Shallow Water¶
Python Numpy¶
from __future__ import print_function
"""
Shallow Water
-------------
So what does this code example illustrate?
Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
"""
import numpy as np
from benchpress.benchmarks import util
bench = util.Benchmark("Shallow Water", "height*width*iterations")
g = 9.80665 # gravitational acceleration
def droplet(height, width, data_type=np.float32):
"""Generate grid of droplets"""
x = np.array(np.linspace(-1, 1, num=width, endpoint=True), dtype=data_type)
y = np.array(np.linspace(-1, 1, num=width, endpoint=True), dtype=data_type)
(xx, yy) = np.meshgrid(x, y)
droplet = height * np.exp(-5 * (xx ** 2 + yy ** 2))
return droplet
def model(height, width, dtype=np.float32):
assert height >= 16
assert width >= 16
m = np.ones((height, width), dtype=dtype)
D = droplet(8, 8) # simulate a water drop
droploc = height // 4
(dropx, dropy) = D.shape
m[droploc:droploc + dropx, droploc:droploc + dropy] += D
droploc = height // 2
(dropx, dropy) = D.shape
m[droploc:droploc + dropx, droploc:droploc + dropy] += D
return {"H": m, "U": np.zeros_like(m), "V": np.zeros_like(m)}
def simulate(state, timesteps):
# FLOP count: i*(12*s + 4*s**2 + 14*s**2 + 9*s**2 + 4*s**2 + 9*s**2 + 14*s**2 + 6*s**2 + 19*s**2 + 19*s**2)
# where s is size and i is iterations
def loop_body(H, U, V, dt=0.02, dx=1.0, dy=1.0):
# Reflecting boundary conditions
H[:, 0] = H[:, 1];
U[:, 0] = U[:, 1];
V[:, 0] = -V[:, 1]
H[:, -1] = H[:, -2];
U[:, -1] = U[:, -2];
V[:, -1] = -V[:, -2]
H[0, :] = H[1, :];
U[0, :] = -U[1, :];
V[0, :] = V[1, :]
H[-1, :] = H[-2, :];
U[-1, :] = -U[-2, :];
V[-1, :] = V[-2, :]
# First half step
# height
Hx = (H[1:, 1:-1] + H[:-1, 1:-1]) / 2 - dt / (2 * dx) * (U[1:, 1:-1] - U[:-1, 1:-1])
# x momentum
Ux = (U[1:, 1:-1] + U[:-1, 1:-1]) / 2 - \
dt / (2 * dx) * ((U[1:, 1:-1] ** 2 / H[1:, 1:-1] + g / 2 * H[1:, 1:-1] ** 2) -
(U[:-1, 1:-1] ** 2 / H[:-1, 1:-1] + g / 2 * H[:-1, 1:-1] ** 2))
# y momentum
Vx = (V[1:, 1:-1] + V[:-1, 1:-1]) / 2 - \
dt / (2 * dx) * ((U[1:, 1:-1] * V[1:, 1:-1] / H[1:, 1:-1]) -
(U[:-1, 1:-1] * V[:-1, 1:-1] / H[:-1, 1:-1]))
# height
Hy = (H[1:-1, 1:] + H[1:-1, :-1]) / 2 - dt / (2 * dy) * (V[1:-1, 1:] - V[1:-1, :-1])
# x momentum
Uy = (U[1:-1, 1:] + U[1:-1, :-1]) / 2 - \
dt / (2 * dy) * ((V[1:-1, 1:] * U[1:-1, 1:] / H[1:-1, 1:]) -
(V[1:-1, :-1] * U[1:-1, :-1] / H[1:-1, :-1]))
# y momentum
Vy = (V[1:-1, 1:] + V[1:-1, :-1]) / 2 - \
dt / (2 * dy) * ((V[1:-1, 1:] ** 2 / H[1:-1, 1:] + g / 2 * H[1:-1, 1:] ** 2) -
(V[1:-1, :-1] ** 2 / H[1:-1, :-1] + g / 2 * H[1:-1, :-1] ** 2))
# Second half step
# height
H[1:-1, 1:-1] -= (dt / dx) * (Ux[1:, :] - Ux[:-1, :]) + (dt / dy) * (Vy[:, 1:] - Vy[:, :-1])
# x momentum
U[1:-1, 1:-1] -= (dt / dx) * ((Ux[1:, :] ** 2 / Hx[1:, :] + g / 2 * Hx[1:, :] ** 2) -
(Ux[:-1, :] ** 2 / Hx[:-1, :] + g / 2 * Hx[:-1, :] ** 2)) + \
(dt / dy) * ((Vy[:, 1:] * Uy[:, 1:] / Hy[:, 1:]) -
(Vy[:, :-1] * Uy[:, :-1] / Hy[:, :-1]))
# y momentum
V[1:-1, 1:-1] -= (dt / dx) * ((Ux[1:, :] * Vx[1:, :] / Hx[1:, :]) -
(Ux[:-1, :] * Vx[:-1, :] / Hx[:-1, :])) + \
(dt / dy) * ((Vy[:, 1:] ** 2 / Hy[:, 1:] + g / 2 * Hy[:, 1:] ** 2) -
(Vy[:, :-1] ** 2 / Hy[:, :-1] + g / 2 * Hy[:, :-1] ** 2))
bench.plot_surface(H, "3d", 0, 0, 5.5)
bench.do_while(loop_body, timesteps, state['H'], state['U'], state['V'])
def main():
H = bench.args.size[0]
W = bench.args.size[1]
I = bench.args.size[2]
state = bench.load_data()
if state is None:
state = model(H, W, dtype=bench.dtype)
bench.start()
simulate(state, I)
bench.stop()
bench.save_data(state)
bench.pprint()
if __name__ == "__main__":
main()
C99 Seq¶
// Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
// Saved images may be converted into an animated gif with:
// convert -delay 20 -loop 0 swater*.png swater.gif
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <bp_util.h>
#define iH(y,x) (H[(x)+(y)*(n+2)])
#define iU(y,x) (U[(x)+(y)*(n+2)])
#define iV(y,x) (V[(x)+(y)*(n+2)])
#define iHx(y,x) (Hx[(x)+(y)*(n+1)])
#define iUx(y,x) (Ux[(x)+(y)*(n+1)])
#define iVx(y,x) (Vx[(x)+(y)*(n+1)])
#define iHy(y,x) (Hy[(x)+(y)*(n+1)])
#define iUy(y,x) (Uy[(x)+(y)*(n+1)])
#define iVy(y,x) (Vy[(x)+(y)*(n+1)])
int main (int argc, char **argv)
{
bp_util_type bp = bp_util_create(argc, argv, 3);
if (bp.args.has_error) {
return 1;
}
const int n = bp.args.sizes[0];
const int other_n = bp.args.sizes[1];
const int T = bp.args.sizes[2];
if (n != other_n) {
fprintf(stderr, "Implementation only supports quadratic grid.\n");
exit(0);
}
const double g = 9.8; // gravitational constant
const double dt = 0.02; // hardwired timestep
const double dx = 1.0;
const double dy = 1.0;
const int droploc = n/4;
double *H = (double*) malloc((n+2)*(n+2)*sizeof(double));
double *U = (double*) malloc((n+2)*(n+2)*sizeof(double));
double *V = (double*) malloc((n+2)*(n+2)*sizeof(double));
double *Hx = (double*) malloc((n+1)*(n+1)*sizeof(double));
double *Ux = (double*) malloc((n+1)*(n+1)*sizeof(double));
double *Vx = (double*) malloc((n+1)*(n+1)*sizeof(double));
double *Hy = (double*) malloc((n+1)*(n+1)*sizeof(double));
double *Uy = (double*) malloc((n+1)*(n+1)*sizeof(double));
double *Vy = (double*) malloc((n+1)*(n+1)*sizeof(double));
for(int i=0; i<n+2; i++)
for(int j=0; j<n+2; j++)
{
iH(i,j)=0.1;
iU(i,j)=0.1;
iV(i,j)=0.1;
}
for(int i=0; i<n+1; i++)
for(int j=0; j<n+1; j++)
{
iHx(i,j)=0.1;
iUx(i,j)=0.1;
iVx(i,j)=0.1;
iHy(i,j)=0.1;
iUy(i,j)=0.1;
iVy(i,j)=0.1;
}
iH(droploc,droploc) += 5.0;
bp.timer_start();
for(int iter=0; iter < T; iter++)
{
// Reflecting boundary conditions
for(int i=0; i<n+2; i++)
{
iH(i,0) = iH(i,1) ; iU(i,0) = iU(i,1) ; iV(i,0) = -iV(i,1);
iH(i,n+1) = iH(i,n) ; iU(i,n+1) = iU(i,n) ; iV(i,n+1) = -iV(i,n);
iH(0,i) = iH(1,i) ; iU(0,i) = -iU(1,i) ; iV(0,i) = iV(1,i);
iH(n+1,i) = iH(n,i) ; iU(n+1,i) = -iU(n,i) ; iV(n+1,i) = iV(n,i);
}
//
// First half step
//
for(int i=0; i<n+1; i++)
{
for(int j=0; j<n; j++)
{
// height
iHx(i,j) = (iH(i+1,j+1)+iH(i,j+1))/2 - dt/(2*dx)*(iU(i+1,j+1)-iU(i,j+1));
// x momentum
iUx(i,j) = (iU(i+1,j+1)+iU(i,j+1))/2 - \
dt/(2*dx)*((pow(iU(i+1,j+1),2)/iH(i+1,j+1) + \
g/2*pow(iH(i+1,j+1),2)) - \
(pow(iU(i,j+1),2)/iH(i,j+1) + \
g/2*pow(iH(i,j+1),2)));
// y momentum
iVx(i,j) = (iV(i+1,j+1)+iV(i,j+1))/2 - \
dt/(2*dx)*((iU(i+1,j+1) * \
iV(i+1,j+1)/iH(i+1,j+1)) - \
(iU(i,j+1) * \
iV(i,j+1)/iH(i,j+1)));
}
}
for(int i=0; i<n; i++)
{
for(int j=0; j<n+1; j++)
{
//height
iHy(i,j) = (iH(i+1,j+1)+iH(i+1,j))/2 - dt/(2*dy)*(iV(i+1,j+1)-iV(i+1,j));
//x momentum
iUy(i,j) = (iU(i+1,j+1)+iU(i+1,j))/2 - \
dt/(2*dy)*((iV(i+1,j+1) * \
iU(i+1,j+1)/iH(i+1,j+1)) - \
(iV(i+1,j) * \
iU(i+1,j)/iH(i+1,j)));
//y momentum
iVy(i,j) = (iV(i+1,j+1)+iV(i+1,j))/2 - \
dt/(2*dy)*((pow(iV(i+1,j+1),2)/iH(i+1,j+1) + \
g/2*pow(iH(i+1,j+1),2)) - \
(pow(iV(i+1,j),2)/iH(i+1,j) + \
g/2*pow(iH(i+1,j),2)));
}
}
//
// Second half step
//
for(int i=1; i<n+1; i++)
{
for(int j=1; j<n+1; j++)
{
//height
iH(i,j) -= (dt/dx)*(iUx(i,j-1)-iUx(i-1,j-1)) - (dt/dy)*(iVy(i-1,j)-iVy(i-1,j-1));
// x momentum
iU(i,j) -= (dt/dx)*((pow(iUx(i,j-1),2)/iHx(i,j-1) + g/2*pow(iHx(i,j-1),2)) - \
(pow(iUx(i-1,j-1),2)/iHx(i-1,j-1) + g/2*pow(iHx(i-1,j-1),2))) - \
(dt/dy)*((iVy(i-1,j) * iUy(i-1,j)/iHy(i-1,j)) -
(iVy(i-1,j-1) * iUy(i-1,j-1)/iHy(i-1,j-1)));
// y momentum - score
iV(i,j) -= (dt/dx)*((iUx(i,j-1) * iVx(i,j-1)/iHx(i,j-1)) - \
(iUx(i-1,j-1)*iVx(i-1,j-1)/iHx(i-1,j-1))) - \
(dt/dy)*((pow(iVy(i-1,j),2)/iHy(i-1,j) + g/2*pow(iHy(i-1,j),2)) - \
(pow(iVy(i-1,j-1),2)/iHy(i-1,j-1) + g/2*pow(iHy(i-1,j-1),2)));
}
}
}
/* NumPy implementation does not do this!
// res = numpy.add.reduce(numpy.add.reduce(H / n))
double res = 0.0;
for(int i=0;i<n+2;i++)
for(int j=0;j<n+2;j++)
res+=iH(i,j)/n;
*/
bp.timer_stop();
bp.print("shallow_water(c99_seq)");
}
Cpp11 Boost¶
// Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
// Saved images may be converted into an animated gif with:
// convert -delay 20 -loop 0 swater*.png swater.gif
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <bp_util.h>
#include <boost/multi_array.hpp>
int main (int argc, char **argv)
{
bp_util_type bp = bp_util_create(argc, argv, 3);
if (bp.args.has_error) {
return 1;
}
const int n = bp.args.sizes[0];
const int other_n = bp.args.sizes[1];
const int T = bp.args.sizes[2];
if (n != other_n) {
fprintf(stderr, "Implementation only supports quadratic grid.\n");
exit(0);
}
const double g = 9.8; // gravitational constant
const double dt = 0.02; // hardwired timestep
const double dx = 1.0;
const double dy = 1.0;
const int droploc = n/4;
typedef boost::multi_array<double, 2> array_type;
array_type H(boost::extents[n+2][n+2]);
array_type U(boost::extents[n+2][n+2]);
array_type V(boost::extents[n+2][n+2]);
array_type Hx(boost::extents[n+1][n+1]);
array_type Ux(boost::extents[n+1][n+1]);
array_type Vx(boost::extents[n+1][n+1]);
array_type Hy(boost::extents[n+1][n+1]);
array_type Uy(boost::extents[n+1][n+1]);
array_type Vy(boost::extents[n+1][n+1]);
for(int i=0; i<n+2; i++)
for(int j=0; j<n+2; j++)
{
H[i][j]=0.1;
U[i][j]=0.1;
V[i][j]=0.1;
}
for(int i=0; i<n+1; i++)
for(int j=0; j<n+1; j++)
{
Hx[i][j]=0.1;
Ux[i][j]=0.1;
Vx[i][j]=0.1;
Hy[i][j]=0.1;
Uy[i][j]=0.1;
Vy[i][j]=0.1;
}
H[droploc][droploc] += 5.0;
bp.timer_start();
for(int iter=0; iter < T; iter++)
{
// Reflecting boundary conditions
for(int i=0; i<n+2; i++)
{
H[i][0] = H[i][1] ; U[i][0] = U[i][1] ; V[i][0] = -V[i][1];
H[i][n+1] = H[i][n] ; U[i][n+1] = U[i][n] ; V[i][n+1] = -V[i][n];
H[0][i] = H[1][i] ; U[0][i] = -U[1][i] ; V[0][i] = V[1][i];
H[n+1][i] = H[n][i] ; U[n+1][i] = -U[n][i] ; V[n+1][i] = V[n][i];
}
//
// First half step
//
for(int i=0; i<n+1; i++)
{
for(int j=0; j<n; j++)
{
// height
Hx[i][j] = (H[i+1][j+1]+H[i][j+1])/2 - dt/(2*dx)*(U[i+1][j+1]-U[i][j+1]);
// x momentum
Ux[i][j] = (U[i+1][j+1]+U[i][j+1])/2 - \
dt/(2*dx)*((pow(U[i+1][j+1],2)/H[i+1][j+1] + \
g/2*pow(H[i+1][j+1],2)) - \
(pow(U[i][j+1],2)/H[i][j+1] + \
g/2*pow(H[i][j+1],2)));
// y momentum
Vx[i][j] = (V[i+1][j+1]+V[i][j+1])/2 - \
dt/(2*dx)*((U[i+1][j+1] * \
V[i+1][j+1]/H[i+1][j+1]) - \
(U[i][j+1] * \
V[i][j+1]/H[i][j+1]));
}
}
for(int i=0; i<n; i++)
{
for(int j=0; j<n+1; j++)
{
//height
Hy[i][j] = (H[i+1][j+1]+H[i+1][j])/2 - dt/(2*dy)*(V[i+1][j+1]-V[i+1][j]);
//x momentum
Uy[i][j] = (U[i+1][j+1]+U[i+1][j])/2 - \
dt/(2*dy)*((V[i+1][j+1] * \
U[i+1][j+1]/H[i+1][j+1]) - \
(V[i+1][j] * \
U[i+1][j]/H[i+1][j]));
//y momentum
Vy[i][j] = (V[i+1][j+1]+V[i+1][j])/2 - \
dt/(2*dy)*((pow(V[i+1][j+1],2)/H[i+1][j+1] + \
g/2*pow(H[i+1][j+1],2)) - \
(pow(V[i+1][j],2)/H[i+1][j] + \
g/2*pow(H[i+1][j],2)));
}
}
//
// Second half step
//
for(int i=1; i<n+1; i++)
{
for(int j=1; j<n+1; j++)
{
//height
H[i][j] -= (dt/dx)*(Ux[i][j-1]-Ux[i-1][j-1]) - (dt/dy)*(Vy[i-1][j]-Vy[i-1][j-1]);
// x momentum
U[i][j] -= (dt/dx)*((pow(Ux[i][j-1],2)/Hx[i][j-1] + g/2*pow(Hx[i][j-1],2)) - \
(pow(Ux[i-1][j-1],2)/Hx[i-1][j-1] + g/2*pow(Hx[i-1][j-1],2))) - \
(dt/dy)*((Vy[i-1][j] * Uy[i-1][j]/Hy[i-1][j]) -
(Vy[i-1][j-1] * Uy[i-1][j-1]/Hy[i-1][j-1]));
// y momentum - score
V[i][j] -= (dt/dx)*((Ux[i][j-1] * Vx[i][j-1]/Hx[i][j-1]) - \
(Ux[i-1][j-1]*Vx[i-1][j-1]/Hx[i-1][j-1])) - \
(dt/dy)*((pow(Vy[i-1][j],2)/Hy[i-1][j] + g/2*pow(Hy[i-1][j],2)) - \
(pow(Vy[i-1][j-1],2)/Hy[i-1][j-1] + g/2*pow(Hy[i-1][j-1],2)));
}
}
}
// res = numpy.add.reduce(numpy.add.reduce(H / n))
double res = 0.0;
for(int i=0;i<n+2;i++)
for(int j=0;j<n+2;j++)
res+=H[i][j]/n;
bp.timer_stop();
bp.print("shallow_water(cpp11_boost)");
}
Cpp11 Omp¶
// Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
// Saved images may be converted into an animated gif with:
// convert -delay 20 -loop 0 swater*.png swater.gif
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <bp_util.h>
#define WIDTH 5000
#define HEIGHT 5000
int main (int argc, char **argv)
{
bp_util_type bp = bp_util_create(argc, argv, 3);
if (bp.args.has_error) {
return 1;
}
const int height = bp.args.sizes[0];
const int width = bp.args.sizes[1];
const int iterations = bp.args.sizes[2];
if (width != WIDTH) {
fprintf(stderr, "Unsupported size, size must be %dx%d\n", WIDTH, HEIGHT);
exit(0);
}
if (height != WIDTH) {
fprintf(stderr, "Unsupported size, size must be %dx%d\n", WIDTH, HEIGHT);
exit(0);
}
if (width != height) {
fprintf(stderr, "Implementation only supports quadratic grid.\n");
exit(0);
}
const double g = 9.8; // gravitational constant
const double dt = 0.02; // hardwired timestep
const double dx = 1.0;
const double dy = 1.0;
const int droploc = WIDTH/4;
auto H = new double[WIDTH+2][WIDTH+2];
auto U = new double[WIDTH+2][WIDTH+2];
auto V = new double[WIDTH+2][WIDTH+2];
auto Hx = new double[WIDTH+1][WIDTH+1];
auto Ux = new double[WIDTH+1][WIDTH+1];
auto Vx = new double[WIDTH+1][WIDTH+1];
auto Hy = new double[WIDTH+1][WIDTH+1];
auto Uy = new double[WIDTH+1][WIDTH+1];
auto Vy = new double[WIDTH+1][WIDTH+1];
#pragma omp parallel for collapse(2)
for(int i=0; i<HEIGHT+2; i++) {
for(int j=0; j<WIDTH+2; j++) {
H[i][j]=0.1;
U[i][j]=0.1;
V[i][j]=0.1;
}
}
#pragma omp parallel for collapse(2)
for(int i=0; i<HEIGHT+1; i++) {
for(int j=0; j<WIDTH+1; j++) {
Hx[i][j]=0.1;
Ux[i][j]=0.1;
Vx[i][j]=0.1;
Hy[i][j]=0.1;
Uy[i][j]=0.1;
Vy[i][j]=0.1;
}
}
H[droploc][droploc] += 5.0;
bp.timer_start();
for(int iter=0; iter < iterations; iter++) {
#pragma omp parallel for
for(int i=0; i<WIDTH+2; i++) { // Reflecting boundary conditions
H[i][0] = H[i][1];
U[i][0] = U[i][1];
V[i][0] = -V[i][1];
H[i][WIDTH+1] = H[i][WIDTH];
U[i][WIDTH+1] = U[i][WIDTH];
V[i][WIDTH+1] = -V[i][WIDTH];
H[0][i] = H[1][i];
U[0][i] = -U[1][i];
V[0][i] = V[1][i];
H[HEIGHT+1][i] = H[HEIGHT][i];
U[HEIGHT+1][i] = -U[HEIGHT][i];
V[HEIGHT+1][i] = V[HEIGHT][i];
}
//
// First half step
//
#pragma omp parallel for collapse(2)
for(int i=0; i<HEIGHT+1; i++) {
for(int j=0; j<WIDTH; j++) {
// height
Hx[i][j] = (H[i+1][j+1]+H[i][j+1])/2 - dt/(2*dx)*(U[i+1][j+1]-U[i][j+1]);
// x momentum
Ux[i][j] = (U[i+1][j+1]+U[i][j+1])/2 - \
dt/(2*dx)*((pow(U[i+1][j+1],2)/H[i+1][j+1] + \
g/2*pow(H[i+1][j+1],2)) - \
(pow(U[i][j+1],2)/H[i][j+1] + \
g/2*pow(H[i][j+1],2)));
// y momentum
Vx[i][j] = (V[i+1][j+1]+V[i][j+1])/2 - \
dt/(2*dx)*((U[i+1][j+1] * \
V[i+1][j+1]/H[i+1][j+1]) - \
(U[i][j+1] * \
V[i][j+1]/H[i][j+1]));
}
}
#pragma omp parallel for collapse(2)
for(int i=0; i<HEIGHT; i++) {
for(int j=0; j<WIDTH+1; j++) {
//height
Hy[i][j] = (H[i+1][j+1]+H[i+1][j])/2 - dt/(2*dy)*(V[i+1][j+1]-V[i+1][j]);
//x momentum
Uy[i][j] = (U[i+1][j+1]+U[i+1][j])/2 - \
dt/(2*dy)*((V[i+1][j+1] * \
U[i+1][j+1]/H[i+1][j+1]) - \
(V[i+1][j] * \
U[i+1][j]/H[i+1][j]));
//y momentum
Vy[i][j] = (V[i+1][j+1]+V[i+1][j])/2 - \
dt/(2*dy)*((pow(V[i+1][j+1],2)/H[i+1][j+1] + \
g/2*pow(H[i+1][j+1],2)) - \
(pow(V[i+1][j],2)/H[i+1][j] + \
g/2*pow(H[i+1][j],2)));
}
}
//
// Second half step
//
#pragma omp parallel for collapse(2)
for(int i=1; i<HEIGHT+1; i++) {
for(int j=1; j<WIDTH+1; j++) {
//height
H[i][j] -= (dt/dx)*(Ux[i][j-1]-Ux[i-1][j-1]) - (dt/dy)*(Vy[i-1][j]-Vy[i-1][j-1]);
// x momentum
U[i][j] -= (dt/dx)*((pow(Ux[i][j-1],2)/Hx[i][j-1] + g/2*pow(Hx[i][j-1],2)) - \
(pow(Ux[i-1][j-1],2)/Hx[i-1][j-1] + g/2*pow(Hx[i-1][j-1],2))) - \
(dt/dy)*((Vy[i-1][j] * Uy[i-1][j]/Hy[i-1][j]) -
(Vy[i-1][j-1] * Uy[i-1][j-1]/Hy[i-1][j-1]));
// y momentum - score
V[i][j] -= (dt/dx)*((Ux[i][j-1] * Vx[i][j-1]/Hx[i][j-1]) - \
(Ux[i-1][j-1]*Vx[i-1][j-1]/Hx[i-1][j-1])) - \
(dt/dy)*((pow(Vy[i-1][j],2)/Hy[i-1][j] + g/2*pow(Hy[i-1][j],2)) - \
(pow(Vy[i-1][j-1],2)/Hy[i-1][j-1] + g/2*pow(Hy[i-1][j-1],2)));
}
}
}
/* NumPy implementation does not do this!?
// res = numpy.add.reduce(numpy.add.reduce(H / n))
double res = 0.0;
#pragma omp parallel for collapse(2) reduction(+:res)
for(int i=0;i<HEIGHT+2;i++) {
for(int j=0;j<WIDTH+2;j++) {
res+=H[i][j]/WIDTH;
}
}
*/
bp.timer_stop();
bp.print("shallow_water(cpp11_omp)");
}
Cpp11 Seq¶
// Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
// Saved images may be converted into an animated gif with:
// convert -delay 20 -loop 0 swater*.png swater.gif
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <bp_util.h>
#define WIDTH 5000
#define HEIGHT 5000
int main (int argc, char **argv)
{
bp_util_type bp = bp_util_create(argc, argv, 3);
if (bp.args.has_error) {
return 1;
}
const int height = bp.args.sizes[0];
const int width = bp.args.sizes[1];
const int T = bp.args.sizes[2];
if (width != WIDTH) {
fprintf(stderr, "Unsupported size, size must be %dx%d\n", WIDTH, HEIGHT);
exit(0);
}
if (height != WIDTH) {
fprintf(stderr, "Unsupported size, size must be %dx%d\n", WIDTH, HEIGHT);
exit(0);
}
if (width != height) {
fprintf(stderr, "Implementation only supports quadratic grid.\n");
exit(0);
}
const double g = 9.8; // gravitational constant
const double dt = 0.02; // hardwired timestep
const double dx = 1.0;
const double dy = 1.0;
const int droploc = WIDTH/4;
auto H = new double[WIDTH+2][WIDTH+2];
auto U = new double[WIDTH+2][WIDTH+2];
auto V = new double[WIDTH+2][WIDTH+2];
auto Hx = new double[WIDTH+1][WIDTH+1];
auto Ux = new double[WIDTH+1][WIDTH+1];
auto Vx = new double[WIDTH+1][WIDTH+1];
auto Hy = new double[WIDTH+1][WIDTH+1];
auto Uy = new double[WIDTH+1][WIDTH+1];
auto Vy = new double[WIDTH+1][WIDTH+1];
for(int i=0; i<HEIGHT+2; i++) {
for(int j=0; j<WIDTH+2; j++) {
H[i][j]=0.1;
U[i][j]=0.1;
V[i][j]=0.1;
}
}
for(int i=0; i<HEIGHT+1; i++) {
for(int j=0; j<WIDTH+1; j++) {
Hx[i][j]=0.1;
Ux[i][j]=0.1;
Vx[i][j]=0.1;
Hy[i][j]=0.1;
Uy[i][j]=0.1;
Vy[i][j]=0.1;
}
}
H[droploc][droploc] += 5.0;
bp.timer_start();
for(int iter=0; iter < T; iter++) {
for(int i=0; i<WIDTH+2; i++) { // Reflecting boundary conditions
H[i][0] = H[i][1];
U[i][0] = U[i][1];
V[i][0] = -V[i][1];
H[i][WIDTH+1] = H[i][WIDTH];
U[i][WIDTH+1] = U[i][WIDTH];
V[i][WIDTH+1] = -V[i][WIDTH];
H[0][i] = H[1][i];
U[0][i] = -U[1][i];
V[0][i] = V[1][i];
H[HEIGHT+1][i] = H[HEIGHT][i];
U[HEIGHT+1][i] = -U[HEIGHT][i];
V[HEIGHT+1][i] = V[HEIGHT][i];
}
//
// First half step
//
for(int i=0; i<HEIGHT+1; i++) {
for(int j=0; j<WIDTH; j++) {
// height
Hx[i][j] = (H[i+1][j+1]+H[i][j+1])/2 - dt/(2*dx)*(U[i+1][j+1]-U[i][j+1]);
// x momentum
Ux[i][j] = (U[i+1][j+1]+U[i][j+1])/2 - \
dt/(2*dx)*((pow(U[i+1][j+1],2)/H[i+1][j+1] + \
g/2*pow(H[i+1][j+1],2)) - \
(pow(U[i][j+1],2)/H[i][j+1] + \
g/2*pow(H[i][j+1],2)));
// y momentum
Vx[i][j] = (V[i+1][j+1]+V[i][j+1])/2 - \
dt/(2*dx)*((U[i+1][j+1] * \
V[i+1][j+1]/H[i+1][j+1]) - \
(U[i][j+1] * \
V[i][j+1]/H[i][j+1]));
}
}
for(int i=0; i<HEIGHT; i++) {
for(int j=0; j<WIDTH+1; j++) {
//height
Hy[i][j] = (H[i+1][j+1]+H[i+1][j])/2 - dt/(2*dy)*(V[i+1][j+1]-V[i+1][j]);
//x momentum
Uy[i][j] = (U[i+1][j+1]+U[i+1][j])/2 - \
dt/(2*dy)*((V[i+1][j+1] * \
U[i+1][j+1]/H[i+1][j+1]) - \
(V[i+1][j] * \
U[i+1][j]/H[i+1][j]));
//y momentum
Vy[i][j] = (V[i+1][j+1]+V[i+1][j])/2 - \
dt/(2*dy)*((pow(V[i+1][j+1],2)/H[i+1][j+1] + \
g/2*pow(H[i+1][j+1],2)) - \
(pow(V[i+1][j],2)/H[i+1][j] + \
g/2*pow(H[i+1][j],2)));
}
}
//
// Second half step
//
for(int i=1; i<HEIGHT+1; i++) {
for(int j=1; j<WIDTH+1; j++) {
//height
H[i][j] -= (dt/dx)*(Ux[i][j-1]-Ux[i-1][j-1]) - (dt/dy)*(Vy[i-1][j]-Vy[i-1][j-1]);
// x momentum
U[i][j] -= (dt/dx)*((pow(Ux[i][j-1],2)/Hx[i][j-1] + g/2*pow(Hx[i][j-1],2)) - \
(pow(Ux[i-1][j-1],2)/Hx[i-1][j-1] + g/2*pow(Hx[i-1][j-1],2))) - \
(dt/dy)*((Vy[i-1][j] * Uy[i-1][j]/Hy[i-1][j]) -
(Vy[i-1][j-1] * Uy[i-1][j-1]/Hy[i-1][j-1]));
// y momentum - score
V[i][j] -= (dt/dx)*((Ux[i][j-1] * Vx[i][j-1]/Hx[i][j-1]) - \
(Ux[i-1][j-1]*Vx[i-1][j-1]/Hx[i-1][j-1])) - \
(dt/dy)*((pow(Vy[i-1][j],2)/Hy[i-1][j] + g/2*pow(Hy[i-1][j],2)) - \
(pow(Vy[i-1][j-1],2)/Hy[i-1][j-1] + g/2*pow(Hy[i-1][j-1],2)));
}
}
}
/* NumPy implementation does not do this!?
// res = numpy.add.reduce(numpy.add.reduce(H / n))
double res = 0.0;
for(int i=0;i<HEIGHT+2;i++) {
for(int j=0;j<WIDTH+2;j++) {
res+=H[i][j]/WIDTH;
}
}
*/
bp.timer_stop();
bp.print("shallow_water(cpp11_seq)");
}
Csharp Numcil¶
#region Copyright
/*
This file is part of Bohrium and copyright (c) 2012 the Bohrium:
team <http://www.bh107.org>.
Bohrium is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3
of the License, or (at your option) any later version.
Bohrium is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the
GNU Lesser General Public License along with Bohrium.
If not, see <http://www.gnu.org/licenses/>.
*/
#endregion
//Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using R = NumCIL.Range;
using NumCIL;
namespace shallow_water
{
public static class ShallowWaterSolver
{
/// <summary>
/// NumCIL equivalent to [:]
/// </summary>
public static readonly R ALL = R.All;
/// <summary>
/// NumCIL equivalent to [1:-1]
/// </summary>
public static readonly R INNER = R.Slice(1, -1);
/// <summary>
/// NumCIL equivalent to [0]
/// </summary>
public static readonly R FIRST = R.El(0);
/// <summary>
/// NumCIL equivalent to [1]
/// </summary>
public static readonly R SECOND = R.El(1);
/// <summary>
/// NumCIL equivalent to [-1]
/// </summary>
public static readonly R LAST = R.El(-1);
/// <summary>
/// NumCIL equivalent to [:-1]
/// </summary>
public static readonly R ZM1 = R.Slice(0, -1);
/// <summary>
/// NumCIL equivalent to [1:]
/// </summary>
public static readonly R SKIP1 = R.Slice(1, 0);
public class DataDouble
{
public readonly long N;
public readonly NumCIL.Double.NdArray H;
public readonly NumCIL.Double.NdArray U;
public readonly NumCIL.Double.NdArray V;
public readonly NumCIL.Double.NdArray Hx;
public readonly NumCIL.Double.NdArray Ux;
public readonly NumCIL.Double.NdArray Vx;
public readonly NumCIL.Double.NdArray Hy;
public readonly NumCIL.Double.NdArray Uy;
public readonly NumCIL.Double.NdArray Vy;
public DataDouble(long n)
{
N = n;
H = NumCIL.Double.Generate.Ones(n + 2, n + 2);
U = NumCIL.Double.Generate.Ones(n + 2, n + 2);
V = NumCIL.Double.Generate.Ones(n + 2, n + 2);
Hx = NumCIL.Double.Generate.Ones(n + 1, n + 1);
Ux = NumCIL.Double.Generate.Ones(n + 1, n + 1);
Vx = NumCIL.Double.Generate.Ones(n + 1, n + 1);
Hy = NumCIL.Double.Generate.Ones(n + 1, n + 1);
Uy = NumCIL.Double.Generate.Ones(n + 1, n + 1);
Vy = NumCIL.Double.Generate.Ones(n + 1, n + 1);
}
}
public static System.Double SolveDouble(long timesteps, DataDouble data)
{
//Convenience indices
R RN = R.El(data.N); //Same as [n]
R RN1 = R.El(data.N + 1); //Same as [n+1]
System.Double g = 9.8f;// gravitational constant
System.Double dt = 0.02f; // hardwired timestep
System.Double dx = 1.0f;
System.Double dy = 1.0f;
long droploc = data.N / 4;
var H = data.H;
var U = data.U;
var V = data.V;
var Hx = data.Hx;
var Ux = data.Ux;
var Vx = data.Vx;
var Hy = data.Hy;
var Uy = data.Uy;
var Vy = data.Vy;
//Splash!!!
H[droploc, droploc] += 5.0f;
for (int i = 0; i < timesteps; i++)
{
H.Flush();
// Reflecting boundary conditions
H[ALL, FIRST] = H[ALL, SECOND];
U[ALL, FIRST] = U[ALL, SECOND];
V[ALL, FIRST] = -V[ALL, SECOND];
H[ALL, RN1] = H[ALL, RN];
U[ALL, RN1] = U[ALL, RN];
V[ALL, RN1] = -V[ALL, RN];
H[FIRST, ALL] = H[SECOND, ALL];
U[FIRST, ALL] = -U[SECOND, ALL];
V[FIRST, ALL] = V[SECOND, ALL];
H[RN1, ALL] = H[RN, ALL];
U[RN1, ALL] = -U[RN, ALL];
V[RN1, ALL] = V[RN, ALL];
//First half-step
//Height
Hx[ALL, R.Slice(0, -1)] = (H[SKIP1,INNER]+H[ZM1,INNER])/2 -
dt/(2*dx)*(U[SKIP1,INNER]-U[ZM1,INNER]);
//x momentum
Ux[ALL, R.Slice(0, -1)] = (U[SKIP1,INNER]+U[ZM1,INNER])/2 -
dt/(2*dx)*((U[SKIP1,INNER].Pow(2)/H[SKIP1,INNER] +
g/2*H[SKIP1,INNER].Pow(2)) -
(U[ZM1,INNER].Pow(2)/H[ZM1,INNER] +
g/2*H[ZM1,INNER].Pow(2)));
// y momentum
Vx[ALL, ZM1] = (V[SKIP1,INNER]+V[ZM1,INNER])/2 -
dt/(2*dx)*((U[SKIP1,INNER] *
V[SKIP1,INNER]/H[SKIP1, INNER]) -
(U[ZM1,INNER] *
V[ZM1,INNER]/H[ZM1,INNER]));
// height
Hy[ZM1,ALL] = (H[INNER,SKIP1]+H[INNER,ZM1])/2 -
dt/(2*dy)*(V[INNER,SKIP1]-V[INNER,ZM1]);
// x momentum
Uy[ZM1,ALL] = (U[INNER,SKIP1]+U[INNER,ZM1])/2 -
dt/(2*dy)*((V[INNER,SKIP1] *
U[INNER,SKIP1]/H[INNER,SKIP1]) -
(V[INNER,ZM1] *
U[INNER,ZM1]/H[INNER,ZM1]));
// y momentum
Vy[ZM1,ALL] = (V[INNER,SKIP1]+V[INNER,ZM1])/2 -
dt/(2*dy)*((V[INNER,SKIP1].Pow(2)/H[INNER,SKIP1] +
g/2*H[INNER,SKIP1].Pow(2)) -
(V[INNER,ZM1].Pow(2)/H[INNER,ZM1] +
g/2*H[INNER,ZM1].Pow(2)));
// Second half step
// height
H[INNER, INNER] = H[INNER, INNER] -
(dt/dx)*(Ux[SKIP1,ZM1]-Ux[ZM1,ZM1]) -
(dt/dy)*(Vy[ZM1,SKIP1]-Vy[ZM1, ZM1]);
// x momentum
U[INNER, INNER] = U[INNER, INNER] -
(dt/dx)*((Ux[SKIP1,ZM1].Pow(2)/Hx[SKIP1,ZM1] +
g/2*Hx[SKIP1,ZM1].Pow(2)) -
(Ux[ZM1,ZM1].Pow(2)/Hx[ZM1,ZM1] +
g / 2 * Hx[ZM1, ZM1].Pow(2))) -
(dt/dy)*((Vy[ZM1,SKIP1] *
Uy[ZM1,SKIP1]/Hy[ZM1,SKIP1]) -
(Vy[ZM1,ZM1] *
Uy[ZM1,ZM1]/Hy[ZM1,ZM1]));
// y momentum
V[R.Slice(1, -1),R.Slice(1, -1)] = V[R.Slice(1, -1),R.Slice(1, -1)] -
(dt/dx)*((Ux[SKIP1,ZM1] *
Vx[SKIP1,ZM1]/Hx[SKIP1,ZM1]) -
(Ux[ZM1,ZM1]*Vx[ZM1,ZM1]/Hx[ZM1,ZM1])) -
(dt / dy) * ((Vy[ZM1, SKIP1].Pow(2) / Hy[ZM1, SKIP1] +
g / 2 * Hy[ZM1, SKIP1].Pow(2)) -
(Vy[ZM1, ZM1].Pow(2) / Hy[ZM1, ZM1] +
g / 2 * Hy[ZM1, ZM1].Pow(2)));
}
//Make sure we have the actual data and use it as a checksum
return NumCIL.Double.Add.Reduce(NumCIL.Double.Add.Reduce(H / data.N)).Value[0];
}
public class DataFloat
{
public readonly long N;
public readonly NumCIL.Float.NdArray H;
public readonly NumCIL.Float.NdArray U;
public readonly NumCIL.Float.NdArray V;
public readonly NumCIL.Float.NdArray Hx;
public readonly NumCIL.Float.NdArray Ux;
public readonly NumCIL.Float.NdArray Vx;
public readonly NumCIL.Float.NdArray Hy;
public readonly NumCIL.Float.NdArray Uy;
public readonly NumCIL.Float.NdArray Vy;
public DataFloat(long n)
{
N = n;
H = NumCIL.Float.Generate.Ones(n + 2, n + 2);
U = NumCIL.Float.Generate.Ones(n + 2, n + 2);
V = NumCIL.Float.Generate.Ones(n + 2, n + 2);
Hx = NumCIL.Float.Generate.Ones(n + 1, n + 1);
Ux = NumCIL.Float.Generate.Ones(n + 1, n + 1);
Vx = NumCIL.Float.Generate.Ones(n + 1, n + 1);
Hy = NumCIL.Float.Generate.Ones(n + 1, n + 1);
Uy = NumCIL.Float.Generate.Ones(n + 1, n + 1);
Vy = NumCIL.Float.Generate.Ones(n + 1, n + 1);
}
}
public static System.Single SolveFloat(long timesteps, DataFloat data)
{
//Convenience indices
R RN = R.El(data.N); //Same as [n]
R RN1 = R.El(data.N + 1); //Same as [n+1]
System.Single g = 9.8f;// gravitational constant
System.Single dt = 0.02f; // hardwired timestep
System.Single dx = 1.0f;
System.Single dy = 1.0f;
long droploc = data.N / 4;
var H = data.H;
var U = data.U;
var V = data.V;
var Hx = data.Hx;
var Ux = data.Ux;
var Vx = data.Vx;
var Hy = data.Hy;
var Uy = data.Uy;
var Vy = data.Vy;
//Splash!!!
H[droploc, droploc] += 5.0f;
for (int i = 0; i < timesteps; i++)
{
H.Flush();
// Reflecting boundary conditions
H[ALL, FIRST] = H[ALL, SECOND];
U[ALL, FIRST] = U[ALL, SECOND];
V[ALL, FIRST] = -V[ALL, SECOND];
H[ALL, RN1] = H[ALL, RN];
U[ALL, RN1] = U[ALL, RN];
V[ALL, RN1] = -V[ALL, RN];
H[FIRST, ALL] = H[SECOND, ALL];
U[FIRST, ALL] = -U[SECOND, ALL];
V[FIRST, ALL] = V[SECOND, ALL];
H[RN1, ALL] = H[RN, ALL];
U[RN1, ALL] = -U[RN, ALL];
V[RN1, ALL] = V[RN, ALL];
//First half-step
//Height
Hx[ALL, R.Slice(0, -1)] = (H[SKIP1,INNER]+H[ZM1,INNER])/2 -
dt/(2*dx)*(U[SKIP1,INNER]-U[ZM1,INNER]);
//x momentum
Ux[ALL, R.Slice(0, -1)] = (U[SKIP1,INNER]+U[ZM1,INNER])/2 -
dt/(2*dx)*((U[SKIP1,INNER].Pow(2)/H[SKIP1,INNER] +
g/2*H[SKIP1,INNER].Pow(2)) -
(U[ZM1,INNER].Pow(2)/H[ZM1,INNER] +
g/2*H[ZM1,INNER].Pow(2)));
// y momentum
Vx[ALL, ZM1] = (V[SKIP1,INNER]+V[ZM1,INNER])/2 -
dt/(2*dx)*((U[SKIP1,INNER] *
V[SKIP1,INNER]/H[SKIP1, INNER]) -
(U[ZM1,INNER] *
V[ZM1,INNER]/H[ZM1,INNER]));
// height
Hy[ZM1,ALL] = (H[INNER,SKIP1]+H[INNER,ZM1])/2 -
dt/(2*dy)*(V[INNER,SKIP1]-V[INNER,ZM1]);
// x momentum
Uy[ZM1,ALL] = (U[INNER,SKIP1]+U[INNER,ZM1])/2 -
dt/(2*dy)*((V[INNER,SKIP1] *
U[INNER,SKIP1]/H[INNER,SKIP1]) -
(V[INNER,ZM1] *
U[INNER,ZM1]/H[INNER,ZM1]));
// y momentum
Vy[ZM1,ALL] = (V[INNER,SKIP1]+V[INNER,ZM1])/2 -
dt/(2*dy)*((V[INNER,SKIP1].Pow(2)/H[INNER,SKIP1] +
g/2*H[INNER,SKIP1].Pow(2)) -
(V[INNER,ZM1].Pow(2)/H[INNER,ZM1] +
g/2*H[INNER,ZM1].Pow(2)));
// Second half step
// height
H[INNER, INNER] = H[INNER, INNER] -
(dt/dx)*(Ux[SKIP1,ZM1]-Ux[ZM1,ZM1]) -
(dt/dy)*(Vy[ZM1,SKIP1]-Vy[ZM1, ZM1]);
// x momentum
U[INNER, INNER] = U[INNER, INNER] -
(dt/dx)*((Ux[SKIP1,ZM1].Pow(2)/Hx[SKIP1,ZM1] +
g/2*Hx[SKIP1,ZM1].Pow(2)) -
(Ux[ZM1,ZM1].Pow(2)/Hx[ZM1,ZM1] +
g / 2 * Hx[ZM1, ZM1].Pow(2))) -
(dt/dy)*((Vy[ZM1,SKIP1] *
Uy[ZM1,SKIP1]/Hy[ZM1,SKIP1]) -
(Vy[ZM1,ZM1] *
Uy[ZM1,ZM1]/Hy[ZM1,ZM1]));
// y momentum
V[R.Slice(1, -1),R.Slice(1, -1)] = V[R.Slice(1, -1),R.Slice(1, -1)] -
(dt/dx)*((Ux[SKIP1,ZM1] *
Vx[SKIP1,ZM1]/Hx[SKIP1,ZM1]) -
(Ux[ZM1,ZM1]*Vx[ZM1,ZM1]/Hx[ZM1,ZM1])) -
(dt / dy) * ((Vy[ZM1, SKIP1].Pow(2) / Hy[ZM1, SKIP1] +
g / 2 * Hy[ZM1, SKIP1].Pow(2)) -
(Vy[ZM1, ZM1].Pow(2) / Hy[ZM1, ZM1] +
g / 2 * Hy[ZM1, ZM1].Pow(2)));
}
//Make sure we have the actual data and use it as a checksum
return NumCIL.Float.Add.Reduce(NumCIL.Float.Add.Reduce(H / data.N)).Value[0];
}
}
}