Shallow Water

Python Numpy

from __future__ import print_function
"""
Shallow Water
-------------

So what does this code example illustrate?

Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
"""
from benchpress import util
import numpy as np

g = 9.80665 # gravitational acceleration

def droplet(height, width, data_type=np.float32):
    """Generate grid of droplets"""

    x = np.array(np.linspace(-1, 1, num=width, endpoint=True), dtype=data_type)
    y = np.array(np.linspace(-1, 1, num=width, endpoint=True), dtype=data_type)

    (xx, yy) = np.meshgrid(x, y)

    droplet = height * np.exp(-5 * (xx ** 2 + yy ** 2))

    return droplet

def model(height, width, dtype=np.float32):
    assert height >= 16
    assert width >= 16

    m = np.ones((height, width), dtype=dtype)
    D = droplet(8, 8)  # simulate a water drop
    droploc = height / 4
    (dropx, dropy) = D.shape
    m[droploc:droploc + dropx, droploc:droploc + dropy] += D
    droploc = height / 2
    (dropx, dropy) = D.shape
    m[droploc:droploc + dropx, droploc:droploc + dropy] += D

    return m

#FLOP count: i*(12*s + 4*s**2 + 14*s**2 + 9*s**2 + 4*s**2 + 9*s**2 + 14*s**2 + 6*s**2 + 19*s**2 + 19*s**2)
#where s is size and i is iterations
def step(H, U, V, dt=0.02, dx=1.0, dy=1.0):
    # Reflecting boundary conditions
    H[:,0] = H[:,1]   ; U[:,0] = U[:,1]     ; V[:,0] = -V[:,1]
    H[:,-1] = H[:,-2] ; U[:,-1] = U[:,-2]   ; V[:,-1] = -V[:,-2]
    H[0,:] = H[1,:]   ; U[0,:] = -U[1,:]    ; V[0,:] = V[1,:]
    H[-1,:] = H[-2,:] ; U[-1,:] = -U[-2,:]  ; V[-1,:] = V[-2,:]

    #First half step

    # height
    Hx = (H[1:,1:-1]+H[:-1,1:-1])/2 - dt/(2*dx)*(U[1:,1:-1]-U[:-1,1:-1])

    # x momentum
    Ux = (U[1:,1:-1]+U[:-1,1:-1])/2 - \
         dt/(2*dx) * ((U[1:,1:-1]**2/H[1:,1:-1] + g/2*H[1:,1:-1]**2) -
                      (U[:-1,1:-1]**2/H[:-1,1:-1] + g/2*H[:-1,1:-1]**2))

    # y momentum
    Vx = (V[1:,1:-1]+V[:-1,1:-1])/2 - \
         dt/(2*dx) * ((U[1:,1:-1]*V[1:,1:-1]/H[1:,1:-1]) -
                      (U[:-1,1:-1]*V[:-1,1:-1]/H[:-1,1:-1]))

    # height
    Hy = (H[1:-1,1:]+H[1:-1,:-1])/2 - dt/(2*dy)*(V[1:-1,1:]-V[1:-1,:-1])

    #x momentum
    Uy = (U[1:-1,1:]+U[1:-1,:-1])/2 - \
         dt/(2*dy)*((V[1:-1,1:]*U[1:-1,1:]/H[1:-1,1:]) -
                    (V[1:-1,:-1]*U[1:-1,:-1]/H[1:-1,:-1]))
    #y momentum
    Vy = (V[1:-1,1:]+V[1:-1,:-1])/2 - \
         dt/(2*dy)*((V[1:-1,1:]**2/H[1:-1,1:] + g/2*H[1:-1,1:]**2) -
                    (V[1:-1,:-1]**2/H[1:-1,:-1] + g/2*H[1:-1,:-1]**2))

    #Second half step

    # height
    H[1:-1,1:-1] -= (dt/dx)*(Ux[1:,:]-Ux[:-1,:]) + (dt/dy)*(Vy[:,1:]-Vy[:,:-1])

    # x momentum
    U[1:-1,1:-1] -= (dt/dx)*((Ux[1:,:]**2/Hx[1:,:] + g/2*Hx[1:,:]**2) -
                             (Ux[:-1,:]**2/Hx[:-1,:] + g/2*Hx[:-1,:]**2)) + \
                    (dt/dy)*((Vy[:,1:] * Uy[:,1:]/Hy[:,1:]) -
                             (Vy[:,:-1] * Uy[:,:-1]/Hy[:,:-1]))
    # y momentum
    V[1:-1,1:-1] -= (dt/dx)*((Ux[1:,:] * Vx[1:,:]/Hx[1:,:]) -
                             (Ux[:-1,:]*Vx[:-1,:]/Hx[:-1,:])) + \
                    (dt/dy)*((Vy[:,1:]**2/Hy[:,1:] + g/2*Hy[:,1:]**2) -
                             (Vy[:,:-1]**2/Hy[:,:-1] + g/2*Hy[:,:-1]**2))
    return (H, U, V)

def simulate(H, timesteps, visualize=False):
    U = np.zeros_like(H)
    V = np.zeros_like(H)

    for i in range(timesteps):
        (H, U, V) = step(H, U, V)
        util.Benchmark().flush()
        if visualize:
            util.plot_surface(H, "3d", 0, 0, 5.5)
    return H

def main():
    B = util.Benchmark()
    H = B.size[0]
    W = B.size[1]
    I = B.size[2]

    if B.inputfn:
        M = B.load_array()
    else:
        M = model(H, W, dtype=B.dtype)

    if B.dumpinput:
        B.dump_arrays("shallow_water", {'input':M})

    B.start()
    M = simulate(M, I, visualize=B.visualize)
    B.stop()

    B.pprint()
    if B.outputfn:
        B.tofile(B.outputfn, {'res': M})

    if B.visualize:
        util.confirm_exit()

if __name__ == "__main__":
    main()

C99 Seq

// Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
// Saved images may be converted into an animated gif with:
// convert   -delay 20   -loop 0   swater*.png   swater.gif


#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <bp_util.h>

#define iH(y,x) (H[(x)+(y)*(n+2)])
#define iU(y,x) (U[(x)+(y)*(n+2)])
#define iV(y,x) (V[(x)+(y)*(n+2)])

#define iHx(y,x) (Hx[(x)+(y)*(n+1)])
#define iUx(y,x) (Ux[(x)+(y)*(n+1)])
#define iVx(y,x) (Vx[(x)+(y)*(n+1)])

#define iHy(y,x) (Hy[(x)+(y)*(n+1)])
#define iUy(y,x) (Uy[(x)+(y)*(n+1)])
#define iVy(y,x) (Vy[(x)+(y)*(n+1)])

int main (int argc, char **argv)
{
    bp_util_type bp = bp_util_create(argc, argv, 3);
    if (bp.args.has_error) {
        return 1;
    }
    const int n = bp.args.sizes[0];
    const int other_n = bp.args.sizes[1];
    const int T = bp.args.sizes[2];

    if (n != other_n) {
        fprintf(stderr, "Implementation only supports quadratic grid.\n");
        exit(0);
    }

    const double g   = 9.8;       // gravitational constant
    const double dt  = 0.02;      // hardwired timestep
    const double dx  = 1.0;
    const double dy  = 1.0;
    const int droploc = n/4;

    double *H = (double*) malloc((n+2)*(n+2)*sizeof(double));
    double *U = (double*) malloc((n+2)*(n+2)*sizeof(double));
    double *V = (double*) malloc((n+2)*(n+2)*sizeof(double));

    double *Hx = (double*) malloc((n+1)*(n+1)*sizeof(double));
    double *Ux = (double*) malloc((n+1)*(n+1)*sizeof(double));
    double *Vx = (double*) malloc((n+1)*(n+1)*sizeof(double));

    double *Hy = (double*) malloc((n+1)*(n+1)*sizeof(double));
    double *Uy = (double*) malloc((n+1)*(n+1)*sizeof(double));
    double *Vy = (double*) malloc((n+1)*(n+1)*sizeof(double));


    for(int i=0; i<n+2; i++)
        for(int j=0; j<n+2; j++)
        {
          iH(i,j)=0.1;
          iU(i,j)=0.1;
          iV(i,j)=0.1;
        }


    for(int i=0; i<n+1; i++)
        for(int j=0; j<n+1; j++)
        {
          iHx(i,j)=0.1;
          iUx(i,j)=0.1;
          iVx(i,j)=0.1;

          iHy(i,j)=0.1;
          iUy(i,j)=0.1;
          iVy(i,j)=0.1;
        }

    iH(droploc,droploc) += 5.0;

    
    bp.timer_start();

    for(int iter=0; iter < T; iter++)
    {
        // Reflecting boundary conditions
        for(int i=0; i<n+2; i++)
        {
            iH(i,0) = iH(i,1)   ; iU(i,0) = iU(i,1)     ; iV(i,0) = -iV(i,1);
            iH(i,n+1) = iH(i,n) ; iU(i,n+1) = iU(i,n)   ; iV(i,n+1) = -iV(i,n);
            iH(0,i) = iH(1,i)   ; iU(0,i) = -iU(1,i)    ; iV(0,i) = iV(1,i);
            iH(n+1,i) = iH(n,i) ; iU(n+1,i) = -iU(n,i)  ; iV(n+1,i) = iV(n,i);
        }
        //
        // First half step
        //
        for(int i=0; i<n+1; i++)
        {
            for(int j=0; j<n; j++)
            {

                // height
                iHx(i,j) = (iH(i+1,j+1)+iH(i,j+1))/2 - dt/(2*dx)*(iU(i+1,j+1)-iU(i,j+1));

                // x momentum
                iUx(i,j) = (iU(i+1,j+1)+iU(i,j+1))/2 -          \
                dt/(2*dx)*((pow(iU(i+1,j+1),2)/iH(i+1,j+1) +	\
                          g/2*pow(iH(i+1,j+1),2)) -		\
                         (pow(iU(i,j+1),2)/iH(i,j+1) +	        \
                          g/2*pow(iH(i,j+1),2)));

                // y momentum
                iVx(i,j) = (iV(i+1,j+1)+iV(i,j+1))/2 -          \
                      dt/(2*dx)*((iU(i+1,j+1) *                 \
                                  iV(i+1,j+1)/iH(i+1,j+1)) -    \
                                 (iU(i,j+1) *                   \
                                  iV(i,j+1)/iH(i,j+1)));
            }
        }

        for(int i=0; i<n; i++)
        {
            for(int j=0; j<n+1; j++)
            {
                //height
                iHy(i,j) = (iH(i+1,j+1)+iH(i+1,j))/2 - dt/(2*dy)*(iV(i+1,j+1)-iV(i+1,j));

                //x momentum
                iUy(i,j) = (iU(i+1,j+1)+iU(i+1,j))/2 -	   \
                dt/(2*dy)*((iV(i+1,j+1) *                  \
                           iU(i+1,j+1)/iH(i+1,j+1)) -	   \
                                (iV(i+1,j) *               \
                                 iU(i+1,j)/iH(i+1,j)));

                //y momentum
                iVy(i,j) = (iV(i+1,j+1)+iV(i+1,j))/2 -	\
                dt/(2*dy)*((pow(iV(i+1,j+1),2)/iH(i+1,j+1) +	\
                           g/2*pow(iH(i+1,j+1),2)) -	\
                          (pow(iV(i+1,j),2)/iH(i+1,j) +	\
                           g/2*pow(iH(i+1,j),2)));
            }
        }
        //
        // Second half step
        //

        for(int i=1; i<n+1; i++)
        {
            for(int j=1; j<n+1; j++)
            {
                //height
                iH(i,j) -= (dt/dx)*(iUx(i,j-1)-iUx(i-1,j-1)) - (dt/dy)*(iVy(i-1,j)-iVy(i-1,j-1));

                // x momentum
                iU(i,j) -= (dt/dx)*((pow(iUx(i,j-1),2)/iHx(i,j-1) + g/2*pow(iHx(i,j-1),2)) -        \
                                    (pow(iUx(i-1,j-1),2)/iHx(i-1,j-1) + g/2*pow(iHx(i-1,j-1),2))) - \
                           (dt/dy)*((iVy(i-1,j) * iUy(i-1,j)/iHy(i-1,j)) -
                                    (iVy(i-1,j-1) * iUy(i-1,j-1)/iHy(i-1,j-1)));

                // y momentum    - score
                iV(i,j) -= (dt/dx)*((iUx(i,j-1) * iVx(i,j-1)/iHx(i,j-1)) -                         \
                                    (iUx(i-1,j-1)*iVx(i-1,j-1)/iHx(i-1,j-1))) -                    \
                           (dt/dy)*((pow(iVy(i-1,j),2)/iHy(i-1,j) + g/2*pow(iHy(i-1,j),2)) -       \
                                    (pow(iVy(i-1,j-1),2)/iHy(i-1,j-1) + g/2*pow(iHy(i-1,j-1),2)));

            }
        }
    }

    /* NumPy implementation does not do this!
    // res = numpy.add.reduce(numpy.add.reduce(H / n))
    double res = 0.0;
    for(int i=0;i<n+2;i++)
        for(int j=0;j<n+2;j++)
            res+=iH(i,j)/n;
    */
    bp.timer_stop();
    bp.print("shallow_water(c99_seq)");
}

Cpp11 Boost

// Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
// Saved images may be converted into an animated gif with:
// convert   -delay 20   -loop 0   swater*.png   swater.gif
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <bp_util.h>

#include <boost/multi_array.hpp>

int main (int argc, char **argv)
{
    bp_util_type bp = bp_util_create(argc, argv, 3);
    if (bp.args.has_error) {
        return 1;
    }
    const int n         = bp.args.sizes[0];
    const int other_n   = bp.args.sizes[1];
    const int T         = bp.args.sizes[2];

    if (n != other_n) {
        fprintf(stderr, "Implementation only supports quadratic grid.\n");
        exit(0);
    }

    const double g   = 9.8;       // gravitational constant
    const double dt  = 0.02;      // hardwired timestep
    const double dx  = 1.0;
    const double dy  = 1.0;
    const int droploc = n/4;

    typedef boost::multi_array<double, 2> array_type;

    array_type H(boost::extents[n+2][n+2]);
    array_type U(boost::extents[n+2][n+2]);
    array_type V(boost::extents[n+2][n+2]);

    array_type Hx(boost::extents[n+1][n+1]);
    array_type Ux(boost::extents[n+1][n+1]);
    array_type Vx(boost::extents[n+1][n+1]);
    array_type Hy(boost::extents[n+1][n+1]);
    array_type Uy(boost::extents[n+1][n+1]);
    array_type Vy(boost::extents[n+1][n+1]);


    for(int i=0; i<n+2; i++)
        for(int j=0; j<n+2; j++)
        {
          H[i][j]=0.1;
          U[i][j]=0.1;
          V[i][j]=0.1;
        }


    for(int i=0; i<n+1; i++)
        for(int j=0; j<n+1; j++)
        {
          Hx[i][j]=0.1;
          Ux[i][j]=0.1;
          Vx[i][j]=0.1;

          Hy[i][j]=0.1;
          Uy[i][j]=0.1;
          Vy[i][j]=0.1;
        }

    H[droploc][droploc] += 5.0;

    bp.timer_start();

    for(int iter=0; iter < T; iter++)
    {
        // Reflecting boundary conditions
        for(int i=0; i<n+2; i++)
        {
            H[i][0] = H[i][1]   ; U[i][0] = U[i][1]     ; V[i][0] = -V[i][1];
            H[i][n+1] = H[i][n] ; U[i][n+1] = U[i][n]   ; V[i][n+1] = -V[i][n];
            H[0][i] = H[1][i]   ; U[0][i] = -U[1][i]    ; V[0][i] = V[1][i];
            H[n+1][i] = H[n][i] ; U[n+1][i] = -U[n][i]  ; V[n+1][i] = V[n][i];
        }
        //
        // First half step
        //
        for(int i=0; i<n+1; i++)
        {
            for(int j=0; j<n; j++)
            {

                // height
                Hx[i][j] = (H[i+1][j+1]+H[i][j+1])/2 - dt/(2*dx)*(U[i+1][j+1]-U[i][j+1]);

                // x momentum
                Ux[i][j] = (U[i+1][j+1]+U[i][j+1])/2 -          \
                dt/(2*dx)*((pow(U[i+1][j+1],2)/H[i+1][j+1] +	\
                          g/2*pow(H[i+1][j+1],2)) -		\
                         (pow(U[i][j+1],2)/H[i][j+1] +	        \
                          g/2*pow(H[i][j+1],2)));

                // y momentum
                Vx[i][j] = (V[i+1][j+1]+V[i][j+1])/2 -          \
                      dt/(2*dx)*((U[i+1][j+1] *                 \
                                  V[i+1][j+1]/H[i+1][j+1]) -    \
                                 (U[i][j+1] *                   \
                                  V[i][j+1]/H[i][j+1]));
            }
        }

        for(int i=0; i<n; i++)
        {
            for(int j=0; j<n+1; j++)
            {
                //height
                Hy[i][j] = (H[i+1][j+1]+H[i+1][j])/2 - dt/(2*dy)*(V[i+1][j+1]-V[i+1][j]);

                //x momentum
                Uy[i][j] = (U[i+1][j+1]+U[i+1][j])/2 -	   \
                dt/(2*dy)*((V[i+1][j+1] *                  \
                           U[i+1][j+1]/H[i+1][j+1]) -	   \
                                (V[i+1][j] *               \
                                 U[i+1][j]/H[i+1][j]));

                //y momentum
                Vy[i][j] = (V[i+1][j+1]+V[i+1][j])/2 -	\
                dt/(2*dy)*((pow(V[i+1][j+1],2)/H[i+1][j+1] +	\
                           g/2*pow(H[i+1][j+1],2)) -	\
                          (pow(V[i+1][j],2)/H[i+1][j] +	\
                           g/2*pow(H[i+1][j],2)));
            }
        }
        //
        // Second half step
        //

        for(int i=1; i<n+1; i++)
        {
            for(int j=1; j<n+1; j++)
            {
                //height
                H[i][j] -= (dt/dx)*(Ux[i][j-1]-Ux[i-1][j-1]) - (dt/dy)*(Vy[i-1][j]-Vy[i-1][j-1]);

                // x momentum
                U[i][j] -= (dt/dx)*((pow(Ux[i][j-1],2)/Hx[i][j-1] + g/2*pow(Hx[i][j-1],2)) -        \
                                    (pow(Ux[i-1][j-1],2)/Hx[i-1][j-1] + g/2*pow(Hx[i-1][j-1],2))) - \
                           (dt/dy)*((Vy[i-1][j] * Uy[i-1][j]/Hy[i-1][j]) -
                                    (Vy[i-1][j-1] * Uy[i-1][j-1]/Hy[i-1][j-1]));

                // y momentum    - score
                V[i][j] -= (dt/dx)*((Ux[i][j-1] * Vx[i][j-1]/Hx[i][j-1]) -                         \
                                    (Ux[i-1][j-1]*Vx[i-1][j-1]/Hx[i-1][j-1])) -                    \
                           (dt/dy)*((pow(Vy[i-1][j],2)/Hy[i-1][j] + g/2*pow(Hy[i-1][j],2)) -       \
                                    (pow(Vy[i-1][j-1],2)/Hy[i-1][j-1] + g/2*pow(Hy[i-1][j-1],2)));

            }
        }
    }

    // res = numpy.add.reduce(numpy.add.reduce(H / n))
    double res = 0.0;
    for(int i=0;i<n+2;i++)
        for(int j=0;j<n+2;j++)
            res+=H[i][j]/n;

    bp.timer_stop();
    bp.print("shallow_water(cpp11_boost)");
}

Cpp11 Bxx

/*
This file is part of Bohrium and Copyright (c) 2012 the Bohrium team:
http://bohrium.bitbucket.org

Bohrium is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3
of the License, or (at your option) any later version.

Bohrium is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the
GNU Lesser General Public License along with bohrium.

If not, see <http://www.gnu.org/licenses/>.
*/
#include <iostream>
#include <vector>
#include <bxx/bohrium.hpp>
#include <bp_util.h>

using namespace std;
using namespace bxx;

static double g = 9.80665;  // Gravitational acceleration

template <typename T>
void step(multi_array<T>& H, multi_array<T>& U, multi_array<T>& V, T dt, T dx, T dy)
{
    multi_array<T> Hx, Ux, Vx, Hy, Uy, Vy;

    // Reflecting boundary conditions
    H[_ALL()][0]   =       H[_ALL()][1];
    U[_ALL()][0]   =       U[_ALL()][1];
    V[_ALL()][0]   = (T)-1*V[_ALL()][1];

    H[_ALL()][-1]  =       H[_ALL()][-2];
    U[_ALL()][-1]  =       U[_ALL()][-2];
    V[_ALL()][-1]  = (T)-1*V[_ALL()][-2];

    H[0][_ALL()]   =       H[1][_ALL()];
    U[0][_ALL()]   = (T)-1*U[1][_ALL()];
    V[0][_ALL()]   =       V[1][_ALL()];

    H[-1][_ALL()]  =       H[-2][_ALL()];
    U[-1][_ALL()]  = (T)-1*U[-2][_ALL()];
    V[-1][_ALL()]  =       V[-2][_ALL()];

    // First half step
    // height
    Hx = (H[_ABF()][_INNER()] + H[_ABL()][_INNER()]) / (T)2 - dt/((T)2*dx) * (U[_ABF()][_INNER()] - U[_ABL()][_INNER()]);

    // x momentum
    Ux =                (U[_ABF()][_INNER()]       + U[_ABL()][_INNER()]) / (T)2 -
    dt/((T)2*dx) * ((pow(U[_ABF()][_INNER()], 2.0) / H[_ABF()][_INNER()] + g / (T)2 * pow(H[_ABF()][_INNER()], 2.0)) -
                    (pow(U[_ABL()][_INNER()], 2.0) / H[_ABL()][_INNER()] + g / (T)2 * pow(H[_ABL()][_INNER()], 2.0)));
    
    // y momentum
    Vx =            (V[_ABF()][_INNER()] + V[_ABL()][_INNER()]) / (T)2 -
    dt/((T)2*dx) * ((U[_ABF()][_INNER()] * V[_ABF()][_INNER()]  / H[_ABF()][_INNER()]) -
                    (U[_ABL()][_INNER()] * V[_ABL()][_INNER()]  / H[_ABL()][_INNER()]));

    // height
    Hy = (H[_INNER()][_ABF()] + H[_INNER()][_ABL()]) / (T)2 - dt/((T)2*dy) * (V[_INNER()][_ABF()] - V[_INNER()][_ABL()]);

    // x momentum
    Uy =          (U[_INNER()][_ABF()] + U[_INNER()][_ABL()]) / (T)2 -
    dt/((T)2*dy)*((V[_INNER()][_ABF()] * U[_INNER()][_ABF()]  / H[_INNER()][_ABF()]) -
                  (V[_INNER()][_ABL()] * U[_INNER()][_ABL()]  / H[_INNER()][_ABL()]));

    // y momentum
    Vy =              (V[_INNER()][_ABF()] +     V[_INNER()][_ABL()]) / (T)2 -
    dt/((T)2*dy)*((pow(V[_INNER()][_ABF()],2.0)/ H[_INNER()][_ABF()] + g/(T)2*pow(H[_INNER()][_ABF()],2.0)) -
                  (pow(V[_INNER()][_ABL()],2.0)/ H[_INNER()][_ABL()] + g/(T)2*pow(H[_INNER()][_ABL()],2.0)));
    //
    // Second half step

    // height
    H[_INNER()][_INNER()] -= (dt/dx)*(Ux[_ABF()][_ALL()] - Ux[_ABL()][_ALL()]) + (dt/dy) * (Vy[_ALL()][_ABF()] - Vy[_ALL()][_ABL()]);

    // x momentum
    U[_INNER()][_INNER()] -= (dt/dx)*((pow(Ux[_ABF()][_ALL()],2.0) / Hx[_ABF()][_ALL()] + g/(T)2*pow(Hx[_ABF()][_ALL()],2.0)) -
                                      (pow(Ux[_ABL()][_ALL()],2.0) / Hx[_ABL()][_ALL()] + g/(T)2*pow(Hx[_ABL()][_ALL()],2.0))) +
                             (dt/dy)*((Vy[_ALL()][_ABF()] * Uy[_ALL()][_ABF()] / Hy[_ALL()][_ABF()]) -
                                      (Vy[_ALL()][_ABL()] * Uy[_ALL()][_ABL()] / Hy[_ALL()][_ABL()]));
    // y momentum
    V[_INNER()][_INNER()] -= (dt/dx)*((    Ux[_ABF()][_ALL()] *      Vx[_ABF()][_ALL()] /            Hx[_ABF()][_ALL()]) -
                                      (    Ux[_ABL()][_ALL()] *      Vx[_ABL()][_ALL()] /            Hx[_ABL()][_ALL()])) +
                             (dt/dy)*((pow(Vy[_ALL()][_ABF()],2.0) / Hy[_ALL()][_ABF()] + g/(T)2*pow(Hy[_ALL()][_ABF()],2.0)) -
                                      (pow(Vy[_ALL()][_ABL()],2.0) / Hy[_ALL()][_ABL()] + g/(T)2*pow(Hy[_ALL()][_ABL()],2.0)));
}

template <typename T>
void simulate(multi_array<T>& H, multi_array<T>& U, multi_array<T>& V, int64_t timesteps, int visualize)
{
    for(int timestep=0; timestep<timesteps; ++timestep) {
        step(H, U, V, 0.02, 1.0, 1.0);
        if (visualize) {
            plot_surface(H, 1, 0, 0, 5.5);
        }
    }
}

int main(int argc, char* argv[])
{
    bp_util_type bp = bp_util_create(argc, argv, 3);
    if (bp.args.has_error) {
        return 0;
    }
    const size_t height     = bp.args.sizes[0];
    const size_t width      = bp.args.sizes[1];
    const int64_t timesteps = bp.args.sizes[2];

    multi_array<double> H, U, V;

    H = ones<double>(height, width);
    U = zeros<double>(height, width);
    V = zeros<double>(height, width);

    multi_array<double> x, y, xx, yy, droplet;  // Create droplet
    x = linspace<double>(-1, 1, 8, true);
    y = linspace<double>(-1, 1, 8, true);
    xx = gridify(x, 0);
    yy = gridify(x, 1);
    
    droplet = 8.0 * exp(-5.0 * (pow(xx,2.0) + pow(yy,2.0)));
                                                // Let it drip into the water
    size_t droploc = height / 2;
    H[_(droploc,droploc+7)][_(droploc,droploc+7)] += droplet;
    droploc = height / 4;
    H[_(droploc,droploc+7)][_(droploc,droploc+7)] += droplet;

    Runtime::instance().flush();                // Run the simulation
    bp.timer_start();
    simulate(H, U, V, timesteps, (int)bp.args.visualize);
    Runtime::instance().flush();
    bp.timer_stop();
    
    bp.print("shallow_water(cpp11_bxx)");
    if (bp.args.verbose) {                      // and values.
        cout << ", output: " << endl;
    }

    return 0;
}

Cpp11 Omp

// Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
// Saved images may be converted into an animated gif with:
// convert   -delay 20   -loop 0   swater*.png   swater.gif
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <bp_util.h>

#define WIDTH 5000
#define HEIGHT 5000

int main (int argc, char **argv)
{
    bp_util_type bp = bp_util_create(argc, argv, 3);
    if (bp.args.has_error) {
        return 1;
    }
    const int height        = bp.args.sizes[0];
    const int width         = bp.args.sizes[1];
    const int iterations    = bp.args.sizes[2];

    if (width != WIDTH) {
        fprintf(stderr, "Unsupported size, size must be %dx%d\n", WIDTH, HEIGHT);
        exit(0);
    }
    if (height != WIDTH) {
        fprintf(stderr, "Unsupported size, size must be %dx%d\n", WIDTH, HEIGHT);
        exit(0);
    }

    if (width != height) {
        fprintf(stderr, "Implementation only supports quadratic grid.\n");
        exit(0);
    }

    const double g   = 9.8;       // gravitational constant
    const double dt  = 0.02;      // hardwired timestep
    const double dx  = 1.0;
    const double dy  = 1.0;
    const int droploc = WIDTH/4;

    auto H = new double[WIDTH+2][WIDTH+2];
    auto U = new double[WIDTH+2][WIDTH+2];
    auto V = new double[WIDTH+2][WIDTH+2];

    auto Hx = new double[WIDTH+1][WIDTH+1];
    auto Ux = new double[WIDTH+1][WIDTH+1];
    auto Vx = new double[WIDTH+1][WIDTH+1];
    auto Hy = new double[WIDTH+1][WIDTH+1];
    auto Uy = new double[WIDTH+1][WIDTH+1];
    auto Vy = new double[WIDTH+1][WIDTH+1];

    #pragma omp parallel for collapse(2)
    for(int i=0; i<HEIGHT+2; i++) {
        for(int j=0; j<WIDTH+2; j++) {
          H[i][j]=0.1;
          U[i][j]=0.1;
          V[i][j]=0.1;
        }
    }

    #pragma omp parallel for collapse(2)
    for(int i=0; i<HEIGHT+1; i++) {
        for(int j=0; j<WIDTH+1; j++) {
          Hx[i][j]=0.1;
          Ux[i][j]=0.1;
          Vx[i][j]=0.1;

          Hy[i][j]=0.1;
          Uy[i][j]=0.1;
          Vy[i][j]=0.1;
        }
    }
    H[droploc][droploc] += 5.0;

    bp.timer_start();

    for(int iter=0; iter < iterations; iter++) {

        #pragma omp parallel for        
        for(int i=0; i<WIDTH+2; i++) {  // Reflecting boundary conditions
            H[i][0] = H[i][1];
            U[i][0] = U[i][1];
            V[i][0] = -V[i][1];
            H[i][WIDTH+1] = H[i][WIDTH];
            U[i][WIDTH+1] = U[i][WIDTH];
            V[i][WIDTH+1] = -V[i][WIDTH];
            H[0][i] = H[1][i];
            U[0][i] = -U[1][i];
            V[0][i] = V[1][i];
            H[HEIGHT+1][i] = H[HEIGHT][i];
            U[HEIGHT+1][i] = -U[HEIGHT][i];
            V[HEIGHT+1][i] = V[HEIGHT][i];
        }

        //
        // First half step
        //
        #pragma omp parallel for collapse(2)
        for(int i=0; i<HEIGHT+1; i++) {
            for(int j=0; j<WIDTH; j++) {

                // height
                Hx[i][j] = (H[i+1][j+1]+H[i][j+1])/2 - dt/(2*dx)*(U[i+1][j+1]-U[i][j+1]);

                // x momentum
                Ux[i][j] = (U[i+1][j+1]+U[i][j+1])/2 -          \
                dt/(2*dx)*((pow(U[i+1][j+1],2)/H[i+1][j+1] +	\
                          g/2*pow(H[i+1][j+1],2)) -		\
                         (pow(U[i][j+1],2)/H[i][j+1] +	        \
                          g/2*pow(H[i][j+1],2)));

                // y momentum
                Vx[i][j] = (V[i+1][j+1]+V[i][j+1])/2 -          \
                      dt/(2*dx)*((U[i+1][j+1] *                 \
                                  V[i+1][j+1]/H[i+1][j+1]) -    \
                                 (U[i][j+1] *                   \
                                  V[i][j+1]/H[i][j+1]));
            }
        }

        #pragma omp parallel for collapse(2)
        for(int i=0; i<HEIGHT; i++) {
            for(int j=0; j<WIDTH+1; j++) {
                //height
                Hy[i][j] = (H[i+1][j+1]+H[i+1][j])/2 - dt/(2*dy)*(V[i+1][j+1]-V[i+1][j]);

                //x momentum
                Uy[i][j] = (U[i+1][j+1]+U[i+1][j])/2 -	   \
                dt/(2*dy)*((V[i+1][j+1] *                  \
                           U[i+1][j+1]/H[i+1][j+1]) -	   \
                                (V[i+1][j] *               \
                                 U[i+1][j]/H[i+1][j]));

                //y momentum
                Vy[i][j] = (V[i+1][j+1]+V[i+1][j])/2 -	\
                dt/(2*dy)*((pow(V[i+1][j+1],2)/H[i+1][j+1] +	\
                           g/2*pow(H[i+1][j+1],2)) -	\
                          (pow(V[i+1][j],2)/H[i+1][j] +	\
                           g/2*pow(H[i+1][j],2)));
            }
        }

        //
        // Second half step
        //

        #pragma omp parallel for collapse(2)
        for(int i=1; i<HEIGHT+1; i++) {
            for(int j=1; j<WIDTH+1; j++) {
                //height
                H[i][j] -= (dt/dx)*(Ux[i][j-1]-Ux[i-1][j-1]) - (dt/dy)*(Vy[i-1][j]-Vy[i-1][j-1]);

                // x momentum
                U[i][j] -= (dt/dx)*((pow(Ux[i][j-1],2)/Hx[i][j-1] + g/2*pow(Hx[i][j-1],2)) -        \
                                    (pow(Ux[i-1][j-1],2)/Hx[i-1][j-1] + g/2*pow(Hx[i-1][j-1],2))) - \
                           (dt/dy)*((Vy[i-1][j] * Uy[i-1][j]/Hy[i-1][j]) -
                                    (Vy[i-1][j-1] * Uy[i-1][j-1]/Hy[i-1][j-1]));

                // y momentum    - score
                V[i][j] -= (dt/dx)*((Ux[i][j-1] * Vx[i][j-1]/Hx[i][j-1]) -                         \
                                    (Ux[i-1][j-1]*Vx[i-1][j-1]/Hx[i-1][j-1])) -                    \
                           (dt/dy)*((pow(Vy[i-1][j],2)/Hy[i-1][j] + g/2*pow(Hy[i-1][j],2)) -       \
                                    (pow(Vy[i-1][j-1],2)/Hy[i-1][j-1] + g/2*pow(Hy[i-1][j-1],2)));
            }
        }
    }

    /* NumPy implementation does not do this!?
    // res = numpy.add.reduce(numpy.add.reduce(H / n))
    double res = 0.0;
    #pragma omp parallel for collapse(2) reduction(+:res)
    for(int i=0;i<HEIGHT+2;i++) {
        for(int j=0;j<WIDTH+2;j++) {
            res+=H[i][j]/WIDTH;
        }
    }
    */

    bp.timer_stop();
    bp.print("shallow_water(cpp11_omp)");
}

Cpp11 Seq

// Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/
// Saved images may be converted into an animated gif with:
// convert   -delay 20   -loop 0   swater*.png   swater.gif
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <bp_util.h>

#define WIDTH 5000
#define HEIGHT 5000

int main (int argc, char **argv)
{
    bp_util_type bp = bp_util_create(argc, argv, 3);
    if (bp.args.has_error) {
        return 1;
    }
    const int height    = bp.args.sizes[0];
    const int width     = bp.args.sizes[1];
    const int T         = bp.args.sizes[2];

    if (width != WIDTH) {
        fprintf(stderr, "Unsupported size, size must be %dx%d\n", WIDTH, HEIGHT);
        exit(0);
    }
    if (height != WIDTH) {
        fprintf(stderr, "Unsupported size, size must be %dx%d\n", WIDTH, HEIGHT);
        exit(0);
    }

    if (width != height) {
        fprintf(stderr, "Implementation only supports quadratic grid.\n");
        exit(0);
    }

    const double g   = 9.8;       // gravitational constant
    const double dt  = 0.02;      // hardwired timestep
    const double dx  = 1.0;
    const double dy  = 1.0;
    const int droploc = WIDTH/4;

    auto H = new double[WIDTH+2][WIDTH+2];
    auto U = new double[WIDTH+2][WIDTH+2];
    auto V = new double[WIDTH+2][WIDTH+2];

    auto Hx = new double[WIDTH+1][WIDTH+1];
    auto Ux = new double[WIDTH+1][WIDTH+1];
    auto Vx = new double[WIDTH+1][WIDTH+1];
    auto Hy = new double[WIDTH+1][WIDTH+1];
    auto Uy = new double[WIDTH+1][WIDTH+1];
    auto Vy = new double[WIDTH+1][WIDTH+1];

    for(int i=0; i<HEIGHT+2; i++) {
        for(int j=0; j<WIDTH+2; j++) {
          H[i][j]=0.1;
          U[i][j]=0.1;
          V[i][j]=0.1;
        }
    }


    for(int i=0; i<HEIGHT+1; i++) {
        for(int j=0; j<WIDTH+1; j++) {
          Hx[i][j]=0.1;
          Ux[i][j]=0.1;
          Vx[i][j]=0.1;

          Hy[i][j]=0.1;
          Uy[i][j]=0.1;
          Vy[i][j]=0.1;
        }
    }
    H[droploc][droploc] += 5.0;

    bp.timer_start();

    for(int iter=0; iter < T; iter++) {
        
        for(int i=0; i<WIDTH+2; i++) {  // Reflecting boundary conditions
            H[i][0] = H[i][1];
            U[i][0] = U[i][1];
            V[i][0] = -V[i][1];
            H[i][WIDTH+1] = H[i][WIDTH];
            U[i][WIDTH+1] = U[i][WIDTH];
            V[i][WIDTH+1] = -V[i][WIDTH];
            H[0][i] = H[1][i];
            U[0][i] = -U[1][i];
            V[0][i] = V[1][i];
            H[HEIGHT+1][i] = H[HEIGHT][i];
            U[HEIGHT+1][i] = -U[HEIGHT][i];
            V[HEIGHT+1][i] = V[HEIGHT][i];
        }

        //
        // First half step
        //

        for(int i=0; i<HEIGHT+1; i++) {
            for(int j=0; j<WIDTH; j++) {

                // height
                Hx[i][j] = (H[i+1][j+1]+H[i][j+1])/2 - dt/(2*dx)*(U[i+1][j+1]-U[i][j+1]);

                // x momentum
                Ux[i][j] = (U[i+1][j+1]+U[i][j+1])/2 -          \
                dt/(2*dx)*((pow(U[i+1][j+1],2)/H[i+1][j+1] +	\
                          g/2*pow(H[i+1][j+1],2)) -		\
                         (pow(U[i][j+1],2)/H[i][j+1] +	        \
                          g/2*pow(H[i][j+1],2)));

                // y momentum
                Vx[i][j] = (V[i+1][j+1]+V[i][j+1])/2 -          \
                      dt/(2*dx)*((U[i+1][j+1] *                 \
                                  V[i+1][j+1]/H[i+1][j+1]) -    \
                                 (U[i][j+1] *                   \
                                  V[i][j+1]/H[i][j+1]));
            }
        }

        for(int i=0; i<HEIGHT; i++) {
            for(int j=0; j<WIDTH+1; j++) {
                //height
                Hy[i][j] = (H[i+1][j+1]+H[i+1][j])/2 - dt/(2*dy)*(V[i+1][j+1]-V[i+1][j]);

                //x momentum
                Uy[i][j] = (U[i+1][j+1]+U[i+1][j])/2 -	   \
                dt/(2*dy)*((V[i+1][j+1] *                  \
                           U[i+1][j+1]/H[i+1][j+1]) -	   \
                                (V[i+1][j] *               \
                                 U[i+1][j]/H[i+1][j]));

                //y momentum
                Vy[i][j] = (V[i+1][j+1]+V[i+1][j])/2 -	\
                dt/(2*dy)*((pow(V[i+1][j+1],2)/H[i+1][j+1] +	\
                           g/2*pow(H[i+1][j+1],2)) -	\
                          (pow(V[i+1][j],2)/H[i+1][j] +	\
                           g/2*pow(H[i+1][j],2)));
            }
        }

        //
        // Second half step
        //

        for(int i=1; i<HEIGHT+1; i++) {
            for(int j=1; j<WIDTH+1; j++) {
                //height
                H[i][j] -= (dt/dx)*(Ux[i][j-1]-Ux[i-1][j-1]) - (dt/dy)*(Vy[i-1][j]-Vy[i-1][j-1]);

                // x momentum
                U[i][j] -= (dt/dx)*((pow(Ux[i][j-1],2)/Hx[i][j-1] + g/2*pow(Hx[i][j-1],2)) -        \
                                    (pow(Ux[i-1][j-1],2)/Hx[i-1][j-1] + g/2*pow(Hx[i-1][j-1],2))) - \
                           (dt/dy)*((Vy[i-1][j] * Uy[i-1][j]/Hy[i-1][j]) -
                                    (Vy[i-1][j-1] * Uy[i-1][j-1]/Hy[i-1][j-1]));

                // y momentum    - score
                V[i][j] -= (dt/dx)*((Ux[i][j-1] * Vx[i][j-1]/Hx[i][j-1]) -                         \
                                    (Ux[i-1][j-1]*Vx[i-1][j-1]/Hx[i-1][j-1])) -                    \
                           (dt/dy)*((pow(Vy[i-1][j],2)/Hy[i-1][j] + g/2*pow(Hy[i-1][j],2)) -       \
                                    (pow(Vy[i-1][j-1],2)/Hy[i-1][j-1] + g/2*pow(Hy[i-1][j-1],2)));

            }
        }
    }

    /* NumPy implementation does not do this!?
    // res = numpy.add.reduce(numpy.add.reduce(H / n))
    double res = 0.0;
    for(int i=0;i<HEIGHT+2;i++) {
        for(int j=0;j<WIDTH+2;j++) {
            res+=H[i][j]/WIDTH;
        }
    }
    */

    bp.timer_stop();
    bp.print("shallow_water(cpp11_seq)");
}

Csharp Numcil

#region Copyright
/*
This file is part of Bohrium and copyright (c) 2012 the Bohrium:
team <http://www.bh107.org>.

Bohrium is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3
of the License, or (at your option) any later version.

Bohrium is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the
GNU Lesser General Public License along with Bohrium.

If not, see <http://www.gnu.org/licenses/>.
*/
#endregion

//Adapted from: http://people.sc.fsu.edu/~jburkardt/m_src/shallow_water_2d/

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using R = NumCIL.Range;
using NumCIL;

namespace shallow_water
{
    public static class ShallowWaterSolver
    {
        /// <summary>
        /// NumCIL equivalent to [:]
        /// </summary>
        public static readonly R ALL = R.All;
        /// <summary>
        /// NumCIL equivalent to [1:-1]
        /// </summary>
        public static readonly R INNER = R.Slice(1, -1);
        /// <summary>
        /// NumCIL equivalent to [0]
        /// </summary>
        public static readonly R FIRST = R.El(0);
        /// <summary>
        /// NumCIL equivalent to [1]
        /// </summary>
        public static readonly R SECOND = R.El(1);
        /// <summary>
        /// NumCIL equivalent to [-1]
        /// </summary>
        public static readonly R LAST = R.El(-1);
        /// <summary>
        /// NumCIL equivalent to [:-1]
        /// </summary>
        public static readonly R ZM1 = R.Slice(0, -1);
        /// <summary>
        /// NumCIL equivalent to [1:]
        /// </summary>
        public static readonly R SKIP1 = R.Slice(1, 0);


        public class DataDouble
        {
            public readonly long N;
            public readonly NumCIL.Double.NdArray H;
            public readonly NumCIL.Double.NdArray U;
            public readonly NumCIL.Double.NdArray V;
            public readonly NumCIL.Double.NdArray Hx;
            public readonly NumCIL.Double.NdArray Ux;
            public readonly NumCIL.Double.NdArray Vx;
            public readonly NumCIL.Double.NdArray Hy;
            public readonly NumCIL.Double.NdArray Uy;
            public readonly NumCIL.Double.NdArray Vy;

            public DataDouble(long n)
            {
                N = n;
                H = NumCIL.Double.Generate.Ones(n + 2, n + 2);
                U = NumCIL.Double.Generate.Ones(n + 2, n + 2);
                V = NumCIL.Double.Generate.Ones(n + 2, n + 2);
                Hx = NumCIL.Double.Generate.Ones(n + 1, n + 1);
                   Ux = NumCIL.Double.Generate.Ones(n + 1, n + 1);
                Vx = NumCIL.Double.Generate.Ones(n + 1, n + 1);
                   Hy = NumCIL.Double.Generate.Ones(n + 1, n + 1);
                Uy = NumCIL.Double.Generate.Ones(n + 1, n + 1);
                Vy = NumCIL.Double.Generate.Ones(n + 1, n + 1);
            }
        }


        public static System.Double SolveDouble(long timesteps, DataDouble data)
        {
            //Convenience indices
            R RN = R.El(data.N); //Same as [n]
            R RN1 = R.El(data.N + 1); //Same as [n+1]

            System.Double g = 9.8f;// gravitational constant
            System.Double dt = 0.02f; // hardwired timestep
            System.Double dx = 1.0f;
            System.Double dy = 1.0f;
            long droploc = data.N / 4;

            var H = data.H;
            var U = data.U;
            var V = data.V;
            var Hx = data.Hx;
            var Ux = data.Ux;
            var Vx = data.Vx;
            var Hy = data.Hy;
            var Uy = data.Uy;
            var Vy = data.Vy;

            //Splash!!!
            H[droploc, droploc] += 5.0f;

            for (int i = 0; i < timesteps; i++)
            {
                H.Flush();
                // Reflecting boundary conditions
                H[ALL, FIRST] = H[ALL, SECOND];
                U[ALL, FIRST] = U[ALL, SECOND];
                V[ALL, FIRST] = -V[ALL, SECOND];
                H[ALL, RN1] = H[ALL, RN];
                U[ALL, RN1] = U[ALL, RN];
                V[ALL, RN1] = -V[ALL, RN];
                H[FIRST, ALL] = H[SECOND, ALL];
                U[FIRST, ALL] = -U[SECOND, ALL];
                V[FIRST, ALL] = V[SECOND, ALL];
                H[RN1, ALL] = H[RN, ALL];
                U[RN1, ALL] = -U[RN, ALL];
                V[RN1, ALL] = V[RN, ALL];

                //First half-step

                //Height
                Hx[ALL, R.Slice(0, -1)] = (H[SKIP1,INNER]+H[ZM1,INNER])/2 -
                        dt/(2*dx)*(U[SKIP1,INNER]-U[ZM1,INNER]);

                //x momentum
                Ux[ALL, R.Slice(0, -1)] = (U[SKIP1,INNER]+U[ZM1,INNER])/2 -
                dt/(2*dx)*((U[SKIP1,INNER].Pow(2)/H[SKIP1,INNER] +
                            g/2*H[SKIP1,INNER].Pow(2)) -
                           (U[ZM1,INNER].Pow(2)/H[ZM1,INNER] +
                            g/2*H[ZM1,INNER].Pow(2)));

                // y momentum
                Vx[ALL, ZM1] = (V[SKIP1,INNER]+V[ZM1,INNER])/2 -
                            dt/(2*dx)*((U[SKIP1,INNER] *
                            V[SKIP1,INNER]/H[SKIP1, INNER]) -
                           (U[ZM1,INNER] *
                            V[ZM1,INNER]/H[ZM1,INNER]));



                // height
                Hy[ZM1,ALL] = (H[INNER,SKIP1]+H[INNER,ZM1])/2 -
                            dt/(2*dy)*(V[INNER,SKIP1]-V[INNER,ZM1]);

                // x momentum
                Uy[ZM1,ALL] = (U[INNER,SKIP1]+U[INNER,ZM1])/2 -
                            dt/(2*dy)*((V[INNER,SKIP1] *
                            U[INNER,SKIP1]/H[INNER,SKIP1]) -
                           (V[INNER,ZM1] *
                            U[INNER,ZM1]/H[INNER,ZM1]));
                // y momentum
                Vy[ZM1,ALL] = (V[INNER,SKIP1]+V[INNER,ZM1])/2 -
                            dt/(2*dy)*((V[INNER,SKIP1].Pow(2)/H[INNER,SKIP1] +
                            g/2*H[INNER,SKIP1].Pow(2)) -
                           (V[INNER,ZM1].Pow(2)/H[INNER,ZM1] +
                            g/2*H[INNER,ZM1].Pow(2)));

                // Second half step

                // height
                H[INNER, INNER] = H[INNER, INNER] -
                            (dt/dx)*(Ux[SKIP1,ZM1]-Ux[ZM1,ZM1]) -
                            (dt/dy)*(Vy[ZM1,SKIP1]-Vy[ZM1, ZM1]);

                // x momentum
                U[INNER, INNER] = U[INNER, INNER] -
                           (dt/dx)*((Ux[SKIP1,ZM1].Pow(2)/Hx[SKIP1,ZM1] +
                             g/2*Hx[SKIP1,ZM1].Pow(2)) -
                            (Ux[ZM1,ZM1].Pow(2)/Hx[ZM1,ZM1] +
                             g / 2 * Hx[ZM1, ZM1].Pow(2))) -
                             (dt/dy)*((Vy[ZM1,SKIP1] *
                                       Uy[ZM1,SKIP1]/Hy[ZM1,SKIP1]) -
                                        (Vy[ZM1,ZM1] *
                                         Uy[ZM1,ZM1]/Hy[ZM1,ZM1]));
                // y momentum
                V[R.Slice(1, -1),R.Slice(1, -1)] = V[R.Slice(1, -1),R.Slice(1, -1)] -
                               (dt/dx)*((Ux[SKIP1,ZM1] *
                                         Vx[SKIP1,ZM1]/Hx[SKIP1,ZM1]) -
                                        (Ux[ZM1,ZM1]*Vx[ZM1,ZM1]/Hx[ZM1,ZM1])) -
                                        (dt / dy) * ((Vy[ZM1, SKIP1].Pow(2) / Hy[ZM1, SKIP1] +
                                                  g / 2 * Hy[ZM1, SKIP1].Pow(2)) -
                                                 (Vy[ZM1, ZM1].Pow(2) / Hy[ZM1, ZM1] +
                                                  g / 2 * Hy[ZM1, ZM1].Pow(2)));
            }
            //Make sure we have the actual data and use it as a checksum
            return NumCIL.Double.Add.Reduce(NumCIL.Double.Add.Reduce(H / data.N)).Value[0];
        }
        public class DataFloat
        {
            public readonly long N;
            public readonly NumCIL.Float.NdArray H;
            public readonly NumCIL.Float.NdArray U;
            public readonly NumCIL.Float.NdArray V;
            public readonly NumCIL.Float.NdArray Hx;
            public readonly NumCIL.Float.NdArray Ux;
            public readonly NumCIL.Float.NdArray Vx;
            public readonly NumCIL.Float.NdArray Hy;
            public readonly NumCIL.Float.NdArray Uy;
            public readonly NumCIL.Float.NdArray Vy;

            public DataFloat(long n)
            {
                N = n;
                H = NumCIL.Float.Generate.Ones(n + 2, n + 2);
                U = NumCIL.Float.Generate.Ones(n + 2, n + 2);
                V = NumCIL.Float.Generate.Ones(n + 2, n + 2);
                Hx = NumCIL.Float.Generate.Ones(n + 1, n + 1);
                   Ux = NumCIL.Float.Generate.Ones(n + 1, n + 1);
                Vx = NumCIL.Float.Generate.Ones(n + 1, n + 1);
                   Hy = NumCIL.Float.Generate.Ones(n + 1, n + 1);
                Uy = NumCIL.Float.Generate.Ones(n + 1, n + 1);
                Vy = NumCIL.Float.Generate.Ones(n + 1, n + 1);
            }
        }


        public static System.Single SolveFloat(long timesteps, DataFloat data)
        {
            //Convenience indices
            R RN = R.El(data.N); //Same as [n]
            R RN1 = R.El(data.N + 1); //Same as [n+1]

            System.Single g = 9.8f;// gravitational constant
            System.Single dt = 0.02f; // hardwired timestep
            System.Single dx = 1.0f;
            System.Single dy = 1.0f;
            long droploc = data.N / 4;

            var H = data.H;
            var U = data.U;
            var V = data.V;
            var Hx = data.Hx;
            var Ux = data.Ux;
            var Vx = data.Vx;
            var Hy = data.Hy;
            var Uy = data.Uy;
            var Vy = data.Vy;

            //Splash!!!
            H[droploc, droploc] += 5.0f;

            for (int i = 0; i < timesteps; i++)
            {
                H.Flush();
                // Reflecting boundary conditions
                H[ALL, FIRST] = H[ALL, SECOND];
                U[ALL, FIRST] = U[ALL, SECOND];
                V[ALL, FIRST] = -V[ALL, SECOND];
                H[ALL, RN1] = H[ALL, RN];
                U[ALL, RN1] = U[ALL, RN];
                V[ALL, RN1] = -V[ALL, RN];
                H[FIRST, ALL] = H[SECOND, ALL];
                U[FIRST, ALL] = -U[SECOND, ALL];
                V[FIRST, ALL] = V[SECOND, ALL];
                H[RN1, ALL] = H[RN, ALL];
                U[RN1, ALL] = -U[RN, ALL];
                V[RN1, ALL] = V[RN, ALL];

                //First half-step

                //Height
                Hx[ALL, R.Slice(0, -1)] = (H[SKIP1,INNER]+H[ZM1,INNER])/2 -
                        dt/(2*dx)*(U[SKIP1,INNER]-U[ZM1,INNER]);

                //x momentum
                Ux[ALL, R.Slice(0, -1)] = (U[SKIP1,INNER]+U[ZM1,INNER])/2 -
                dt/(2*dx)*((U[SKIP1,INNER].Pow(2)/H[SKIP1,INNER] +
                            g/2*H[SKIP1,INNER].Pow(2)) -
                           (U[ZM1,INNER].Pow(2)/H[ZM1,INNER] +
                            g/2*H[ZM1,INNER].Pow(2)));

                // y momentum
                Vx[ALL, ZM1] = (V[SKIP1,INNER]+V[ZM1,INNER])/2 -
                            dt/(2*dx)*((U[SKIP1,INNER] *
                            V[SKIP1,INNER]/H[SKIP1, INNER]) -
                           (U[ZM1,INNER] *
                            V[ZM1,INNER]/H[ZM1,INNER]));



                // height
                Hy[ZM1,ALL] = (H[INNER,SKIP1]+H[INNER,ZM1])/2 -
                            dt/(2*dy)*(V[INNER,SKIP1]-V[INNER,ZM1]);

                // x momentum
                Uy[ZM1,ALL] = (U[INNER,SKIP1]+U[INNER,ZM1])/2 -
                            dt/(2*dy)*((V[INNER,SKIP1] *
                            U[INNER,SKIP1]/H[INNER,SKIP1]) -
                           (V[INNER,ZM1] *
                            U[INNER,ZM1]/H[INNER,ZM1]));
                // y momentum
                Vy[ZM1,ALL] = (V[INNER,SKIP1]+V[INNER,ZM1])/2 -
                            dt/(2*dy)*((V[INNER,SKIP1].Pow(2)/H[INNER,SKIP1] +
                            g/2*H[INNER,SKIP1].Pow(2)) -
                           (V[INNER,ZM1].Pow(2)/H[INNER,ZM1] +
                            g/2*H[INNER,ZM1].Pow(2)));

                // Second half step

                // height
                H[INNER, INNER] = H[INNER, INNER] -
                            (dt/dx)*(Ux[SKIP1,ZM1]-Ux[ZM1,ZM1]) -
                            (dt/dy)*(Vy[ZM1,SKIP1]-Vy[ZM1, ZM1]);

                // x momentum
                U[INNER, INNER] = U[INNER, INNER] -
                           (dt/dx)*((Ux[SKIP1,ZM1].Pow(2)/Hx[SKIP1,ZM1] +
                             g/2*Hx[SKIP1,ZM1].Pow(2)) -
                            (Ux[ZM1,ZM1].Pow(2)/Hx[ZM1,ZM1] +
                             g / 2 * Hx[ZM1, ZM1].Pow(2))) -
                             (dt/dy)*((Vy[ZM1,SKIP1] *
                                       Uy[ZM1,SKIP1]/Hy[ZM1,SKIP1]) -
                                        (Vy[ZM1,ZM1] *
                                         Uy[ZM1,ZM1]/Hy[ZM1,ZM1]));
                // y momentum
                V[R.Slice(1, -1),R.Slice(1, -1)] = V[R.Slice(1, -1),R.Slice(1, -1)] -
                               (dt/dx)*((Ux[SKIP1,ZM1] *
                                         Vx[SKIP1,ZM1]/Hx[SKIP1,ZM1]) -
                                        (Ux[ZM1,ZM1]*Vx[ZM1,ZM1]/Hx[ZM1,ZM1])) -
                                        (dt / dy) * ((Vy[ZM1, SKIP1].Pow(2) / Hy[ZM1, SKIP1] +
                                                  g / 2 * Hy[ZM1, SKIP1].Pow(2)) -
                                                 (Vy[ZM1, ZM1].Pow(2) / Hy[ZM1, ZM1] +
                                                  g / 2 * Hy[ZM1, ZM1].Pow(2)));
            }
            //Make sure we have the actual data and use it as a checksum
            return NumCIL.Float.Add.Reduce(NumCIL.Float.Add.Reduce(H / data.N)).Value[0];
        }
    }
}