Black Scholes

Python Numpy

from __future__ import print_function
from benchpress import util
import numpy as np
import math
try:
    import numpy_force as npf
except ImportError:
    import numpy as npf

def model(N, B):
    """Construct pseudo-data representing price samples?"""
    # Create the outside of bohrium
    S = npf.random.random(N).astype(B.dtype)*4.0 + 58.0 # Price is between 58-62
    S = np.array(S)
    return S

def CND(X):
    """
    Cumulative normal distribution.
    Helper function used by BS(...).
    """

    (a1,a2,a3,a4,a5) = (0.31938153, -0.356563782, 1.781477937, -1.821255978, 1.330274429)
    L = np.absolute(X)
    K = 1.0 / (1.0 + 0.2316419 * L)
    w = 1.0 - 1.0 / math.sqrt(2*np.pi)*np.exp(-L*L/2.) * \
        (a1 * K + \
         a2 * (K**2) + \
         a3 * (K**3) + \
         a4 * (K**4) + \
         a5 * (K**5))

    mask    = X<0
    w       = w * ~mask + (1.0-w)*mask

    return w

def BS(CallPutFlag, S, X, T, r, v):
    """Black Sholes Function."""

    d1 = (np.log(S/X)+(r+v*v/2.)*T)/(v*math.sqrt(T))
    d2 = d1-v*math.sqrt(T)
    if CallPutFlag=='c':
        return S*CND(d1)-X*math.exp(-r*T)*CND(d2)
    else:
        return X*math.exp(-r*T)*CND(-d2)-S*CND(-d1)

def price(S, I, flag='c', X=65.0, dT=(1.0/365.0), r=0.08, v=0.3, visualize=False):
    """Model price as a function of time."""

    T   = dT
    Ps  = []
    N   = len(S)
    for i in range (I):
        P = np.sum(BS(flag, S, X, T, r, v)) / N
        Ps.append(P)
        T += dT
        if visualize:   #NB: this is only for experiments
            np.visualize(P, "3d", 0, 0.0, 10)
        util.Benchmark().flush()

    return Ps

def main():
    B = util.Benchmark()
    (N, I) = B.size

    if B.inputfn:
        S = B.load_array()
    else:
        S = model(N, B)                     # Construct pseudo-data

    if B.dumpinput:
        B.dump_arrays("black_scholes", {'input': S})

    B.start()
    R = price(S, I, visualize=B.visualize)  # Run the model
    B.stop()
    B.pprint()

    for i in range(len(R)):                 # Convert to one array
        if hasattr(R[i], "copy2numpy"):
            R[i] = R[i].copy2numpy()[()]
    if B.outputfn:
        B.tofile(B.outputfn, {'res':R})

    if B.verbose:
        print (R)

if __name__ == "__main__":
    main()

C99 Omp

#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <Random123/philox.h>
#include <bp_util.h>

#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif

typedef union philox2x32_as_1x64 {
    philox2x32_ctr_t orig;
    uint64_t combined;
} philox2x32_as_1x64_t;

double* model(int64_t samples)
{
    const uint64_t key = 0;
    double* data = (double*)malloc(sizeof(double)*samples);

    #pragma omp parallel for
    for(int64_t count = 0; count<samples; ++count) {
        philox2x32_as_1x64_t counter;
        counter.combined = count;

        uint64_t x_raw = ((philox2x32_as_1x64_t)philox2x32(
          counter.orig,
          (philox2x32_key_t){ { key } }
        )).combined;
        double x = x_raw;
        x /= 18446744073709551616.000000;
        x *= 4.0;
        x += 58.0;          // Model between 58-62

        data[count] = x;
    }
    return data;
}

// The cumulative normal distribution function 
double cnd( double x )
{
    double L, K, w;

    double const a1 =  0.31938153,
                 a2 = -0.356563782,
                 a3 =  1.781477937,
                 a4 = -1.821255978,
                 a5 =  1.330274429;

    L = fabs(x);
    K = 1.0 / (1.0 + 0.2316419 * L);
    w = 1.0 - 1.0 / sqrt(2 * M_PI) * exp(-L *L / 2) * (\
        a1 * K         + \
        a2 * pow(K, 2) + \
        a3 * pow(K, 3) + \
        a4 * pow(K, 4) + \
        a5 * pow(K, 5)
    );

    return (x<0) ? 1.0 - w : w;
}

// The Black and Scholes (1973) Stock option formula
void pricing(double* market, double *prices,
             size_t samples, size_t iterations,
             char flag, double x, double d_t, double r, double v)
{
    double t = d_t;

    for(size_t iter=0; iter<iterations; ++iter) {
        double res = 0;
        #pragma omp parallel for reduction(+:res)
        for(size_t sample=0; sample<samples; ++sample) {
            double d1 = (log(market[sample]/x) + (r+v*v/2)*t) / (v*sqrt(t));
            double d2 = d1-v*sqrt(t);

            if (flag == 'c') {
                res += market[sample] *cnd(d1)-x * exp(-r*t)*cnd(d2);
            } else {
                res += x * exp(-r*t) * cnd(-d2) - market[sample] * cnd(-d1);
            }
        }
        t += d_t;
        prices[iter] = res / samples;
    }
}

int main(int argc, char* argv[])
{
    bp_util_type bp = bp_util_create(argc, argv, 2);
    if (bp.args.has_error) {
        return 0;
    }
    const int samples    = bp.args.sizes[0];
    const int iterations = bp.args.sizes[1];

    double* market = model(samples);        // Generate pseudo-market data
    double* prices = (double*)malloc(sizeof(double)*iterations); // Prices

    bp.timer_start();
    pricing(
        market, prices,
        samples, iterations,
        'c', 65.0, 1.0 / 365.0,
        0.08, 0.3
    );
    bp.timer_stop();
    
    bp.print("black_scholes(c99_omp)");
    if (bp.args.verbose) {                 // and values.
        printf("output: [ ");
        for(int i=0; i<iterations; ++i) {
            printf("%f ", prices[i]);
        }
        printf("]\n");
    }

    free(market);
    free(prices);
    return 0;
}

C99 Seq

#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <Random123/philox.h>
#include <bp_util.h>

#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif

typedef union philox2x32_as_1x64 {
    philox2x32_ctr_t orig;
    uint64_t combined;
} philox2x32_as_1x64_t;

double* model(int64_t samples)
{
    const uint64_t key = 0;
    double* data = (double*)malloc(sizeof(double)*samples);

    for(int64_t count = 0; count<samples; ++count) {
        philox2x32_as_1x64_t counter;
        counter.combined = count;

        uint64_t x_raw = ((philox2x32_as_1x64_t)philox2x32(
          counter.orig,
          (philox2x32_key_t){ { key } }
        )).combined;
        double x = x_raw;
        x /= 18446744073709551616.000000;
        x *= 4.0;
        x += 58.0;          // Model between 58-62

        data[count] = x;
    }
    return data;
}

// The cumulative normal distribution function 
double cnd( double x )
{
    double L, K, w;

    double const a1 =  0.31938153,
                 a2 = -0.356563782,
                 a3 =  1.781477937,
                 a4 = -1.821255978,
                 a5 =  1.330274429;

    L = fabs(x);
    K = 1.0 / (1.0 + 0.2316419 * L);
    double K2 = K*K;
    double K4 = K2*K2;
    w = 1.0 - 1.0 / sqrt(2 * M_PI) * exp(-L *L / 2) * (\
        a1 * K         + \
        a2 * K2 + \
        a3 * K*K2 + \
        a4 * K4 + \
        a5 * K*K4
    );

    return (x<0) ? 1.0 - w : w;
}

// The Black and Scholes (1973) Stock option formula
void pricing(double* market, double *prices,
             size_t samples, size_t iterations,
             char flag, double x, double d_t, double r, double v)
{
    double t = d_t;

    for(size_t iter=0; iter<iterations; ++iter) {
        double res = 0;
        for(size_t sample=0; sample<samples; ++sample) {
            double d1 = (log(market[sample]/x) + (r+v*v/2)*t) / (v*sqrt(t));
            double d2 = d1-v*sqrt(t);

            if (flag == 'c') {
                res += market[sample] *cnd(d1)-x * exp(-r*t)*cnd(d2);
            } else {
                res += x * exp(-r*t) * cnd(-d2) - market[sample] * cnd(-d1);
            }
        }
        t += d_t;
        prices[iter] = res / samples;
    }
}

int main(int argc, char* argv[])
{
    bp_util_type bp = bp_util_create(argc, argv, 2);
    if (bp.args.has_error) {
        return 0;
    }
    const int samples    = bp.args.sizes[0];
    const int iterations = bp.args.sizes[1];

    double* market = model(samples);        // Generate pseudo-market data
    double* prices = (double*)malloc(sizeof(double)*iterations); // Prices

    bp.timer_start();
    pricing(
        market, prices,
        samples, iterations,
        'c', 65.0, 1.0 / 365.0,
        0.08, 0.3
    );
    bp.timer_stop();
    
    bp.print("black_scholes(c99_seq)");
    if (bp.args.verbose) {                 // and values.
        printf("output: [ ");
        for(int i=0; i<iterations; ++i) {
            printf("%f ", prices[i]);
        }
        printf("]\n");
    }

    free(market);
    free(prices);
    return 0;
}

Cpp11 Armadillo

/*
This file is part of Bohrium and Copyright (c) 2012 the Bohrium team:
http://bohrium.bitbucket.org

Bohrium is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as 
published by the Free Software Foundation, either version 3 
of the License, or (at your option) any later version.

Bohrium is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the 
GNU Lesser General Public License along with bohrium. 

If not, see <http://www.gnu.org/licenses/>.
*/
#include <iostream>
#include <armadillo>
#include <bp_util.h>

using namespace std;
using namespace arma;

template <typename T>
Col<T> cnd(Col<T> x)
{
    size_t samples = x.n_elem;
    Col<T> l(samples), k(samples), w(samples);
    T a1 = 0.31938153,
      a2 =-0.356563782,
      a3 = 1.781477937,
      a4 =-1.821255978,
      a5 = 1.330274429,
      pp = 2.5066282746310002; // sqrt(2.0*PI)

    l = abs(x);
    k = 1.0 / (1.0 + 0.2316419 * l);

    w = 1.0 - 1.0 / pp * exp(-1.0*l%l/2.0) % \
        (a1*k + \
         a2*(pow(k,(T)2)) + \
         a3*(pow(k,(T)3)) + \
         a4*(pow(k,(T)4)) + \
         a5*(pow(k,(T)5)));

    uvec mask = x < 0.0;

    return w % (-mask) + (1.0-w) % mask;
}

template <typename T>
T* pricing(size_t samples, size_t iterations, char flag, T x, T d_t, T r, T v)
{
    T* p    = (T*)malloc(sizeof(T)*samples);    // Intermediate results
    T t     = d_t;                              // Initial delta

    Col<T> d1(samples), d2(samples), res(samples);
    Col<T> s = randu<Col<T> >(samples)*4.0 +58.0;      // Model between 58-62

    for(size_t i=0; i<iterations; i++) {
        d1 = (log(s/x) + (r+v*v/2.0)*t) / (v*sqrt(t));
        d2 = d1-v*sqrt(t);
        if (flag == 'c') {
            res = s % cnd<T>(d1) -x * exp(-r*t) * cnd<T>(d2);
        } else {
            res = x * exp(-r*t) * cnd<T>(-1.0*d2) - s*cnd<T>(-1.0*d1);
        }

        t += d_t;                               // Increment delta
        p[i] = sum(res) / (T)samples;           // Result from timestep
    }

    return p;
}

int main(int argc, char* argv[])
{
    bp_util_type bp = bp_util_create(argc, argv, 2);
    if (bp.args.has_error) {
        return 1;
    }
    const size_t samples    = bp.args.sizes[0];
    const size_t iterations = bp.args.sizes[1];

    bp.timer_start();
    double* prices = pricing(
        samples, iterations,
        'c', 65.0, 1.0 / 365.0,
        0.08, 0.3
    );
    bp.timer_stop();

    bp.print("black_scholes(cpp11_armadillo)");
    if (bp.args.verbose) {
        cout << ", \"output\": [";
        for(size_t i=0; i<iterations; i++) {
            cout << prices[i];
            if (iterations-1!=i) {
                cout << ", ";
            }
        }
        cout << "]" << endl;
    }

    free(prices);
    return 0;
}

Cpp11 Blitz

#include <blitz/array.h>
#include <random/uniform.h>
#include <bp_util.h>

using namespace blitz;
using namespace ranlib;

template <typename T>
Array<T, 1> cnd(Array<T, 1> & x)
{
    int samples = x.numElements();
    Array<T, 1> l(samples), k(samples), w(samples), res(samples);
    Array<bool, 1> mask(samples);
    T a1 = 0.31938153,
      a2 =-0.356563782,
      a3 = 1.781477937,
      a4 =-1.821255978,
      a5 = 1.330274429,
      pp = 2.5066282746310002; // sqrt(2.0*PI)

    l = abs(x);
    k = 1.0 / (1.0 + 0.2316419 * l);
    w = 1.0 - 1.0 / pp * exp(-1.0*l*l/2.0) * \
        (a1*k + \
         a2*(pow(k,(T)2)) + \
         a3*(pow(k,(T)3)) + \
         a4*(pow(k,(T)4)) + \
         a5*(pow(k,(T)5)));

    mask    = x < 0.0;
    res     = (w * cast<T>(!mask) + (1.0-w)* cast<T>(mask));
    return res;
}

template <typename T>
T* pricing(size_t samples, size_t iterations, char flag, T x, T d_t, T r, T v)
{
    Array<T, 1> d1(samples), d2(samples), res(samples);
    T* p    = (T*)malloc(sizeof(T)*samples);    // Intermediate results
    T t     = d_t;                              // Initial delta

    Array<T, 1> s(samples);                     // Initial uniform sampling
    Uniform<T> rand;                            // values between 58-62
    rand.seed((unsigned int)time(0));
    s = rand.random() *4.0 +58.0;

    for(size_t i=0; i<iterations; i++) {
        d1 = (log(s/x) + (r+v*v/2.0)*t) / (v*sqrt(t));
        d2 = d1-v*sqrt(t);
        if (flag == 'c') {
            res = s * cnd(d1) - x * exp(-r * t) * cnd(d2);
        } else {
            Array<T, 1> tmp1(samples), tmp2(samples);
            tmp1 = -1.0*d2;
            tmp2 = -1.0*d1;

            res = x * exp(-r*t) * cnd(tmp1) - s*cnd(tmp2);
        }
        t += d_t;                               // Increment delta
        p[i] = sum(res) / (T)samples;           // Result from timestep
    }

    return p;
}

//FLOP count: 2*s+i*(s*8+2*s*23) where s is samples and i is iterations

int main(int argc, char* argv[])
{
    bp_util_type bp = bp_util_create(argc, argv, 2);
    if (bp.args.has_error) {
        return 1;
    }
    const size_t samples    = bp.args.sizes[0];
    const size_t iterations = bp.args.sizes[1];

    bp.timer_start();
    double* prices = pricing(
        samples, iterations,
        'c', 65.0, 1.0 / 365.0,
        0.08, 0.3
    );
    bp.timer_stop();
    
    bp.print("black_scholes(cpp11_blitz)");
    if (bp.args.verbose) {
        cout << ", \"output\": [";
        for(size_t i=0; i<iterations; i++) {
            cout << prices[i];
            if (iterations-1!=i) {
                cout << ", ";
            }
        }
        cout << "]" << endl;
    }

    free(prices);
    return 0;
}

Cpp11 Bxx

/*
This file is part of Bohrium and Copyright (c) 2012 the Bohrium team:
http://bohrium.bitbucket.org

Bohrium is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3
of the License, or (at your option) any later version.

Bohrium is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the
GNU Lesser General Public License along with bohrium.

If not, see <http://www.gnu.org/licenses/>.
*/
#include <iostream>
#include <vector>
#include <bxx/bohrium.hpp>
#include <bp_util.h>

using namespace std;
using namespace bxx;

template <typename T>
multi_array<T>& model(uint64_t samples)
{
    return randu<T>(samples)*4.0 + 58.0; // Model between 58-62
}

template <typename T>
multi_array<T>& cnd(multi_array<T>& x)
{
    multi_array<T> L, K, W;
    multi_array<bool> mask;
    T a1 = 0.31938153,
      a2 =-0.356563782,
      a3 = 1.781477937,
      a4 =-1.821255978,
      a5 = 1.330274429,
      PP = 2.5066282746310002; // sqrt(2.0*PI)

    L = abs(x);
    K = 1.0 / (1.0 + 0.2316419 * L);
    W = 1.0 - 1.0 / PP * exp(-1.0*L*L/2.0) * \
        (a1*K + \
         a2*(pow(K, (T)2)) + \
         a3*(pow(K, (T)3)) + \
         a4*(pow(K, (T)4)) + \
         a5*(pow(K, (T)5)));

    mask = x < 0.0;
    return W * as<T>(!mask) + (1.0-W)* as<T>(mask);
}

//FLOP count: 2*s+i*(s*8+2*s*26) where s is samples and i is iterations
template <typename T>
void pricing(multi_array<T>& market, multi_array<T>& prices,
             size_t samples, size_t iterations,
             char flag, T x, T d_t, T r, T v)
{
    T t = d_t;                                  // Initial delta

    for(size_t i=0; i<iterations; i++) {
        multi_array<T> d1, d2, res;
        d1 = (log(market/x) + (r+v*v/2.0)*t) / (v*sqrt(t));
        d2 = d1-v*sqrt(t);

        if (flag == 'c') {
            res = market * cnd(d1) - x * exp(-r * t) * cnd(d2);
        } else {
            res = x * exp(-r*t) * cnd(-1.0*d2) - market*cnd(-1.0*d1);
        }

        t += d_t;                               // Increment delta
        prices[i] = sum(res) / (T)samples;      // Result from timestep
    }
}

int main(int argc, char* argv[])
{
    bp_util_type bp = bp_util_create(argc, argv, 2);
    if (bp.args.has_error) {
        return 0;
    }
    const size_t samples    = bp.args.sizes[0];
    const size_t iterations = bp.args.sizes[1];

    multi_array<double> market;             // Generate pseudo-market data
    market = model<double>(samples);

    multi_array<double> prices;             // Storage for calculated prices
    prices = empty<double>(iterations);

    bp.timer_start();
    pricing<double>(
        market, prices,
        samples, iterations,
        'c', 65.0, 1.0 / 365.0,
        0.08, 0.3
    );
    bp.timer_stop();
    
    bp.print("black_scholes(cpp11_bxx)");
    if (bp.args.verbose) {                 // and values.
        cout << ", \"output\": " << prices << endl;
    }

    return 0;
}

Cpp11 Eigen

Error

There are issues with the implementation.

Compilation errors.

/*
This file is part of Bohrium and Copyright (c) 2012 the Bohrium team:
http://bohrium.bitbucket.org

Bohrium is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as 
published by the Free Software Foundation, either version 3 
of the License, or (at your option) any later version.

Bohrium is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the 
GNU Lesser General Public License along with bohrium. 

If not, see <http://www.gnu.org/licenses/>.
*/
#include <iostream>
#include <Eigen/Dense>
#include <bp_util.h>

using namespace std;
using namespace Eigen;

template <typename T>
Array<T, Dynamic, 1>& cnd(const Array<T, Dynamic, 1>& x)
{
    size_t samples = x.size();
    Array<T, Dynamic, 1> l(samples), k(samples), w(samples);
    Array<T, Dynamic, 1> mask(samples);
    Array<bool, Dynamic, 1> mask_bool(samples);

    T a1 = 0.31938153,
      a2 =-0.356563782,
      a3 = 1.781477937,
      a4 =-1.821255978,
      a5 = 1.330274429,
      pp = 2.5066282746310002; // sqrt(2.0*PI)

    l = abs(x);
    //k = (T)1.0 / ((T)1.0 + (T)0.2316419 * l);
    //k = (T)0.5 / l / (T)0.5;
    k = l / (T)0.5;
    w = (T)1.0 - (T)1.0 / (T)pp * exp((T)-(T)1.0*l*l/(T)2.0) * \
        ((T)a1*k + \
         (T)a2*(pow(k,(T)2)) + \
         (T)a3*(pow(k,(T)3)) + \
         (T)a4*(pow(k,(T)4)) + \
         (T)a5*(pow(k,(T)5)));

    mask_bool= (x < (T)0.0);
    mask = mask_bool.cast<T>();
    return w * (mask) + ((T)1.0-w);
    //return w * (mask) + ((T)1.0-w)* mask;
    //return w * (!mask) + (1.0-w)* mask;
}

template <typename T>
T* pricing(size_t samples, size_t iterations, char flag, T x, T d_t, T r, T v)
{
    T* p    = (T*)malloc(sizeof(T)*samples);    // Intermediate results
    T t     = d_t;                              // Initial delta

    Array<T, Dynamic, 1> d1(samples), d2(samples), res(samples);
    Array<T, Dynamic, 1> s = Array<T, Dynamic, 1>::Random(samples)*4.0 +58.0;      // Model between 58-62

    for(size_t i=0; i<iterations; i++) {
        d1 = (log(s/x) + (r+v*v/2.0)*t) / (v*sqrt(t));
        d2 = d1-v*sqrt(t);

        if (flag == 'c') {

            size_t samples = x.size();
            Array<T, Dynamic, 1> l(samples), k(samples), w(samples);
            Array<T, Dynamic, 1> mask(samples);
            Array<bool, Dynamic, 1> mask_bool(samples);

            T a1 = 0.31938153,
              a2 =-0.356563782,
              a3 = 1.781477937,
              a4 =-1.821255978,
              a5 = 1.330274429,
              pp = 2.5066282746310002; // sqrt(2.0*PI)

            l = abs(x);
            //k = (T)1.0 / ((T)1.0 + (T)0.2316419 * l);
            //k = (T)0.5 / l / (T)0.5;
            k = l / (T)0.5;
            w = (T)1.0 - (T)1.0 / (T)pp * exp((T)-(T)1.0*l*l/(T)2.0) * \
                ((T)a1*k + \
                 (T)a2*(pow(k,(T)2)) + \
                 (T)a3*(pow(k,(T)3)) + \
                 (T)a4*(pow(k,(T)4)) + \
                 (T)a5*(pow(k,(T)5)));

            mask_bool= (x < (T)0.0);
            mask = mask_bool.cast<T>();
            res =  w * (!mask) + (1.0-w)* mask;

            //cnd<T>(d1);
            //res = s * cnd<T>(d1) -x * exp(-r*t) * cnd<T>(d2);
        } else {
/*
            Array<T, Dynamic, 1> tmp1(samples), tmp2(samples);
            tmp1 = -1.0*d2;
            tmp2 = -1.0*d1;
            res = x * exp(-r*t) * cnd<T>(tmp1) - s*cnd<T>(tmp2);
    */
        }

        t += d_t;                               // Increment delta
        p[i] = res.sum() / (T)samples;           // Result from timestep
    }

    return p;
}

int main(int argc, char* argv[])
{
    bp_util_type bp = bp_util_create(argc, argv, 2);    // Grab arguments
    if (bp.args.has_error) {
        return 1;
    }
    const size_t samples    = bp.args.sizes[0];
    const size_t iterations = bp.args.sizes[1];

    bp.timer_start();                                   // Start timer
    double* prices = pricing(                           // Run...
        samples, iterations,
        'c', 65.0, 1.0 / 365.0,
        0.08, 0.3
    );
    bp.timer_stop();                                    // Stop timer
    
    bp.print("black_scholes(cpp11_bxx)");               // Print restults..
    if (bp.args.verbose) {                              // ..verbosely.
        cout << ", \"output\": [";
        for(size_t i=0; i<iterations; i++) {
            cout << prices[i];
            if (iterations-1!=i) {
                cout << ", ";
            }
        }
        cout << "]" << endl;
    }
    free(prices);                                       // Cleanup

    return 0;
}

Cpp11 Omp

#include <iostream>
#include <sstream>
#include <string>
#include <cmath>
#include <Random123/philox.h>
#include <bp_util.h>

using namespace std;

typedef union philox2x32_as_1x64 {
    philox2x32_ctr_t orig;
    uint64_t combined;
} philox2x32_as_1x64_t;

double* model(int64_t samples)
{
    const uint64_t key = 0;
    double* data = (double*)malloc(sizeof(double)*samples);

    #pragma omp parallel for
    for(int64_t count = 0; count<samples; ++count) {
        philox2x32_as_1x64_t counter;
        counter.combined = count;

        philox2x32_as_1x64_t x_philox;

        x_philox.orig = philox2x32(
          counter.orig,
          (philox2x32_key_t){ { key } } 
        );

        double x = x_philox.combined;
        x /= 18446744073709551616.000000;
        x *= 4.0;
        x += 58.0;          // Model between 58-62

        data[count] = x;
    }
    return data;
}

// The cumulative normal distribution function 
double cnd( double x )
{
    double L, K, w;

    double const a1 =  0.31938153,
                 a2 = -0.356563782,
                 a3 =  1.781477937,
                 a4 = -1.821255978,
                 a5 =  1.330274429;

    L = fabs(x);
    K = 1.0 / (1.0 + 0.2316419 * L);
    w = 1.0 - 1.0 / sqrt(2 * M_PI) * exp(-L *L / 2) * (\
        a1 * K         + \
        a2 * pow(K, 2) + \
        a3 * pow(K, 3) + \
        a4 * pow(K, 4) + \
        a5 * pow(K, 5)
    );

    return (x<0) ? 1.0 - w : w;
}

// The Black and Scholes (1973) Stock option formula
void pricing(double* market, double *prices,
             size_t samples, size_t iterations,
             char flag, double x, double d_t, double r, double v)
{
    double t = d_t;

    for(size_t iter=0; iter<iterations; ++iter) {
        double res = 0;
        #pragma omp parallel for reduction(+:res)
        for(size_t sample=0; sample<samples; ++sample) {
            double d1 = (log(market[sample]/x) + (r+v*v/2)*t) / (v*sqrt(t));
            double d2 = d1-v*sqrt(t);

            if (flag == 'c') {
                res += market[sample] *cnd(d1)-x * exp(-r*t)*cnd(d2);
            } else {
                res += x * exp(-r*t) * cnd(-d2) - market[sample] * cnd(-d1);
            }
        }
        t += d_t;
        prices[iter] = res / samples;
    }
}

int main(int argc, char* argv[])
{
    bp_util_type bp = bp_util_create(argc, argv, 2);
    if (bp.args.has_error) {
        return 0;
    }
    const int samples    = bp.args.sizes[0];
    const int iterations = bp.args.sizes[1];

    double* market = model(samples);        // Generate pseudo-market data
    double* prices = (double*)malloc(sizeof(double)*iterations); // Prices

    bp.timer_start();
    pricing(
        market, prices,
        samples, iterations,
        'c', 65.0, 1.0 / 365.0,
        0.08, 0.3
    );
    bp.timer_stop();
    
    bp.print("black_scholes(cpp11_omp)");
    if (bp.args.verbose) {                 // and values.
        printf("output: [ ");
        for(int i=0; i<iterations; ++i) {
            printf("%f ", prices[i]);
        }
        printf("]\n");
    }

    free(market);
    free(prices);
    return 0;
}

Cpp11 Seq

#include <iostream>
#include <sstream>
#include <string>
#include <cmath>
#include <Random123/philox.h>
#include <bp_util.h>

using namespace std;

typedef union philox2x32_as_1x64 {
    philox2x32_ctr_t orig;
    uint64_t combined;
} philox2x32_as_1x64_t;

double* model(int64_t samples)
{
    const uint64_t key = 0;
    double* data = (double*)malloc(sizeof(double)*samples);

    for(int64_t count = 0; count<samples; ++count) {
        philox2x32_as_1x64_t counter;
        counter.combined = count;

        philox2x32_as_1x64_t x_philox;

        x_philox.orig = philox2x32(
          counter.orig,
          (philox2x32_key_t){ { key } } 
        );

        double x = x_philox.combined;
        x /= 18446744073709551616.000000;
        x *= 4.0;
        x += 58.0;          // Model between 58-62

        data[count] = x;
    }
    return data;
}

// The cumulative normal distribution function 
double cnd( double x )
{
    double L, K, w;

    double const a1 =  0.31938153,
                 a2 = -0.356563782,
                 a3 =  1.781477937,
                 a4 = -1.821255978,
                 a5 =  1.330274429;

    L = fabs(x);
    K = 1.0 / (1.0 + 0.2316419 * L);
    w = 1.0 - 1.0 / sqrt(2 * M_PI) * exp(-L *L / 2) * (\
        a1 * K         + \
        a2 * pow(K, 2) + \
        a3 * pow(K, 3) + \
        a4 * pow(K, 4) + \
        a5 * pow(K, 5)
    );

    return (x<0) ? 1.0 - w : w;
}

// The Black and Scholes (1973) Stock option formula
void pricing(double* market, double *prices,
             size_t samples, size_t iterations,
             char flag, double x, double d_t, double r, double v)
{
    double t = d_t;

    for(size_t iter=0; iter<iterations; ++iter) {
        double res = 0;
        for(size_t sample=0; sample<samples; ++sample) {
            double d1 = (log(market[sample]/x) + (r+v*v/2)*t) / (v*sqrt(t));
            double d2 = d1-v*sqrt(t);

            if (flag == 'c') {
                res += market[sample] *cnd(d1)-x * exp(-r*t)*cnd(d2);
            } else {
                res += x * exp(-r*t) * cnd(-d2) - market[sample] * cnd(-d1);
            }
        }
        t += d_t;
        prices[iter] = res / samples;
    }
}

int main(int argc, char* argv[])
{
    bp_util_type bp = bp_util_create(argc, argv, 2);
    if (bp.args.has_error) {
        return 0;
    }
    const int samples    = bp.args.sizes[0];
    const int iterations = bp.args.sizes[1];

    double* market = model(samples);        // Generate pseudo-market data
    double* prices = (double*)malloc(sizeof(double)*iterations); // Prices

    bp.timer_start();
    pricing(
        market, prices,
        samples, iterations,
        'c', 65.0, 1.0 / 365.0,
        0.08, 0.3
    );
    bp.timer_stop();
    
    bp.print("black_scholes(cpp11_seq)");
    if (bp.args.verbose) {                 // and values.
        printf("output: [ ");
        for(int i=0; i<iterations; ++i) {
            printf("%f ", prices[i]);
        }
        printf("]\n");
    }

    free(market);
    free(prices);
    return 0;
}

Csharp Numcil

#region Copyright
/*
This file is part of Bohrium and copyright (c) 2012 the Bohrium:
team <http://www.bh107.org>.

Bohrium is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as 
published by the Free Software Foundation, either version 3 
of the License, or (at your option) any later version.

Bohrium is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the 
GNU Lesser General Public License along with Bohrium. 

If not, see <http://www.gnu.org/licenses/>.
*/
#endregion

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

using R = NumCIL.Range;

namespace blackscholes
{
	using NumCIL.Double;
	using DATA = System.Double;
	
    public static class BlackScholesSolverDouble
    {
        private static NdArray CND(NdArray X)
        {
            DATA a1 = 0.31938153f, a2 = -0.356563782f, a3 = 1.781477937f, a4 = -1.821255978f, a5 = 1.330274429f;
            var L = X.Abs();
            var K = 1.0f / (1.0f + 0.2316419f * L);
            var w = 1.0f - 1.0f / ((DATA)Math.Sqrt(2 * Math.PI)) * (-L * L / 2.0f).Exp() * (a1 * K + a2 * (K.Pow(2)) + a3 * (K.Pow(3)) + a4 * (K.Pow(4)) + a5 * (K.Pow(5)));
            
            var mask1 = (NdArray)(X < 0);
            var mask2 = (NdArray)(X >= 0);

            w = w * mask2 + (1.0f - w) * mask1;
            return w;
        }

        private static NdArray BlackScholes(bool callputflag, NdArray S, DATA X, DATA T, DATA r, DATA v)
        {
            var d1 = ((S / X).Log() + (r + v * v / 2.0f) * T) / (v * (DATA)Math.Sqrt(T));
            var d2 = d1 - v * (DATA)Math.Sqrt(T);

            if (callputflag)
                return S * CND(d1) - X * (DATA)Math.Exp(-r * T) * CND(d2);
            else
                return X * (DATA)Math.Exp(-r * T) * CND(-d2) - S * CND(-d1);
        }
        
        public static NdArray Create(long size)
        {
            var S = Generate.Random(size);
            S = S * 4.0f - 2.0f + 60.0f; //Price is 58-62
            return S;
        }
        
        public static DATA Sync(NdArray data)
        {
        	return data.Value[0];
        }

        public static DATA Solve(NdArray data, long years)
        {
            DATA X = 65.0f;
            DATA r = 0.08f;
            DATA v = 0.3f;

            DATA day=1.0f/years;
            DATA T = day;

            DATA total = 0.0f;

            for (long i = 0; i < years; i++)
            {
                total += Add.Reduce(BlackScholes(true, data, X, T, r, v)).Value[0] / data.Shape.Length;
                T += day;
            }

            return total;
        }
    }
}

namespace blackscholes
{
	using NumCIL.Float;
	using DATA = System.Single;
	
    public static class BlackScholesSolverSingle
    {
        private static NdArray CND(NdArray X)
        {
            DATA a1 = 0.31938153f, a2 = -0.356563782f, a3 = 1.781477937f, a4 = -1.821255978f, a5 = 1.330274429f;
            var L = X.Abs();
            var K = 1.0f / (1.0f + 0.2316419f * L);
            var w = 1.0f - 1.0f / ((DATA)Math.Sqrt(2 * Math.PI)) * (-L * L / 2.0f).Exp() * (a1 * K + a2 * (K.Pow(2)) + a3 * (K.Pow(3)) + a4 * (K.Pow(4)) + a5 * (K.Pow(5)));
            
            var mask1 = (NdArray)(X < 0);
            var mask2 = (NdArray)(X >= 0);

            w = w * mask2 + (1.0f - w) * mask1;
            return w;
        }

        private static NdArray BlackScholes(bool callputflag, NdArray S, DATA X, DATA T, DATA r, DATA v)
        {
            var d1 = ((S / X).Log() + (r + v * v / 2.0f) * T) / (v * (DATA)Math.Sqrt(T));
            var d2 = d1 - v * (DATA)Math.Sqrt(T);

            if (callputflag)
                return S * CND(d1) - X * (DATA)Math.Exp(-r * T) * CND(d2);
            else
                return X * (DATA)Math.Exp(-r * T) * CND(-d2) - S * CND(-d1);
        }
        
        public static NdArray Create(long size)
        {
            var S = Generate.Random(size);
            S = S * 4.0f - 2.0f + 60.0f; //Price is 58-62
            return S;
        }
        
        public static DATA Sync(NdArray data)
        {
        	return data.Value[0];
        }

        public static DATA Solve(NdArray data, long years)
        {
            DATA X = 65.0f;
            DATA r = 0.08f;
            DATA v = 0.3f;

            DATA day=1.0f/years;
            DATA T = day;

            DATA total = 0.0f;

            for (long i = 0; i < years; i++)
            {
                total += Add.Reduce(BlackScholes(true, data, X, T, r, v)).Value[0] / data.Shape.Length;
                T += day;
            }

            return total;
        }
    }
}