/*
 * numerical integration example, as discussed in textbook:  
 *
 * compute pi by approximating the area under the curve f(x) = 4 / (1 + x*x)
 * between 0 and 1.
 *
 * parallel version using OpenMP with approach taken in MPI program ("SPMD")
 * and an array of partial sums.  possible performance problems because of
 * "false sharing" of array.
 */
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>		/* OpenMP header file */
#define NUM_STEPS 100000000

/* main program */
int main(int argc, char *argv[]) {

    double start_time, end_time;
    double pi;
    double sum = 0.0;
    double *partsum;
    double step = 1.0/(double) NUM_STEPS; 
    int nthreads;
    int k;

    /* record start time */
    start_time = omp_get_wtime();

    /* do computation -- using all available threads */
    #pragma omp parallel 
    {
        /* local variables -- or declare in main and make private */
        double x; 
        int i;
        int myid = omp_get_thread_num();

        /* get number of threads, allocate space for partial sums and init */
        #pragma omp single 
        {
            nthreads = omp_get_num_threads();
            partsum = malloc(sizeof(double) * nthreads);
            for (k = 0; k < nthreads; ++k) partsum[k] = 0.0;
        }

        for (i=myid; i < NUM_STEPS; i += nthreads) {
            x = (i+0.5)*step;
            partsum[myid] = partsum[myid] + 4.0/(1.0+x*x);
        }
    }

    /* sum partial results */
    for (k = 0; k < nthreads; ++k) sum += partsum[k];
    pi = step * sum;

    /* record end time */
    end_time = omp_get_wtime();

    /* print results */
    printf("parallel program results with %d threads:\n", nthreads);
    printf("pi = %g  (%17.15f)\n",pi, pi);
    printf("time to compute = %g seconds\n", end_time - start_time);

    return EXIT_SUCCESS;
}