/*
 * numerical integration example (OpenMP version):
 *
 * compute pi by approximating the area under the curve f(x) = 4 / (1 + x*x)
 * between 0 and 1.
 *
 * command-line argument gives number of "slices".
 *
 * number of threads can be specified with environment variable OMP_NUM_THREADS,
 * or defaults to something implementation-dependent (probably the number of 
 * processing elements).
 */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/* best(?) available value of pi, to check computed approximation */
/* surprisingly, no library-defined constant for this in standard C! */
#define M_PI acos(-1.0)

#include <omp.h>		/* OpenMP header file */

#include "timer.h"

/* main program */
int main(int argc, char *argv[]) {

    /* process command-line arguments */

    if (argc < 2) {
        fprintf(stderr, "usage %s numslices\n", argv[0]);
        return EXIT_FAILURE;
    }
    char *endptr;
    long numslices = strtol(argv[1], &endptr, 10);
    if ((*endptr != '\0') || (numslices <= 0)) {
        fprintf(stderr, "numslices must be positive integer\n");
        return EXIT_FAILURE;
    }

    /* start of "interesting" code */

    double start_time, end_time;
    double x, pi;
    double sum = 0.0;
    double step = 1.0/(double) numslices;
    int nthreads;

    /* record start time */
    /* could use omp_get_wtime but get_time is what sequential code uses */
    start_time = get_time();

    /* do computation -- using all available threads */
    #pragma omp parallel
    {
        /* must put this inside "omp parallel" block, since otherwise
         * the library function just gets a result of 1 */
        #pragma omp master
        {
            nthreads = omp_get_num_threads();
        }
        #pragma omp for private(x) reduction(+:sum) schedule(static)
        for (int i=0; i < numslices; ++i) {
            x = (i+0.5)*step;
            sum += 4.0/(1.0+x*x);
        }
    }
    pi = step * sum;

    /* record end time */
    end_time = get_time();

    /* print results */
    printf("parallel program results with %d threads and %ld steps:\n", 
            nthreads, numslices);
    printf("computed pi = %g  (%17.15f)\n",pi, pi);
    printf("difference between computed pi and math.h M_PI = %g\n", 
            fabs(pi - M_PI));
    printf("time to compute = %g seconds\n", end_time - start_time);

    return EXIT_SUCCESS;
}