/****************************************************************************/ #include #include #include #include /****************************************************************************/ #define STOPCOUNT 19 /****************************************************************************/ int main(int argc, char *argv[]) { int maxsize, nsamples, unroll = 4, count; int i, k, isize, ip1, ip2, ip3; double *x, *y, a, b, c, d; double drand48(); double factor, flops_per_loop, nflops, mflops, size, starttime, time; struct timeval tv; struct timezone tz; FILE *out; out = fopen("prog1.out","w"); maxsize = 1000000; // 2 arrays of this size = 16MBytes x = (double *) malloc (maxsize * sizeof(double)); y = (double *) malloc (maxsize * sizeof(double)); printf("Maxsize = %g KB\n",maxsize*sizeof(double)/1024.0); isize = 200; factor = 1.6; flops_per_loop = 28.0, count = 1; // main loop, keep increasing active set size until as large as allocated memory while (isize < (maxsize - unroll)) { // set nsamples large relative to tick, but not too large nsamples = 1000; size = nsamples * sizeof(double); // total number of flops per timing (note done as double) nflops = flops_per_loop*nsamples*1.0*isize/unroll; // initialize and get active set in cache for (i = 0; i < isize+unroll; i++) { x[i] = drand48(); y[i] = drand48(); } a = drand48(); b = drand48(); c = drand48(); d = drand48(); // timing loop gettimeofday(&tv,&tz); starttime = tv.tv_sec + tv.tv_usec/1000000.0; for (k = 0; k < nsamples; k++) { // actual calculation, unrolling to minimize indexing overhead for (i = 0; i < isize; i += unroll) { ip1 = i+1; ip2 = i+2; ip3 = i+3; x[i] = a*y[i] + b*y[ip1] + c*y[ip2] + d*y[ip3]; x[ip1] = b*y[i] + c*y[ip1] + d*y[ip2] + a*y[ip3]; x[ip2] = c*y[i] + d*y[ip1] + a*y[ip2] + b*y[ip3]; x[ip3] = d*y[i] + a*y[ip1] + d*y[ip2] + c*y[ip3]; } } gettimeofday(&tv,&tz); time = (tv.tv_sec + tv.tv_usec/1000000.0) - starttime; // calculate and print mflops = nflops * 1.0e-6 / time; printf("size(%d) = %g KB; mflops(%d) = %e;\n", count, 2.0*isize*8.0/1024.0, count, mflops); fprintf(out,"%g %g\n", 2*isize*8.0/1024.0, mflops); fflush(out); count++; if (count == STOPCOUNT) break; isize = (int) (factor * ((double) isize)); } fclose(out); return 0; } /****************************************************************************/