# include # include # include # include # include # include void norm_cplx (fftw_complex data[], ptrdiff_t L, ptrdiff_t M, ptrdiff_t N, ptrdiff_t local_L) { ptrdiff_t i, j, k; for (i = 0; i < local_L; ++i) for (j = 0; j < M; ++j) for (k = 0; k < N; ++k) { data[(i*M*N) + (j*N) + k][0] = data[(i*M*N) + (j*N) + k][0]/(L*M*N); data[(i*M*N) + (j*N) + k][1] = data[(i*M*N) + (j*N) + k][1]/(L*M*N); } } void check_results(fftw_complex x[], fftw_complex y[], ptrdiff_t L, ptrdiff_t M, ptrdiff_t N, ptrdiff_t local_L, int proc_id, int nproc) { ptrdiff_t i, j, k; double diffr, diffi, modulex; double prec = 1.0e-3; double cdiff; double maxdiff = 0.; double tmaxmod, tmaxdiff; double maxmod = FLT_MIN; for (i = 0; i < local_L; ++i) for (j = 0; j < M; ++j) for (k = 0; k < N; ++k) { modulex = sqrt(pow(x[(i*M*N) + (j*N) + k][0],2.)+pow(x[(i*M*N) + (j*N) + k][1],2.)); if (maxmod < modulex) { maxmod = modulex; } } if (nproc > 1) { MPI_Reduce(&maxmod, &tmaxmod, 1, MPI_DOUBLE,MPI_MAX, 0, MPI_COMM_WORLD); MPI_Bcast(&tmaxmod, 1, MPI_DOUBLE,0, MPI_COMM_WORLD); } else { tmaxmod = maxmod; } for (i = 0; i < local_L; ++i) for (j = 0; j < M; ++j) for (k = 0; k < N; ++k) { cdiff = 0.; diffr = fabs(x[(i*M*N) + (j*N) + k][0] - y[(i*M*N) + (j*N) + k][0])/tmaxmod; diffi = fabs(x[(i*M*N) + (j*N) + k][1] - y[(i*M*N) + (j*N) + k][1])/tmaxmod; if ( cdiff < diffr) { cdiff = diffr; } if( cdiff < diffi) { cdiff = diffi; } if (maxdiff < cdiff ) { maxdiff = cdiff; } } if (nproc > 1) { MPI_Reduce(&maxdiff, &tmaxdiff, 1, MPI_DOUBLE,MPI_MAX, 0, MPI_COMM_WORLD); MPI_Bcast(&tmaxdiff, 1, MPI_DOUBLE,0, MPI_COMM_WORLD); } else { tmaxdiff = maxdiff; } if (proc_id == 0) { if(maxdiff > prec) { printf("Results are incorrect! Error Max = %E \n", tmaxdiff); } else { printf("Results are correct! Error Max= %E \n", tmaxdiff); } } } int main(int argc, char **argv) { const ptrdiff_t L = 256, M = 256, N = 256; fftw_plan plan1, plan2; fftw_complex *datain, *dataout, *datanewout; ptrdiff_t alloc_local, local_L, local_L_start, i, j, k, ii; int proc_id, nproc; double xx, yy, zz, rr, r2; double t0, t1, t2, t3, t4, t5, t6, tplan1, tplan2, texec1, texec2, tinit, tnorm; const double amp = 0.25; /* Initialize */ MPI_Init(&argc, &argv); fftw_mpi_init(); MPI_Comm_size(MPI_COMM_WORLD,&nproc); MPI_Comm_rank(MPI_COMM_WORLD,&proc_id); /* Get local data size and allocate */ alloc_local = fftw_mpi_local_size_3d(L, M, N, MPI_COMM_WORLD, &local_L, &local_L_start); datain = fftw_alloc_complex(alloc_local); dataout = fftw_alloc_complex(alloc_local); datanewout = fftw_alloc_complex(alloc_local); /* Create plan for in-place forward DFT */ t0 = MPI_Wtime(); // plan1 = fftw_mpi_plan_dft_3d(L, M, N, datain, dataout, MPI_COMM_WORLD, FFTW_FORWARD, FFTW_ESTIMATE); plan1 = fftw_mpi_plan_dft_3d(L, M, N, datain, dataout, MPI_COMM_WORLD, FFTW_FORWARD, FFTW_MEASURE); // plan1 = fftw_mpi_plan_dft_3d(L, M, N, datain, dataout, MPI_COMM_WORLD, FFTW_FORWARD, FFTW_PATIENT); // plan1 = fftw_mpi_plan_dft_3d(L, M, N, datain, dataout, MPI_COMM_WORLD, FFTW_FORWARD, FFTW_EXHAUSTIVE); t1 = MPI_Wtime(); plan2 = fftw_mpi_plan_dft_3d(L, M, N, dataout, datanewout, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); // plan2 = fftw_mpi_plan_dft_3d(L, M, N, dataout, datanewout, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_MEASURE); // plan2 = fftw_mpi_plan_dft_3d(L, M, N, dataout, datanewout, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_PATIENT); // plan2 = fftw_mpi_plan_dft_3d(L, M, N, dataout, datanewout, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_EXHAUSTIVE); t2 = MPI_Wtime(); t2 = MPI_Wtime(); /* Initialize data to some function my_function(x,y) */ if (L == M && M == N) { for (i = 0; i < local_L; ++i) for (j = 0; j < M; ++j) for (k = 0; k < N; ++k) { ii = i + local_L_start; xx = ( (double) ii - (double)L/2 )/(double)L; yy = ( (double)j - (double)M/2 )/(double)M; zz = ( (double)k - (double)N/2 )/(double)M; r2 = pow(xx, 2) + pow(yy, 2) + pow(zz,2); rr = sqrt(r2); if (rr <= amp) { datain[(i*M*N) + (j*N) + k][0] = 1.; datain[(i*M*N) + (j*N) + k][1] = 0.; } else { datain[(i*M*N) + (j*N) + k][0] = 0.; datain[(i*M*N) + (j*N) + k][1] = 0.; } } } else { for (i = 0; i < local_L; ++i) for (j = 0; j < M; ++j) for (k = 0; k < N; ++k) { datain[(i*M*N) + (j*N) + k][0] = 0; datain[(i*M*N) + (j*N) + k][1] = 0; } } /* Compute transforms, in-place, as many times as desired */ t3 = MPI_Wtime(); fftw_execute(plan1); t4 = MPI_Wtime(); fftw_execute(plan2); t5 = MPI_Wtime(); norm_cplx(datanewout, L, M, N, local_L); t6 = MPI_Wtime(); /* Check results */ check_results(datain, datanewout, L, M, N, local_L, proc_id, nproc); /* Print results */ tplan1 = t1 - t0; tplan2 = t2 - t1; tinit = t3 - t2; texec1 = t4 - t3; texec2 = t5 - t4; tnorm = t6 - t5; if (proc_id == 0) { printf("******************************************\n"); printf("Time to initialize = %E\n", tinit); printf("FFT_FORWARD: T_plan = %E, T_exec = %E \n",tplan1,texec1); printf("FFT_BACKWARD: T_plan = %E, T_exec = %E \n",tplan2,texec2); printf("Time to normalize = %E\n", tnorm); printf("******************************************\n"); } /* deallocate and destroy plans */ fftw_destroy_plan(plan1); fftw_mpi_cleanup(); fftw_free ( datain ); fftw_free ( dataout ); MPI_Finalize(); }