diff --git a/phase_correction.cu b/phase_correction.cu index 95b2933df54ff129a26549b2928ed84715a02c05..4c26890ccb8f1e1205e0a5eb97f7dba8eda06463 100644 --- a/phase_correction.cu +++ b/phase_correction.cu @@ -108,12 +108,18 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in double * gridss_g; mmm=cudaMalloc(&gridss_g, 2*num_w_planes*xaxis*yaxis*sizeof(double)); + printf("CUDA ERROR 1 %s\n",cudaGetErrorString(mmm)); mmm=cudaMalloc(&image_real_g, xaxis*yaxis*sizeof(double)); + printf("CUDA ERROR 2 %s\n",cudaGetErrorString(mmm)); mmm=cudaMalloc(&image_imag_g, xaxis*yaxis*sizeof(double)); + printf("CUDA ERROR 3 %s\n",cudaGetErrorString(mmm)); mmm=cudaMemcpy(gridss_g, gridss, 2*num_w_planes*xaxis*yaxis*sizeof(double), cudaMemcpyHostToDevice); + printf("CUDA ERROR 4 %s\n",cudaGetErrorString(mmm)); mmm=cudaMemset(image_real_g, 0.0, xaxis*yaxis*sizeof(double)); + printf("CUDA ERROR 5 %s\n",cudaGetErrorString(mmm)); mmm=cudaMemset(image_imag_g, 0.0, xaxis*yaxis*sizeof(double)); + printf("CUDA ERROR 6 %s\n",cudaGetErrorString(mmm)); // call the phase correction kernel phase_g <<<Nbl,Nth>>> (xaxis, @@ -131,7 +137,9 @@ void phase_correction(double* gridss, double* image_real, double* image_imag, in nbucket); mmm = cudaMemcpy(image_real, image_real_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost); + printf("CUDA ERROR 7 %s\n",cudaGetErrorString(mmm)); mmm = cudaMemcpy(image_imag, image_imag_g, xaxis*yaxis*sizeof(double), cudaMemcpyDeviceToHost); + printf("CUDA ERROR 8 %s\n",cudaGetErrorString(mmm)); #else diff --git a/w-stacking-fftw.c b/w-stacking-fftw.c index 5c6833d42208b37a512942e8b080f574370b86e5..0eda87e9b2dcef70714081129aff0fbeb468fae0 100644 --- a/w-stacking-fftw.c +++ b/w-stacking-fftw.c @@ -70,6 +70,7 @@ int main(int argc, char * argv[]) char logfile[30] = "run.log"; char extension[30] = ".txt"; char srank[4]; + char timingfile[30] = "timings.dat"; double * uu; double * vv; @@ -940,6 +941,17 @@ int main(int argc, char * argv[]) } } + if (rank == 0) + { + pFile = fopen (timingfile,"w"); + if (num_threads == 1) + { + fprintf(pFile, "%f %f %f %f %f %f %f\n",setup_time,kernel_time,compose_time,reduce_time,fftw_time,phase_time,tot_time); + } else { + fprintf(pFile, "%f %f %f %f %f %f %f\n",setup_time1,kernel_time1,compose_time1,reduce_time1,fftw_time1,phase_time1,tot_time1); + } + fclose(pFile); + } // Close MPI environment #ifdef USE_MPI