#include <stdio.h>
#include "allvars.h"
#include "proto.h"

void Push(struct sectorlist** headRef, long data) {
     struct sectorlist* newNode = malloc(sizeof(struct sectorlist));
     newNode->index = data;
     newNode->next = *headRef;
     *headRef = newNode;
}

void gridding(){

    if(rank == 0)printf("GRIDDING DATA\n");

    // Create histograms and linked lists
    
    clock_gettime(CLOCK_MONOTONIC, &begin);
    start = clock();

    // Initialize linked list
    initialize_list();

    //Sector and Gridding data
    gridding_data();

    #ifdef USE_MPI
        MPI_Barrier(MPI_COMM_WORLD);
    #endif

    end = clock();
    clock_gettime(CLOCK_MONOTONIC, &finish);
    timing.process_time = ((double) (end - start)) / CLOCKS_PER_SEC;
    timing.process_time1 = (finish.tv_sec - begin.tv_sec);
    timing.process_time1 += (finish.tv_nsec - begin.tv_nsec) / 1000000000.0;
    clock_gettime(CLOCK_MONOTONIC, &begin);

}

void initialize_list(){

    sectorhead = (struct sectorlist **) malloc((nsectors+1) * sizeof(struct sectorlist));
    for (int isec=0; isec<=nsectors; isec++)
    {
            sectorhead[isec] = malloc(sizeof(struct sectorlist));
            sectorhead[isec]->index = -1;
            sectorhead[isec]->next = NULL;
    }


    histo_send = (long*) calloc(nsectors+1,sizeof(long));
    int * boundary = (int*) calloc(metaData.Nmeasures,sizeof(int));
    double uuh,vvh;
    for (long iphi = 0; iphi < metaData.Nmeasures; iphi++)
    {
     	   boundary[iphi] = -1;
           uuh = data.uu[iphi];
           vvh = data.vv[iphi];
           int binphi = (int)(vvh*nsectors);
           // check if the point influence also neighboring slabs
           double updist = (double)((binphi+1)*yaxis)*dx - vvh;
           double downdist = vvh - (double)(binphi*yaxis)*dx;
           //
           histo_send[binphi]++;
           Push(&sectorhead[binphi],iphi);
           if(updist < w_supporth && updist >= 0.0) {histo_send[binphi+1]++; boundary[iphi] = binphi+1; Push(&sectorhead[binphi+1],iphi);};
              if(downdist < w_supporth && binphi > 0 && downdist >= 0.0) {histo_send[binphi-1]++; boundary[iphi] = binphi-1; Push(&sectorhead[binphi-1],iphi);};
    }
    #ifdef PIPPO
        struct sectorlist * current;
        long iiii = 0;
        for (int j=0; j<nsectors; j++)
        {
                current = sectorhead[j];
                iiii = 0;
                while (current->index != -1)
                {
                        printf("%d %d %ld %ld %ld\n",rank,j,iiii,histo_send[j],current->index);
                        current = current->next;
                        iiii++;
                }
        }
    #endif

    #ifdef VERBOSE
        for (int iii=0; iii<nsectors+1; iii++)printf("HISTO %d %d %ld\n",rank, iii, histo_send[iii]);
    #endif
}

void gridding_data(){

    // Create sector grid

    size_of_grid = 2*num_w_planes*xaxis*yaxis;
    gridss = (double*) calloc(size_of_grid,sizeof(double));
    gridss_w = (double*) calloc(size_of_grid,sizeof(double));
    gridss_real = (double*) calloc(size_of_grid/2,sizeof(double));
    gridss_img = (double*) calloc(size_of_grid/2,sizeof(double));
  
    // Create destination slab
    grid = (double*) calloc(size_of_grid,sizeof(double));
  
    // Create temporary global grid
    #ifndef USE_MPI
        double * gridtot = (double*) calloc(2*grid_size_x*grid_size_y*num_w_planes,sizeof(double));
    #endif
    double shift = (double)(dx*yaxis);
    
    // Open the MPI Memory Window for the slab
    #ifdef USE_MPI
        MPI_Win_create(grid, size_of_grid*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &slabwin);
        MPI_Win_fence(0,slabwin);
    #endif

    #ifndef USE_MPI
       file.pFile1 = fopen (out.outfile1,"w");
    #endif

    timing.kernel_time = 0.0;
    timing.kernel_time1 = 0.0;
    timing.reduce_time = 0.0;
    timing.reduce_time1 = 0.0;
    timing.compose_time = 0.0;
    timing.compose_time1 = 0.0; 

    // calculate the resolution in radians
    resolution = 1.0/MAX(abs(metaData.uvmin),abs(metaData.uvmax));
    
    // calculate the resolution in arcsec 
    double resolution_asec = (3600.0*180.0)/MAX(abs(metaData.uvmin),abs(metaData.uvmax))/PI;
    printf("RESOLUTION = %f rad, %f arcsec\n", resolution, resolution_asec);

    // Declare temporary arrays for the masking
    double * uus;
    double * vvs;
    double * wws;
    float * visreals;
    float * visimgs;
    float * weightss;
    long isector;
    for (long isector_count=0; isector_count<nsectors; isector_count++)
    {
        clock_gettime(CLOCK_MONOTONIC, &begink);
        startk = clock();
        // define local destination sector
        //isector = (isector_count+rank)%size;
        isector = isector_count;
        // allocate sector arrays 
        long Nsec = histo_send[isector];
        uus = (double*) malloc(Nsec*sizeof(double));
        vvs = (double*) malloc(Nsec*sizeof(double));
        wws = (double*) malloc(Nsec*sizeof(double));
        long Nweightss = Nsec*metaData.polarisations;
        long Nvissec = Nweightss*metaData.freq_per_chan;
        weightss = (float*) malloc(Nweightss*sizeof(float));
        visreals = (float*) malloc(Nvissec*sizeof(float));
        visimgs = (float*) malloc(Nvissec*sizeof(float));
       
        // select data for this sector
        long icount = 0;
        long ip = 0;
        long inu = 0;
        struct sectorlist * current;
        current = sectorhead[isector];

         while (current->index != -1)
         {
              long ilocal = current->index;
              //double vvh = data.vv[ilocal];
              //int binphi = (int)(vvh*nsectors);
              //if (binphi == isector || boundary[ilocal] == isector) {
              uus[icount] = data.uu[ilocal];
              vvs[icount] = data.vv[ilocal]-isector*shift;
              wws[icount] = data.ww[ilocal];
              for (long ipol=0; ipol<metaData.polarisations; ipol++)
              {
                     weightss[ip] = data.weights[ilocal*metaData.polarisations+ipol];
                     ip++;
              }
              for (long ifreq=0; ifreq<metaData.polarisations*metaData.freq_per_chan; ifreq++)
              {
                     visreals[inu] = data.visreal[ilocal*metaData.polarisations*metaData.freq_per_chan+ifreq];
                     visimgs[inu] = data.visimg[ilocal*metaData.polarisations*metaData.freq_per_chan+ifreq];
                    //if(visimgs[inu]>1e10 || visimgs[inu]<-1e10)printf("%f %f %ld %ld %d %ld %ld\n",visreals[inu],visimgs[inu],inu,Nvissec,rank,ilocal*metaData.polarisations*metaData.freq_per_chan+ifreq,metaData.Nvis);
                     inu++;
              }
              icount++;
              current = current->next;
         }

         clock_gettime(CLOCK_MONOTONIC, &finishk);
         endk = clock();
         timing.compose_time += ((double) (endk - startk)) / CLOCKS_PER_SEC;
         timing.compose_time1 += (finishk.tv_sec - begink.tv_sec);
         timing.compose_time1 += (finishk.tv_sec - begink.tv_sec);
         timing.compose_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0;

         #ifndef USE_MPI
               double vvmin = 1e20;
               double uumax = -1e20;
               double vvmax = -1e20;

               for (long ipart=0; ipart<Nsec; ipart++)
               {
                     uumin = MIN(uumin,uus[ipart]);
                     uumax = MAX(uumax,uus[ipart]);
                     vvmin = MIN(vvmin,vvs[ipart]);
                     vvmax = MAX(vvmax,vvs[ipart]);

                    if(ipart%10 == 0)fprintf (file.pFile, "%ld %f %f %f\n",isector,uus[ipart],vvs[ipart]+isector*shift,wws[ipart]);
              }

              printf("UU, VV, min, max = %f %f %f %f\n", uumin, uumax, vvmin, vvmax);
        #endif

       // Make convolution on the grid

       #ifdef VERBOSE
          printf("Processing sector %ld\n",isector);
       #endif
       clock_gettime(CLOCK_MONOTONIC, &begink);
       startk = clock();

       wstack(num_w_planes,
              Nsec,
              metaData.freq_per_chan,
              metaData.polarisations,
              uus,
              vvs,
              wws,
              visreals,
              visimgs,
              weightss,
              dx,
              dw,
              w_support,
              xaxis,
              yaxis,
              gridss,
              param.num_threads);

      /* int z =0 ;
       * #pragma omp target map(to:test_i_gpu) map(from:z)
       * {
       *   int x; // only accessible from accelerator
       *     x = 2;
       *       z = x + test_i_gpu;
       *       }*/

       clock_gettime(CLOCK_MONOTONIC, &finishk);
       endk = clock();
       timing.kernel_time += ((double) (endk - startk)) / CLOCKS_PER_SEC;
       timing.kernel_time1 += (finishk.tv_sec - begink.tv_sec);
       timing.kernel_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0;
       #ifdef VERBOSE
          printf("Processed sector %ld\n",isector);
       #endif
       clock_gettime(CLOCK_MONOTONIC, &begink);
       startk = clock();

       //for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)printf("--> %f\n",gridss[iii]);
    
       #ifndef USE_MPI
          long stride = isector*2*xaxis*yaxis*num_w_planes;
          for (long iii=0; iii<2*xaxis*yaxis*num_w_planes; iii++)gridtot[stride+iii] = gridss[iii];
       #endif

       // Write grid in the corresponding remote slab
       #ifdef USE_MPI
          int target_rank = (int)isector;
          //int target_rank = (int)(size-isector-1);
          #ifdef ONE_SIDE
               printf("One Side communication active\n");
               MPI_Win_lock(MPI_LOCK_SHARED,target_rank,0,slabwin);
               MPI_Accumulate(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,MPI_SUM,slabwin);
               MPI_Win_unlock(target_rank,slabwin);
               //MPI_Put(gridss,size_of_grid,MPI_DOUBLE,target_rank,0,size_of_grid,MPI_DOUBLE,slabwin);
          #else
               MPI_Reduce(gridss,grid,size_of_grid,MPI_DOUBLE,MPI_SUM,target_rank,MPI_COMM_WORLD);
          #endif //ONE_SIDE
       #endif //USE_MPI

       clock_gettime(CLOCK_MONOTONIC, &finishk);
       endk = clock();
       timing.reduce_time += ((double) (endk - startk)) / CLOCKS_PER_SEC;
       timing.reduce_time1 += (finishk.tv_sec - begink.tv_sec);
       timing.reduce_time1 += (finishk.tv_nsec - begink.tv_nsec) / 1000000000.0;
       // Go to next sector
       for (long inull=0; inull<2*num_w_planes*xaxis*yaxis; inull++)gridss[inull] = 0.0;

       // Deallocate all sector arrays
       free(uus);
       free(vvs);
       free(wws);
       free(weightss);
       free(visreals);
       free(visimgs);
      // End of loop over sector    
    }
    // Finalize MPI communication
    // Finalize MPI communication
    #ifdef USE_MPI
       MPI_Win_fence(0,slabwin);
    #endif  

    #ifndef USE_MPI
        fclose(file.pFile1);
    #endif

   #ifdef USE_MPI
        MPI_Barrier(MPI_COMM_WORLD);
   #endif
}

void write_grided_data()
{

   #ifdef WRITE_DATA
     // Write results
     if (rank == 0)
     {
        printf("WRITING GRIDDED DATA\n");
        file.pFilereal = fopen (out.outfile2,"wb");
        file.pFileimg = fopen (out.outfile3,"wb");
        #ifdef USE_MPI
           for (int isector=0; isector<nsectors; isector++)
           {
              MPI_Win_lock(MPI_LOCK_SHARED,isector,0,slabwin);
              MPI_Get(gridss,size_of_grid,MPI_DOUBLE,isector,0,size_of_grid,MPI_DOUBLE,slabwin);
              MPI_Win_unlock(isector,slabwin);
              for (long i=0; i<size_of_grid/2; i++)
              {
                      gridss_real[i] = gridss[2*i];
                      gridss_img[i] = gridss[2*i+1];
              }
              if (num_w_planes > 1)
              {
                      for (int iw=0; iw<num_w_planes; iw++)
                        for (int iv=0; iv<yaxis; iv++)
                          for (int iu=0; iu<xaxis; iu++)
                          {
                               long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
                               long index = iu + iv*xaxis + iw*xaxis*yaxis;
                               fseek(file.pFilereal, global_index, SEEK_SET);
                               fwrite(&gridss_real[index], 1, sizeof(double), file.pFilereal);
                          }
                      for (int iw=0; iw<num_w_planes; iw++)
                        for (int iv=0; iv<yaxis; iv++)
                          for (int iu=0; iu<xaxis; iu++)
                          {
                               long global_index = (iu + (iv+isector*yaxis)*xaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
                               long index = iu + iv*xaxis + iw*xaxis*yaxis;
                               fseek(file.pFileimg, global_index, SEEK_SET);
                               fwrite(&gridss_img[index], 1, sizeof(double), file.pFileimg);
                               //double v_norm = sqrt(gridss[index]*gridss[index]+gridss[index+1]*gridss[index+1]);
                               //fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,isector*yaxis+iv,iw,gridss[index],gridss[index+1],v_norm);
                          }

              }
              else
              {
                      for (int iw=0; iw<num_w_planes; iw++)
                      {
                          long global_index = (xaxis*isector*yaxis + iw*grid_size_x*grid_size_y)*sizeof(double);
                          long index = iw*xaxis*yaxis;
                          fseek(file.pFilereal, global_index, SEEK_SET);
                          fwrite(&gridss_real[index], xaxis*yaxis, sizeof(double), file.pFilereal);
                          fseek(file.pFileimg, global_index, SEEK_SET);
                          fwrite(&gridss_img[index], xaxis*yaxis, sizeof(double), file.pFileimg);
                     }
              }
          }
       #else
          for (int iw=0; iw<num_w_planes; iw++)
             for (int iv=0; iv<grid_size_y; iv++)
               for (int iu=0; iu<grid_size_x; iu++)
                {
                      long index = 2*(iu + iv*grid_size_x + iw*grid_size_x*grid_size_y);
                      fwrite(&gridtot[index], 1, sizeof(double), file.pFilereal);
                      fwrite(&gridtot[index+1], 1, sizeof(double), file.pFileimg);
                      //double v_norm = sqrt(gridtot[index]*gridtot[index]+gridtot[index+1]*gridtot[index+1]);
                      //fprintf (file.pFile, "%d %d %d %f %f %f\n", iu,iv,iw,gridtot[index],gridtot[index+1],v_norm);
                 }
        #endif
        fclose(file.pFilereal);
        fclose(file.pFileimg);
     }

     #ifdef USE_MPI
        MPI_Win_fence(0,slabwin);
     #endif
   #endif //WRITE_DATA 

}
