#ifdef OMP #include <omp.h> #endif #include <stdlib.h> #include <stdio.h> #include "mpi.h" //#include "pardef.h" #include "util.h" void aprod(int mode, long int m, long int n, double *vVect, double *knownTerms, double *systemMatrix,long int *matrixIndex, int *instrCol,int *instrConstrIlung, struct comData comlsqr,time_t *ompSec) { // Parallel definitions int myid,nproc; long int *mapNoss, *mapNcoeff; int nthreads, tid; long **mapForThread; /// struct comData *comlsqr; // FILE *fk,*fk0; double zero=0.0; double sum, yi; long int l1,l2; long int i, i1; long int l, j, k; int i2=0,j2=0,j3=0,na=0; int setBound[4]; double localSum; short nAstroPSolved=comlsqr.nAstroPSolved; long localAstro= comlsqr.VrIdAstroPDim*nAstroPSolved; long localAstroMax=comlsqr.VrIdAstroPDimMax*nAstroPSolved; // Initialize. myid=comlsqr.myid; nproc=comlsqr.nproc; mapNcoeff=comlsqr.mapNcoeff; mapNoss=comlsqr.mapNoss; nthreads=comlsqr.nthreads; mapForThread=comlsqr.mapForThread; int multMI=comlsqr.multMI; long nparam=comlsqr.parOss; short nAttAxes=comlsqr.nAttAxes; int numOfExtStar=comlsqr.numOfExtStar; int numOfBarStar=comlsqr.numOfBarStar; int numOfExtAttCol=comlsqr.numOfExtAttCol; long VrIdAstroPDimMax=comlsqr.VrIdAstroPDimMax; int startingAttColExtConstr=comlsqr.startingAttColExtConstr; int nOfElextObs=comlsqr.nOfElextObs; int nEqExtConstr=comlsqr.nEqExtConstr; int nOfElBarObs=comlsqr.nOfElBarObs; int nEqBarConstr=comlsqr.nEqBarConstr; int debugMode=comlsqr.debugMode; short nInstrPSolved=comlsqr.nInstrPSolved; int nOfInstrConstr=comlsqr.nOfInstrConstr; int nElemIC=comlsqr.nElemIC; short nAttP=comlsqr.nAttP; short nGlobP=comlsqr.nGlobP; setBound[0]=comlsqr.setBound[0]; setBound[1]=comlsqr.setBound[1]; setBound[2]=comlsqr.setBound[2]; setBound[3]=comlsqr.setBound[3]; long nDegFreedomAtt=comlsqr.nDegFreedomAtt; short nAttParAxis=comlsqr.nAttParAxis; long offsetAttParam=comlsqr.offsetAttParam; long offsetInstrParam=comlsqr.offsetInstrParam; long offsetGlobParam=comlsqr.offsetGlobParam; nthreads=1; tid=0; FILE *fp1,*fp2; // fp1=fopen("test1_aprod","w"); // fp2=fopen("test2_aprod","w"); if(mode!=1 && mode !=2) { printf("ERROR: Invalid mode=%d in aprod function\n",mode); exit(1); } l1=0; l2=0; myid=comlsqr.myid; if(mode==1) { time_t startTime=time(NULL); #pragma omp parallel private(myid, sum, k, l1, l2, l, j,tid,nthreads,i2,na) shared(mapNoss,instrCol,comlsqr,vVect,systemMatrix,matrixIndex,knownTerms,j2) { myid=comlsqr.myid; if(comlsqr.itn==1) { #ifdef OMP tid = omp_get_thread_num(); nthreads = omp_get_num_threads(); #endif } if(comlsqr.itn==1 && debugMode) printf("PE=%d Aprod1 OpenMP num of threads =%d from thread =%d icycle=%ld comlsqr.itn=%d\n",myid, nthreads,tid,i,comlsqr.itn); long miValAstro=0; long miValAtt=0; long jstartAtt=0; long jstartAstro=0; long lset=0; long offLocalAstro=0; long offLocalAtt=0; long offLocalInstr=0; //Offset on Instruments long ixInstr=0; int nInstrVal=0; offLocalInstr=offsetInstrParam+(localAstroMax-offsetAttParam); //Offset on Instruments nInstrVal=nAstroPSolved+nAttP; offLocalAstro=comlsqr.mapStar[myid][0]*nAstroPSolved; //Offset on my mstars offLocalAtt=localAstroMax-offsetAttParam; //Offset on attitude long offLocalGlob=offsetGlobParam+(localAstroMax-offsetAttParam); //Offset on GlobP int nGlobVal=nAstroPSolved+nAttP+nInstrPSolved; jstartAstro=miValAstro-offLocalAstro; #pragma omp for for(long ix=0;ix<mapNoss[myid];ix++){ sum=0.; ///////////////////////////////////////////////////// /// Mode 1 Astrometric Sect if (nAstroPSolved){ lset=ix*nparam; if(matrixIndex[multMI*ix]!= miValAstro){ miValAstro=matrixIndex[multMI*ix]; jstartAstro=miValAstro-offLocalAstro; } for(long jx=jstartAstro;jx<jstartAstro+nAstroPSolved;jx++){ sum=sum+systemMatrix[lset]*vVect[jx]; lset++; } } ////////////////////////////////////////////////////// /// Mode 1 Attitude Sect if (nAttP){ lset=ix*nparam+nAstroPSolved; miValAtt=matrixIndex[multMI*ix+(multMI-1)]; for(int nax=0;nax<nAttAxes;nax++){ jstartAtt=miValAtt+offLocalAtt+nax*nDegFreedomAtt; for(long inpax=jstartAtt;inpax<jstartAtt+nAttParAxis;inpax++) { sum=sum+systemMatrix[lset]*vVect[inpax]; lset++; } } } ////////////////////////////////////////////////////// /// Mode 1 Instrument Sect if (nInstrPSolved){ lset=ix*nparam+nInstrVal; long iiVal=ix*nInstrPSolved; for(int inInstr=0;inInstr<nInstrPSolved;inInstr++){ ixInstr=offLocalInstr+instrCol[iiVal+inInstr]; sum=sum+systemMatrix[lset]*vVect[ixInstr]; lset++; } } ////////////////////////////////////////////////////// /// Mode 1 Global sect if (nGlobP){ lset=ix*nparam+nGlobVal; for(long inGlob=offLocalGlob;inGlob<offLocalGlob+nGlobP;inGlob++){ sum=sum+systemMatrix[lset]*vVect[inGlob]; lset++; } } ////////////////////////////////////////////////////// knownTerms[ix]+=sum; } //for ix /// Mode 1 ExtConstr if(nEqExtConstr){ long offExtAtt; long offExtAttConstr=VrIdAstroPDimMax*nAstroPSolved+startingAttColExtConstr; long vVIx; long ktIx=mapNoss[myid]; long offExtConstr; for(int iexc=0;iexc<nEqExtConstr;iexc++ ){ sum=0.0; offExtConstr=mapNcoeff[myid]+iexc*nOfElextObs; #pragma omp for for(int j3=0;j3<numOfExtStar*nAstroPSolved;j3++) sum+=systemMatrix[offExtConstr+j3]*vVect[j3]; for(int nax=0;nax<nAttAxes;nax++){ offExtAtt=offExtConstr+numOfExtStar*nAstroPSolved+nax*numOfExtAttCol; vVIx=offExtAttConstr+nax*nDegFreedomAtt; #pragma omp for for(int j3=0;j3<numOfExtAttCol;j3++) sum+=systemMatrix[offExtAtt+j3]*vVect[vVIx+j3]; } #pragma omp atomic knownTerms[ktIx+iexc]+=sum; }//for iexc } ////////////////////////////////////////////////////// /// Mode 1 BarConstr if(nEqBarConstr){ long offBarConstr=mapNcoeff[myid]+nOfElextObs*nEqExtConstr; long offBarConstrIx; long ktIx=mapNoss[myid]+nEqExtConstr; for(int iexc=0;iexc<nEqBarConstr;iexc++ ){ sum=0.0; offBarConstrIx=offBarConstr+iexc*nOfElBarObs; #pragma omp for for(int j3=0;j3<numOfBarStar*nAstroPSolved;j3++) sum+=systemMatrix[offBarConstrIx+j3]*vVect[j3]; #pragma omp atomic knownTerms[ktIx+iexc]+=sum; }//for iexc } ////////////////////////////////////////////////////// /// Mode 1 InstrConstr if(nOfInstrConstr){ long offSetInstr=0; long offSetInstrInc=0; long offSetInstrInc1=0; long vVix=0; long offvV=0; long offSetInstrConstr=mapNcoeff[myid]+nOfElextObs*nEqExtConstr+nOfElBarObs*nEqBarConstr; long offSetInstrConstr1=VrIdAstroPDimMax*nAstroPSolved+nDegFreedomAtt*nAttAxes; long ktIx=mapNoss[myid]+nEqExtConstr+nEqBarConstr; for(int i1=myid;i1<nOfInstrConstr;i1=i1+nproc){ sum=0.0; offSetInstrInc=offSetInstrConstr; offSetInstr=0; for(int m=0;m<i1;m++){ offSetInstrInc+=instrConstrIlung[m]; offSetInstr+=instrConstrIlung[m]; } offvV=mapNoss[myid]*nInstrPSolved+offSetInstr; #pragma omp for for(int j3=0;j3<instrConstrIlung[i1];j3++){ vVix=instrCol[offvV+j3]; sum+=systemMatrix[offSetInstrInc+j3]*vVect[offSetInstrConstr1+vVix]; } #pragma omp atomic knownTerms[ktIx+i1]+=sum; } } ////////////////////////////////////////////////////// }//pragma *ompSec+=time(NULL)-startTime; if(comlsqr.itn<=2 &&(myid==0 || myid==nproc-1 || debugMode==1)) printf("PE=%d AprodTiming: mode=1 OmpSec=%ld\n",myid,time(NULL)-startTime); } else //mode==2 { //if(mode time_t startTime=time(NULL); #pragma omp parallel private(myid,yi, sum, k, l1, l2, l, j,localSum,i,tid,nthreads) shared(mapNoss,instrCol,comlsqr,vVect,systemMatrix,matrixIndex,knownTerms) { int count=0; myid=comlsqr.myid; #ifdef OMP tid = omp_get_thread_num(); nthreads = omp_get_num_threads(); #endif ///////////////////////////////////////////////////// /// Mode 2 Astrometric Sect if (nAstroPSolved){ long offLocalAstro=comlsqr.mapStar[myid][0]*nAstroPSolved; long jstartAstro=0; long lset=mapForThread[tid][0]*nparam; for(long ix=mapForThread[tid][0];ix<mapForThread[tid][2];ix++){ long miValAstro=0; if(matrixIndex[multMI*ix]!= miValAstro){ miValAstro=matrixIndex[multMI*ix]; jstartAstro=miValAstro-offLocalAstro; } for(long jx=jstartAstro;jx<jstartAstro+nAstroPSolved;jx++){ localSum=systemMatrix[lset]*knownTerms[ix]; vVect[jx]+=localSum; lset++; }//for jx lset+=nparam-nAstroPSolved; }//for ix } ///////////////////////////////////////////////////// } //pragma ///////////////////////////////////////////////////// /// Mode 2 Attitude Sect if (nAttP){ long lset; long mix; long offj=(localAstroMax-offsetAttParam); long vVix; ///#pragma omp for for(long ix=0;ix<mapNoss[myid];ix++){ lset=nparam*ix+nAstroPSolved; mix=matrixIndex[multMI*ix+(multMI-1)]+offj; for(int ly=0;ly<nAttAxes;ly++){ vVix=mix+ly*nDegFreedomAtt; for(int lx=0;lx<nAttParAxis;lx++){ localSum=systemMatrix[lset]*knownTerms[ix]; ///#pragma omp atomic vVect[vVix]+=localSum; lset++; vVix++; }//for lx } //for ly }//for ix } ///////////////////////////////////////////////////// /// Mode 2 Instrument Sect if(nOfInstrConstr){ long lset; int offLset=nAstroPSolved+nAttP; long iix; long offj=offsetInstrParam+(localAstroMax-offsetAttParam); long vVix; ///#pragma omp for for(long ix=0;ix<mapNoss[myid];ix++){ lset=nparam*ix+offLset; iix=ix*nInstrPSolved; for(int ly=0;ly<nInstrPSolved;ly++){ vVix=offj+instrCol[iix+ly]; localSum=systemMatrix[lset]*knownTerms[ix]; ///#pragma omp atomic vVect[vVix]+=localSum; lset++; }//for ly }//for ix } ///////////////////////////////////////////////////// ///} //pragma *ompSec+=time(NULL)-startTime; if(comlsqr.itn<=2 &&(myid==0 || myid==nproc-1 || debugMode==1) )printf("PE=%d AprodTiming: mode=2.1 OmpSec=%ld\n",myid,time(NULL)-startTime); ///////////////////////////////////////////////////// /// Mode 2 Global Sect if(nGlobP){ long lset; int offLset=nAstroPSolved+nAttP+nInstrPSolved; long offj=offsetGlobParam+(localAstroMax-offsetAttParam); long vVix; for(long ix=0;ix<mapNoss[myid];ix++){ lset=nparam*ix+offLset; for(long ly=0;ly<nGlobP;ly++){ vVix=offj+ly; localSum=systemMatrix[lset]*knownTerms[ix]; vVect[vVix]+=localSum; lset++; }//for ly }//for ix } //////////////////////////////////////////////////// startTime=time(NULL); #pragma omp parallel private(myid,yi,localSum,tid,nthreads,i2,j2,na) shared(mapNoss,vVect,systemMatrix,knownTerms,k,j3) { myid=comlsqr.myid; #ifdef OMP tid = omp_get_thread_num(); nthreads = omp_get_num_threads(); #endif localSum=0.0; ////////////////////////////////////////////////////// /// Mode 2 ExtConstr if(nEqExtConstr){ long offExtStarConstrEq; long off2; long off3; for(int ix=0;ix<nEqExtConstr;ix++ ){ //stars yi=knownTerms[mapNoss[myid]+ix]; offExtStarConstrEq=mapNcoeff[myid]+ix*nOfElextObs;//Star offset on the row ix (all the row) #pragma omp for for(long yx=0;yx<numOfExtStar;yx++){ off3=yx*nAstroPSolved; off2=offExtStarConstrEq+off3; for(int j2=0;j2<nAstroPSolved;j2++){ localSum= systemMatrix[off2+j2]*yi; vVect[j2+off3]+=localSum; } } #pragma omp barrier } //for ix long offExtAttConstrEq; long offExtUnk; long off1=VrIdAstroPDimMax*nAstroPSolved+startingAttColExtConstr; for(int ix=0;ix<nEqExtConstr;ix++ ){ //att yi=knownTerms[mapNoss[myid]+ix]; offExtAttConstrEq=mapNcoeff[myid]+ix*nOfElextObs; //Att offset on the row ix (all the row) offExtAttConstrEq+=numOfExtStar*nAstroPSolved; //Att offset inside ix row for(int nax=0;nax<nAttAxes;nax++){ offExtUnk=off1+nax*nDegFreedomAtt;// Att offset for Unk array on extConstr off2=offExtAttConstrEq+nax*numOfExtAttCol; #pragma omp for for(int jx=0;jx<numOfExtAttCol;jx++){ localSum= systemMatrix[off2+jx]*yi; vVect[offExtUnk+jx]+=localSum; } #pragma omp barrier } } } ////////////////////////////////////////////////////// /// Mode 2 BarConstr if(nEqBarConstr){ localSum=0.0; long off3; long off2; for(int ix=0;ix<nEqBarConstr;ix++ ){ //stars yi=knownTerms[mapNoss[myid]+nEqExtConstr+ix]; long offBarStarConstrEq=mapNcoeff[myid]+nEqExtConstr*nOfElextObs+ix*nOfElBarObs;//Star offset on the row i2 (all the row) #pragma omp for for(long yx=0;yx<numOfBarStar;yx++){ off3=yx*nAstroPSolved; off2=offBarStarConstrEq+off3; for(int j2=0;j2<nAstroPSolved;j2++){ localSum= systemMatrix[off2+j2]*yi; vVect[j2+off3]+=localSum; } } #pragma omp barrier } //for i2 } ////////////////////////////////////////////////////// /// Mode 2 InstrConstr if(nOfInstrConstr){ localSum=0.0; int offSetInstr; long off1; long offInstrUnk=VrIdAstroPDimMax*nAstroPSolved+nAttAxes*nDegFreedomAtt; long off2=mapNoss[myid]+nEqExtConstr+nEqBarConstr; long off3=mapNcoeff[myid]+nOfElextObs*nEqExtConstr+nOfElBarObs*nEqBarConstr; long off4=mapNoss[myid]*nInstrPSolved; long off5; long off6; for(int k1=myid;k1<nOfInstrConstr;k1=k1+nproc){ yi=knownTerms[off2+k1]; offSetInstr=0; for(int m=0;m<k1;m++) offSetInstr+=instrConstrIlung[m]; off1=off3+offSetInstr; off5=off4+offSetInstr; #pragma omp for for(int j=0;j<instrConstrIlung[k1];j++){ localSum=systemMatrix[off1+j]*yi; off6=offInstrUnk+instrCol[off5+j]; vVect[off6]+=localSum; } } } //////////// } //pragma *ompSec+=time(NULL)-startTime; if(comlsqr.itn<=2 &&(myid==0 || myid==nproc-1 || debugMode==1)) printf("PE=%d AprodTiming: mode=2.2 OmpSec=%ld\n",myid,time(NULL)-startTime); }// else if(mode==2) }