Skip to content
aprod.c 41.1 KiB
Newer Older
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
#ifdef OMP
#include <omp.h>
#endif
#include <stdlib.h>
#include <stdio.h>
#include "mpi.h"
//#include "pardef.h"
#include "util.h"
// NOTA: vedere come e cosa cambia aumentando i valori nAstroPSolved, nAttP e nInstrConst facendoli non usuali ma molto grossi
void aprod(int mode, long int m, long int n, double *vVect, double *knownTerms,
           double *systemMatrix, long int *matrixIndex, int *instrCol, int *instrConstrIlung, struct comData comlsqr, time_t *ompSec)
    
    // Parallel definitions
    int myid, nproc;
    long int *mapNoss, *mapNcoeff;
    int nthreads, tid, ntasks;
    long **mapForThread;
    ///    struct comData *comlsqr;
    //
    FILE *fk, *fk0;
    double zero = 0.0;
    double sum, yi;
    long int l1, l2;
    long int i, i1;
    long int l, j, k;
    int i2 = 0, j2 = 0, j3 = 0, na = 0;
    int setBound[4];
    double localSum;
    int nAstroPSolved = comlsqr.nAstroPSolved;

    long localAstro = comlsqr.VrIdAstroPDim * nAstroPSolved;
    long localAstroMax = comlsqr.VrIdAstroPDimMax * nAstroPSolved;
    //  Initialize.
    myid = comlsqr.myid;
    nproc = comlsqr.nproc;
    mapNcoeff = comlsqr.mapNcoeff;
    mapNoss = comlsqr.mapNoss;
    nthreads = comlsqr.nthreads;
    ntasks= comlsqr.ntasks;
    
    mapForThread = comlsqr.mapForThread;
    int multMI = comlsqr.multMI;
    long nparam = comlsqr.parOss;
    short nAttAxes = comlsqr.nAttAxes;
    int numOfExtStar = comlsqr.numOfExtStar;
    int numOfBarStar = comlsqr.numOfBarStar;
    int numOfExtAttCol = comlsqr.numOfExtAttCol;
    long VrIdAstroPDimMax = comlsqr.VrIdAstroPDimMax;
    int startingAttColExtConstr = comlsqr.startingAttColExtConstr;
    int nOfElextObs = comlsqr.nOfElextObs;
    int nEqExtConstr = comlsqr.nEqExtConstr;
    int nOfElBarObs = comlsqr.nOfElBarObs;
    int nEqBarConstr = comlsqr.nEqBarConstr;
    int debugMode = comlsqr.debugMode;
    int nInstrPSolved = comlsqr.nInstrPSolved;
    int nOfInstrConstr = comlsqr.nOfInstrConstr;
    int nElemIC = comlsqr.nElemIC;
    int nAttP = comlsqr.nAttP;
    int nGlobP = comlsqr.nGlobP;

    setBound[0] = comlsqr.setBound[0];
    setBound[1] = comlsqr.setBound[1];
    setBound[2] = comlsqr.setBound[2];
    setBound[3] = comlsqr.setBound[3];
    long nDegFreedomAtt = comlsqr.nDegFreedomAtt;
    int nAttParAxis = comlsqr.nAttParAxis;
    long offsetAttParam = comlsqr.offsetAttParam;
    long offsetInstrParam = comlsqr.offsetInstrParam;
    long offsetGlobParam = comlsqr.offsetGlobParam;

    
    /* for(int n=0;n<ntasks;n++)
        comlsqr->mapForThread[n]=(long *) calloc(3,sizeof(long));
*/
    //nthreads = 1;
    tid = 0;
    FILE *fp1, *fp2;
    //        fp1=fopen("test1_aprod","w");
    //        fp2=fopen("test2_aprod","w");
    if (mode != 1 && mode != 2)
        printf("ERROR: Invalid mode=%d in aprod function\n", mode);
        exit(1);
    l1 = 0;
    l2 = 0;
    myid = comlsqr.myid;
    if (mode == 1)
        
        time_t startTime = time(NULL);
////        #pragma omp parallel private(myid, sum, k, l1, l2, l, j, tid, nthreads, i2, na) shared(mapNoss, instrCol, comlsqr, vVect, systemMatrix, matrixIndex, knownTerms, j2)
            myid = comlsqr.myid;
            
            
            if (comlsqr.itn == 1 && debugMode)
                printf("PE=%d Aprod1 OpenMP num of threads =%d from thread =%d icycle=%ld  comlsqr.itn=%d\n", myid, nthreads, tid, i, comlsqr.itn);
            long miValAstro = 0;
            long miValAtt = 0;
            long jstartAtt = 0;
            long jstartAstro = 0;
            long lset = 0;
            long offLocalAstro = 0;
            long offLocalAtt = 0;
            long offLocalInstr = 0; //Offset on Instruments
            long ixInstr = 0;
            int nInstrVal = 0;
            offLocalInstr = offsetInstrParam + (localAstroMax - offsetAttParam); //Offset on Instruments
            nInstrVal = nAstroPSolved + nAttP;
            offLocalAstro = comlsqr.mapStar[myid][0] * nAstroPSolved;               //Offset on my mstars
            offLocalAtt = localAstroMax - offsetAttParam;                           //Offset on attitude
            long offLocalGlob = offsetGlobParam + (localAstroMax - offsetAttParam); //Offset on GlobP
            int nGlobVal = nAstroPSolved + nAttP + nInstrPSolved;
            jstartAstro = miValAstro - offLocalAstro;
            //FV_ EDIT ompSs
          
            
            for(int nt=0; nt < ntasks; nt++ )
            {
                #pragma omp task label(mode1)
                {
                    for (long ix = mapForThread[nt][0]; ix < mapForThread[nt][2]; ix++)  //FUNZIONE DA FARE
                    {
/*================= INIZIO
                        sum = 0.;   // FARE UNA FUNZIONE CHIAMATA NEL CICLO FOR
                        /////////////////////////////////////////////////////
                        /// Mode 1 Astrometric Sect
                        if (nAstroPSolved)
                        {
                            lset = ix * nparam;
                            if (matrixIndex[multMI * ix] != miValAstro)
                            {
                                miValAstro = matrixIndex[multMI * ix];
                                jstartAstro = miValAstro - offLocalAstro;
                            }
                            for (long jx = jstartAstro; jx < jstartAstro + nAstroPSolved; jx++)
                            {
                                sum = sum + systemMatrix[lset] * vVect[jx];
                                lset++;
                            }
                        }
                        //////////////////////////////////////////////////////
                        /// Mode 1 Attitude Sect
                        if (nAttP)
                        {
                            lset = ix * nparam + nAstroPSolved;
                            miValAtt = matrixIndex[multMI * ix + (multMI - 1)];
                            for (int nax = 0; nax < nAttAxes; nax++)
                            {
                                jstartAtt = miValAtt + offLocalAtt + nax * nDegFreedomAtt;
                                for (long inpax = jstartAtt; inpax < jstartAtt + nAttParAxis; inpax++)
                                {
                                    sum = sum + systemMatrix[lset] * vVect[inpax];
                                    lset++;
                                }
                            }
                        }
                        //////////////////////////////////////////////////////
                        /// Mode 1 Instrument Sect
                        if (nInstrPSolved)
                        {
                            lset = ix * nparam + nInstrVal;
                            long iiVal = ix * nInstrPSolved;
                            for (int inInstr = 0; inInstr < nInstrPSolved; inInstr++)
                            {
                                ixInstr = offLocalInstr + instrCol[iiVal + inInstr];
                                sum = sum + systemMatrix[lset] * vVect[ixInstr];
                                lset++;
                            }
                        }
                        //////////////////////////////////////////////////////
                        /// Mode 1 Global sect
                        if (nGlobP)
                        {
                            lset = ix * nparam + nGlobVal;
                            for (long inGlob = offLocalGlob; inGlob < offLocalGlob + nGlobP; inGlob++)
                            {
                                sum = sum + systemMatrix[lset] * vVect[inGlob];
                                lset++;
                            }
                        }
                        //////////////////////////////////////////////////////
                       // knownTerms[ix] += sum;
================ FINE */
                        knownTerms[ix] += aprodM1Obs(ix,comlsqr, vVect, systemMatrix, matrixIndex, instrCol);
                    } //for ix
                }
            /// Mode 1 ExtConstr
            if (nEqExtConstr)
            {
                long offExtAtt;
                long offExtAttConstr = VrIdAstroPDimMax * nAstroPSolved + startingAttColExtConstr;
                long vVIx;
                long ktIx = mapNoss[myid];
                long offExtConstr;
                for (int iexc = 0; iexc < nEqExtConstr; iexc++)
                {
                    sum = 0.0;
                    offExtConstr = mapNcoeff[myid] + iexc * nOfElextObs;
                    //FV_ EDIT ompSs
                    //#pragma omp for
                    for (int j3 = 0; j3 < numOfExtStar * nAstroPSolved; j3++)
                        sum += systemMatrix[offExtConstr + j3] * vVect[j3];
                    for (int nax = 0; nax < nAttAxes; nax++)
                    {
                        offExtAtt = offExtConstr + numOfExtStar * nAstroPSolved + nax * numOfExtAttCol;
                        vVIx = offExtAttConstr + nax * nDegFreedomAtt;
                        //FV_ EDIT ompSs
                        //#pragma omp for
                        for (int j3 = 0; j3 < numOfExtAttCol; j3++)
                            sum += systemMatrix[offExtAtt + j3] * vVect[vVIx + j3];
                    }
                    //FV_ EDIT ompSs
                    //#pragma omp atomic
                    knownTerms[ktIx + iexc] += sum;
                } //for iexc
            }
            //////////////////////////////////////////////////////
            /// Mode 1 BarConstr
            if (nEqBarConstr)
            {
                long offBarConstr = mapNcoeff[myid] + nOfElextObs * nEqExtConstr;
                long offBarConstrIx;
                long ktIx = mapNoss[myid] + nEqExtConstr;
                for (int iexc = 0; iexc < nEqBarConstr; iexc++)
                {
                    sum = 0.0;
                    offBarConstrIx = offBarConstr + iexc * nOfElBarObs;
                    //FV_ EDIT ompSs
                    //#pragma omp for
                    for (int j3 = 0; j3 < numOfBarStar * nAstroPSolved; j3++)
                        sum += systemMatrix[offBarConstrIx + j3] * vVect[j3];
                    //FV_ EDIT ompSs
                    //#pragma omp atomic
                    knownTerms[ktIx + iexc] += sum;
                } //for iexc
            }
            //////////////////////////////////////////////////////
            /// Mode 1 InstrConstr
            if (nOfInstrConstr)
            {
                long offSetInstr = 0;
                long offSetInstrInc = 0;
                long offSetInstrInc1 = 0;
                long vVix = 0;
                long offvV = 0;
                long offSetInstrConstr = mapNcoeff[myid] + nOfElextObs * nEqExtConstr + nOfElBarObs * nEqBarConstr;
                long offSetInstrConstr1 = VrIdAstroPDimMax * nAstroPSolved + nDegFreedomAtt * nAttAxes;
                long ktIx = mapNoss[myid] + nEqExtConstr + nEqBarConstr;
                for (int i1 = myid; i1 < nOfInstrConstr; i1 = i1 + nproc)
                {
                    sum = 0.0;
                    offSetInstrInc = offSetInstrConstr;
                    offSetInstr = 0;
                    for (int m = 0; m < i1; m++)
                    {
                        offSetInstrInc += instrConstrIlung[m];
                        offSetInstr += instrConstrIlung[m];
                    }
                    offvV = mapNoss[myid] * nInstrPSolved + offSetInstr;
                    //FV_ EDIT ompSs
                    //#pragma omp for
                    for (int j3 = 0; j3 < instrConstrIlung[i1]; j3++)
                    {
                        vVix = instrCol[offvV + j3];
                        sum += systemMatrix[offSetInstrInc + j3] * vVect[offSetInstrConstr1 + vVix];
                    }
                    //FV_ EDIT ompSs
                    //#pragma omp atomic
                    knownTerms[ktIx + i1] += sum;
                }
            }
            //////////////////////////////////////////////////////
        } //pragma
        *ompSec += time(NULL) - startTime;
        if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
            printf("PE=%d AprodTiming: mode=1  OmpSec=%ld\n", myid, time(NULL) - startTime);
             
        
        #pragma omp taskwait
    else //mode==2
    {    //if(mode
        
        time_t startTime = time(NULL);
      
        int count = 0;
        myid = comlsqr.myid;
        printf("vVect[1]: %f:\n", vVect[1] );

            //qui lavorare
            /*
            #ifdef OMP
            tid = omp_get_thread_num();
            nthreads = omp_get_num_threads();
            #endif
            */
            /////////////////////////////////////////////////////
            /// Mode 2 Astrometric Sect
            if (nAstroPSolved)
            {
                long offLocalAstro = comlsqr.mapStar[myid][0] * nAstroPSolved;
                //                for (long ix = mapForThread[tid][0]; ix < mapForThread[tid][2]; ix++)
                long lset;
                long jstartAstro;
                               
                for(int nt=0; nt < ntasks; nt++ )
                {
		            #pragma omp target device (fpga) copy_deps	
                    #pragma omp task label(vVect_nAstroPSolved) //out(vVect[jstartAstro : jstartAstro + nAstroPSolved-1]) 
                    {
                        lset = mapForThread[nt][0] * nparam;
                   
                        double taskLocalSum;
                        jstartAstro = 0;
                        for (long ix = mapForThread[nt][0]; ix < mapForThread[nt][2]; ix++)  //QUI e altrove in for(long ix..) richiama una funzione
                        {
//======== INIZIO
                            lset=ix*nparam;
                            long miValAstro = 0;
                            if (matrixIndex[multMI * ix] != miValAstro)
                            {
                                miValAstro = matrixIndex[multMI * ix];
                                jstartAstro = miValAstro - offLocalAstro;
                            }
                            for (long jx = jstartAstro; jx < jstartAstro + nAstroPSolved; jx++)
                            {
                                
                                taskLocalSum = systemMatrix[lset] * knownTerms[ix];
                                //vVect[jx] = 0.000001;
                                if(jx == 1 && false)
                                {
                                    printf("nAstroPSolved nt:%d ix:%ld vVect[jx]:%f\n\n",nt,ix,vVect[jx] );
                                }
                                vVect[jx] += taskLocalSum;
                                lset++;
                            } //for jx
                            //lset += nparam - nAstroPSolved;  //**QUI attenzio0ne che nel caso di "funzioni avviate in parallelo succede casino! è modificata!! in lste messa all'inizio!
 //========== FINE
                            aprodM2AstroP(ix, nt, comlsqr, vVect, systemMatrix, matrixIndex, knownTerms);
                        } //for ix 
                    }
                  //  #pragma omp taskwait
                }
        
              
            }
            /////////////////////////////////////////////////////
       
       
        /////////////////////////////////////////////////////
        /// Mode 2 Attitude Sect
        if (nAttP)
        {
            long mix;
            long offj = (localAstroMax - offsetAttParam);
            long vVix;
            long lset;
          //  printf("ntask: %d\n",ntasks);
            
            for(int nt=0; nt < ntasks; nt++ )
            {
                // #pragma omp task shared(nparam,nInstrPSolved,matrixIndex) in (nt) commutative(vVect)
//                #pragma omp task commutative(vVect)
                #pragma omp task label(nAttP) commutative(vVect)//out(vVect[mix : mix + (nAttAxes-1) * nDegFreedomAtt])
                {
                    for (long ix = mapForThread[nt][0]; ix < mapForThread[nt][2]; ix++)
                    {
 //============= INIZIO
/*
                        lset = nparam * ix + nAstroPSolved;
                        mix = matrixIndex[multMI * ix + (multMI - 1)] + offj;
                        for (int ly = 0; ly < nAttAxes; ly++)
                        {
                            vVix = mix + ly * nDegFreedomAtt;
                            if(vVix == 1)
                            {
                                printf("nAttP nt %d %ld\n\n",nt,ix);
                            }
                            for (int lx = 0; lx < nAttParAxis; lx++)
                            {
                                localSum = systemMatrix[lset] * knownTerms[ix];
                               // #pragma omp atomic
                              //  vVect[vVix] = 0.000001;
                                vVect[vVix] += localSum;
                                lset++;
                                vVix++;
                            } //for lx
                        }     //for ly
//========== FINE
*/
                        aprodM2AttP(ix, nt, comlsqr, vVect, systemMatrix, matrixIndex, knownTerms);
                    } //for ix
                }
           // long lset;
            ///#pragma omp for
            
            
       
        }
        /////////////////////////////////////////////////////
        /// Mode 2 Instrument Sect
        
        printf("\t\t\t\tnOfInstrConstr: %d", nOfInstrConstr);
        if (nOfInstrConstr )
        {
            
            int offLset = nAstroPSolved + nAttP;
            long iix;
            long offj = offsetInstrParam + (localAstroMax - offsetAttParam);
            long vVix;
            for(int nt=0; nt < ntasks; nt++ )
            {
                // #pragma omp task shared(vVect,nparam,offLset,nInstrPSolved) in (nt)
               // #pragma omp task commutative(vVect)
              #pragma omp task label(nOfInstrConstr) commutative(vVect) //out(vVect[mix : mix + (nAttAxes-1) * nDegFreedomAtt])
                {
                    long lset;
                    for (long ix = mapForThread[nt][0]; ix < mapForThread[nt][2]; ix++)
                    {
//========== INIZIO
/*
                        lset = nparam * ix + offLset;
                        iix = ix * nInstrPSolved;
                        for (int ly = 0; ly < nInstrPSolved; ly++)
                        {
                            vVix = offj + instrCol[iix + ly];
                            if(vVix == 1)
                            {
                                printf("nOfInstrConstr nt %d %ld\n\n",nt,ix);
                            }
                            localSum = systemMatrix[lset] * knownTerms[ix];
                            //#pragma omp atomic
                            //vVect[vVix] = 0.000001;
                            vVect[vVix] += localSum;
                            lset++;
                        } //for ly
 //========== FINE
 */
                        aprodM2InstrP(ix, nt, comlsqr, vVect, systemMatrix, instrCol, knownTerms);
                    }     //for ix
                }
              //  #pragma omp taskwait
            }
           
        }
        /////////////////////////////////////////////////////
        ///} //pragma
        *ompSec += time(NULL) - startTime;
        if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
            printf("PE=%d AprodTiming: mode=2.1  OmpSec=%ld\n", myid, time(NULL) - startTime);
        /////////////////////////////////////////////////////
        /// Mode 2 Global Sect
        if (nGlobP )
        {
            time_t startTime_nGlobP = time(NULL);
            long lset;
            int offLset = nAstroPSolved + nAttP + nInstrPSolved;
            long offj = offsetGlobParam + (localAstroMax - offsetAttParam);
            long vVix;
            for (long ix = 0; ix < mapNoss[myid]; ix++)
            {
// ==================== INIZIO
                /*
                lset = nparam * ix + offLset;
                for (long ly = 0; ly < nGlobP; ly++)
                {
                    vVix = offj + ly;
                    if(vVix == 1)
                    {
                        printf("nGlobP nt %ld\n\n",ix);
                    }
                    localSum = systemMatrix[lset] * knownTerms[ix];
                    vVect[vVix] += localSum;
                    lset++;
                } //for ly
                
// ============ FINE
*/
               aprodM2GlobP(ix, ntasks, comlsqr, vVect, systemMatrix, knownTerms);
            }     //for ix
            if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
                printf("\tPE=%d AprodTiming: nGlobP  sec=%ld\n", myid, time(NULL) - startTime_nGlobP);
        }
        ////////////////////////////////////////////////////
        startTime = time(NULL);
        //FV_ EDIT ompSs
        //  #pragma omp parallel private(myid, yi, localSum, tid, nthreads, i2, j2, na) shared(mapNoss, vVect, systemMatrix, knownTerms, k, j3)
       
        
        {
            myid = comlsqr.myid;
            //FV_ EDIT ompSs
            /*
            //#ifdef OMP
            tid = omp_get_thread_num();
            nthreads = omp_get_num_threads();
            #endif
*/
            localSum = 0.0;
            
            
            printf("\t\t\t\nEqExtConstr: %d nEqBarConstr: %d nOfInstrConstr: %d", nEqExtConstr,nEqBarConstr,nOfInstrConstr);
            #pragma omp taskwait
            
            //////////////////////////////////////////////////////
            /// Mode 2 ExtConstr
            if (nEqExtConstr && true)
            {
                time_t startTime_nEqExtConstr = time(NULL);
                long offExtStarConstrEq;
                long off2;
                long off3;
                for (int ix = 0; ix < nEqExtConstr; ix++)
                { //stars
                    yi = knownTerms[mapNoss[myid] + ix];
                    offExtStarConstrEq = mapNcoeff[myid] + ix * nOfElextObs; //Star offset on the row ix (all the row)
                    //FV_ EDIT ompSs
                    //#pragma omp for
                    for (long yx = 0; yx < numOfExtStar; yx++)
                    {
                        off3 = yx * nAstroPSolved;
                        off2 = offExtStarConstrEq + off3;
                        for (int j2 = 0; j2 < nAstroPSolved; j2++)
                        {
                            localSum = systemMatrix[off2 + j2] * yi;
                            vVect[j2 + off3] += localSum;
                        }
                    }
                    //FV_ EDIT ompSs
                    //#pragma omp barrier
                } //for ix

                long offExtAttConstrEq;
                long offExtUnk;
                long off1 = VrIdAstroPDimMax * nAstroPSolved + startingAttColExtConstr;
                for (int ix = 0; ix < nEqExtConstr; ix++)
                { //att
                    yi = knownTerms[mapNoss[myid] + ix];
                    offExtAttConstrEq = mapNcoeff[myid] + ix * nOfElextObs; //Att offset on the row ix (all the row)
                    offExtAttConstrEq += numOfExtStar * nAstroPSolved;      //Att offset inside ix row
                    for (int nax = 0; nax < nAttAxes; nax++)
                    {
                        offExtUnk = off1 + nax * nDegFreedomAtt; // Att offset for Unk array on extConstr
                        off2 = offExtAttConstrEq + nax * numOfExtAttCol;
                        //FV_ EDIT ompSs
                        //#pragma omp for
                        for (int jx = 0; jx < numOfExtAttCol; jx++)
                        {
                            localSum = systemMatrix[off2 + jx] * yi;
                            vVect[offExtUnk + jx] += localSum;
                        }
                        //FV_ EDIT ompSs
                        //#pragma omp barrier
                    }
                }
                if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
                    printf("\tPE=%d AprodTiming: nEqExtConstr  sec=%ld\n", myid, time(NULL) - startTime_nEqExtConstr);
            }
            //////////////////////////////////////////////////////
            /// Mode 2 BarConstr
            if (nEqBarConstr && true)
            {
                time_t startTime_nEqBarConstr = time(NULL);
                localSum = 0.0;
                long off3;
                long off2;
                for (int ix = 0; ix < nEqBarConstr; ix++)
                { //stars
                    yi = knownTerms[mapNoss[myid] + nEqExtConstr + ix];
                    long offBarStarConstrEq = mapNcoeff[myid] + nEqExtConstr * nOfElextObs + ix * nOfElBarObs; //Star offset on the row i2 (all the row)
                    //FV_ EDIT ompSs
                    //#pragma omp for
                    for (long yx = 0; yx < numOfBarStar; yx++)
                    {
                        off3 = yx * nAstroPSolved;
                        off2 = offBarStarConstrEq + off3;
                        for (int j2 = 0; j2 < nAstroPSolved; j2++)
                        {
                            localSum = systemMatrix[off2 + j2] * yi;
                            vVect[j2 + off3] += localSum;
                        }
                    }
                    //FV_ EDIT ompSs
                    //#pragma omp barrier
                } //for i2

                if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
                    printf("\tPE=%d AprodTiming: nEqBarConstr  sec=%ld\n", myid, time(NULL) - startTime_nEqBarConstr);

            }
            //////////////////////////////////////////////////////
            /// Mode 2 InstrConstr
            if (nOfInstrConstr && true)
            {
                time_t startTime_nOfInstrConstr = time(NULL);

                localSum = 0.0;
                int offSetInstr;
                long off1;
                long offInstrUnk = VrIdAstroPDimMax * nAstroPSolved + nAttAxes * nDegFreedomAtt;
                long off2 = mapNoss[myid] + nEqExtConstr + nEqBarConstr;
                long off3 = mapNcoeff[myid] + nOfElextObs * nEqExtConstr + nOfElBarObs * nEqBarConstr;
                long off4 = mapNoss[myid] * nInstrPSolved;
                long off5;
                long off6;
                for (int k1 = myid; k1 < nOfInstrConstr; k1 = k1 + nproc)
                {
                    yi = knownTerms[off2 + k1];
                    offSetInstr = 0;
                    for (int m = 0; m < k1; m++)
                        offSetInstr += instrConstrIlung[m];

                    off1 = off3 + offSetInstr;
                    off5 = off4 + offSetInstr;
                    //FV_ EDIT ompSs
                    //#pragma omp for
                    for (int j = 0; j < instrConstrIlung[k1]; j++)
                    {
                        localSum = systemMatrix[off1 + j] * yi;
                        off6 = offInstrUnk + instrCol[off5 + j];
                        vVect[off6] += localSum;
                    }
                }
                if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
                    printf("\tPE=%d AprodTiming: nOfInstrConstr  sec=%ld\n", myid, time(NULL) - startTime_nOfInstrConstr);
            }
            ////////////
        } //pragma
        *ompSec += time(NULL) - startTime;
        if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
            printf("PE=%d AprodTiming: mode=2.2  OmpSec=%ld\n", myid, time(NULL) - startTime);
    } // else if(mode==2)
    
    
    
    /*
    else //mode==2
    {    //if(mode
        
        double **vVect_aux_AttP;
        double **vVect_aux_InstP ;

        int nElements = (comlsqr.nAttParam + comlsqr.nInstrParam);
        vVect_aux_AttP =(double **) calloc(omp_get_num_threads(),sizeof(double *));
        if (!vVect_aux_AttP)
            exit(err_malloc("vVect_aux_AttP", myid));
        vVect_aux_InstP =(double **) calloc(omp_get_num_threads(),sizeof(double *));
        if (!vVect_aux_InstP)
            exit(err_malloc("vVect_aux_InstP", myid));



        for (int i=0;i<omp_get_num_threads();i++)
        {
            vVect_aux_AttP [i]= (double *)calloc(nElements , sizeof(double));
            if (!vVect_aux_AttP[i])
                exit(err_malloc("vVect_aux_AttP[i]", myid));
            vVect_aux_InstP [i]=(double *)calloc(nElements , sizeof(double));
            if (!vVect_aux_InstP[i])
                exit(err_malloc("vVect_aux_InstP[i]", myid));

        }



        // Compute and write the total memory allocated
        if (myid == 0)
        {

            printf("vVect_aux_* %ld MB of memory allocated on each task.\n", ( (omp_get_num_threads() * nElements * sizeof(double)) / (1024 * 1024)  ));
        }


        
        

            time_t startTime = time(NULL);

            int count = 0;            
            int nt=-1;
            
            #pragma omp task 
            {

                int taskid = -1;
                #pragma omp critical
                {
                    nt++;
                    taskid = nt;
                }
                
                printf("taskid: %d",taskid);
                
                long offLocalAstro = comlsqr.mapStar[myid][0] * nAstroPSolved;

                double taskLocalSum;
                long jstartAstro = 0;

                long lset_astro = mapForThread[taskid][0] * nparam;
                

                long mix;
                long offj = (localAstroMax - offsetAttParam);
                long vVix;

                long lset_AttP;
                
                int offLset = nAstroPSolved + nAttP;
                long iix;
                long offj_InstrConstr = offsetInstrParam + (localAstroMax - offsetAttParam);
                long vVix_InstrConstr;
                long lset_InstrConstr;


                
                for (long ix = mapForThread[taskid][0]; ix < mapForThread[taskid][2]; ix++)
                {

                    if (nAstroPSolved)
                    {
                        
                        
                        
                        long miValAstro = 0;

                        if (matrixIndex[multMI * ix] != miValAstro)
                        {
                            miValAstro = matrixIndex[multMI * ix];
                            jstartAstro = miValAstro - offLocalAstro;
                        }

                        for (long jx = jstartAstro; jx < jstartAstro + nAstroPSolved; jx++)
                        {
                            taskLocalSum = systemMatrix[lset_astro] * knownTerms[ix];
                            vVect[jx] += taskLocalSum;

                            lset_astro++;
                        } //for jx
                       
                        
                        
                       
                        lset_astro += nparam - nAstroPSolved;
                        
                           
                    }
                    
                    
                    ///
                    if (nAttP)
                    {
            
                        
                        lset_AttP = nparam * ix + nAstroPSolved;
                        mix = matrixIndex[multMI * ix + (multMI - 1)] + offj;
                        for (int ly = 0; ly < nAttAxes; ly++)
                        {
                            vVix = mix + ly * nDegFreedomAtt - offsetAttParam;
                            for (int lx = 0; lx < nAttParAxis; lx++)
                            {
                                localSum = systemMatrix[lset_AttP] * knownTerms[ix];
                                vVect_aux_AttP[taskid][vVix] += localSum;
                              
                                lset_AttP++;
                                vVix++;
                            } //for lx
                        }     //for ly
                        
                    }

                    
                    ///
                    
                    ///
                    
                    if (nOfInstrConstr)
                    {

                    
                        lset_InstrConstr = nparam * ix + offLset;
                        iix = ix * nInstrPSolved;
                        for (int ly = 0; ly < nInstrPSolved; ly++)
                        {
                            vVix_InstrConstr = offj + instrCol[iix + ly] - offsetAttParam;
                            localSum = systemMatrix[lset_InstrConstr] * knownTerms[ix];
                            vVect_aux_InstP[taskid][vVix] += localSum;
                            
                           
                            
                            lset_InstrConstr++;
                        } //for ly
                    }

                    ////
                    
                    
                }
        
            }
           #pragma omp taskwait

           // long int z=offsetAttParam;

            for(int i=0; i < omp_get_num_threads();i++)
            {
                for(j=0; j < nElements;j++)
                {
                    if(j==nElements/2)
                        printf("vVect_aux_AttP[%d][%d]=%d\n",i,j,vVect_aux_AttP[i][j]);
                    else if(j==(nElements/2)+1)
                        printf("vVect_aux_AttP[%d][%d]=%d\n",i,j,vVect_aux_AttP[i][j]);
                    vVect[j+offsetAttParam]+= vVect_aux_AttP[i][j]+vVect_aux_InstP[i][j];
                    //z++;
                }
            }
        
        //printf("z: %ld\n",z);
        

//    #pragma omp task in(vVect)
   {

        /////////////////////////////////////////////////////
        ///} //pragma
        *ompSec += time(NULL) - startTime;
        if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
            printf("PE=%d AprodTiming: mode=2.1  OmpSec=%ld\n", myid, time(NULL) - startTime);
        /////////////////////////////////////////////////////
        /// Mode 2 Global Sect
        if (nGlobP )
        {
            time_t startTime_nGlobP = time(NULL);
            long lset;
            int offLset = nAstroPSolved + nAttP + nInstrPSolved;
            long offj = offsetGlobParam + (localAstroMax - offsetAttParam);
            long vVix;
            for (long ix = 0; ix < mapNoss[myid]; ix++)
            {
                lset = nparam * ix + offLset;
                for (long ly = 0; ly < nGlobP; ly++)
                {
                    vVix = offj + ly;
                    localSum = systemMatrix[lset] * knownTerms[ix];
                    vVect[vVix] += localSum;
                    lset++;
                } //for ly
            }     //for ix
        
            if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
                printf("\tPE=%d AprodTiming: nGlobP  sec=%ld\n", myid, time(NULL) - startTime_nGlobP);
        }
        ////////////////////////////////////////////////////
        startTime = time(NULL);
        //FV_ EDIT ompSs
        //  #pragma omp parallel private(myid, yi, localSum, tid, nthreads, i2, j2, na) shared(mapNoss, vVect, systemMatrix, knownTerms, k, j3)
        {
            myid = comlsqr.myid;
   
            localSum = 0.0;
            //////////////////////////////////////////////////////
            /// Mode 2 ExtConstr
            if (nEqExtConstr )
            {
                time_t startTime_nEqExtConstr = time(NULL);
                long offExtStarConstrEq;
                long off2;
                long off3;
                for (int ix = 0; ix < nEqExtConstr; ix++)
                { //stars
                    yi = knownTerms[mapNoss[myid] + ix];
                    offExtStarConstrEq = mapNcoeff[myid] + ix * nOfElextObs; //Star offset on the row ix (all the row)
                    //FV_ EDIT ompSs
                    //#pragma omp for
                    for (long yx = 0; yx < numOfExtStar; yx++)
                    {
                        off3 = yx * nAstroPSolved;
                        off2 = offExtStarConstrEq + off3;
                        for (int j2 = 0; j2 < nAstroPSolved; j2++)
                        {
                            localSum = systemMatrix[off2 + j2] * yi;
                            vVect[j2 + off3] += localSum;
                        }
                    }
                    //FV_ EDIT ompSs
                    //#pragma omp barrier
                } //for ix

                long offExtAttConstrEq;
                long offExtUnk;
                long off1 = VrIdAstroPDimMax * nAstroPSolved + startingAttColExtConstr;
                for (int ix = 0; ix < nEqExtConstr; ix++)
                { //att
                    yi = knownTerms[mapNoss[myid] + ix];
                    offExtAttConstrEq = mapNcoeff[myid] + ix * nOfElextObs; //Att offset on the row ix (all the row)
                    offExtAttConstrEq += numOfExtStar * nAstroPSolved;      //Att offset inside ix row
                    for (int nax = 0; nax < nAttAxes; nax++)
                    {
                        offExtUnk = off1 + nax * nDegFreedomAtt; // Att offset for Unk array on extConstr
                        off2 = offExtAttConstrEq + nax * numOfExtAttCol;
                        //FV_ EDIT ompSs
                        //#pragma omp for
                        for (int jx = 0; jx < numOfExtAttCol; jx++)
                        {
                            localSum = systemMatrix[off2 + jx] * yi;
                            vVect[offExtUnk + jx] += localSum;
                        }
                        //FV_ EDIT ompSs
                        //#pragma omp barrier
                    }
                }
                if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
                    printf("\tPE=%d AprodTiming: nEqExtConstr  sec=%ld\n", myid, time(NULL) - startTime_nEqExtConstr);
            }
            //////////////////////////////////////////////////////
            /// Mode 2 BarConstr
            if (nEqBarConstr )
            {
                time_t startTime_nEqBarConstr = time(NULL);
                localSum = 0.0;
                long off3;
                long off2;
                for (int ix = 0; ix < nEqBarConstr; ix++)
                { //stars
                    yi = knownTerms[mapNoss[myid] + nEqExtConstr + ix];
                    long offBarStarConstrEq = mapNcoeff[myid] + nEqExtConstr * nOfElextObs + ix * nOfElBarObs; //Star offset on the row i2 (all the row)
                    //FV_ EDIT ompSs
                    //#pragma omp for
                    for (long yx = 0; yx < numOfBarStar; yx++)
                    {
                        off3 = yx * nAstroPSolved;
                        off2 = offBarStarConstrEq + off3;
                        for (int j2 = 0; j2 < nAstroPSolved; j2++)
                        {
                            localSum = systemMatrix[off2 + j2] * yi;
                            vVect[j2 + off3] += localSum;
                        }
                    }
                    //FV_ EDIT ompSs
                    //#pragma omp barrier
                } //for i2
        
                if (comlsqr.itn <= 2 && (myid == 0 || myid == nproc - 1 || debugMode == 1))
                    printf("\tPE=%d AprodTiming: nEqBarConstr  sec=%ld\n", myid, time(NULL) - startTime_nEqBarConstr);

            }
            //////////////////////////////////////////////////////