""" Yet Another extended csv manages

extended csv is csv with:

. comment lines
. parameter (constant parameters)
. name files
. declared separator
. BEGIN/END data marker

comment line:
^\s*#{.}\n

parameter
^\s*#!<name-variable>=<content>\n

BEGIN marker
^\s*#!BEGIN\s*\n

END marker
^\s*#!END\s*\n

standard fields used for formatting

declares the used separator
#!__sep__=<separator>

declares the used comment character
#!__comment__=<comment>

declares the python interpreter
#!__interpreter__=<python/c>

the separator can be a single character or a sequence
"""

from collections import OrderedDict

import pandas
class ya_extended_csv :
   @property
   def header(self) :
      return self._header
   #
   @property
   def skipUndef(self) :
      return self._skipUndef
   @skipUndef.setter
   def skipUndef(self,this) :
      self._skipUndef= (this==True) or (this==1) or (str(this).lower().strip()=='true')
   #
   @property
   def sep(self) :
      return self._sep
   #
   @sep.setter
   def sep(self,this) :
      self._sep=this
   #
   @property
   def reserved_tags(self) :
      return ['creator','created','creator_version','header_version','sep','tag_comment','tag_variable','filename','author'
              ,'!__index_label','!__sep__','!__tag_comment__','!__tag_variable__','!__assign_operator__','!__index_label__']
   #
   @property
   def doNotLoadTags(self) :
      return ['BEGIN','END','!__index_label','!__sep__','!__tag_comment__','!__tag_variable__','!__assign_operator__','!__index_label__']
   #
   def __init__(self,sep=',',tag_comment='#',tag_variable='#!',assign_operator='=',skipUndef=True,undefValue=None,index_column=0) :
      """
Instantiates an extended_csv manager with default syntax
syntax elements can be changed modifiing parameters
   :keyword: sep separator, character, default ','
   :keyword: tag_comment, comment line marker, default '#'
   :keyword: tag_variable, variable line marker, default '#!'
   :keyword: assign_operator, marker for assignement, default '='
   :keyword: skipUndef, if True undefined variables are skipped (variables without the assign_operator), default True
   :keyword: undefValue, specifies value for undefined variables if skipUndef is False, default None
   :keyword: index_column, index or name of the column used for indexing the csv table, default 0
"""
      self.clear()
      self._sep=sep
      self._tag_comment=tag_comment
      self._tag_variable=tag_variable
      self._assign_operator=assign_operator
      self._skipUndef=skipUndef
      self._undefValue=undefValue
      self._index_column=index_column
      self.__formatter_message=' *** FORMATTED BY YA_EXTENDED_CSV - V 1.0 - M.Maris - 2020 Jan 20 - ***'
      self._verbose=False
   #
   def clear(self) :
      self._content=None
      self._hdrl=None
      self._header=None
      return
   #
   def hdr_parser(self) :
      "parses the header"
      self._header=OrderedDict()
      for k in open(self._fname,'r') :
         l=k.strip()
         l1=l+'      '
         if (l+'    ')[0:len(self._tag_variable)]==self._tag_variable :
            a=(l+'    ')[len(self._tag_variable):]
            a=a.strip()
            if self._assign_operator in a :
               a=[k.strip() for k in a.split(self._assign_operator)]
               self._header[a[0]]=a[1]
            else :
               if not self._skipUndef :
                  self._header[a]=self._undefValue
   #
   def get_data(self,sep) :
      engine='c' if len(sep)==1  else 'python'
      if self._verbose : print(engine,sep)
      self._csv=pandas.read_csv(self._fname,sep=sep,comment=self._tag_comment,engine=engine,index_col=self._index_column)
   #
   def set_pandas_attrs(self) :
      if len(self._header.keys())>0 :
         for k in self._header.keys() :
            if k not in self.doNotLoadTags :
               self._csv.attrs[k]=self._header[k]
   #
   def to_pandas(self) :
      return self._csv
   #
   def to_dict(self) :
      out=OrderedDict()
      for k in self._csv.keys() :
         out[k]=self._csv[k].values
   #
   def load(self,fname,strip_spaces=True,verbose=False,sep=None) :
      """  load an extended csv into a pandas dataframe
           :parameter: fname is the filename
           :parameter: strip_spaces if True leading and trailing spaces in columns are removed (slow readout)
           :parameter: verbose if True verbose messages
      """
      self._verbose=verbose
      self._fname=fname
      self.hdr_parser()
      #
      _sep_used=sep if not sep is None else self._sep
      if strip_spaces :
         _sep_used='\s*'+_sep_used+'\s*'
      #
      self.get_data(_sep_used)
      self.set_pandas_attrs()
      return self.to_pandas()
   #
   def save(self,fname
            ,pdtbl
            ,sep=None
            ,creator=''
            ,creator_version=''
            ,header_version=''
            ,author=''
            ,content=None
            ,headerDict=None
            ,body=None
            ,add_pandas_attrs=True
            ,index_label='__index__') :
      """  saves an extended csv into a pandas dataframe

           :parameter: fname is the filename
           :parameter: pdtbl is the pandas table
           :parameter: creator the code which created the file
           :parameter: creator_version the version of the code which created the file
           :parameter: header_version the version of the header
           :parameter: author the author of the file
           :parameter: content a string used as file content description (*)
           :parameter: headerDict a dictionary with variables to be stored in header (after conversion in string)
           :parameter: body a string to be placed at the end of the header  (*)
           :parameter: add_pandas_attrs if True content of pdtbl.attrs is converted to string and added to the header
           :parameter: index_label label for the index of the pandas table

           (*) it can be a single string, multilines content can be added either using a list of strings or dividing the string in lines by using the '\n' character
      """
      import time
      from io import StringIO
      self._csv=pdtbl
      hdr=[self._tag_comment]
      #
      # append a content - a content is the initial part of hdr
      #    it can be a string, a string separed by \n or an array of strings
      if not content is None :
         if type(content) == type("") :
            if '\n' in content :
               ll=content.split('\n')
            else :
               ll=[content]
         else :
            ll=[k.split('\n')[0] for k in content]
         for k in ll :
            hdr.append(self._tag_comment+k)
      #
      hdr.append(self._tag_comment)
      hdr.append(self._tag_variable+'filename'+self._assign_operator+fname)
      #
      hdr.append(self._tag_comment)
      if author is None :
         hdr.append(self._tag_variable+'author'+self._assign_operator+'')
      else :
         hdr.append(self._tag_variable+'author'+self._assign_operator+author)
      #
      hdr.append(self._tag_comment)
      if creator is None :
         hdr.append(self._tag_variable+'creator'+self._assign_operator+'')
      else :
         hdr.append(self._tag_variable+'creator'+self._assign_operator+creator)
      #
      if creator_version is None :
         hdr.append(self._tag_variable+'creator_version'+self._assign_operator+'')
      else :
         hdr.append(self._tag_variable+'creator_version'+self._assign_operator+creator_version)
      hdr.append(self._tag_variable+'created'+self._assign_operator+time.asctime())
      #
      hdr.append(self._tag_comment)
      if header_version is None :
         hdr.append(self._tag_variable+'header_version'+self._assign_operator+'')
      else :
         hdr.append(self._tag_variable+'header_version'+self._assign_operator+header_version)
      #
      hdr.append(self._tag_comment)
      if not headerDict is None :
         for k in headerDict.keys() :
            if not k in self.reserved_tags :
               hdr.append(self._tag_variable+k+self._assign_operator+str(headerDict[k]))
      #
      if add_pandas_attrs :
         if len(self._csv.attrs.keys()) > 0 :
            hdr.append(self._tag_comment)
            hdr.append(self._tag_comment+' pandas attrs ')
            for k in self._csv.attrs.keys() :
               k1=[j for j in k.strip().split() if j!='']
               if not k in self.reserved_tags and not 'BEGIN' in k1 and not 'END' in k1:
                  hdr.append(self._tag_variable+k+self._assign_operator+str(self._csv.attrs[k]))
            hdr.append(self._tag_comment)
      #
      # append a body
      #    it can be a string, a string separed by \n or an array of strings
      if not body is None :
         if type(body) == type("") :
            if '\n' in body :
               ll=body.split('\n')
            else :
               ll=[body]
         else :
            ll=[k.split('\n')[0] for k in body]
         for k in ll :
            hdr.append(self._tag_comment+k)
      #
      _sep_used=sep.strip() if sep!=None else self._sep.strip()
      #
      hdr.append(self._tag_comment)
      hdr.append(self._tag_comment+' ====================== ')
      hdr.append(self._tag_comment+' formatting description ')
      hdr.append(self._tag_variable+'!__index_label__'+self._assign_operator+str(index_label))
      hdr.append(self._tag_variable+'!__sep__'+self._assign_operator+str(_sep_used))
      hdr.append(self._tag_variable+'!__tag_comment__'+self._assign_operator+str(self._tag_comment))
      hdr.append(self._tag_variable+'!__tag_variable__'+self._assign_operator+str(self._tag_variable))
      hdr.append(self._tag_variable+'!__assign_operator__'+self._assign_operator+str(self._assign_operator))
      hdr.append(self._tag_comment+' ====================== ')
      #
      hdr.append(self._tag_comment)
      hdr.append(self._tag_comment+self.__formatter_message)
      #
      hdr.append(self._tag_comment)
      hdr.append(self._tag_comment+'!BEGIN')
      #
      out=StringIO()
      out.write('\n'.join(hdr)+'\n')
      #
      pdtbl.to_csv(out,sep=_sep_used,index_label=index_label)
      out.write('#!END\n')
      #
      out.seek(0)
      open(fname,'w').write(''.join(out.readlines()))

if __name__=="__main__" :
   from collections import OrderedDict
   import numpy as np
   import sys

   print("CREATES A PANDAS TABLE TO BE SAVED")
   out=OrderedDict()
   out['a']=np.arange(10)
   out['b']=-np.arange(10)
   out['c']=np.array([' s'+str(k) for k in np.arange(10)])
   out['d']=np.array([' '+str(k) for k in np.arange(10)])

   out=pandas.DataFrame(out)

   print('===============')
   print('Simple csv file')
   ofile='/tmp/ya_extended_csv_simple.csv'
   print(ofile)

   ya_extended_csv().save(ofile,out)

   for k in open(ofile,'r') :
      print(k.strip())
   print('===============')

   print()
   print("Sets some attributes to the pandas table")
   out.attrs['mio']='mia'
   print()

   print('=======================')
   print('A more complex csv file')
   ofile='/tmp/ya_extended_csv.csv'
   print(ofile)

   ya_extended_csv().save(ofile,out,
                          creator=sys.argv[0],
                          creator_version='0.0 ',
                          author='M.Maris',
                          header_version='0.0',
                          headerDict={'pippo':10,'pallino':'ecco'},
                          content='Exaple of Table\nTo test Creation\n',
                          body='columns are a and b'
                          )

   for k in open(ofile,'r') :
      print(k.strip())
   print('=======================')

   print()

   print('==============================')
   print('Read the more complex csv file in a new pandas table')
   CSV=ya_extended_csv().load(ofile)

   print()
   print('The pandas table')
   print(CSV)

   print()
   print('The pandas table attributes from the csv file')
   print(CSV.attrs)
   print('==============================')

   print('=======================')
   print('A csv file using & as separator ')
   ofile='/tmp/ya_extended_csv_different.csv'
   ya_extended_csv().save(ofile,out,sep='&')
   print(ofile)

   for k in open(ofile,'r') :
      print(k.strip())

   CSV=ya_extended_csv().load(ofile,sep='&')
   print('=======================')

