"""
Simple stats
"""

import numpy as np

class CumulativeOfData :
   """strucuture to handle the cumulative of 1d data
      It uses linear interpolations
   """
   @property
   def CDF(self) :
      """the cdf sampled curve """
      return self._eff
   @property
   def x(self) :
      """the score"""
      return self._x
   @property
   def N(self) :
      """the number of finite samples"""
      return self._N
   @property
   def z(self) :
      """normalized x: z(x) = (x-x.min())/(x.max()-x.min())"""
      return (self._x-self._x[0])/(self._x[-1]-self._x[0])
   def __init__(self,X) :
      """:X: 1d array of scores"""
      idx=np.where(np.isfinite(X))
      xf=X[idx]
      self._Nin=len(X)
      self._N=len(xf)
      self._x=np.sort(X)
      self._eff=np.arange(self._N)/(self._N-1)
   def cdf(self,x) :
      """returns the cdf at a given score 
      
      :x: the score
      """
      return np.interp(x,self._x,self._eff,left=0.,right=1.)
   #
   def sampled_pdf(self,x,method='hist') :
      """returns the sample pdf for a list of x
         output: hh, xx

         x must be monotously increasing or an integer

         if method = 'hist' the pdf is calculated using an histogram like function (default)
         if method = 'tan' the pdf is calculated using the local tangent to the cdf
      """
      if not method in ['hist','tan'] :
         raise Exception("Error: method must be either 'hist' or 'tan'")
      #
      if method == 'hist' :
         if np.isscalar(x) :
            n=int(x)
            _x=np.linspace(self._x[0],self._x[-1],n)
         else :
            _x=np.sort(x)
         #
         yy=self.cdf(_x)
         hh=(yy[:-1]-yy[1:])/(_x[:-1]-_x[1:])
         return hh, _x
      elif method == 'tan' :
         h=(self._x[-1]-self._x[0])*eps
         ydotF=(self.cdf(_x+h)-self.cdf(_x-h))/(2*h)
         ydotH=(self.cdf(_x+h/2)-self.cdf(_x-h/2))/(h)
         ydot=2*ydotH-ydotF
         #
         # if the lower limit is below the minimum value in the cdf
         if _x[0]-h<self._x[0] :
            ydotF=(self.cdf(_x[0]+h)-self.cdf(_x[0]))/(h)
            ydotH=((self.cdf(_x[0]+h)-self.cdf(_x[0])))/(h/2)
            ydot[0]=2*ydotH-ydotF
         #
         # if the upper limit is above the maximum value of the cdf
         if _x[-1]+h>self._x[-1] :
            ydotF=(self.cdf(_x[-1])-self.cdf(_x[-1]-h))/(h)
            ydotH=(self.cdf(_x[-1])-self.cdf(_x[-1]-h/2))/(h/2)
            ydot[-1]=2*ydotH-ydotF
         #
         return ydot, _x
      else :
         raise Exception("Error: method must be either 'hist' or 'tan'")
   #
   def percentile(self,eff) :
      """computes the percentile of samples for which x<=percentile(eff)
      
      :eff: the required percentile [0,1]
      if eff<0 the result is -infty
      if eff>1 the result is +infty
      """
      return np.interp(eff,self._eff,self._x,left=-np.infty,right=np.infty)
   
class correlation_matrix :
   """ formatted correlation matrix for a 2D table:
      Correlation is computed between columns of Table
   """
   def __init__(self,Tab,Lbl) :
      """ Tab = table 
          Lbl = Name of each column"""
      self._pd=pd.DataFrame(Tab,Lbl)
   @property 
   def corr(self) :
      return self._pd.corr()
