diff --git a/README.md b/README.md index e48cf5e489c7957b6d60f63bc6114cf657606d5a..0a371d3230fba9317170bb0220bee74fa3896acd 100644 --- a/README.md +++ b/README.md @@ -1 +1,29 @@ -PY_ARTECS +Tools to handle ARTECS contents + +Dependencies: + numpy, scipy + pandas + pyvo + sudo pip install pyvo + +Example of session with TAP: +>import pyvo as vo +>tap_service = vo.dal.TAPService("http://archives.ia2.inaf.it/vo/tap/exo") +>tap_results = tap_service.search("SELECT top 1 exp_id, url FROM exo.EXO") +>print(tap_results) +>len(tap_results) +>tap_results.getrecord(0) + +To install pyvo: +>sudo pip install pyvo + +Example of session using Artecs.tap + +>import artecs +>atap=artecs.exop_pubblic_tap() +>atap.EXPLAIN() +>atap.keys() +>tab=atap.search('(0.7 <= SMA) and (SMA <=3.)') +>tab.FO_CONST.unique() +>tab.to_csv('/tmp/pippo.csv',sep=' ') +>MAP=atap.get_map(tab.URL[0]) diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3748f4191c38406a851d61023a72c158e5200bae --- /dev/null +++ b/__init__.py @@ -0,0 +1,5 @@ +from .modelDb import * +from .artecs_map import * +from .tap import EXOP_TAP as exop_pubblic_tap + + diff --git a/artecs_map.py b/artecs_map.py new file mode 100644 index 0000000000000000000000000000000000000000..9da1df42910552eea6217b3a1b1dc9163568cf4e --- /dev/null +++ b/artecs_map.py @@ -0,0 +1,47 @@ +class artecs_map : + def __init__(self,filename) : + import numpy + try : + import pyfits + except : + from astropy.io import fits as pyfits + self.filename=filename + self.p=pyfits.open(filename) + self._key=[] + self._value=[] + self._descr=[] + mkd=True + for cc in self.p[0].header.cards : + if cc[0]!='COMMENT' : + self._key.append(cc[0]) + self._value.append(cc[1]) + self._descr.append(cc[2]) + else : + if mkd : + self._key.append(cc[0]) + self._value.append('\n'.join(self.p[0].header['COMMENT'])) + self._descr.append(None) + mkd=False + self.N=self.parameter('N') + self.NS=self.parameter('NS') + self.shape=(self.NS,self.N) + self.year=self.p[1].data['year'] + self.lat=self.p[1].data['lat'] + self.temp=self.p[1].data['temp'] + self.year.shape=self.shape + self.lat.shape=self.shape + self.temp.shape=self.shape + self.TMGLOB=self.temp.mean() + self.p.close() + self.lst_lat=self.lat[0] + self.lst_year=self.year[:,0].T + def keys(self) : + return self._key + def has_key(self,key) : + return key in self._key + def parameter(self,key) : + return self._value[self._key.index(key)] + def description(self,key) : + return self._descr[self._key.index(key)] + def bilinear_interpolation(self,lat,year) : + pass diff --git a/doc/py_artecs.tex b/doc/py_artecs.tex new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modelDb.py b/modelDb.py new file mode 100644 index 0000000000000000000000000000000000000000..822c3118027afd3d71d38a636dbf4a40b6326615 --- /dev/null +++ b/modelDb.py @@ -0,0 +1,326 @@ +class modelDb : + def __init__(self,project_name,project_path,csv_name,Verbose=True,csv_sep='!',csv_comment='#',csv_index_col='index',figures_path='png',filterBad=True,new=False,query=None) : + import time + from collections import OrderedDict + from matplotlib import pyplot as plt + import numpy as np + import pandas + import astropy + # + self.reset() + self.reset_plot_status() + # + self._project_name=project_name + self._project_path=project_path + self._csv_name=csv_name + self._figures_path=figures_path + # + if new : return + # + print() + print("Reading from csv file ",self._csv_name) + tic=time.time() + csv=pandas.read_csv(self._project_path+'/'+self._csv_name,sep=csv_sep,comment=csv_comment,index_col=csv_index_col) + if not 'TSTART' in csv.keys() : + csv['TSTART']=np.zeros(len(csv)) + print ("TSTART not found, replaced by a DUMMY") + csv['Natm']=csv['p_surface']/csv['GRAV'] + csv['iclass']=np.array([self.classification_string2numeric(k.strip().lower()) for k in csv.CLASS]) + csv['peri']=(1-csv.ECC)*csv.SMA + csv['aphe']=(1+csv.ECC)*csv.SMA + self.flagBad=np.array(csv['iclass']==self._string_class_notfound)*8+np.array(csv.iclass<0)*4+np.array(np.isnan(csv.TMGLOB))*2+np.array(csv.TSTART<0) + tic=time.time()-tic + print("%d lines in %e sec"%(len(csv),tic)) + # + if not 'SOURCE' in csv.keys() : + csv['SOURCE']=np.zeros(len(csv))-1 + # + if filterBad : + self.TABLE=csv[self.flagBad==0] + else : + self.TABLE=csv + # + if query != None and query!='' : + self.TABLE=self.TABLE.query(query) + # + if Verbose : + print() + print("In %s, containind %d models, the number of models with "%(csv_name,len(csv))) + print(" CLASS == UNKNOWN ",(csv.iclass==-200).sum()) + print(" CLASS == SNOWBALL ",(csv.iclass==1).sum()) + print(" CLASS == WATERBELT ",(csv.iclass==2).sum()) + print(" CLASS == WARM ",(csv.iclass==3).sum()) + print(" CLASS == WARM_HOT ",(csv.iclass==4).sum()) + print(" INVALID STRING IN CLASS ",((self.flagBad>=8)).sum()) + print() + for k in np.unique(csv['SOURCE']) : + print(" SOURCE == %d %d"%(k,(csv.SOURCE==0).sum())) + #print(" SOURCE == 1 ",(csv.SOURCE==1).sum()) + print() + print(" TMGLOB == NaN ",np.isnan(csv.TMGLOB).sum()) + print(" TSTART<0 ",(csv.TSTART<0).sum()) + print(" TMGLOB == NaN and TSTART<0 ",((csv.TSTART<0)&np.isnan(csv.TMGLOB)).sum()) + print(" TMGLOB == NaN and ZCD == NaN ",(np.isnan(csv.TMGLOB)&np.isnan(csv.zenithal_column_density)).sum()) + print(" TMGLOB == NaN and CLASS == UNDEFINED ",(np.isnan(csv.TMGLOB)&(csv.iclass==0)).sum()) + print() + # + del csv + def to_csv(self,outFile,COMMENT='') : + print("Writing csv into ",outFile) + open(outFile,'w').write('#\n#'+outFile+'\n#elaboration of '+self._csv_name+'\n#'+str(COMMENT)+'\n#!BEGIN\n') + open(outFile,'a').write(self.TABLE.to_csv(None,sep='!',index_column='index')) + open(outFile,'a').write('#!END\n') + print("written") + def reset(self) : + from collections import OrderedDict + for k in ['_project_name','_project_path','_csv_name','_figures_path','_project_name','_project_path','TABLE','flagBad','classTable'] : + self.__dict__[k]=None + self._numeric_classification=OrderedDict() + self._numeric_classification['---']=-400 + self._numeric_classification['unknown']=-200 + self._numeric_classification['snowball']=1 + self._numeric_classification['waterbelt']=2 + self._numeric_classification['warm']=3 + self._numeric_classification['warm_hot']=4 + self._string_class_notfound=-100000 + # + self._classification_labels=self._numeric_classification.keys() + self._ClassNames=['SNOWBALL','WATERBELT','WARM','WARM_HOT'] + self._ClassColor={'SNOWBALL':'b','WATERBELT':'g','WARM':'r','WARM_HOT':'y'} + self._ClassMarker={'SNOWBALL':'o:12','WATERBELT':'v:16','WARM':'d:8','WARM_HOT':'s:6'} + # + self.iceTOT_threshold=None + # + def __len__(self) : return len(self.TABLE) + # + def __getitem__(self,this) : return self.TABLE[this] + # + def __setitem__(self,this,that) : self.TABLE[this]=that + # + def copy(self) : + import copy + out=modelDb('slice of '+self._project_name,self._project_path,None,new=True) + out._project_name=self._project_name + out._project_path=self._project_path + out._csv_name=None + out._figures_path=self._figures_path + out.classTable=copy.deepcopy(self.classTable) + return out + # + def unique_values(self) : + import numpy as np + from collections import OrderedDict + import pandas + out=OrderedDict() + for k in self.keys() : + out[k]=np.unique(self.TABLE[k].values) + return out + # + def list_unique_values(self) : + out=self.unique_values() + for k in out.keys() : + print("%s (%d) ="%(k,len(out[k]))) + # + def classification_indexes(self,*arg,**karg) : + import numpy as np + from collections import OrderedDict + if len(arg) == 0 : + _arg=['CLASS','OBLIQ','ECC','SMA','PRESS','SOURCE','GEO','FO_CONST','PROT','P_CO2'] + else : + _arg=arg + ipfx=karg['index_pfx'] if karg.has_key('index_pfx') else 'i_' + for k in _arg : + quit=False + if not k in self.keys() : + print("%s not found "%k) + quit=True + if quit : + return + classTable=OrderedDict() + for k in arg : + u=self.unique(k) + iu=np.zeros(len(self)) + classTable[k]=OrderedDict() + for cu,u in enumerate(self.unique(k)) : + idx=np.where(np.array(self.TABLE[k]==u))[0] + iu[idx]=cu + classTable[k][u]=[cu,len(idx)] + self.TABLE[ipfx+k]=iu + self.classTable=classTable.copy() + return classTable + # + def show_classTable(self) : + for k in DB.classTable.keys() : + print('%10s : %3d = '%(k,len(DB.classTable[k])),end='') + for ij,j in enumerate(DB.classTable[k].keys()) : + if ij>0 : print ('|',end='') + print('%3d#'%ij,end='') + print('%12s'%str(j),':',end='') + print("%6d "%len(DB.query('i_'+k+'=='+str(ij))),end='') + print() + # + def unique(self,key) : + import numpy as np + return np.sort(self.TABLE[key].unique()) + # + def sort(self,sort_by) : + self.TABLE=self.TABLE.sort(sort_by) + # + def sorted_query(self,sort_by,qstr) : + out=self.copy() + if type(qstr) == type('') : + out.TABLE=self.TABLE.query(qstr) + else : + out.TABLE=self.TABLE[qstr] + out.TABLE=out.TABLE.sort(sort_by) + return out + # + def query(self,qstr) : + out=self.copy() + if type(qstr) == type('') : + out.TABLE=self.TABLE.query(qstr) + else : + out.TABLE=self.TABLE[qstr] + return out + # + def select_by_loc(self,loc_argument) : + out=self.copy() + out.TABLE=self.TABLE.loc[loc_argument] + return out + # + def select_by_iloc(self,iloc_argument) : + out=self.copy() + out.TABLE=self.TABLE.iloc[iloc_argument] + return out + # + def classification_string2numeric(self,strg) : + try : + return self._numeric_classification[strg.lower().strip()] + except : + print("bad strng ",strg) + return self._string_class_notfound + # + def calc_iclass(self,string_classification_array) : + return np.array([self._numeric_classification[k.strip().lower()] for k in string_classification_array]) + # + def keys(self) : return list(self.TABLE.keys()) + # + def has_key(self,this) : return (this in self.TABLE.keys()) + # + def __include__(self,this) : return (this in self.TABLE.keys()) + # + def reset_plot_status(self) : + """resets the self._last_plot dictionary where handles from the last plot generated are stored""" + from collections import OrderedDict + self._last_plot=OrderedDict() + self._last_plot['fig']=None + self._last_plot['xlabel']=None + self._last_plot['ylabel']=None + self._last_plot['axe']=None + self._last_plot['legend']=None + self._last_plot['curves']=[] + # + def plot2(self,x,y='MolecularDepth',ylog=True,xlog=False,newfig=True,legend_loc=3,xylim=None,savefig=None,one2one_line=False) : + """creates a 2 axis plot, the handles for the plot objects are stored in self._last_plot""" + from collections import OrderedDict + from matplotlib import pyplot as plt + import numpy as np + import pandas + import astropy + # + self.reset_plot_status() + # + if newfig : + self._last_plot['fig']=plt.figure() + else : + self._last_plot['fig']=plt.gcf() + # + for k in self._ClassNames : + smp=self.TABLE.query('CLASS=="%s"'%k) + self._last_plot['curves'].append(plt.plot(smp[x],smp[y],self._ClassColor[k]+self._ClassMarker[k][0],label=k,mew=0,markersize=int(self._ClassMarker[k][2:]))) + plt.gca().set_xscale('linear' if xlog==False else 'log') + plt.gca().set_yscale('linear' if ylog==False else 'log') + plt.title(self._project_name,fontsize=20) + if xylim != None : plt.axis(xylim) + # + if one2one_line==True : + a=plt.axis() + x121=np.arange(0,1+0.01,0.01) + x121=a[0]*x121+a[1]*(1-x121) + self._last_plot['1:1']=plt.plot(x121,x121,'k-',lw=1) + self._last_plot['1:1'][0].set_zorder(-10) + # + if x=='Natm' : + a=plt.axis() + self._last_plot['STARTNATMLINES']=[plt.plot(k*np.ones(2),[a[2],a[3]],'k-',lw=2) for k in np.unique(self.TABLE['PRESS']/self.TABLE['GRAV'])] + for k in self._last_plot['STARTNATMLINES'] : k[0].set_zorder(-10) + # + if x=='p_surface' : + a=plt.axis() + self._last_plot['PRESS_LINES']=[plt.plot(k*np.ones(2),[a[2],a[3]],'k-',lw=2) for k in np.unique(self.TABLE['PRESS'])] + for k in self._last_plot['PRESS_LINES'] : k[0].set_zorder(-10) + # + self._last_plot['axe']=plt.gca() + if legend_loc > 0 : + self._last_plot['legend']=plt.legend(loc=legend_loc) + self._last_plot['xlabel']=plt.xlabel(x,fontsize=20) + self._last_plot['ylabel']=plt.ylabel(y,fontsize=20) + plt.gcf().set_figwidth(18.) + plt.gcf().set_figheight(plt.gcf().get_figwidth()*3./4.) + if savefig != None and savefig != False: + if type(savefig)==type('') and savefig!='' : + _savefig=savefig + else : + _savefig=self._project_name+'/'+self._figures_path+'/'+y+'_vz_'+x+'.pdf' + print("Saving in ",_savefig) + plt.savefig(_savefig,dpi=plt.gcf().get_dpi()) + plt.show() + # + def set_iceTOT_threshold(self,iceTOT_threshold) : + """ set iceTOT_threshold : the ice coverage over which the planet is considered a snowball +suggested values 0.95 or 0.99 + """ + self.iceTOT_threshold=iceTOT_threshold + # + def flag_class_warm(self) : + """returns Murante's classifications for warm calculated from scratch +iceTOT_threshold is the ice coverage over which the planet is considered a snowball +suggested values 0.95 or 0.99 + """ + import numpy as np + if not self.iceTOT_threshold() : return + return (self.TABLE['TMGLOB'] > self.TABLE['Tice']) * (self.TABLE['TMGLOB'] < self.TABLE['Tvap']) + + def flag_class_warm_hot(self) : + """returns Murante's classifications for warm_hot calculated from scratch +iceTOT_threshold is the ice coverage over which the planet is considered a snowball +suggested values 0.95 or 0.99 + """ + import numpy as np + if not self.iceTOT_threshold() : return + return (self.TABLE['TMGLOB'] >= self.TABLE['Tvap']) + + def flag_class_snowball(self) : + """returns Murante's classifications for warm_hot calculated from scratch +iceTOT_threshold is the ice coverage over which the planet is considered a snowball +suggested values 0.95 or 0.99 + """ + import numpy as np + if not self.iceTOT_threshold() : return + return (self.TABLE['ICE'] > self.iceTOT_threshold) + + def flag_class_waterbelt(self) : + """returns Murante's classifications for warm_hot calculated from scratch +iceTOT_threshold is the ice coverage over which the planet is considered a snowball +suggested values 0.95 or 0.99 + """ + import numpy as np + if not self.iceTOT_threshold() : return + return (self.TABLE['TMGLOB'] <= self.TABLE['Tice'])*(self.TABLE['ICE'] <= self.iceTOT_threshold) + + def _testIceTot(self) : + if self.iceTOT_threshold == None : + raise Exception("iceTOT_threshold not set, Set iceTOT_threshold with .set_iceTOT_threshold(x) before") + return False + return True + diff --git a/tap.py b/tap.py new file mode 100644 index 0000000000000000000000000000000000000000..3f5b691cd8bd3441329100dcf6b0c807d678db21 --- /dev/null +++ b/tap.py @@ -0,0 +1,265 @@ +__DESCRIPTION__=""" +By M.Maris 1.1 - 2019 Nov 15 - + +Ported to python3 +Needs pyvo + +Tap service for artecs + + +Example: +import artecs +atap=artecs.exop_pubblic_tap() + +atap.EXPLAIN() +atap.keys() + + +tab=atap.search('(0.7 <= SMA) and (SMA <=3.)') + +tab.FO_CONST.unique() + +tab.to_csv('/tmp/pippo.csv',sep=' ') + +MAP=atap.get_map(tab.URL[0]) + + + +""" + +try : + import pyvo + BAD=False +except : + print("PYVO not found, to install pyvo : \n\t>sudo pip install pyvo\n%s EXOP_TAP class will not work properly"%("/".join(__file__.split('/')[-2:]))) + BAD=True + +#if BAD : + #import sys + #sys.exit(1) +#del BAD + +class EXOP_TAP : + def __init__(self,tap_url="http://archives.ia2.inaf.it/vo/tap/exo",table_name="exo.EXO",temporary_files_path='/tmp') : + import pyvo + # + #creates empty object + self._empty() + # + self._temporary_files_path=temporary_files_path+'/' if temporary_files_path!='' and temporary_files_path!=None else '' + # + #connects to db + self._url=tap_url + self._table_name=table_name + # + #template for query_string, two fields: + self._template_query="SELECT %s %s FROM "+self._table_name + # + try : + self._tap_service = pyvo.dal.TAPService(tap_url) + except : + print ("Error, unable to connect to TAP_URL '"+tap_url+"'") + # + #from db gets fields description + self._get_field_description() + # + def _empty(self) : + self._url=None + self._table_name=None + self._tap_service = None + self._elapsed_time=0. + self._search_result=None + self._field_description=None + self._field_names=None + self._search_success=None + self._template_query=None + self._download_success=None + # + def _get_field_description(self) : + self.adql_search('SELECT TOP 1 * FROM '+self._table_name) + self._field_description=self._search_result.fielddescs + self._field_names=self._search_result.fieldnames + self.clean() + # + def keys(self) : + """list of fields in the database""" + return self._field_names + # + def has_key(self,this) : + """has_key""" + return this in self._field_names + # + def __include__(self,this) : + """this in object""" + return this in self._field_names + # + def success(self) : + """returns True if last search was successfull""" + return self._search_success==True + # + def clean(self) : + """cleans information from last search""" + self._search_success=None + self._search_result=None + # + def adql_search(self,adql_string) : + """search on database using ADQL string """ + import time + self.clean() + self._search_success=False + self._elapsed_time=time.time() + self._search_result=self._tap_service.search(adql_string) + self._elapsed_time=time.time()-self._elapsed_time + self._search_success=True + # + def search_string(self,SELECTION,TOP,FIELDS,WHERE,SORT) : + """creates a properly formatted adql query_string""" + adql='SELECT' + adql+='' if type(SELECTION)==type(None) else ' '+SELECT + adql+=' TOP %d'%TOP if type(TOP)==type(1) else '' + adql+=' *' if type(FIELDS)==type(None) else ' '+FIELDS + adql+=' FROM '+self._table_name + adql+='' if WHERE=='' or WHERE==None else ' WHERE '+WHERE + adql+='' if SORT=='' or SORT==None else ' SORT BY '+SORT + return adql + # + def search(self,WHERE,SELECTION=None,FIELDS=None,TOP=None,SORT=None,as_astropy=False,as_votable=False) : + """download a table from the database + search('') or search(WHERE='') returns all the data in the database + + the table can be returned as : + pandas dataframe (default) + astropy table (as_astropy = True) + votable (as_votable=True) + """ + import time + # + adql=self.search_string(SELECTION,TOP,FIELDS,WHERE,SORT) + self.adql_search(adql) + # + if as_astropy : + return self._search_result.table + elif as_votable : + return self._search_result.votable + else : + return self._search_result.table.to_pandas() + # + def get_map(self,_URL,outfile=None) : + """gets a map """ + import numpy as np + import time + from .artecs_map import artecs_map + try : + from urllib2 import urlopen + except : + from urllib.request import urlopen + import requests + import gzip + import os + try : + from StringIO import StringIO + except : + from io import StringIO + if type(_URL) == type("") : + URL=_URL+'' + else : + URL=_URL.decode("utf-8") + # + if type(outfile) != type('') : + #if out file not specified creates a unique temporary name and reserves it + flag = True + while flag : + tmp=URL+' '+time.asctime() + tmp=tmp+' '+str(np.random.randint(1000000)) + tmp=abs(hash(tmp)) + tmp=hex( tmp )[2:] + _o=self._temporary_files_path+'artecs_download_file_'+tmp+'.fits' + flag=os.path.isfile(_o) + open(_o,'w').write('a') #this is to reserve the file name + else : + _o=outfile + # + tic=time.time() + request=requests.Request(URL) + #response = urlopen(request) + try : + response = urlopen(URL) + except : + print("URL %s not found"%URL) + if type(outfile) != type('') : + os.remove(_o) + self._download_success=False + return + #buf = StringIO(response.read()) + buf = response + f = gzip.GzipFile(fileobj=buf) + data=f.read() + open(_o,'wb').write(data) + tic=time.time()-tic + # + amap=artecs_map(_o) + amap.url=URL + if type(outfile) != type('') : + os.remove(_o) + self._download_success=True + return amap + # + def download_map(self,URL,outfile=None,path=None) : + """download the fits file corresponding at a given URL + if outfile is not specified the fits file name from the URL is used + if path is not specified the current file is stored in the current path + """ + _path='' if type(path) != type('') else path+'/' + if type(outfile)!=type('') : + _o=URL.split('/')[-1].split('.gz')[0] + _o=_path+_o + else : + _o=outfile + self.get_map(URL,outfile=_o) + # + def EXPLAIN(self) : + """print a short introduction on the query language""" + print (""" +ADQL = Astronomical Data Query Language + +A query is a string +- SELECTION set of data to be selected +- HOW_MANY elements to select +- WHICH_FIELDS fields to select +- FROM which table +- WHERE condition for selection is true + +Example: + +1. Download all the data + SELECT * FROM exo.EXO + +2. Download only the first 10 elements + SELECT TOP 10 * FROM exo.EXO + +3. Download only the first 10 elements with SMA in the range 0.9 to 1.1 + SELECT TOP 10 * FROM exo.EXO WHERE SMA BETWEEN 0.9 AND 1.1 + + alternativelly + + SELECT TOP 10 * FROM exo.EXO WHERE (0.9 <= SMA) AND (SMA <= 1.1) + +4. Download only the first 10 elements with SMA in the range 0.9 to 1.1 and CONTHAB>=0.5 + SELECT TOP 10 * FROM exo.EXO WHERE SMA BETWEEN 0.9 AND 1.1 AND CONTHAB>=0.5 + +5. Arithmetic calculations are allowed if needed for selection so to download the first 10 elements with SMA^1.5 > 0.87 + SELECT TOP 10 * FROM exo.EXO WHERE power(SMA,1.5)> 0.87 + +6. returns just columns SMA and CONTHAB from previous example: + SELECT TOP 10 SMA,CONTHAB FROM exo.EXO WHERE power(SMA,1.5)> 0.87 + +Note that the query is not sensitive to uppercase or lowercase. + +Tutorials: + http://www.g-vo.org/tutorials/gaia-mock-tap.pdf + http://www.ivoa.net/documents/REC/ADQL/ADQL-20081030.pdf + http://www.ivoa.net/documents/ + +""") + +