Skip to content
Snippets Groups Projects
Commit ab31ee09 authored by Michele Maris's avatar Michele Maris
Browse files

u added files

parent 622ff989
No related branches found
No related tags found
No related merge requests found
PY_ARTECS Tools to handle ARTECS contents
Dependencies:
numpy, scipy
pandas
pyvo
sudo pip install pyvo
Example of session with TAP:
>import pyvo as vo
>tap_service = vo.dal.TAPService("http://archives.ia2.inaf.it/vo/tap/exo")
>tap_results = tap_service.search("SELECT top 1 exp_id, url FROM exo.EXO")
>print(tap_results)
>len(tap_results)
>tap_results.getrecord(0)
To install pyvo:
>sudo pip install pyvo
Example of session using Artecs.tap
>import artecs
>atap=artecs.exop_pubblic_tap()
>atap.EXPLAIN()
>atap.keys()
>tab=atap.search('(0.7 <= SMA) and (SMA <=3.)')
>tab.FO_CONST.unique()
>tab.to_csv('/tmp/pippo.csv',sep=' ')
>MAP=atap.get_map(tab.URL[0])
from .modelDb import *
from .artecs_map import *
from .tap import EXOP_TAP as exop_pubblic_tap
class artecs_map :
def __init__(self,filename) :
import numpy
try :
import pyfits
except :
from astropy.io import fits as pyfits
self.filename=filename
self.p=pyfits.open(filename)
self._key=[]
self._value=[]
self._descr=[]
mkd=True
for cc in self.p[0].header.cards :
if cc[0]!='COMMENT' :
self._key.append(cc[0])
self._value.append(cc[1])
self._descr.append(cc[2])
else :
if mkd :
self._key.append(cc[0])
self._value.append('\n'.join(self.p[0].header['COMMENT']))
self._descr.append(None)
mkd=False
self.N=self.parameter('N')
self.NS=self.parameter('NS')
self.shape=(self.NS,self.N)
self.year=self.p[1].data['year']
self.lat=self.p[1].data['lat']
self.temp=self.p[1].data['temp']
self.year.shape=self.shape
self.lat.shape=self.shape
self.temp.shape=self.shape
self.TMGLOB=self.temp.mean()
self.p.close()
self.lst_lat=self.lat[0]
self.lst_year=self.year[:,0].T
def keys(self) :
return self._key
def has_key(self,key) :
return key in self._key
def parameter(self,key) :
return self._value[self._key.index(key)]
def description(self,key) :
return self._descr[self._key.index(key)]
def bilinear_interpolation(self,lat,year) :
pass
class modelDb :
def __init__(self,project_name,project_path,csv_name,Verbose=True,csv_sep='!',csv_comment='#',csv_index_col='index',figures_path='png',filterBad=True,new=False,query=None) :
import time
from collections import OrderedDict
from matplotlib import pyplot as plt
import numpy as np
import pandas
import astropy
#
self.reset()
self.reset_plot_status()
#
self._project_name=project_name
self._project_path=project_path
self._csv_name=csv_name
self._figures_path=figures_path
#
if new : return
#
print()
print("Reading from csv file ",self._csv_name)
tic=time.time()
csv=pandas.read_csv(self._project_path+'/'+self._csv_name,sep=csv_sep,comment=csv_comment,index_col=csv_index_col)
if not 'TSTART' in csv.keys() :
csv['TSTART']=np.zeros(len(csv))
print ("TSTART not found, replaced by a DUMMY")
csv['Natm']=csv['p_surface']/csv['GRAV']
csv['iclass']=np.array([self.classification_string2numeric(k.strip().lower()) for k in csv.CLASS])
csv['peri']=(1-csv.ECC)*csv.SMA
csv['aphe']=(1+csv.ECC)*csv.SMA
self.flagBad=np.array(csv['iclass']==self._string_class_notfound)*8+np.array(csv.iclass<0)*4+np.array(np.isnan(csv.TMGLOB))*2+np.array(csv.TSTART<0)
tic=time.time()-tic
print("%d lines in %e sec"%(len(csv),tic))
#
if not 'SOURCE' in csv.keys() :
csv['SOURCE']=np.zeros(len(csv))-1
#
if filterBad :
self.TABLE=csv[self.flagBad==0]
else :
self.TABLE=csv
#
if query != None and query!='' :
self.TABLE=self.TABLE.query(query)
#
if Verbose :
print()
print("In %s, containind %d models, the number of models with "%(csv_name,len(csv)))
print(" CLASS == UNKNOWN ",(csv.iclass==-200).sum())
print(" CLASS == SNOWBALL ",(csv.iclass==1).sum())
print(" CLASS == WATERBELT ",(csv.iclass==2).sum())
print(" CLASS == WARM ",(csv.iclass==3).sum())
print(" CLASS == WARM_HOT ",(csv.iclass==4).sum())
print(" INVALID STRING IN CLASS ",((self.flagBad>=8)).sum())
print()
for k in np.unique(csv['SOURCE']) :
print(" SOURCE == %d %d"%(k,(csv.SOURCE==0).sum()))
#print(" SOURCE == 1 ",(csv.SOURCE==1).sum())
print()
print(" TMGLOB == NaN ",np.isnan(csv.TMGLOB).sum())
print(" TSTART<0 ",(csv.TSTART<0).sum())
print(" TMGLOB == NaN and TSTART<0 ",((csv.TSTART<0)&np.isnan(csv.TMGLOB)).sum())
print(" TMGLOB == NaN and ZCD == NaN ",(np.isnan(csv.TMGLOB)&np.isnan(csv.zenithal_column_density)).sum())
print(" TMGLOB == NaN and CLASS == UNDEFINED ",(np.isnan(csv.TMGLOB)&(csv.iclass==0)).sum())
print()
#
del csv
def to_csv(self,outFile,COMMENT='') :
print("Writing csv into ",outFile)
open(outFile,'w').write('#\n#'+outFile+'\n#elaboration of '+self._csv_name+'\n#'+str(COMMENT)+'\n#!BEGIN\n')
open(outFile,'a').write(self.TABLE.to_csv(None,sep='!',index_column='index'))
open(outFile,'a').write('#!END\n')
print("written")
def reset(self) :
from collections import OrderedDict
for k in ['_project_name','_project_path','_csv_name','_figures_path','_project_name','_project_path','TABLE','flagBad','classTable'] :
self.__dict__[k]=None
self._numeric_classification=OrderedDict()
self._numeric_classification['---']=-400
self._numeric_classification['unknown']=-200
self._numeric_classification['snowball']=1
self._numeric_classification['waterbelt']=2
self._numeric_classification['warm']=3
self._numeric_classification['warm_hot']=4
self._string_class_notfound=-100000
#
self._classification_labels=self._numeric_classification.keys()
self._ClassNames=['SNOWBALL','WATERBELT','WARM','WARM_HOT']
self._ClassColor={'SNOWBALL':'b','WATERBELT':'g','WARM':'r','WARM_HOT':'y'}
self._ClassMarker={'SNOWBALL':'o:12','WATERBELT':'v:16','WARM':'d:8','WARM_HOT':'s:6'}
#
self.iceTOT_threshold=None
#
def __len__(self) : return len(self.TABLE)
#
def __getitem__(self,this) : return self.TABLE[this]
#
def __setitem__(self,this,that) : self.TABLE[this]=that
#
def copy(self) :
import copy
out=modelDb('slice of '+self._project_name,self._project_path,None,new=True)
out._project_name=self._project_name
out._project_path=self._project_path
out._csv_name=None
out._figures_path=self._figures_path
out.classTable=copy.deepcopy(self.classTable)
return out
#
def unique_values(self) :
import numpy as np
from collections import OrderedDict
import pandas
out=OrderedDict()
for k in self.keys() :
out[k]=np.unique(self.TABLE[k].values)
return out
#
def list_unique_values(self) :
out=self.unique_values()
for k in out.keys() :
print("%s (%d) ="%(k,len(out[k])))
#
def classification_indexes(self,*arg,**karg) :
import numpy as np
from collections import OrderedDict
if len(arg) == 0 :
_arg=['CLASS','OBLIQ','ECC','SMA','PRESS','SOURCE','GEO','FO_CONST','PROT','P_CO2']
else :
_arg=arg
ipfx=karg['index_pfx'] if karg.has_key('index_pfx') else 'i_'
for k in _arg :
quit=False
if not k in self.keys() :
print("%s not found "%k)
quit=True
if quit :
return
classTable=OrderedDict()
for k in arg :
u=self.unique(k)
iu=np.zeros(len(self))
classTable[k]=OrderedDict()
for cu,u in enumerate(self.unique(k)) :
idx=np.where(np.array(self.TABLE[k]==u))[0]
iu[idx]=cu
classTable[k][u]=[cu,len(idx)]
self.TABLE[ipfx+k]=iu
self.classTable=classTable.copy()
return classTable
#
def show_classTable(self) :
for k in DB.classTable.keys() :
print('%10s : %3d = '%(k,len(DB.classTable[k])),end='')
for ij,j in enumerate(DB.classTable[k].keys()) :
if ij>0 : print ('|',end='')
print('%3d#'%ij,end='')
print('%12s'%str(j),':',end='')
print("%6d "%len(DB.query('i_'+k+'=='+str(ij))),end='')
print()
#
def unique(self,key) :
import numpy as np
return np.sort(self.TABLE[key].unique())
#
def sort(self,sort_by) :
self.TABLE=self.TABLE.sort(sort_by)
#
def sorted_query(self,sort_by,qstr) :
out=self.copy()
if type(qstr) == type('') :
out.TABLE=self.TABLE.query(qstr)
else :
out.TABLE=self.TABLE[qstr]
out.TABLE=out.TABLE.sort(sort_by)
return out
#
def query(self,qstr) :
out=self.copy()
if type(qstr) == type('') :
out.TABLE=self.TABLE.query(qstr)
else :
out.TABLE=self.TABLE[qstr]
return out
#
def select_by_loc(self,loc_argument) :
out=self.copy()
out.TABLE=self.TABLE.loc[loc_argument]
return out
#
def select_by_iloc(self,iloc_argument) :
out=self.copy()
out.TABLE=self.TABLE.iloc[iloc_argument]
return out
#
def classification_string2numeric(self,strg) :
try :
return self._numeric_classification[strg.lower().strip()]
except :
print("bad strng ",strg)
return self._string_class_notfound
#
def calc_iclass(self,string_classification_array) :
return np.array([self._numeric_classification[k.strip().lower()] for k in string_classification_array])
#
def keys(self) : return list(self.TABLE.keys())
#
def has_key(self,this) : return (this in self.TABLE.keys())
#
def __include__(self,this) : return (this in self.TABLE.keys())
#
def reset_plot_status(self) :
"""resets the self._last_plot dictionary where handles from the last plot generated are stored"""
from collections import OrderedDict
self._last_plot=OrderedDict()
self._last_plot['fig']=None
self._last_plot['xlabel']=None
self._last_plot['ylabel']=None
self._last_plot['axe']=None
self._last_plot['legend']=None
self._last_plot['curves']=[]
#
def plot2(self,x,y='MolecularDepth',ylog=True,xlog=False,newfig=True,legend_loc=3,xylim=None,savefig=None,one2one_line=False) :
"""creates a 2 axis plot, the handles for the plot objects are stored in self._last_plot"""
from collections import OrderedDict
from matplotlib import pyplot as plt
import numpy as np
import pandas
import astropy
#
self.reset_plot_status()
#
if newfig :
self._last_plot['fig']=plt.figure()
else :
self._last_plot['fig']=plt.gcf()
#
for k in self._ClassNames :
smp=self.TABLE.query('CLASS=="%s"'%k)
self._last_plot['curves'].append(plt.plot(smp[x],smp[y],self._ClassColor[k]+self._ClassMarker[k][0],label=k,mew=0,markersize=int(self._ClassMarker[k][2:])))
plt.gca().set_xscale('linear' if xlog==False else 'log')
plt.gca().set_yscale('linear' if ylog==False else 'log')
plt.title(self._project_name,fontsize=20)
if xylim != None : plt.axis(xylim)
#
if one2one_line==True :
a=plt.axis()
x121=np.arange(0,1+0.01,0.01)
x121=a[0]*x121+a[1]*(1-x121)
self._last_plot['1:1']=plt.plot(x121,x121,'k-',lw=1)
self._last_plot['1:1'][0].set_zorder(-10)
#
if x=='Natm' :
a=plt.axis()
self._last_plot['STARTNATMLINES']=[plt.plot(k*np.ones(2),[a[2],a[3]],'k-',lw=2) for k in np.unique(self.TABLE['PRESS']/self.TABLE['GRAV'])]
for k in self._last_plot['STARTNATMLINES'] : k[0].set_zorder(-10)
#
if x=='p_surface' :
a=plt.axis()
self._last_plot['PRESS_LINES']=[plt.plot(k*np.ones(2),[a[2],a[3]],'k-',lw=2) for k in np.unique(self.TABLE['PRESS'])]
for k in self._last_plot['PRESS_LINES'] : k[0].set_zorder(-10)
#
self._last_plot['axe']=plt.gca()
if legend_loc > 0 :
self._last_plot['legend']=plt.legend(loc=legend_loc)
self._last_plot['xlabel']=plt.xlabel(x,fontsize=20)
self._last_plot['ylabel']=plt.ylabel(y,fontsize=20)
plt.gcf().set_figwidth(18.)
plt.gcf().set_figheight(plt.gcf().get_figwidth()*3./4.)
if savefig != None and savefig != False:
if type(savefig)==type('') and savefig!='' :
_savefig=savefig
else :
_savefig=self._project_name+'/'+self._figures_path+'/'+y+'_vz_'+x+'.pdf'
print("Saving in ",_savefig)
plt.savefig(_savefig,dpi=plt.gcf().get_dpi())
plt.show()
#
def set_iceTOT_threshold(self,iceTOT_threshold) :
""" set iceTOT_threshold : the ice coverage over which the planet is considered a snowball
suggested values 0.95 or 0.99
"""
self.iceTOT_threshold=iceTOT_threshold
#
def flag_class_warm(self) :
"""returns Murante's classifications for warm calculated from scratch
iceTOT_threshold is the ice coverage over which the planet is considered a snowball
suggested values 0.95 or 0.99
"""
import numpy as np
if not self.iceTOT_threshold() : return
return (self.TABLE['TMGLOB'] > self.TABLE['Tice']) * (self.TABLE['TMGLOB'] < self.TABLE['Tvap'])
def flag_class_warm_hot(self) :
"""returns Murante's classifications for warm_hot calculated from scratch
iceTOT_threshold is the ice coverage over which the planet is considered a snowball
suggested values 0.95 or 0.99
"""
import numpy as np
if not self.iceTOT_threshold() : return
return (self.TABLE['TMGLOB'] >= self.TABLE['Tvap'])
def flag_class_snowball(self) :
"""returns Murante's classifications for warm_hot calculated from scratch
iceTOT_threshold is the ice coverage over which the planet is considered a snowball
suggested values 0.95 or 0.99
"""
import numpy as np
if not self.iceTOT_threshold() : return
return (self.TABLE['ICE'] > self.iceTOT_threshold)
def flag_class_waterbelt(self) :
"""returns Murante's classifications for warm_hot calculated from scratch
iceTOT_threshold is the ice coverage over which the planet is considered a snowball
suggested values 0.95 or 0.99
"""
import numpy as np
if not self.iceTOT_threshold() : return
return (self.TABLE['TMGLOB'] <= self.TABLE['Tice'])*(self.TABLE['ICE'] <= self.iceTOT_threshold)
def _testIceTot(self) :
if self.iceTOT_threshold == None :
raise Exception("iceTOT_threshold not set, Set iceTOT_threshold with .set_iceTOT_threshold(x) before")
return False
return True
tap.py 0 → 100644
__DESCRIPTION__="""
By M.Maris 1.1 - 2019 Nov 15 -
Ported to python3
Needs pyvo
Tap service for artecs
Example:
import artecs
atap=artecs.exop_pubblic_tap()
atap.EXPLAIN()
atap.keys()
tab=atap.search('(0.7 <= SMA) and (SMA <=3.)')
tab.FO_CONST.unique()
tab.to_csv('/tmp/pippo.csv',sep=' ')
MAP=atap.get_map(tab.URL[0])
"""
try :
import pyvo
BAD=False
except :
print("PYVO not found, to install pyvo : \n\t>sudo pip install pyvo\n%s EXOP_TAP class will not work properly"%("/".join(__file__.split('/')[-2:])))
BAD=True
#if BAD :
#import sys
#sys.exit(1)
#del BAD
class EXOP_TAP :
def __init__(self,tap_url="http://archives.ia2.inaf.it/vo/tap/exo",table_name="exo.EXO",temporary_files_path='/tmp') :
import pyvo
#
#creates empty object
self._empty()
#
self._temporary_files_path=temporary_files_path+'/' if temporary_files_path!='' and temporary_files_path!=None else ''
#
#connects to db
self._url=tap_url
self._table_name=table_name
#
#template for query_string, two fields: <QUERY> <FIELDS>
self._template_query="SELECT %s %s FROM "+self._table_name
#
try :
self._tap_service = pyvo.dal.TAPService(tap_url)
except :
print ("Error, unable to connect to TAP_URL '"+tap_url+"'")
#
#from db gets fields description
self._get_field_description()
#
def _empty(self) :
self._url=None
self._table_name=None
self._tap_service = None
self._elapsed_time=0.
self._search_result=None
self._field_description=None
self._field_names=None
self._search_success=None
self._template_query=None
self._download_success=None
#
def _get_field_description(self) :
self.adql_search('SELECT TOP 1 * FROM '+self._table_name)
self._field_description=self._search_result.fielddescs
self._field_names=self._search_result.fieldnames
self.clean()
#
def keys(self) :
"""list of fields in the database"""
return self._field_names
#
def has_key(self,this) :
"""has_key"""
return this in self._field_names
#
def __include__(self,this) :
"""this in object"""
return this in self._field_names
#
def success(self) :
"""returns True if last search was successfull"""
return self._search_success==True
#
def clean(self) :
"""cleans information from last search"""
self._search_success=None
self._search_result=None
#
def adql_search(self,adql_string) :
"""search on database using ADQL string """
import time
self.clean()
self._search_success=False
self._elapsed_time=time.time()
self._search_result=self._tap_service.search(adql_string)
self._elapsed_time=time.time()-self._elapsed_time
self._search_success=True
#
def search_string(self,SELECTION,TOP,FIELDS,WHERE,SORT) :
"""creates a properly formatted adql query_string"""
adql='SELECT'
adql+='' if type(SELECTION)==type(None) else ' '+SELECT
adql+=' TOP %d'%TOP if type(TOP)==type(1) else ''
adql+=' *' if type(FIELDS)==type(None) else ' '+FIELDS
adql+=' FROM '+self._table_name
adql+='' if WHERE=='' or WHERE==None else ' WHERE '+WHERE
adql+='' if SORT=='' or SORT==None else ' SORT BY '+SORT
return adql
#
def search(self,WHERE,SELECTION=None,FIELDS=None,TOP=None,SORT=None,as_astropy=False,as_votable=False) :
"""download a table from the database
search('') or search(WHERE='') returns all the data in the database
the table can be returned as :
pandas dataframe (default)
astropy table (as_astropy = True)
votable (as_votable=True)
"""
import time
#
adql=self.search_string(SELECTION,TOP,FIELDS,WHERE,SORT)
self.adql_search(adql)
#
if as_astropy :
return self._search_result.table
elif as_votable :
return self._search_result.votable
else :
return self._search_result.table.to_pandas()
#
def get_map(self,_URL,outfile=None) :
"""gets a map """
import numpy as np
import time
from .artecs_map import artecs_map
try :
from urllib2 import urlopen
except :
from urllib.request import urlopen
import requests
import gzip
import os
try :
from StringIO import StringIO
except :
from io import StringIO
if type(_URL) == type("") :
URL=_URL+''
else :
URL=_URL.decode("utf-8")
#
if type(outfile) != type('') :
#if out file not specified creates a unique temporary name and reserves it
flag = True
while flag :
tmp=URL+' '+time.asctime()
tmp=tmp+' '+str(np.random.randint(1000000))
tmp=abs(hash(tmp))
tmp=hex( tmp )[2:]
_o=self._temporary_files_path+'artecs_download_file_'+tmp+'.fits'
flag=os.path.isfile(_o)
open(_o,'w').write('a') #this is to reserve the file name
else :
_o=outfile
#
tic=time.time()
request=requests.Request(URL)
#response = urlopen(request)
try :
response = urlopen(URL)
except :
print("URL %s not found"%URL)
if type(outfile) != type('') :
os.remove(_o)
self._download_success=False
return
#buf = StringIO(response.read())
buf = response
f = gzip.GzipFile(fileobj=buf)
data=f.read()
open(_o,'wb').write(data)
tic=time.time()-tic
#
amap=artecs_map(_o)
amap.url=URL
if type(outfile) != type('') :
os.remove(_o)
self._download_success=True
return amap
#
def download_map(self,URL,outfile=None,path=None) :
"""download the fits file corresponding at a given URL
if outfile is not specified the fits file name from the URL is used
if path is not specified the current file is stored in the current path
"""
_path='' if type(path) != type('') else path+'/'
if type(outfile)!=type('') :
_o=URL.split('/')[-1].split('.gz')[0]
_o=_path+_o
else :
_o=outfile
self.get_map(URL,outfile=_o)
#
def EXPLAIN(self) :
"""print a short introduction on the query language"""
print ("""
ADQL = Astronomical Data Query Language
A query is a string
- SELECTION set of data to be selected
- HOW_MANY elements to select
- WHICH_FIELDS fields to select
- FROM which table
- WHERE condition for selection is true
Example:
1. Download all the data
SELECT * FROM exo.EXO
2. Download only the first 10 elements
SELECT TOP 10 * FROM exo.EXO
3. Download only the first 10 elements with SMA in the range 0.9 to 1.1
SELECT TOP 10 * FROM exo.EXO WHERE SMA BETWEEN 0.9 AND 1.1
alternativelly
SELECT TOP 10 * FROM exo.EXO WHERE (0.9 <= SMA) AND (SMA <= 1.1)
4. Download only the first 10 elements with SMA in the range 0.9 to 1.1 and CONTHAB>=0.5
SELECT TOP 10 * FROM exo.EXO WHERE SMA BETWEEN 0.9 AND 1.1 AND CONTHAB>=0.5
5. Arithmetic calculations are allowed if needed for selection so to download the first 10 elements with SMA^1.5 > 0.87
SELECT TOP 10 * FROM exo.EXO WHERE power(SMA,1.5)> 0.87
6. returns just columns SMA and CONTHAB from previous example:
SELECT TOP 10 SMA,CONTHAB FROM exo.EXO WHERE power(SMA,1.5)> 0.87
Note that the query is not sensitive to uppercase or lowercase.
Tutorials:
http://www.g-vo.org/tutorials/gaia-mock-tap.pdf
http://www.ivoa.net/documents/REC/ADQL/ADQL-20081030.pdf
http://www.ivoa.net/documents/
""")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment