From fae3e6d4cb4b65e78d15d06e3b6afb3e835bb9c3 Mon Sep 17 00:00:00 2001 From: jlaura <jlaura@asu.edu> Date: Mon, 18 Jul 2016 15:21:05 -0700 Subject: [PATCH] Ammended setup.py to include necessary data elements. --- plio/examples/__init__.py | 4 +- plio/io_krc.py | 487 +++++++++++++++++++------------------- setup.py | 21 +- 3 files changed, 268 insertions(+), 244 deletions(-) diff --git a/plio/examples/__init__.py b/plio/examples/__init__.py index 1f6b05e..405e9c6 100644 --- a/plio/examples/__init__.py +++ b/plio/examples/__init__.py @@ -1,7 +1,7 @@ import os import plio -__all__ = ['get_path'] +__all__ = ['available', 'get_path'] #Used largely unmodififed from: # https://github.com/pysal/pysal/blob/master/pysal/examples/__init__.py @@ -44,7 +44,7 @@ def get_path(example_name): # pragma: no cover def available(directory='', verbose=False): # pragma: no cover """ - List available datasets in autocnet.examples + List available datasets in plio.examples Parameters ========== diff --git a/plio/io_krc.py b/plio/io_krc.py index 7d3e518..978ae8e 100644 --- a/plio/io_krc.py +++ b/plio/io_krc.py @@ -5,14 +5,6 @@ import warnings import numpy as np import pandas as pd -try: - import krc -except: - import os - import sys - sys.path.append(os.path.basename('..')) - import krc - idl2np_dtype = {1: np.byte, 2: np.int16, 3: np.int32, 4: np.float32, 5: np.float64} idl2struct = {4: 'f', 5:'d'} @@ -24,86 +16,89 @@ class ReadBin52(object): """ Class to read a bin 52 and organize the output - Parameters - ---------- - - filename (str) The file to read Attributes ---------- - - filename : str - The file that was parsed - - header : str - The header from the bin52 file - bin52data : ndarray - RAW bin52 binary read into an n-dim array + The raw, n-dimensional KRC data - ncases : int - The number of available cases (1 based) + casesize : int + The number of bytes in each KRC case - nlat : int - The number of latitudes + date : bytes + The date the read file was created - nhours : int - The number of hours + ddd : ndarray + Raw temperature data - nseasons : int - The number of seasons + ddd_pd : dataframe + A pandas dataframe of surface temperature data - ntau : int - The number of opacity (tau) steps + filename : str + The name of the KRC file that was input - nalbedos : int - The number of albedo steps + header : list + The KRC header unpack to a list of ints - nslope : int - The number of slope steps + headerlength : int + The length, in bytes, of the KRC header - transferedlayers : int - The number of transfered layers in the model + ncases : int + The number of different cases the model was run for - tlabels : list - Labels for the hourly tabel + ndim : int + TBD - latelv : ndarray - Array of model latitudes and elevations + ndx : int + The number of indices for a single KRC run - pd_latelv : dataframe - Pandas dataframe reshape of latelv + nhours : int + The number of hour bins per 24 Mars hours - ttt : ndarray - Hourly parameters as a vector + nlats : int + The number of valid, non-zero latitude bins - pd_ttt : dataframe - Pandas dataframe of the reshape ttt + nseasons : int + The number of seasons the model was run for - seasitems : ndarray - Array of seasonal parameters (DJU5, Ls, PZREF, TAUD, SUMF) + nvariables : int + The number of variables contained within the KRC + lookup table - pd_seasitems : dataframe - Pandas dataframe reshape of seasitems + structdtype : str + Describing endianess and data type - ddd : ndarray - Model temperatures by layer + temperature_data : dataframe + A multi-indexed dataframe of surface temperatures - pd_ddd : dataframe - Pandas dataframe reshape of ddd + transferedlayers : int + The number of KRC transfered layers + + version : list + The krc verison used to create the file in the form + [major, minor, release] - pd_itemg : dataframe - Pandas dataframe of ggg + words_per_krc : int + The number of bytes per krc entry """ def __init__(self, filename, headerlen=512): + """ + Parameters + ---------- + filename : str + The file to read + headerlength : int + The length, in bytes, of the text header. + Default: 512 + """ # Get or setup the logging object self.logger = logging.getLogger(__name__) self.filename = filename self.readbin5(headerlen) - + print(self.ncases) assert(self.ncases == self.bin52data.shape[0]) def definekrc(self, what='KRC', endianness='<'): @@ -129,7 +124,7 @@ class ReadBin52(object): #Define the structure of the KRC file if self.structdtype == '<f': - self._krcstructure = np.dtype([('fd','{}{}f'.format(e, numfd)), + self._krcstructure= np.dtype([('fd','{}{}f'.format(e, numfd)), ('id','{}{}i'.format(e, numid)), ('ld','{}{}i'.format(e, numld)), ('title','{}{}a'.format(e, 4 * numtit)), @@ -167,7 +162,7 @@ class ReadBin52(object): self.headerlength = header[8] self.ncases = header[0 + self.ndim] self.header = header - + print(self.header) # Compute how large each case is self.casesize = self.nhours if self.ndim > 1: @@ -189,12 +184,39 @@ class ReadBin52(object): indices = arraysize[1: -1] self.bin52data = self.bin52data.reshape(indices[: : -1]) + def _read_metadata(): + if self.structdtype == '<f': + j = self.headerlength + 16 # Skip header plus 1 16bit entry + elif self.structdtype == '<d': + j = self.headerlength + 32 # Skip header plus 1 32bit entry + + bin5.seek(j) + self.definekrc('KRC') + structarr = np.fromfile(bin5, dtype=self._krcstructure, count=1) + + if self.structdtype == '<f': + self.structarr = {'fd': structarr[0][0], + 'id': structarr[0][1], + 'ld': structarr[0][2], + 'title': structarr[0][3], + 'date': structarr[0][4], + 'alat': structarr[0][5], + 'elevation': structarr[0][6] + } + elif self.structdtype == '<d': + self.structarr = {'fd': structarr[0][0], + 'alat': structarr[0][1], + 'elevation': structarr[0][2], + 'id': structarr[0][3], + 'ld': structarr[0][4], + 'title': structarr[0][5], + 'date':structarr[0][6]} + def _get_version(): ver = re.findall(b'\d+', head) ver = list(map(int, ver)) self.version = ver[: 3] - #Add smarter filename handling with open(self.filename, 'rb') as bin5: #To handle endianness and architectures @@ -203,7 +225,7 @@ class ReadBin52(object): fullheader = bin5.read(headerlen) _parse_header() - + print(self.header) arraysize = self.header[0: self.ndim + 2] arraydtypecode = arraysize[arraysize[0] + 1] try: @@ -238,211 +260,202 @@ class ReadBin52(object): _get_version() _parse_front() _read_data() + _read_metadata() - def readcase(self, case=1): + + def readcase(self, case=0): """ Read a single dimension (case) from a bin52 file. Parameters ----------- - case (int) The case to be extracted - - Returns - ------- - casearr (ndarray) The extracted KRC model for - the case. + case : int + The case to be extracted """ - if self.structdtype == '<f': - j = 512 + (1 - 1) * 4 * self.casesize + 16 - elif self.structdtype == '<d': - j = 512 + (1 - 1) * 4 * self.casesize + 32 - with open(self.filename, 'r') as krc: - # Skip to the correct case - krc.seek(j) - self.definekrc('KRC') - structarr = np.fromfile(krc, dtype=self._krcstructure, count=1) - if self.structdtype == '<f': - self.structarr = {'fd': structarr[0][0], - 'id': structarr[0][1], - 'ld': structarr[0][2], - 'title': structarr[0][3], - 'date': structarr[0][4], - 'alat': structarr[0][5], - 'elevation': structarr[0][6] - } - elif self.structdtype == '<d': - self.structarr = {'fd': structarr[0][0], - 'alat': structarr[0][1], - 'elevation': structarr[0][2], - 'id': structarr[0][3], - 'ld': structarr[0][4], - 'title': structarr[0][5], - 'date':structarr[0][6]} - - self.nlayers = nlayers = self.structarr['id'][0] - itemt = ['Final Hourly Surface Temp.', - 'Final Hourly Planetary Temp.', - 'Final Hourly Atmospheric Temp.', - 'Hourly net downward solar flux [W/m^2]', - 'Hourly net downward thermal flux [W/m^2]'] - - itemu = ['Lat', 'Elev'] - itemv = ['Current Julian date (offset from J2000.0)', + def latitems2dataframe(): + """ + Converts Latitude items to a dataframe + """ + columns = ['# Days to Compute Soln.', + 'RMS Temp. Change on Last Day', + 'Predicted Final Atmospheric Temp.', + 'Predicted frost amount, [kg/m^2]', + 'Frost albedo (at the last time step)', + 'Mean upward heat flow into soil surface on last day, [W/m^2]'] + + # Grab the correct slice from the data cube and reshape + latitems = layeritems[: ,: ,: ,: ,0: 3].reshape(self.ncases, + self.nseasons, + self.nlats, len(columns)) + + # Multi-index generation + idxcount = self.nseasons * self.nlats * self.ncases + idxpercase = self.nseasons * self.nlats + caseidx = np.empty(idxcount) + for c in range(self.ncases): + start = c * idxpercase + caseidx[start:start+idxpercase] = np.repeat(c, idxpercase) + nseasvect = np.arange(self.nseasons) + seasonidx = np.repeat(np.arange(self.nseasons), self.nlats) + latidx = np.tile(self.latitudes.values.ravel(), self.nseasons) + + # Pack the dataframe + self.latitude = pd.DataFrame(latitems.reshape(self.nseasons * self.nlats, -1), + index=[caseidx, seasonidx, latidx], + columns=columns) + + self.latitude.index.names = ['Case','Season' ,'Latitude'] + + def layer2dataframe(): + """ + Converts layeritems into + """ + columns = ['Tmin', 'Tmax'] + + ddd = layeritems[: ,: ,: ,: ,3: 3 + self.transferedlayers].reshape(self.ncases, + self.nseasons, + self.nlats, + len(columns), + self.transferedlayers) + + idxcount = self.nseasons * self.nlats * self.transferedlayers * self.ncases + caseidx = np.empty(idxcount) + idxpercase = self.nseasons * self.nlats * self.transferedlayers + for c in range(self.ncases): + start = c * idxpercase + caseidx[start:start + idxpercase] = np.repeat(c, idxpercase) + + seasonidx = np.repeat(np.arange(self.nseasons), idxcount / self.nseasons / self.ncases) + nlatidx = np.repeat(self.latitudes.values.ravel(), idxcount / self.transferedlayers / self.ncases) + tranlayeridx = np.tile(np.repeat(np.arange(self.transferedlayers), self.nlats), self.nseasons) + self.layers = pd.DataFrame(ddd.reshape(idxcount, -1), + columns=columns, + index=[caseidx, seasonidx, nlatidx, tranlayeridx]) + self.layers.index.names = ['Case', 'Season', 'Latitude', 'Layer'] + + def latelv2dataframes(): + """ + Convert the latitude and elevation arrays to dataframes + """ + #All latitudes + #Hugh made some change to the krcc format, but I cannot find documentation... + if self.structdtype == '<f': + alllats = krcc[:,prelatwords:].reshape(2, nlat_include_null, self.ncases) + elif self.structdtype == '<d': + alllats = krcc[:,96:170].reshape(2, nlat_include_null, self.ncases) + #Latitudes and elevations for each case + latelv = alllats[: ,0: nlat] + if latelv.shape[-1] == 1: + latelv = latelv[:,:,0] + self.latitudes = pd.DataFrame(latelv[0], columns=['Latitude']) + self.elevations = pd.DataFrame(latelv[1], columns=['Elevation']) + + def season2dataframe(): + columns = ['Current Julian date (offset from J2000.0)', 'Seasonal longitude of Sun (in degrees)', 'Current surface pressure at 0 elevation (in Pascals)', 'Mean visible opacity of dust, solar wavelengths', 'Global average columnar mass of frost [kg /m^2]'] - itemd = ['Tmin', 'Tmax'] - itemg = ['# Days to Compute Soln.', - 'RMS Temp. Change on Last Day', - 'Predicted Final Atmospheric Temp.', - 'Predicted frost amount, [kg/m^2]', - 'Frost albedo (at the last time step)', - 'Mean upward heat flow into soil surface on last day, [W/m^2]'] - - self.glabels = itemg - self.tlabels = itemt - self.ulabels = itemu - self.vlabels = itemv - self.dlabels = itemd + # Build a dataframe of the seasonal information + seasitems = header[:, 4 + self.words_per_krc: k ].reshape(self.ncases, + len(columns), + self.nseasons) + caseidx = np.repeat(np.arange(self.ncases), self.nseasons) + seasonidx = np.repeat(np.arange(self.nseasons), self.ncases) + flt_seasitems = seasitems.reshape(len(self.vlabels), + self.ncases * self.nseasons) + self.seasons = pd.DataFrame(flt_seasitems, + index=[caseidx,seasonidx], + columns=columns) + self.seasons.index.names = ['Case', 'Season'] + + def hourly2dataframe(): + """ + Converts the hourly 'ttt' vector to a + labelled Pandas dataframe. + """ + columns = ['Final Hourly Surface Temp.', + 'Final Hourly Planetary Temp.', + 'Final Hourly Atmospheric Temp.', + 'Hourly net downward solar flux [W/m^2]', + 'Hourly net downward thermal flux [W/m^2]'] + ttt = self.bin52data[: ,self.ndx: ,: ,0: len(columns),: ].reshape(self.ncases, + self.nseasons, + self.nlats, + len(columns), + self.nhours) + + reshapettt = np.swapaxes(ttt.reshape(self.ncases * self.nseasons * self.nlats, + len(columns), + self.nhours),1,2) + shp = reshapettt.shape + reshapettt = reshapettt.reshape((shp[0] * shp[1], shp[2])).T + #Indices + caseidx = np.repeat(np.arange(self.ncases), self.nseasons * self.nlats * self.nhours) + seasonidx = np.tile(np.repeat(np.arange(self.nseasons), self.nlats * self.nhours), self.ncases) + latidx = np.tile(np.repeat(self.latitudes.values.ravel(), self.nhours), self.nseasons) + houridx = np.tile(np.tile(np.tile(np.arange(self.nhours), self.nlats), self.nseasons), self.ncases) + + #DataFrame + self.temperatures = pd.DataFrame(reshapettt.T, + index=[caseidx, seasonidx, latidx, houridx], + columns=columns) + self.temperatures.index.names = ['Case', 'Season', 'Latitude', 'Hour'] + + + + self.nlayers = nlayers = self.structarr['id'][0] nlat = len(self.structarr['alat']) - transferedlayers = nlayers - 1 - if self.nhours - 3 < transferedlayers: - transferedlayers = self.nhours - 3 + self.transferedlayers = nlayers - 1 + if self.nhours - 3 < self.transferedlayers: + self.transferedlayers = self.nhours - 3 + wordsperlat = self.nhours * self.nvariables wordsperseason = wordsperlat * self.nlats - if self.nvariables != len(itemt) + len(itemd): - self.logger.error("Error! Size mismatch") - # Each case has a header that must be extracted: header = self.bin52data[:,0:self.ndx,: ,: ,: ].reshape(self.ncases, wordsperseason * self.ndx) - #krccomm k = 4 + self.words_per_krc + 5 * self.nseasons krcc = header[:, 4: 4 + self.words_per_krc] - # Build a dataframe of the seasonal information - seasitems = header[:, 4 + self.words_per_krc: k ].reshape(self.ncases, - len(itemv), - self.nseasons) - # Good to HERE. - caseidx = np.repeat(np.arange(self.ncases), self.nseasons) - seasonidx = np.repeat(np.arange(self.nseasons), self.ncases) - flt_seasitems = seasitems.reshape(len(self.vlabels), - self.ncases * self.nseasons) - self.seasitems = pd.DataFrame(flt_seasitems, - columns=[caseidx,seasonidx], - index=[self.vlabels]) - self.seasitems.columns.names = ['Case', 'Season'] - - # TODO: Can nlat_include_null be replaced with self.nlats? nlat_include_null = len(self.structarr['alat']) prelatwords = self.words_per_krc - 2 * nlat_include_null - #All latitudes - #Hugh made some change to the krcc format, but I cannot find documentation... - if self.structdtype == '<f': - alllats = krcc[:,prelatwords:].reshape(2, nlat_include_null, ncases) - elif self.structdtype == '<d': - alllats = krcc[:,96:170].reshape(2, nlat_include_null, self.ncases) - self.alllats = alllats - #Latitudes and elevations for each case - self.latelv = alllats[: ,0: nlat] - if self.latelv.shape[-1] == 1: - self.latelv = self.latelv[:,:,0] - self.pd_latelv = pd.DataFrame(self.latelv.T, columns=['Latitude', 'Elevation']) - wordspercase = wordsperseason * self.nseasons # arrshp[1] = nseasons - jword = [4,wordspercase, self.ncases] - - - self.ttt = self.bin52data[: ,self.ndx: ,: ,0: len(itemt),: ].reshape(self.ncases, - self.nseasons, - self.nlats, - len(itemt), - self.nhours) - self.hourly2dataframe() + wordspercase = wordsperseason * self.nseasons + + # Extract the latitude and elevation metadata + latelv2dataframes() + + #Extract the hourly temperature data + hourly2dataframe() + + # Extract by layer data from the data cube layeritems = self.bin52data[: , self.ndx: , : , 5: 7, : ] - self.latitems = layeritems[: ,: ,: ,: ,0: 3].reshape(self.ncases, - self.nseasons, - self.nlats, len(itemg)) - self.latitems2dataframe() - - self.ddd = layeritems[: ,: ,: ,: ,3: 3 + transferedlayers].reshape(self.ncases, - self.nseasons, - self.nlats, - len(itemd), - transferedlayers) - self.transferedlayers = transferedlayers - self.layer2dataframe() - - def latitems2dataframe(self): - """ - Converts Latitude items to a dataframe - """ - idxcount = self.nseasons * self.nlats * self.ncases - idxpercase = self.nseasons * self.nlats - caseidx = np.empty(idxcount) - for c in range(self.ncases): - start = c * idxpercase - caseidx[start:start+idxpercase] = np.repeat(c, idxpercase) - nseasvect = np.arange(self.nseasons) - seasonidx = np.repeat(np.arange(self.nseasons), self.nlats) - latidx = np.tile(self.latelv[0].ravel(), self.nseasons) - - self.pd_itemg = pd.DataFrame(self.latitems.reshape(self.nseasons * self.nlats, -1).T, - columns=[caseidx, seasonidx, latidx], index=self.glabels) - self.pd_itemg.columns.names = ['Case','Season', 'Latitude'] - - def layer2dataframe(self): - """ - Converts layeritems into - """ - self.logger.debug((self.nseasons, self.nlats, self.transferedlayers, self.ncases)) - idxcount = self.nseasons * self.nlats * self.transferedlayers * self.ncases - caseidx = np.empty(idxcount) - idxpercase = self.nseasons * self.nlats * self.transferedlayers - for c in range(self.ncases): - start = c * idxpercase - caseidx[start:start + idxpercase] = np.repeat(c, idxpercase) - - seasonidx = np.repeat(np.arange(self.nseasons), idxcount / self.nseasons / self.ncases) - nlatidx = np.repeat(self.latelv[0].ravel(), idxcount / self.transferedlayers / self.ncases) - tranlayeridx = np.tile(np.repeat(np.arange(self.transferedlayers), self.nlats), self.nseasons) - self.pd_ddd = pd.DataFrame(self.ddd.reshape(idxcount, -1).T, index=[self.dlabels], columns=[caseidx, seasonidx, nlatidx, tranlayeridx]) - self.pd_ddd.columns.names = ['Case', 'Season', 'Latitude', 'Layer'] - - def seas2dataframe(self): - """ - converts seasitems 'vvv' vector to a - labelled Pandas dataframe - """ + latitems2dataframe() + layer2dataframe() + - def hourly2dataframe(self): +class ReadTds(object): + + def __init__(self, filename, headerlength=512): """ - Converts the hourly 'ttt' vector to a - labelled Pandas dataframe. + Parameters + ---------- + filename : str + The file to read + + headerlength : int + The length, in bytes, of the text header. + Default: 512 """ - reshapettt = np.swapaxes(self.ttt.reshape(self.ncases * self.nseasons * self.nlats, len(self.tlabels), self.nhours),1,2) - shp = reshapettt.shape - reshapettt = reshapettt.reshape((shp[0] * shp[1], shp[2])).T - #Indices - caseidx = np.repeat(np.arange(self.ncases), self.nseasons * self.nlats * self.nhours) - seasonidx = np.tile(np.repeat(np.arange(self.nseasons), self.nlats * self.nhours), self.ncases) - latidx = np.tile(np.repeat(self.latelv[0].ravel(), self.nhours), self.nseasons) - houridx = np.tile(np.tile(np.tile(np.arange(self.nhours), self.nlats), self.nseasons), self.ncases) - - #DataFrame - self.pd_ttt = pd.DataFrame(reshapettt.T, - index=[caseidx, seasonidx, latidx, houridx], - columns=self.tlabels) - self.pd_ttt.index.names = ['Case', 'Season', 'Latitude', 'Hour'] - self.temperature_data = self.pd_ttt - #self.pd_ttt = self.pd_ttt.T + # Get or setup the logging object + self.logger = logging.getLogger(__name__) -class ReadTds(object): - def __init__(self): - pass \ No newline at end of file + self.filename = filename + self.readbin5(headerlen) + print(self.ncases) + assert(self.ncases == self.bin52data.shape[0]) \ No newline at end of file diff --git a/setup.py b/setup.py index 5ff21ec..61e417d 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,7 @@ +import os from setuptools import setup, find_packages import plio +from plio.examples import available #Grab the README.md for the long description with open('README.rst', 'r') as f: long_description = f.read() @@ -8,10 +10,17 @@ with open('README.rst', 'r') as f: VERSION = plio.__version__ def setup_package(): - - #import plio - #print(plio.examples.available()) - + examples = set() + for i in available(): + if not os.path.isdir('plio/examples/' + i): + if '.' in i: + glob_name = 'examples/*.' + i.split('.')[-1] + else: + glob_name = 'examples/' + i + else: + glob_name = 'examples/' + i + '/*' + examples.add(glob_name) + setup( name = "plio", version = VERSION, @@ -24,6 +33,8 @@ def setup_package(): url = "http://packages.python.org/plio", packages=find_packages(), include_package_data=True, + package_data={'plio' : list(examples) + ['data/*.db', 'data/*.py'] +\ + ['sqlalchemy_json/*.py', 'sqlalchemy_json/LICENSE']}, zip_safe=False, install_requires=[ 'gdal>=2', @@ -46,4 +57,4 @@ def setup_package(): ) if __name__ == '__main__': - setup_package() \ No newline at end of file + setup_package() -- GitLab