From d10dd5c65c1b2474017cc3273696266699fa8134 Mon Sep 17 00:00:00 2001 From: jlaura <jlaura@asu.edu> Date: Wed, 22 Jun 2016 05:04:08 -0700 Subject: [PATCH] Adds a KRC bin52 reader --- plio/__init__.py | 3 +- plio/io_krc.py | 448 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 450 insertions(+), 1 deletion(-) create mode 100644 plio/io_krc.py diff --git a/plio/__init__.py b/plio/__init__.py index 3431520..7deb7d9 100755 --- a/plio/__init__.py +++ b/plio/__init__.py @@ -5,9 +5,10 @@ from . import isis_serial_number from . import io_controlnetwork from . import io_gdal from . import io_json +from . import io_krc from . import io_yaml from . import io_db from . import io_hdf from . import utils from . import examples -from . import data \ No newline at end of file +from . import data diff --git a/plio/io_krc.py b/plio/io_krc.py new file mode 100644 index 0000000..7d3e518 --- /dev/null +++ b/plio/io_krc.py @@ -0,0 +1,448 @@ +import logging +import re +import warnings + +import numpy as np +import pandas as pd + +try: + import krc +except: + import os + import sys + sys.path.append(os.path.basename('..')) + import krc + +idl2np_dtype = {1: np.byte, 2: np.int16, 3: np.int32, 4: np.float32, + 5: np.float64} +idl2struct = {4: 'f', 5:'d'} +archtype2struct={'sparc': None, 'bigend': '>', 'litend': '<', + 'alpha': None, 'ppc': None, 'x86': None, 'x86_64': None} + + +class ReadBin52(object): + """ + Class to read a bin 52 and organize the output + + Parameters + ---------- + + filename (str) The file to read + + Attributes + ---------- + + filename : str + The file that was parsed + + header : str + The header from the bin52 file + + bin52data : ndarray + RAW bin52 binary read into an n-dim array + + ncases : int + The number of available cases (1 based) + + nlat : int + The number of latitudes + + nhours : int + The number of hours + + nseasons : int + The number of seasons + + ntau : int + The number of opacity (tau) steps + + nalbedos : int + The number of albedo steps + + nslope : int + The number of slope steps + + transferedlayers : int + The number of transfered layers in the model + + tlabels : list + Labels for the hourly tabel + + latelv : ndarray + Array of model latitudes and elevations + + pd_latelv : dataframe + Pandas dataframe reshape of latelv + + ttt : ndarray + Hourly parameters as a vector + + pd_ttt : dataframe + Pandas dataframe of the reshape ttt + + seasitems : ndarray + Array of seasonal parameters (DJU5, Ls, PZREF, TAUD, SUMF) + + pd_seasitems : dataframe + Pandas dataframe reshape of seasitems + + ddd : ndarray + Model temperatures by layer + + pd_ddd : dataframe + Pandas dataframe reshape of ddd + + pd_itemg : dataframe + Pandas dataframe of ggg + """ + + def __init__(self, filename, headerlen=512): + + # Get or setup the logging object + self.logger = logging.getLogger(__name__) + + self.filename = filename + self.readbin5(headerlen) + + assert(self.ncases == self.bin52data.shape[0]) + + def definekrc(self, what='KRC', endianness='<'): + """ + Defines a custom binary data structure for the KRC files. + """ + if what == 'KRC': + numfd = 96 # Size of floats of T-dependent materials + numid = 40 # size of " " integers + numld = 20 # size of " " logicals + maxn1 = 30 # dimension of layers + maxn2 = 384 * 4 # dimensions of times of day + maxn3 = 16 # dimensions of iterations days + maxn4 = self.nlats * 2 - 1 # dimensions of latitudes + maxn5 = 161 # dimensions of seasons + maxn6 = 6 # dimensions of saved years + maxnh = self.nhours # dimensions of saved times of day + maxbot = 6 # dimensions of time divisions + numtit = 20 # number of 4-byte words in TITLE + numday = 5 # number of 4-byte words in DAY + e = endianness + self.logger.debug(self.structdtype) + + #Define the structure of the KRC file + if self.structdtype == '<f': + self._krcstructure = np.dtype([('fd','{}{}f'.format(e, numfd)), + ('id','{}{}i'.format(e, numid)), + ('ld','{}{}i'.format(e, numld)), + ('title','{}{}a'.format(e, 4 * numtit)), + ('daytime','{}{}a'.format(e, 4 * numday)), + ('alat','{}{}f4'.format(e, maxn4)), + ('elev','{}{}f4'.format(e,maxn4))]) + elif self.structdtype == '<d': + self._krcstructure = np.dtype([('fd','{}{}d'.format(e, numfd)), + ('alat','{}{}d'.format(e, maxn4)), + ('elev','{}{}d'.format(e,maxn4) ), + ('id','{}{}i'.format(e, numid)), + ('ld','{}{}i'.format(e, numld)), + ('title','{}{}a'.format(e, 4 * numtit) ), + ('daytime','{}{}a'.format(e, 4 * numday))]) + + def readbin5(self, headerlen): + """ + Reads the type 52 file containing KRC output. + + Tested with KRC version 2.2.2. Note that the output format + can change + + Parameters + ---------- + filename (str) Full PATH to the file + """ + def _parse_header(): + header = re.findall(b'\d+', fullheader.split(b'<<')[0]) + header = list(map(int, header)) + self.ndim = header[0] + self.nhours = header[1] + self.nvariables = header[2] + self.nlats = header[3] + self.nseasons = header[4] - 1 + self.headerlength = header[8] + self.ncases = header[0 + self.ndim] + self.header = header + + # Compute how large each case is + self.casesize = self.nhours + if self.ndim > 1: + for k in range(1, self.ndim - 1): + self.casesize *= header[k + 1] + + def _parse_front(): + # Read the front matter + front = np.fromfile(bin5, dtype=self.structdtype, count=4).astype(np.int) + self.words_per_krc = front[0] + self.ndx = front[2] + + def _read_data(): + bin5.seek(self.headerlength) + self.bin52data = np.fromfile(bin5, + dtype=self.structdtype) + self.logger.debug(len(self.bin52data)) + + indices = arraysize[1: -1] + self.bin52data = self.bin52data.reshape(indices[: : -1]) + + def _get_version(): + ver = re.findall(b'\d+', head) + ver = list(map(int, ver)) + self.version = ver[: 3] + + #Add smarter filename handling + with open(self.filename, 'rb') as bin5: + + #To handle endianness and architectures + archbytes = 8 + c_end = 5 + + fullheader = bin5.read(headerlen) + _parse_header() + + arraysize = self.header[0: self.ndim + 2] + arraydtypecode = arraysize[arraysize[0] + 1] + try: + arraydtype = idl2np_dtype[arraydtypecode] + self.logger.debug("Dtype: ", arraydtype) + except KeyError: + self.logger.error("Unable to determine input datatype.") + + assert(len(self.header) == self.ndim + 4) + + if self.headerlength > 512: + warnings.Warn('Expected header to be 512 bytes, is {} bytes'.format(self.headerlength)) + return + + #Get the endianness of the input file and the data type (32 or 64 bit) + archstart = self.headerlength - (archbytes + c_end) + archend = self.headerlength - c_end + encodingarch = fullheader[archstart: archend].rstrip() + encodingarch = encodingarch.decode() + + self._endianness = archtype2struct[encodingarch] + self.structdtype = self._endianness + idl2struct[arraydtypecode] + + #Get the date and head debug + idx2 = fullheader.find(b'>>') + idx1 = idx2 - 21 + self.date = fullheader[idx1: idx1 + 20] + head = fullheader[idx2 + 2: self.headerlength - (archbytes + c_end) - 3 - idx2] + head = head.rstrip() + + # Parse the header + _get_version() + _parse_front() + _read_data() + + def readcase(self, case=1): + """ + Read a single dimension (case) from a bin52 file. + + Parameters + ----------- + case (int) The case to be extracted + + Returns + ------- + casearr (ndarray) The extracted KRC model for + the case. + """ + + if self.structdtype == '<f': + j = 512 + (1 - 1) * 4 * self.casesize + 16 + elif self.structdtype == '<d': + j = 512 + (1 - 1) * 4 * self.casesize + 32 + with open(self.filename, 'r') as krc: + # Skip to the correct case + krc.seek(j) + self.definekrc('KRC') + structarr = np.fromfile(krc, dtype=self._krcstructure, count=1) + if self.structdtype == '<f': + self.structarr = {'fd': structarr[0][0], + 'id': structarr[0][1], + 'ld': structarr[0][2], + 'title': structarr[0][3], + 'date': structarr[0][4], + 'alat': structarr[0][5], + 'elevation': structarr[0][6] + } + elif self.structdtype == '<d': + self.structarr = {'fd': structarr[0][0], + 'alat': structarr[0][1], + 'elevation': structarr[0][2], + 'id': structarr[0][3], + 'ld': structarr[0][4], + 'title': structarr[0][5], + 'date':structarr[0][6]} + + self.nlayers = nlayers = self.structarr['id'][0] + itemt = ['Final Hourly Surface Temp.', + 'Final Hourly Planetary Temp.', + 'Final Hourly Atmospheric Temp.', + 'Hourly net downward solar flux [W/m^2]', + 'Hourly net downward thermal flux [W/m^2]'] + + itemu = ['Lat', 'Elev'] + itemv = ['Current Julian date (offset from J2000.0)', + 'Seasonal longitude of Sun (in degrees)', + 'Current surface pressure at 0 elevation (in Pascals)', + 'Mean visible opacity of dust, solar wavelengths', + 'Global average columnar mass of frost [kg /m^2]'] + itemd = ['Tmin', 'Tmax'] + itemg = ['# Days to Compute Soln.', + 'RMS Temp. Change on Last Day', + 'Predicted Final Atmospheric Temp.', + 'Predicted frost amount, [kg/m^2]', + 'Frost albedo (at the last time step)', + 'Mean upward heat flow into soil surface on last day, [W/m^2]'] + + self.glabels = itemg + self.tlabels = itemt + self.ulabels = itemu + self.vlabels = itemv + self.dlabels = itemd + + nlat = len(self.structarr['alat']) + transferedlayers = nlayers - 1 + if self.nhours - 3 < transferedlayers: + transferedlayers = self.nhours - 3 + wordsperlat = self.nhours * self.nvariables + wordsperseason = wordsperlat * self.nlats + + if self.nvariables != len(itemt) + len(itemd): + self.logger.error("Error! Size mismatch") + + # Each case has a header that must be extracted: + header = self.bin52data[:,0:self.ndx,: ,: ,: ].reshape(self.ncases, + wordsperseason * self.ndx) + #krccomm + k = 4 + self.words_per_krc + 5 * self.nseasons + krcc = header[:, 4: 4 + self.words_per_krc] + + # Build a dataframe of the seasonal information + seasitems = header[:, 4 + self.words_per_krc: k ].reshape(self.ncases, + len(itemv), + self.nseasons) + # Good to HERE. + caseidx = np.repeat(np.arange(self.ncases), self.nseasons) + seasonidx = np.repeat(np.arange(self.nseasons), self.ncases) + flt_seasitems = seasitems.reshape(len(self.vlabels), + self.ncases * self.nseasons) + self.seasitems = pd.DataFrame(flt_seasitems, + columns=[caseidx,seasonidx], + index=[self.vlabels]) + self.seasitems.columns.names = ['Case', 'Season'] + + # TODO: Can nlat_include_null be replaced with self.nlats? + nlat_include_null = len(self.structarr['alat']) + prelatwords = self.words_per_krc - 2 * nlat_include_null + #All latitudes + #Hugh made some change to the krcc format, but I cannot find documentation... + if self.structdtype == '<f': + alllats = krcc[:,prelatwords:].reshape(2, nlat_include_null, ncases) + elif self.structdtype == '<d': + alllats = krcc[:,96:170].reshape(2, nlat_include_null, self.ncases) + self.alllats = alllats + #Latitudes and elevations for each case + self.latelv = alllats[: ,0: nlat] + if self.latelv.shape[-1] == 1: + self.latelv = self.latelv[:,:,0] + self.pd_latelv = pd.DataFrame(self.latelv.T, columns=['Latitude', 'Elevation']) + wordspercase = wordsperseason * self.nseasons # arrshp[1] = nseasons + jword = [4,wordspercase, self.ncases] + + + self.ttt = self.bin52data[: ,self.ndx: ,: ,0: len(itemt),: ].reshape(self.ncases, + self.nseasons, + self.nlats, + len(itemt), + self.nhours) + self.hourly2dataframe() + + layeritems = self.bin52data[: , self.ndx: , : , 5: 7, : ] + self.latitems = layeritems[: ,: ,: ,: ,0: 3].reshape(self.ncases, + self.nseasons, + self.nlats, len(itemg)) + self.latitems2dataframe() + + self.ddd = layeritems[: ,: ,: ,: ,3: 3 + transferedlayers].reshape(self.ncases, + self.nseasons, + self.nlats, + len(itemd), + transferedlayers) + self.transferedlayers = transferedlayers + self.layer2dataframe() + + def latitems2dataframe(self): + """ + Converts Latitude items to a dataframe + """ + idxcount = self.nseasons * self.nlats * self.ncases + idxpercase = self.nseasons * self.nlats + caseidx = np.empty(idxcount) + for c in range(self.ncases): + start = c * idxpercase + caseidx[start:start+idxpercase] = np.repeat(c, idxpercase) + nseasvect = np.arange(self.nseasons) + seasonidx = np.repeat(np.arange(self.nseasons), self.nlats) + latidx = np.tile(self.latelv[0].ravel(), self.nseasons) + + self.pd_itemg = pd.DataFrame(self.latitems.reshape(self.nseasons * self.nlats, -1).T, + columns=[caseidx, seasonidx, latidx], index=self.glabels) + self.pd_itemg.columns.names = ['Case','Season', 'Latitude'] + + def layer2dataframe(self): + """ + Converts layeritems into + """ + self.logger.debug((self.nseasons, self.nlats, self.transferedlayers, self.ncases)) + idxcount = self.nseasons * self.nlats * self.transferedlayers * self.ncases + caseidx = np.empty(idxcount) + idxpercase = self.nseasons * self.nlats * self.transferedlayers + for c in range(self.ncases): + start = c * idxpercase + caseidx[start:start + idxpercase] = np.repeat(c, idxpercase) + + seasonidx = np.repeat(np.arange(self.nseasons), idxcount / self.nseasons / self.ncases) + nlatidx = np.repeat(self.latelv[0].ravel(), idxcount / self.transferedlayers / self.ncases) + tranlayeridx = np.tile(np.repeat(np.arange(self.transferedlayers), self.nlats), self.nseasons) + self.pd_ddd = pd.DataFrame(self.ddd.reshape(idxcount, -1).T, index=[self.dlabels], columns=[caseidx, seasonidx, nlatidx, tranlayeridx]) + self.pd_ddd.columns.names = ['Case', 'Season', 'Latitude', 'Layer'] + + def seas2dataframe(self): + """ + converts seasitems 'vvv' vector to a + labelled Pandas dataframe + """ + + + def hourly2dataframe(self): + """ + Converts the hourly 'ttt' vector to a + labelled Pandas dataframe. + """ + reshapettt = np.swapaxes(self.ttt.reshape(self.ncases * self.nseasons * self.nlats, len(self.tlabels), self.nhours),1,2) + shp = reshapettt.shape + reshapettt = reshapettt.reshape((shp[0] * shp[1], shp[2])).T + #Indices + caseidx = np.repeat(np.arange(self.ncases), self.nseasons * self.nlats * self.nhours) + seasonidx = np.tile(np.repeat(np.arange(self.nseasons), self.nlats * self.nhours), self.ncases) + latidx = np.tile(np.repeat(self.latelv[0].ravel(), self.nhours), self.nseasons) + houridx = np.tile(np.tile(np.tile(np.arange(self.nhours), self.nlats), self.nseasons), self.ncases) + + #DataFrame + self.pd_ttt = pd.DataFrame(reshapettt.T, + index=[caseidx, seasonidx, latidx, houridx], + columns=self.tlabels) + self.pd_ttt.index.names = ['Case', 'Season', 'Latitude', 'Hour'] + self.temperature_data = self.pd_ttt + #self.pd_ttt = self.pd_ttt.T + +class ReadTds(object): + def __init__(self): + pass \ No newline at end of file -- GitLab