Skip to content
Snippets Groups Projects
Commit e97b9581 authored by Adam Paquette's avatar Adam Paquette
Browse files

Added new plio io modules from PySAT.

parent a6b9d17b
No related branches found
No related tags found
No related merge requests found
File added
......@@ -3,7 +3,7 @@ import plio
__all__ = ['available', 'get_path']
#Used largely unmodififed from:
# Used largely unmodififed from:
# https://github.com/pysal/pysal/blob/master/pysal/examples/__init__.py
base = os.path.split(plio.__file__)[0]
......
import os
import numpy as np
import pandas as pd
def EDR(input_file):
f = open(input_file, 'rb') # read as bytes so python won't complain about the binary part of the file
# read lines of the header until reaching the end of the libs table (collecting other metadata along the way)
end_of_libs_table = False
while end_of_libs_table is False:
line = str(f.readline(), 'utf-8').replace('\r', '').replace('\n',
'') # convert the current line to a string and get rid of newline characters
line = line.split('=') # split the line on equals sign if present
# look for the name of the value we want, if the current line has it, then set the value
if 'RECORD_BYTES' in line[0]:
rbytes = int(line[1])
if 'LABEL_RECORDS' in line[0]:
lrecs = int(line[1])
if 'SPACECRAFT_CLOCK_START_COUNT' in line[0]:
sclock = int(line[1].replace('"', '').split('.')[0])
if 'SEQUENCE_ID' in line[0]:
seqID = line[1].replace('"', '')
if 'INSTRUMENT_FOCUS_DISTANCE' in line[0]:
focus_dist = int(line[1])
if 'INSTRUMENT_TEMPERATURE' in line[0]:
instrument_temps = line[1] \
+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \
+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \
+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '')
instrument_temps = [float(i) for i in
instrument_temps.replace('<degC>', '').replace('(', '').replace(')', '').replace(' ',
'').split(
',')]
instrument_temps_name = str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '')
instrument_temps_name = instrument_temps_name.split('=')[1] \
+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \
+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \
+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \
+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '')
instrument_temps_name = instrument_temps_name.replace(' ', '').replace('(', '').replace(')', '').replace(
'"', '').split(',')
f.readline()
pass
try:
if 'CCAM_LIBS_DATA_CONTAINER' in line[1]:
nshots = int(str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '').split('=')[1])
start_byte = int(str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '').split('=')[1])
if 'END_OBJECT' in line[0] and 'CCAM_LIBS_TABLE' in line[1]:
end_of_libs_table = True
except:
pass
f.close()
header_skip = lrecs * rbytes # calculate the number of header bytes to skip to get to the real data
with open(input_file, "rb") as f:
f.seek(header_skip + start_byte - 1, 0)
spectra = []
while spectra.__len__() < nshots:
spectrum = []
while spectrum.__len__() < 6444:
spectrum.append(int.from_bytes(f.read(2), byteorder='big', signed=False))
spectra.append(spectrum)
spectra = np.array(spectra, dtype='int')
cols = np.array(list(range(spectra.shape[1]))) + 1
cols = [('channel', i) for i in cols]
inds = np.array(list(range(spectra.shape[0]))) + 1
sp = pd.DataFrame(spectra, columns=pd.MultiIndex.from_tuples(cols), index=inds)
sp[('meta', 'EDR_file')] = os.path.basename(input_file)
sp[('meta', 'Spacecraft_Clock')] = sclock
sp[('meta', 'Shot')] = sp.index
sp[('meta', 'SeqID')] = seqID
sp[('meta', 'Focus_Distance')] = focus_dist
for ind, name in enumerate(instrument_temps_name):
sp[('meta', name + '_temp')] = instrument_temps[ind]
sp.to_csv('test.csv')
return sp
import numpy as np
from osgeo import gdal
def openm3(input_data):
if input_data.split('.')[-1] == 'hdr':
# GDAL wants the img, but many users aim at the .hdr
input_data = input_data.split('.')[0] + '.img'
ds = gdal.Open(input_data)
ref_array = ds.GetRasterBand(1).ReadAsArray()
metadata = ds.GetMetadata()
wv_array = metadatatoband(metadata)
return wv_array, ref_array, ds
def metadatatoband(metadata):
wv2band = []
for k, v in metadata.iteritems():
try:
wv2band.append(float(value))
except:
v = v.split(" ")[-1].split("(")[1].split(")")[0]
wv2band.append(float(v))
wv2band.sort(key=int)
return np.asarray(wv2band)
import numpy as np
from osgeo import gdal
def openmi(input_data):
ds = gdal.Open(input_data)
band_pointers = []
nbands = ds.RasterCount
for b in xrange(1, nbands + 1):
band_pointers.append(ds.GetRasterBand(b))
ref_array = ds.GetRasterBand(1).ReadAsArray()
wv_array = None
return wv_array, ref_array[::3, ::3], ds
def getspectra(x, y, ds):
nbands = ds.RasterCount
reflectance = np.empty(nbands)
for b in range(1, nbands + 1):
reflectance[b - 1] = ds.GetRasterBand(b).ReadAsArray(y, x, 1, 1)
mergedref = np.empty(nbands - 1)
mergedref[:4] = reflectance[:4]
mergedref[4] = (reflectance[4] + reflectance[5]) / 2
mergedref[5:] = reflectance[6:]
return mergedref
try:
import yaml
except:
print('YAML package not installed, disabling yaml_io module')
import yaml
def read_yaml(inputfile):
......@@ -21,6 +18,6 @@ def read_yaml(inputfile):
try:
with open(inputfile, 'r') as f:
ydict = yaml.load(f)
except: # pragma: no cover
except: # pragma: no cover
raise IOError('Unable to load YAML file.')
return ydict
import unittest
sys.path.insert(0, os.path.abspath('..'))
from plio.examples import get_path
from plio.io import io_edr
class Test_Tes_IO(unittest.TestCase):
# Need different test data or need to modify the current code
def setUp(self):
self.examplefile = get_path('cl5_398736801edr_f0030004ccam01014m1.dat')
def test_open(self):
ds = io_edr.EDR(self.examplefile)
if __name__ == '__main__':
unittest.main()
import unittest
from .. import io_json
from .. import io_yaml
from plio.io import io_json
from plio.io import io_yaml
try:
import yaml
......
......@@ -5,6 +5,7 @@ import os
import fnmatch
import shutil
import tempfile
import pandas as pd
def create_dir(basedir=''):
......@@ -164,3 +165,31 @@ def xstr(s):
if s is None:
return ''
return str(s)
def lookup(df,lookupfile=None,lookupdf=None,sep=',',skiprows=1,left_on='sclock',right_on='Spacecraft Clock'):
#TODO: automatically determine the number of rows to skip to handle ccam internal master list and PDS "official" master list formats
if lookupfile is not None:
# this loop concatenates together multiple lookup files if provided
# (mostly to handle the three different master lists for chemcam)
for x in lookupfile:
try:
tmp = pd.read_csv(x, sep=sep, skiprows=skiprows, error_bad_lines=False)
lookupdf = pd.concat([lookupdf, tmp])
except:
lookupdf = pd.read_csv(x, sep=sep, skiprows=skiprows, error_bad_lines=False)
metadata = df['meta']
metadata = metadata.merge(lookupdf, left_on=left_on, right_on=right_on, how='left')
# remove metadata columns that already exist in the data frame to avoid non-unique columns
meta_cols = set(metadata.columns.values)
meta_cols_keep = list(meta_cols - set(df['meta'].columns.values))
metadata = metadata[meta_cols_keep]
# make metadata into a multiindex
metadata.columns = [['meta'] * len(metadata.columns), metadata.columns.values]
# give it the same indices as the df
metadata.index = df.index
# combine the df and the new metadata
df = pd.concat([metadata, df], axis=1)
return df
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment