From 7353260bb0ee766535ca1fb692f8b9d5cca32b7b Mon Sep 17 00:00:00 2001 From: jlaura <jlaura@usgs.gov> Date: Wed, 18 Apr 2018 19:48:19 -0700 Subject: [PATCH] Moves PySAT data structures into plio (#31) * Makes gdal an optional dependency * Refactors all bae I/O into a single namespace. * typos * Fixes the gdal dependency test * Adds tests for the JSON * Tests fail without test data * Updates for comments from @thareUSGS * Adds ISIS3 V2 ControlNetwork Reader * Adds hcube mixin object --- plio/io/hcube.py | 75 ++++++++++++++++++++++++++++ plio/io/io_crism.py | 32 ++++++++++++ plio/io/io_moon_minerology_mapper.py | 41 ++++++++------- plio/utils/indexing.py | 73 +++++++++++++++++++++++++++ plio/utils/utils.py | 12 +++++ 5 files changed, 216 insertions(+), 17 deletions(-) create mode 100644 plio/io/hcube.py create mode 100644 plio/io/io_crism.py create mode 100644 plio/utils/indexing.py diff --git a/plio/io/hcube.py b/plio/io/hcube.py new file mode 100644 index 0000000..9b41a71 --- /dev/null +++ b/plio/io/hcube.py @@ -0,0 +1,75 @@ +import numpy as np +import gdal + +class HCube(object): + """ + A Mixin class for use with the io_gdal.GeoDataset class + to optionally add support for spectral labels, label + based indexing, and lazy loading for reads. + """ + + @property + def wavelengths(self): + if not hasattr(self, '_wavelengths'): + try: + info = gdal.Info(self.file_name, format='json') + wavelengths = [float(j) for i, j in sorted(info['metadata'][''].items(), + key=lambda x: float(x[0].split('_')[-1]))] + self._original_wavelengths = wavelengths + self._wavelengths = np.round(wavelengths, self.tolerance) + except: + self._wavelengths = [] + return self._wavelengths + + @property + def tolerance(self): + return getattr(self, '_tolerance', 2) + + @tolerance.setter + def tolerance(self, val): + wv = self.wavelengths + if isinstance(val, int): + self._tolerance = val + self._reindex() + else: + raise TypeError + + def _reindex(self): + if self._original_wavelengths is not None: + self._wavelengths = np.round(self._original_wavelengths, decimals=self.tolerance) + + def __getitem__(self, key): + return _iLocIndexer(self) + + @property + def loc(self): + return _LocIndexer(self) + + @property + def iloc(self): + return _iLocIndexer(self) + + def _read(self, key): + ifnone = lambda a, b: b if a is None else a + + y = key[1] + x = key[2] + + if isinstance(x, slice): + xstart = ifnone(x.start,0) + xstop = ifnone(x.stop,self.raster_size[0]) + xstep = xstop - xstart + else: + raise TypeError("Loc style access elements must be slices, e.g., [:] or [10:100]") + if isinstance(y, slice): + ystart = ifnone(y.start, 0) + ystop = ifnone(y.stop, self.raster_size[1]) + ystep = ystop - ystart + else: + raise TypeError("Loc style access elements must be slices, e.g., [:] or [10:100]") + + pixels = (xstart, ystart, xstop, ystop) + arrs = [] + for b in key[0]: + arrs.append(self.read_array(band=int(b+1), pixels=pixels)) + return np.stack(arrs) \ No newline at end of file diff --git a/plio/io/io_crism.py b/plio/io/io_crism.py new file mode 100644 index 0000000..415ec47 --- /dev/null +++ b/plio/io/io_crism.py @@ -0,0 +1,32 @@ +import os +import numpy as np +from .io_gdal import GeoDataset +from .hcube import HCube +import gdal + + +class Crism(GeoDataset, HCube): + """ + An M3 specific reader with the spectral mixin. + """ + @property + def wavelengths(self): + if not hasattr(self, '_wavelengths'): + try: + info = gdal.Info(self.file_name, format='json') + wv = dict((k,v) for (k,v) in info['metadata'][''].items() if 'Band' in k) # Only get the 'Band_###' keys + wavelengths = [float(j.split(" ")[0]) for i, j in sorted(wv.items(), + key=lambda x: int(x[0].split('_')[-1]))] + self._original_wavelengths = wavelengths + self._wavelengths = np.round(wavelengths, self.tolerance) + except: + self._wavelengths = [] + return self._wavelengths + +def open(input_data): + if os.path.splitext(input_data)[-1] == 'hdr': + # GDAL wants the img, but many users aim at the .hdr + input_data = os.path.splitext(input_data)[:-1] + '.img' + ds = Crism(input_data) + + return ds \ No newline at end of file diff --git a/plio/io/io_moon_minerology_mapper.py b/plio/io/io_moon_minerology_mapper.py index 6a8ad1e..3093c2c 100644 --- a/plio/io/io_moon_minerology_mapper.py +++ b/plio/io/io_moon_minerology_mapper.py @@ -1,24 +1,31 @@ +import os import numpy as np from .io_gdal import GeoDataset +from .hcube import HCube +import gdal +class M3(GeoDataset, HCube): + """ + An M3 specific reader with the spectral mixin. + """ + @property + def wavelengths(self): + if not hasattr(self, '_wavelengths'): + try: + info = gdal.Info(self.file_name, format='json') + wavelengths = [float(j) for i, j in sorted(info['metadata'][''].items(), + key=lambda x: float(x[0].split('_')[-1]))] + self._original_wavelengths = wavelengths + self._wavelengths = np.round(wavelengths, self.tolerance) + except: + self._wavelengths = [] + return self._wavelengths + def open(input_data): - if input_data.split('.')[-1] == 'hdr': + if os.path.splitext(input_data)[-1] == 'hdr': # GDAL wants the img, but many users aim at the .hdr - input_data = input_data.split('.')[0] + '.img' - ds = GeoDataSet(input_data) - ref_array = ds.read_array() - metadata = ds.metadata - wv_array = metadatatoband(metadata) - return wv_array, ref_array, ds + input_data = os.path.splitext(input_data)[:-1] + '.img' + ds = M3(input_data) -def metadatatoband(metadata): - wv2band = [] - for k, v in metadata.items(): - try: - wv2band.append(float(v)) - except: - v = v.split(" ")[-1].split("(")[1].split(")")[0] - wv2band.append(float(v)) - wv2band.sort(key=int) - return np.asarray(wv2band) + return ds diff --git a/plio/utils/indexing.py b/plio/utils/indexing.py new file mode 100644 index 0000000..50fa17a --- /dev/null +++ b/plio/utils/indexing.py @@ -0,0 +1,73 @@ +import warnings +import numpy as np + +def is_dict_like(value): + return hasattr(value, 'keys') and hasattr(value, '__getitem__') + +def expanded_indexer(key, ndim): + """Given a key for indexing an ndarray, return an equivalent key which is a + tuple with length equal to the number of dimensions. + The expansion is done by replacing all `Ellipsis` items with the right + number of full slices and then padding the key with full slices so that it + reaches the appropriate dimensionality. + """ + if not isinstance(key, tuple): + # numpy treats non-tuple keys equivalent to tuples of length 1 + key = (key,) + new_key = [] + # handling Ellipsis right is a little tricky, see: + # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing + found_ellipsis = False + for k in key: + if k is Ellipsis: + if not found_ellipsis: + new_key.extend((ndim + 1 - len(key)) * [slice(None)]) + found_ellipsis = True + else: + new_key.append(slice(None)) + else: + new_key.append(k) + if len(new_key) > ndim: + raise IndexError('too many indices') + new_key.extend((ndim - len(new_key)) * [slice(None)]) + return tuple(new_key) + +class _LocIndexer(object): + def __init__(self, data_array): + self.data_array = data_array + + def __getitem__(self, key): + # expand the indexer so we can handle Ellipsis + key = expanded_indexer(key, 3) + sl = key[0] + ifnone = lambda a, b: b if a is None else a + if isinstance(sl, slice): + sl = list(range(ifnone(sl.start, 0), self.data_array.nbands, ifnone(sl.step, 1))) + + idx = [self._get_idx(s) for s in sl] + key = (idx, key[1], key[2]) + return self.data_array._read(key) + + def _get_idx(self, value, tolerance=2): + vals = np.abs(self.data_array.wavelengths-value) + minidx = np.argmin(vals) + if vals[minidx] >= tolerance: + warning.warn("Absolute difference between requested value and found values is {}".format(vals[minidx])) + return minidx + +class _iLocIndexer(object): + def __init__(self, data_array): + self.data_array = data_array + + def __getitem__(self, key): + # expand the indexer so we can handle Ellipsis + key = expanded_indexer(key, 3) + sl = key[0] + ifnone = lambda a, b: b if a is None else a + if isinstance(sl, slice): + sl = list(range(ifnone(sl.start, 0), + ifnone(sl.stop, self.data_array.nbands), + ifnone(sl.step, 1))) + + key = (key[0], key[1], key[2]) + return self.data_array._read(key) \ No newline at end of file diff --git a/plio/utils/utils.py b/plio/utils/utils.py index 1cec0df..2215f75 100644 --- a/plio/utils/utils.py +++ b/plio/utils/utils.py @@ -7,6 +7,18 @@ import shutil import tempfile import pandas as pd +import numpy as np + +def metadatatoband(metadata): + wv2band = [] + for k, v in metadata.items(): + try: + wv2band.append(float(v)) + except: + v = v.split(" ")[-1].split("(")[1].split(")")[0] + wv2band.append(float(v)) + wv2band.sort(key=int) + return np.asarray(wv2band) def create_dir(basedir=''): """ -- GitLab