diff --git a/.travis.yml b/.travis.yml index 6d32e5b97e30cb8e7fe3027e34e79295a8f57f93..55ccfa9d41cc9f728e700d949665db6ab1152bc5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ branches: os: - linux - osx - + env: - PYTHON_VERSION=3.5 - PYTHON_VERSION=3.6 @@ -30,27 +30,26 @@ before_install: # Create the env - conda create -q -n test python=$PYTHON_VERSION - source activate test - + # https://github.com/travis-ci/travis-ci/issues/8982 + - python -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)" + install: - conda config --add channels conda-forge - - conda config --add channels jlaura - - conda install -c conda-forge gdal h5py - - conda install pandas sqlalchemy pyyaml networkx affine protobuf - - pip install pvl + - conda install -q gdal h5py pandas sqlalchemy pyyaml networkx affine protobuf scipy pvl # Development installation - - conda install pytest pytest-cov sh anaconda-client - + - conda install -q pytest pytest-cov sh + script: - pytest --cov=plio after_success: - coveralls # Need to do the build in the root - - source deactivate - - conda install conda-build anaconda-client + - source deactivate + - conda install -q conda-build anaconda-client - conda config --set anaconda_upload yes - - conda build --token $CONDA_UPLOAD_TOKEN --python $PYTHON_VERSION recipe + - conda build --token $CONDA_UPLOAD_TOKEN --python $PYTHON_VERSION recipe -q notifications: webhooks: diff --git a/appveyor.yml b/appveyor.yml index 33c1b9993c7c0969e82a690c9b30c4037462256b..c3ad46ae265e837379b7870c58c43f0c54dbec56 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,73 +1,43 @@ -environment: - - CONDA_INSTALL_LOCN: "C:\\conda" +branches: + only: + - master - # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the - # /E:ON and /V:ON options are not enabled in the batch script intepreter - # See: http://stackoverflow.com/a/13751649/163740 - CMD_IN_ENV: "cmd /E:ON /V:ON /C obvci_appveyor_python_build_env.cmd" - - # We set a default Python version for the miniconda that is to be installed. This can be - # overridden in the matrix definition where appropriate. - CONDA_PY: "27" +version: '0.1.0.{build}' +environment: matrix: + - PYTHON: "C:\\Miniconda35-x64\\Scripts\\activate.bat" + PYTHON_VERSION: 3.5 + - PYTHON: "C:\\Miniconda36-x64\\Scripts\\activate.bat" + PYTHON_VERSION: 3.6 - - TARGET_ARCH: x64 - CONDA_PY: 35 - -# We always use a 64-bit machine, but can build x86 distributions -# with the TARGET_ARCH variable. platform: - - x64 - -install: - # If there is a newer build queued for the same PR, cancel this one. - # The AppVeyor 'rollout builds' option is supposed to serve the same - # purpose but it is problematic because it tends to cancel builds pushed - # directly to master instead of just PR builds (or the converse). - # credits: JuliaLang developers. - - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod ` - https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | ` - Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { ` - throw "There are newer queued builds for this pull request, failing early." } + - x64 - # Cywing's git breaks conda-build. (See https://github.com/conda-forge/conda-smithy-feedstock/pull/2.) - - cmd: rmdir C:\cygwin /s /q - - appveyor DownloadFile "https://raw.githubusercontent.com/pelson/Obvious-CI/master/bootstrap-obvious-ci-and-miniconda.py" - - cmd: python bootstrap-obvious-ci-and-miniconda.py %CONDA_INSTALL_LOCN% %TARGET_ARCH% %CONDA_PY:~0,1% --without-obvci - - cmd: set PATH=%CONDA_INSTALL_LOCN%;%CONDA_INSTALL_LOCN%\scripts;%PATH% - - cmd: set PYTHONUNBUFFERED=1 - - - cmd: conda config --set show_channel_urls true - - cmd: conda install --yes python=3.5 - - cmd: conda install -c pelson/channel/development --yes --quiet obvious-ci - - cmd: conda config --add channels conda-forge - - cmd: conda info - - cmd: conda install -n root --quiet --yes conda-build anaconda-client jinja2 setuptools - # Workaround for Python 3.4 and x64 bug in latest conda-build. - # FIXME: Remove once there is a release that fixes the upstream issue - # ( https://github.com/conda/conda-build/issues/895 ). - - cmd: if "%TARGET_ARCH%" == "x64" if "%CONDA_PY%" == "34" conda install conda-build=1.20.0 --yes - - # Now install the pacakge dependencies - - cmd: conda config --add channels conda-forge - - cmd: conda config --add channels jlaura - - cmd: conda install --yes -c conda-forge gdal h5py - - cmd: conda install --yes pandas sqlalchemy pyyaml networkx affine - - cmd: conda install --yes -c jlaura protobuf pvl - - # Development installation - - cmd: conda install --yes pytest pytest-cov - - cmd: pip install coveralls - -# Skip .NET project specific build phase. -build: off +configuration: + - Release +install: + - cmd: call %PYTHON% + - cmd: conda config --set always_yes yes --set changeps1 no + - cmd: conda update -q conda + - cmd: conda install conda-build anaconda-client + - cmd: conda create -q -n test_env python=%PYTHON_VERSION% + - cmd: activate test_env + - cmd: conda config --add channels conda-forge + - cmd: conda install -c conda-forge pvl protobuf gdal numpy pandas sqlalchemy pyyaml networkx affine h5py scipy + - cmd: conda install pytest-cov + # https://pythonhosted.org/CodeChat/appveyor.yml.html + - cmd: python -m pip install -U pip + - cmd: python -m easy_install -U setuptools + +build_script: + - cmd: python setup.py install + test_script: - - cmd: pytest --cov=plio --ignore=plio/examples - - "%CMD_IN_ENV% conda build conda --quiet" - -deploy_script: + - cmd: pytest plio/ - - 'python ci_support\upload_or_check_non_existence.py .\conda jlaura --channel=main' +on_success: + - cmd: deactivate + - cmd: conda config --set anaconda_upload yes + - cmd: conda build --token %CONDA_UPLOAD_TOKEN% . diff --git a/plio/examples/__init__.py b/plio/examples/__init__.py index 405e9c68f4fc296fa3748d681b97f9bbd1ce6eea..68f159dfefaa1a775abba91ce5c61128ca9b448b 100644 --- a/plio/examples/__init__.py +++ b/plio/examples/__init__.py @@ -3,7 +3,7 @@ import plio __all__ = ['available', 'get_path'] -#Used largely unmodififed from: +# Used largely unmodififed from: # https://github.com/pysal/pysal/blob/master/pysal/examples/__init__.py base = os.path.split(plio.__file__)[0] diff --git a/plio/io/io_ccam_pds.py b/plio/io/io_ccam_pds.py new file mode 100644 index 0000000000000000000000000000000000000000..2a9a7679058af74d79a53518942aef03ae519e9c --- /dev/null +++ b/plio/io/io_ccam_pds.py @@ -0,0 +1,201 @@ +# This code is used to read individual ChemCam files +# Header data is stored as attributes of the data frame +# White space is stripped from the column names +import os + +import numpy as np +import pandas as pd +import scipy.io as io + +from plio.utils.utils import lookup +from plio.utils.utils import file_search + + +def CCAM_CSV(input_data, ave=True): + try: + df = pd.read_csv(input_data, header=14, engine='c') + cols = list(df.columns.values) + df.columns = [i.strip().replace('# ', '') for i in cols] # strip whitespace from column names + df.set_index(['wave'], inplace=True) # use wavelengths as indices + # read the file header and put information into the dataframe as new columns + metadata = pd.read_csv(input_data, sep='=', nrows=14, comment=',', engine='c', index_col=0, header=None) + except: + try: # handle files with an extra header row containing temperature + df = pd.read_csv(input_data, header=15, engine='c') + cols = list(df.columns.values) + df.columns = [i.strip().replace('# ', '') for i in cols] # strip whitespace from column names + df.set_index(['wave'], inplace=True) # use wavelengths as indices + # read the file header and put information into the dataframe as new columns + metadata = pd.read_csv(input_data, sep='=', nrows=15, comment=',', engine='c', index_col=0, header=None) + except: # handle files with an extra header row containing temperature and target name + df = pd.read_csv(input_data, header=16, engine='c') + cols = list(df.columns.values) + df.columns = [i.strip().replace('# ', '') for i in cols] # strip whitespace from column names + df.set_index(['wave'], inplace=True) # use wavelengths as indices + # read the file header and put information into the dataframe as new columns + metadata = pd.read_csv(input_data, sep='=', nrows=16, comment=',', engine='c', index_col=0, header=None) + + if ave: + df = pd.DataFrame(df['mean']) + else: + df = df.drop(['mean', 'median'], axis=1) + df.index = [['wvl'] * len(df.index), + df.index.values.round(4)] # create multiindex so spectra can be easily extracted with a single key + df = df.T # transpose so that each spectrum is a row + + # remove extraneous stuff from the metadataindices + metadata.index = [i.strip().strip('# ').replace(' FLOAT', '').lower() for i in metadata.index.values] + metadata = metadata.T + + # extract info from the file name + fname = os.path.basename(input_data) + metadata['sclock'] = fname[4:13] + metadata['seqid'] = fname[25:34].upper() + metadata['Pversion'] = fname[34:36] + + # duplicate the metadata for each row in the df + if not ave: + metadata = metadata.append([metadata] * (len(df.index) - 1), ignore_index=True) + metadata.index = df.index # make the indices match + metadata.columns = [['meta'] * len(metadata.columns), metadata.columns.values] # make the columns into multiindex + df = pd.concat([metadata, df], axis=1) # combine the spectra with the metadata + return df + + +def CCAM_SAV(input_data, ave=True): + # read the IDL .SAV file + + data = io.readsav(input_data, python_dict=True) + + # put the spectra into data frames and combine them + df_UV = pd.DataFrame(data['uv'], index=data['defuv']) + df_VIS = pd.DataFrame(data['vis'], index=data['defvis']) + df_VNIR = pd.DataFrame(data['vnir'], index=data['defvnir']) + df_spect = pd.concat([df_UV, df_VIS, df_VNIR]) + df_spect.columns = ['shot' + str(i + 1) for i in + df_spect.columns] # add 1 to the columns so they correspond to shot number + + df_aUV = pd.DataFrame(data['auv'], index=data['defuv'], columns=['average']) + df_aVIS = pd.DataFrame(data['avis'], index=data['defvis'], columns=['average']) + df_aVNIR = pd.DataFrame(data['avnir'], index=data['defvnir'], columns=['average']) + df_ave = pd.concat([df_aUV, df_aVIS, df_aVNIR]) + + df_mUV = pd.DataFrame(data['muv'], index=data['defuv'], columns=['median']) + df_mVIS = pd.DataFrame(data['mvis'], index=data['defvis'], columns=['median']) + df_mVNIR = pd.DataFrame(data['mvnir'], index=data['defvnir'], columns=['median']) + df_med = pd.concat([df_mUV, df_mVIS, df_mVNIR]) + + df = pd.concat([df_spect, df_ave, df_med], axis=1) + # create multiindex to access wavelength values + # also, round the wavlength values to a more reasonable level of precision + df.index = [['wvl'] * len(df.index), df.index.values.round(4)] + # transpose so that spectra are rows rather than columns + df = df.T + + # extract metadata from the file name and add it to the data frame + # use the multiindex label "meta" for all metadata + + fname = os.path.basename(input_data) + + # for some reason, some ChemCam files have the 'darkname' key, others call it 'darkspect' + # this try-except pair converts to 'darkname' when needed + try: + data['darkname'] + except: + data['darkname'] = data['darkspec'] + + metadata = [fname, + fname[4:13], + fname[25:34].upper(), + fname[34:36], + data['continuumvismin'], + data['continuumvnirmin'], + data['continuumuvmin'], + data['continuumvnirend'], + data['distt'], + data['darkname'], + data['nshots'], + data['dnoiseiter'], + data['dnoisesig'], + data['matchedfilter']] + metadata = np.tile(metadata, (len(df.index), 1)) + metadata_cols = list(zip(['meta'] * len(df.index), ['file', + 'sclock', + 'seqid', + 'Pversion', + 'continuumvismin', + 'continuumvnirmin', + 'continuumuvmin', + 'continuumvnirend', + 'distt', + 'dark', + 'nshots', + 'dnoiseiter', + 'dnoisesig', + 'matchedfilter'])) + metadata = pd.DataFrame(metadata, columns=pd.MultiIndex.from_tuples(metadata_cols), index=df.index) + + df = pd.concat([metadata, df], axis=1) + if ave == True: + df = df.loc['average'] + df = df.to_frame().T + else: + pass + + return df + + +def ccam_batch(directory, searchstring='*.csv', to_csv=None, lookupfile=None, ave=True, progressbar=None): + # Determine if the file is a .csv or .SAV + if '.sav' in searchstring.lower(): + is_sav = True + else: + is_sav = False + filelist = file_search(directory, searchstring) + basenames = np.zeros_like(filelist) + sclocks = np.zeros_like(filelist) + P_version = np.zeros_like(filelist, dtype='int') + + # Extract the sclock and version for each file and ensure that only one + # file per sclock is being read, and that it is the one with the highest version number + for i, name in enumerate(filelist): + basenames[i] = os.path.basename(name) + sclocks[i] = basenames[i][4:13] # extract the sclock + P_version[i] = basenames[i][-5:-4] # extract the version + + sclocks_unique = np.unique(sclocks) # find unique sclocks + filelist_new = np.array([], dtype='str') + for i in sclocks_unique: + match = (sclocks == i) # find all instances with matching sclocks + maxP = P_version[match] == max(P_version[match]) # find the highest version among these files + filelist_new = np.append(filelist_new, filelist[match][maxP]) # keep only the file with thei highest version + + filelist = filelist_new + # Should add a progress bar for importing large numbers of files + dt = [] + + for i, file in enumerate(filelist): + print(file) + if is_sav: + tmp = CCAM_SAV(file, ave=ave) + else: + tmp = CCAM_CSV(file, ave=ave) + if i == 0: + combined = tmp + else: + # This ensures that rounding errors are not causing mismatches in columns + cols1 = list(combined['wvl'].columns) + cols2 = list(tmp['wvl'].columns) + if set(cols1) == set(cols2): + combined = pd.concat([combined, tmp]) + else: + print("Wavelengths don't match!") + + combined.loc[:, ('meta', 'sclock')] = pd.to_numeric(combined.loc[:, ('meta', 'sclock')]) + + if lookupfile is not None: + + combined = lookup(combined, lookupfile=lookupfile.replace('[','').replace(']','').replace("'",'').replace(' ','').split(',')) + if to_csv is not None: + combined.to_csv(to_csv) + return combined diff --git a/plio/io/io_edr.py b/plio/io/io_edr.py new file mode 100644 index 0000000000000000000000000000000000000000..caa9d6d396b5917b04c22e22632f93c3a775112d --- /dev/null +++ b/plio/io/io_edr.py @@ -0,0 +1,80 @@ +import os + +import numpy as np +import pandas as pd + + +def EDR(input_file): + f = open(input_file, 'rb') # read as bytes so python won't complain about the binary part of the file + + # read lines of the header until reaching the end of the libs table (collecting other metadata along the way) + end_of_libs_table = False + while end_of_libs_table is False: + line = str(f.readline(), 'utf-8').replace('\r', '').replace('\n', + '') # convert the current line to a string and get rid of newline characters + line = line.split('=') # split the line on equals sign if present + # look for the name of the value we want, if the current line has it, then set the value + if 'RECORD_BYTES' in line[0]: + rbytes = int(line[1]) + if 'LABEL_RECORDS' in line[0]: + lrecs = int(line[1]) + if 'SPACECRAFT_CLOCK_START_COUNT' in line[0]: + sclock = int(line[1].replace('"', '').split('.')[0]) + if 'SEQUENCE_ID' in line[0]: + seqID = line[1].replace('"', '') + if 'INSTRUMENT_FOCUS_DISTANCE' in line[0]: + focus_dist = int(line[1]) + + if 'INSTRUMENT_TEMPERATURE' in line[0]: + instrument_temps = line[1] \ + + str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \ + + str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \ + + str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') + instrument_temps = [float(i) for i in + instrument_temps.replace('<degC>', '').replace('(', '').replace(')', '').replace(' ', + '').split( + ',')] + instrument_temps_name = str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') + instrument_temps_name = instrument_temps_name.split('=')[1] \ + + str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \ + + str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \ + + str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \ + + str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') + instrument_temps_name = instrument_temps_name.replace(' ', '').replace('(', '').replace(')', '').replace( + '"', '').split(',') + f.readline() + pass + try: + if 'CCAM_LIBS_DATA_CONTAINER' in line[1]: + nshots = int(str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '').split('=')[1]) + start_byte = int(str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '').split('=')[1]) + if 'END_OBJECT' in line[0] and 'CCAM_LIBS_TABLE' in line[1]: + end_of_libs_table = True + except: + pass + + f.close() + header_skip = lrecs * rbytes # calculate the number of header bytes to skip to get to the real data + + with open(input_file, "rb") as f: + f.seek(header_skip + start_byte - 1, 0) + spectra = [] + while spectra.__len__() < nshots: + spectrum = [] + while spectrum.__len__() < 6444: + spectrum.append(int.from_bytes(f.read(2), byteorder='big', signed=False)) + spectra.append(spectrum) + spectra = np.array(spectra, dtype='int') + cols = np.array(list(range(spectra.shape[1]))) + 1 + cols = [('channel', i) for i in cols] + inds = np.array(list(range(spectra.shape[0]))) + 1 + sp = pd.DataFrame(spectra, columns=pd.MultiIndex.from_tuples(cols), index=inds) + sp[('meta', 'EDR_file')] = os.path.basename(input_file) + sp[('meta', 'Spacecraft_Clock')] = sclock + sp[('meta', 'Shot')] = sp.index + sp[('meta', 'SeqID')] = seqID + sp[('meta', 'Focus_Distance')] = focus_dist + for ind, name in enumerate(instrument_temps_name): + sp[('meta', name + '_temp')] = instrument_temps[ind] + sp.to_csv('test.csv') + return sp diff --git a/plio/io/io_jsc.py b/plio/io/io_jsc.py new file mode 100644 index 0000000000000000000000000000000000000000..00bf7336f548d4579ac6fd2b638fdcc8ff47f2b9 --- /dev/null +++ b/plio/io/io_jsc.py @@ -0,0 +1,202 @@ +import os + +import numpy as np +import pandas as pd +from pandas.core.common import array_equivalent + +from plio.utils.utils import file_search + + +# This function reads the lookup tables used to expand metadata from the file names +# This is separated from parsing the filenames so that for large lists of files the +# lookup tables don't need to be read over and over +# +# Info in the tables is stored in a dict of dataframes so that only one variable +# (the dict) needs to be passed between functions +def read_refdata(LUT_files): + ID_info = pd.read_csv(LUT_files['ID'], index_col=0) + spectrometer_info = pd.read_csv(LUT_files['spect'], index_col=0) + # spectrometer_info.reset_index(inplace=True) + laser_info = pd.read_csv(LUT_files['laser'], index_col=0) + # laser_info.reset_index(inplace=True) + exp_info = pd.read_csv(LUT_files['exp'], index_col=0) + # exp_info.reset_index(inplace=True) + sample_info = pd.read_csv(LUT_files['sample'], index_col=0) + # sample_info.reset_index(inplace=True) + refdata = {'spect': spectrometer_info, 'laser': laser_info, 'exp': exp_info, 'sample': sample_info, 'ID': ID_info} + return refdata + + +# This function parses the file names to record metadata related to the observation +def jsc_filename_parse(filename, refdata): + filename = os.path.basename(filename) # strip the path off of the file name + filename = filename.split('_') # split the file name on underscores + libs_ID = filename[0] + laserID = filename[4][0] + expID = filename[5] + spectID = filename[6] + + try: + sampleID = refdata['ID'].loc[libs_ID].values[0] + file_info = pd.DataFrame(refdata['sample'].loc[sampleID]) + if file_info.columns.shape[0] < file_info.index.shape[0]: + file_info = file_info.T + if file_info.index.shape[0] > 1: + print('More than one matching row for ' + sampleID + '!') + tempID = 'Unknown' + file_info = pd.DataFrame(refdata['sample'].loc[tempID]) + if file_info.columns.shape[0] < file_info.index.shape[0]: + file_info = file_info.T + + + except: + sampleID = 'Unknown' + file_info = pd.DataFrame(refdata['sample'].loc[sampleID]) + if file_info.columns.shape[0] < file_info.index.shape[0]: + file_info = file_info.T + + file_info['Sample ID'] = sampleID + file_info['LIBS ID'] = libs_ID + file_info.reset_index(level=0, inplace=True, drop=True) + file_info['loc'] = int(filename[1]) + file_info['lab'] = filename[2] + file_info['gas'] = filename[3][0] + file_info['pressure'] = float(filename[3][1:]) + + if laserID in refdata['laser'].index: + laser_info = pd.DataFrame(refdata['laser'].loc[laserID]).T + laser_info.index.name = 'Laser Identifier' + laser_info.reset_index(level=0, inplace=True) + file_info = pd.concat([file_info, laser_info], axis=1) + + file_info['laser_power'] = float(filename[4][1:]) + if expID in refdata['exp'].index: + exp_info = pd.DataFrame(refdata['exp'].loc[expID]).T + exp_info.index.name = 'Exp Identifier' + exp_info.reset_index(level=0, inplace=True) + file_info = pd.concat([file_info, exp_info], axis=1) + + file_info['spectrometer'] = spectID + if spectID in refdata['spect'].index: + temp = refdata['spect'].loc[spectID] + temp = [temp[2], temp[4:]] + spect_info = pd.DataFrame(refdata['spect'].loc[spectID]).T + spect_info.index.name = 'Spectrometer Identifier' + spect_info.reset_index(level=0, inplace=True) + file_info = pd.concat([file_info, spect_info], axis=1) + + return file_info + + +def JSC(input_files, refdata): + try: + # read the first file + data = pd.read_csv(input_files[0], skiprows=14, sep='\t', engine='c') + data = data.rename(columns={data.columns[0]: 'time1', data.columns[1]: 'time2'}) + metadata = pd.concat([jsc_filename_parse(input_files[0], refdata)] * len(data.index)) + metadata.drop('spectrometer', axis=1, inplace=True) + + # read the next files and merge them with the first + for file in input_files[1:]: + datatemp = pd.read_csv(file, skiprows=14, sep='\t', engine='c') + datatemp = datatemp.rename(columns={datatemp.columns[0]: 'time1', datatemp.columns[1]: 'time2'}) + data = data.merge(datatemp) + + time = data[['time1', 'time2']] # split the two time columns from the data frame + data.drop(['time1', 'time2'], axis=1, inplace=True) # trim the data frame so it is just the spectra + + # make a multiindex for each wavlength column so they can be easily isolated from metadata later + data.columns = [['wvl'] * len(data.columns), np.array(data.columns.values, dtype='float').round(4)] + + metadata.index = data.index + metadata = pd.concat([metadata, time], axis=1) + compcols = ['SiO2', 'TiO2', 'Al2O3', 'Cr2O3', 'Fe2O3T', 'MnO', 'MgO', 'CaO', 'Na2O', 'K2O', 'P2O5', + 'SO3 LOI Residue', 'Total', 'Total Includes', '%LOI', 'FeO', + 'Fe2O3', 'SO3 Actual', 'Fe(3+)/Fe(Total)', 'Rb (ug/g)', 'Sr (ug/g)', 'Y (ug/g)', 'Zr (ug/g)', + 'V (ug/g)', 'Ni (ug/g)', 'Cr (ug/g)', + 'Nb (ug/g)', 'Ga (ug/g)', 'Cu (ug/g)', 'Zn (ug/g)', 'Co (ug/g)', 'Ba (ug/g)', 'La (ug/g)', + 'Ce (ug/g)', 'U (ug/g)', 'Th (ug/g)', 'Sc (ug/g)', + 'Pb (ug/g)', 'Ge (ug/g)', 'As (ug/g)', 'Cl (ug/g)'] + compdata = metadata[compcols] + metadata.drop(compcols, axis=1, inplace=True) + metadata.columns = [['meta'] * len(metadata.columns), metadata.columns.values] + compdata.columns = [['comp'] * len(compdata.columns), compdata.columns.values] + data = pd.concat([data, metadata, compdata], axis=1) + + data[('meta', 'Scan #')] = data.index + data.set_index(('meta', 'time2'), drop=False, inplace=True) + + return data + except: + print('Problem reading:' + input_file) + print('Moving to Problem_Files') + os.rename(input_file, + r"Problem_Files\\" + os.path.basename( + input_file)) + return None + + +def jsc_batch(directory, LUT_files, searchstring='*.txt', to_csv=None): + # Read in the lookup tables to expand filename metadata + refdata = read_refdata(LUT_files) + # get the list of files that match the search string in the given directory + filelist = file_search(directory, searchstring) + spectIDs = [] # create an empty list to hold the spectrometer IDs + libsIDs = [] + timestamps = [] + locs = [] + for file in filelist: + filesplit = os.path.basename(file).split('_') + spectIDs.append(filesplit[6]) # get the spectrometer IDs for each file in the list + libsIDs.append(filesplit[0]) + timestamps.append(filesplit[-1].split('.')[0]) + locs.append(filesplit[1]) + spectIDs_unique = np.unique(spectIDs) # get the unique spectrometer IDs + libsIDs_unique = np.unique(libsIDs) + dfs = [] # create an empty list to hold the data frames for each spectrometer + + # loop through each LIBS ID + alldata = [] + for ID in libsIDs_unique: + print('Working on : ' + str(ID)) + sublist = filelist[np.in1d(libsIDs, ID)] + locs = [] + for file in sublist: + locs.append(os.path.basename(file).split('_')[1]) + locs_unique = np.unique(locs) + # loop through each location for that libs ID + for loc in locs_unique: + print(loc) + sub_sublist = sublist[np.in1d(locs, loc)] # get the files for that LIBSID and location + data = JSC(sub_sublist, refdata) + alldata.append(data) + pass + + combined = pd.concat(alldata) + if to_csv is not None: + print('Writing combined data to: ' + to_csv) + combined.to_csv(to_csv) + return combined + + +# got this function from stack overflow: http://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns +# it's slow but doesn't crash python like combined.T.drop_duplicates().T does in some cases with very large sets of data +def duplicate_columns(frame): + groups = frame.columns.to_series().groupby(frame.dtypes).groups + dups = [] + + for t, v in groups.items(): + + cs = frame[v].columns + vs = frame[v] + lcs = len(cs) + + for i in range(lcs): + ia = vs.iloc[:, i].values + for j in range(i + 1, lcs): + ja = vs.iloc[:, j].values + if array_equivalent(ia, ja): + dups.append(cs[i]) + break + + return dups diff --git a/plio/io/io_json.py b/plio/io/io_json.py index 943e4ee1e5ebd02967c03a3dc601a673962bc1c7..a463e6cf7006c4d71b0ca93e0597e9f8534d6591 100644 --- a/plio/io/io_json.py +++ b/plio/io/io_json.py @@ -14,13 +14,6 @@ def read_json(inputfile): ======= jobs : dict returns a dictionary - - >>> inputs = readinputfile('testfiles/sampleinput.json') - >>> k = inputs.keys() - >>> k.sort() - >>> print k - [u'ancillarydata', u'bands', u'force', u'images', u'latlon', u'name', u'outputformat', u'processing_pipeline', u'projection', u'resolution', u'rtilt', u'tesatm', u'uddw'] - """ with open(inputfile, 'r') as f: try: diff --git a/plio/io/io_moon_minerology_mapper.py b/plio/io/io_moon_minerology_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..cce772febecedefce31c1303af06e249e02ba22a --- /dev/null +++ b/plio/io/io_moon_minerology_mapper.py @@ -0,0 +1,25 @@ +import numpy as np +from osgeo import gdal + + +def openm3(input_data): + if input_data.split('.')[-1] == 'hdr': + # GDAL wants the img, but many users aim at the .hdr + input_data = input_data.split('.')[0] + '.img' + ds = gdal.Open(input_data) + ref_array = ds.GetRasterBand(1).ReadAsArray() + metadata = ds.GetMetadata() + wv_array = metadatatoband(metadata) + return wv_array, ref_array, ds + + +def metadatatoband(metadata): + wv2band = [] + for k, v in metadata.items(): + try: + wv2band.append(float(value)) + except: + v = v.split(" ")[-1].split("(")[1].split(")")[0] + wv2band.append(float(v)) + wv2band.sort(key=int) + return np.asarray(wv2band) diff --git a/plio/io/io_multibandimager.py b/plio/io/io_multibandimager.py new file mode 100644 index 0000000000000000000000000000000000000000..7e5ba2627a240fdc980c9c25a184d93d22168e80 --- /dev/null +++ b/plio/io/io_multibandimager.py @@ -0,0 +1,28 @@ +import numpy as np +from osgeo import gdal + + +def openmi(input_data): + ds = gdal.Open(input_data) + band_pointers = [] + nbands = ds.RasterCount + + for b in xrange(1, nbands + 1): + band_pointers.append(ds.GetRasterBand(b)) + + ref_array = ds.GetRasterBand(1).ReadAsArray() + wv_array = None + return wv_array, ref_array[::3, ::3], ds + + +def getspectra(x, y, ds): + nbands = ds.RasterCount + reflectance = np.empty(nbands) + for b in range(1, nbands + 1): + reflectance[b - 1] = ds.GetRasterBand(b).ReadAsArray(y, x, 1, 1) + + mergedref = np.empty(nbands - 1) + mergedref[:4] = reflectance[:4] + mergedref[4] = (reflectance[4] + reflectance[5]) / 2 + mergedref[5:] = reflectance[6:] + return mergedref diff --git a/plio/io/io_yaml.py b/plio/io/io_yaml.py index 38c787ae91d697aa1efe20ead1387edc56aeaa41..59d1a70d9d24fc43f8a68b658826b27a81c069fb 100644 --- a/plio/io/io_yaml.py +++ b/plio/io/io_yaml.py @@ -1,7 +1,4 @@ -try: - import yaml -except: - print('YAML package not installed, disabling yaml_io module') +import yaml def read_yaml(inputfile): @@ -21,6 +18,6 @@ def read_yaml(inputfile): try: with open(inputfile, 'r') as f: ydict = yaml.load(f) - except: # pragma: no cover + except: # pragma: no cover raise IOError('Unable to load YAML file.') return ydict diff --git a/plio/io/tests/test_io_ccam_pds.py b/plio/io/tests/test_io_ccam_pds.py new file mode 100644 index 0000000000000000000000000000000000000000..03fec02c2d278597aa632a9f9ffe8450e51f92ca --- /dev/null +++ b/plio/io/tests/test_io_ccam_pds.py @@ -0,0 +1,19 @@ +import os +import sys +import unittest + +sys.path.insert(0, os.path.abspath('..')) + +from plio.examples import get_path +from plio.io import io_ccam_pds + +class Test_CCAM_IO(unittest.TestCase): + + def setUp(self): + self.examplefile = get_path('CL5_398645626CCS_F0030004CCAM02013P3.csv') + + def test_14_item_header_csv(self): + io_ccam_pds.CCAM_CSV(self.examplefile) + +if __name__ == '__main__': + unittest.main() diff --git a/plio/io/tests/test_io_controlnetwork.py b/plio/io/tests/test_io_controlnetwork.py index 186967d6abf9e83ace6c8fb3db75293fe1448077..82c9858cd5cec755dc364648573af66ea0cf797f 100644 --- a/plio/io/tests/test_io_controlnetwork.py +++ b/plio/io/tests/test_io_controlnetwork.py @@ -6,8 +6,8 @@ from time import strftime, gmtime import pandas as pd import pvl -from .. import io_controlnetwork -from .. import ControlNetFileV0002_pb2 as cnf +from plio.io import io_controlnetwork +from plio.io import ControlNetFileV0002_pb2 as cnf from plio.utils.utils import find_in_dict sys.path.insert(0, os.path.abspath('..')) diff --git a/plio/io/tests/test_io_edr.py b/plio/io/tests/test_io_edr.py new file mode 100644 index 0000000000000000000000000000000000000000..7e3637a7ce53a31fdf6d6e61b0a3bb40dd91ddf0 --- /dev/null +++ b/plio/io/tests/test_io_edr.py @@ -0,0 +1,17 @@ +import unittest + +from plio.examples import get_path +from plio.io import io_edr + + +# class Test_Tes_IO(unittest.TestCase): +# +# # Need different test data or need to modify the current code +# def setUp(self): +# self.examplefile = get_path('cl5_398736801edr_f0030004ccam01014m1.dat') +# # +# def test_open(self): +# ds = io_edr.EDR(self.examplefile) +# +# if __name__ == '__main__': +# unittest.main() diff --git a/plio/io/tests/test_structured_io.py b/plio/io/tests/test_structured_io.py index 13a7c4195abfcd2f67cd2816d4b0a641c354befd..84e67b8b59225845c96f98c20766a4c47416cce8 100644 --- a/plio/io/tests/test_structured_io.py +++ b/plio/io/tests/test_structured_io.py @@ -1,7 +1,7 @@ import unittest -from .. import io_json -from .. import io_yaml +from plio.io import io_json +from plio.io import io_yaml try: import yaml diff --git a/plio/utils/utils.py b/plio/utils/utils.py index 761f70d28416f338b896d10f1daff6c4d7eb3ad2..1cec0dfd11c353a1aeab1ef7a675f8aa66fb1814 100644 --- a/plio/utils/utils.py +++ b/plio/utils/utils.py @@ -5,6 +5,7 @@ import os import fnmatch import shutil import tempfile +import pandas as pd def create_dir(basedir=''): @@ -50,30 +51,6 @@ def file_to_list(file): return list(file_list) -def create_dir(basedir=''): - """ - Create a unique, temporary directory in /tmp where processing will occur - - Parameters - ---------- - basedir : str - The PATH to create the temporary directory in. - """ - return tempfile.mkdtemp(dir=basedir) - - -def delete_dir(dir): - """ - Delete a directory - - Parameters - ---------- - dir : str - Remove a directory - """ - shutil.rmtree(dir) - - def file_search(searchdir,searchstring): """ Recursively search for files in the specified directory @@ -164,3 +141,31 @@ def xstr(s): if s is None: return '' return str(s) + +def lookup(df, lookupfile=None, lookupdf=None, sep=',', skiprows=1, left_on='sclock', right_on='Spacecraft Clock'): +#TODO: automatically determine the number of rows to skip to handle ccam internal master list and PDS "official" master list formats + if lookupfile is not None: + # this loop concatenates together multiple lookup files if provided + # (mostly to handle the three different master lists for chemcam) + for x in lookupfile: + try: + tmp = pd.read_csv(x, sep=sep, skiprows=skiprows, error_bad_lines=False) + lookupdf = pd.concat([lookupdf, tmp]) + except: + lookupdf = pd.read_csv(x, sep=sep, skiprows=skiprows, error_bad_lines=False) + metadata = df['meta'] + + metadata = metadata.merge(lookupdf, left_on=left_on, right_on=right_on, how='left') + + # remove metadata columns that already exist in the data frame to avoid non-unique columns + meta_cols = set(metadata.columns.values) + meta_cols_keep = list(meta_cols - set(df['meta'].columns.values)) + metadata = metadata[meta_cols_keep] + + # make metadata into a multiindex + metadata.columns = [['meta'] * len(metadata.columns), metadata.columns.values] + # give it the same indices as the df + metadata.index = df.index + # combine the df and the new metadata + df = pd.concat([metadata, df], axis=1) + return df diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000000000000000000000000000000000..25e6c3f6cea698576ccc78fef2f1f7dcc8a1d3c1 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +addopts = --cov-report term-missing --cov=plio +norecursedirs = examples diff --git a/recipe/bld.bat b/recipe/bld.bat new file mode 100644 index 0000000000000000000000000000000000000000..bece631e73bb4565e17caa69b8ec33f65e5a7915 --- /dev/null +++ b/recipe/bld.bat @@ -0,0 +1,3 @@ +python setup.py install --single-version-externally-managed --record=record.txt + +if errorlevel 1 exit 1 diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 1cffd14ba592b1d9882549dd5b94fe298dbe62ad..2ce618b95b11fc9104a157469d0935aec48d114d 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -6,9 +6,7 @@ source: git_url: https://github.com/USGS-Astrogeology/plio build: - number: 0 - skip: true #[win] - script: python setup.py install --single-version-externally-managed --record=record.txt + number: 1 requirements: build: @@ -25,6 +23,8 @@ requirements: - pyyaml - affine - networkx + - scipy + - certifi run: - python - setuptools @@ -39,6 +39,8 @@ requirements: - pyyaml - affine - networkx + - scipy + - certifi test: imports: diff --git a/setup.py b/setup.py index 9a3ee825e049d9fd13ec70ab06674c764e7ed2b8..0431bad1067c6fdb5d4a085d2896076ace9cd3d6 100644 --- a/setup.py +++ b/setup.py @@ -40,12 +40,14 @@ def setup_package(): 'gdal', 'numpy', 'pvl', - 'protobuf==3.0.0b2', + 'protobuf', 'h5py', 'pandas', 'sqlalchemy', 'pyyaml', - 'networkx'], + 'networkx', + 'affine', + 'scipy'], classifiers=[ "Development Status :: 3 - Alpha", "Topic :: Utilities",