diff --git a/plio/io/io_bae.py b/plio/io/io_bae.py index c8b8e2e45bd5861b7db6f456c23242d4208cb2cd..c0c132de7fa769e54539c2f705c488f75b98cbcc 100644 --- a/plio/io/io_bae.py +++ b/plio/io/io_bae.py @@ -7,53 +7,66 @@ from functools import singledispatch import numpy as np import pandas as pd -def socetset_keywords_to_json(keywords, ell=None): +from plio.utils.utils import is_number, convert_string_to_float + +def socetset_keywords_to_dict(keywords, ell=None): """ Convert a SocetCet keywords.list file to JSON Parameters ---------- keywords : str - Path to the socetset keywords.list file + Path to the socetset keywords.list file or a raw string that + will be split on '\n' and parsed. + + ell : str + Optional path to the ellipsoid keywords.list file or a raw string + that will be split on '\n' and parsed Returns ------- - : str - The serialized JSON string. + data : dict + A dictionary containing the socet keywords parsed. + """ - matcher = re.compile(r'\b(?!\d)\w+\b') - numeric_matcher = re.compile(r'\W?-?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?') - stream = {} - - def parse(fi): - with open(fi, 'r') as f: - for l in f: - l = l.rstrip() - if not l: + data = {} + + def parse(lines): + for l in lines: + l = l.strip() + if not l: + continue + elems = l.split() + if is_number(elems[0]) is False: + key = elems[0] + if key in data.keys(): + raise ValueError('Duplicate dictionary key: {}'.format(key)) + data[key] = [] + if len(elems) == 1: continue - matches = matcher.findall(l) - if matches: - key = matches[0] - stream[key] = [] - # Case where the kw are strings after the key - if len(matches) > 1: - stream[key] = matches[1:] - # Case where the kw are numeric types after the key - else: - nums = numeric_matcher.findall(l) - if len(nums) == 1: - stream[key] = float(nums[0]) - else: - stream[key] += map(float, nums) + if len(elems) == 2: + data[key] = convert_string_to_float(elems[1]) else: - # Case where the values are on a newline after the key - nums = numeric_matcher.findall(l) - stream[key] += map(float, nums) + data[key] += [convert_string_to_float(e) for e in elems[1:]] + else: + data[key] += [convert_string_to_float(e) for e in elems] + if os.path.exists(keywords): + with open(keywords, 'r') as f: + keywords = f.readlines() + else: + keywords = keywords.split('\n') parse(keywords) + if ell: + if os.path.exists(ell): + with open(ell, 'r') as f: + ell = f.readlines() + else: + ell = ell.split('\n') parse(ell) - return json.dumps(stream) + + return data @singledispatch def read_ipf(arg): # pragma: no cover diff --git a/plio/io/tests/test_io_bae.py b/plio/io/tests/test_io_bae.py index 2817c889616371f63ee6b78ad875a9631ceca86a..aacd851c370d34c2fba0b06212e627e63335cb45 100644 --- a/plio/io/tests/test_io_bae.py +++ b/plio/io/tests/test_io_bae.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd from pandas.util.testing import assert_frame_equal -from plio.io.io_bae import socetset_keywords_to_json, read_gpf, save_gpf, read_ipf, save_ipf +from plio.io.io_bae import socetset_keywords_to_dict, read_gpf, save_gpf, read_ipf, save_ipf from plio.examples import get_path import pytest @@ -89,13 +89,47 @@ def test_write_gpf(gpf, file): # np.testing.assert_array_almost_equal(truth_arr, test_arr) -def test_create_from_socet_lis(): - socetlis = get_path('socet_isd.lis') - socetell = get_path('ellipsoid.ell') - js = json.loads(socetset_keywords_to_json(socetlis)) - assert isinstance(js, dict) # This is essentially a JSON linter - # Manually validated - assert 'RECTIFICATION_TERMS' in js.keys() - assert 'SEMI_MAJOR_AXIS' in js.keys() # From ellipsoid file - assert 'NUMBER_OF_EPHEM' in js.keys() - assert len(js['EPHEM_PTS']) / 3 == js['NUMBER_OF_EPHEM'] +class TestISDFromSocetLis(): + + def test_parse_with_empty_newlines(self): + # Ensure all keys read when whitespace present + empty_newlines = r"""T0_QUAT 1.0000000000000000000000000e-01 + +T1_QUAT 1.0000000000000000000000000e-01""" + data = socetset_keywords_to_dict(empty_newlines) + assert len(data.keys()) == 2 + + def test_duplicate_key_check(self): + duplicate_keys = r"""T 1 +T 1""" + with pytest.raises(ValueError): + data = socetset_keywords_to_dict(duplicate_keys) + + def test_multiple_per_line(self): + multiple_per_line = r"""T 1 1 1""" + data = socetset_keywords_to_dict(multiple_per_line) + assert len(data['T']) == 3 + + def test_key_on_different_line(self): + key_on_different_line = r"""A +0.0 1.00000000000000e+00 2.00000000000000e+00 +3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00 +B 1.0e-01 2.000000e+00 3.00000000000000e+00""" + data = socetset_keywords_to_dict(key_on_different_line) + assert len(data['A']) == 6 + assert data['A'] == [0, 1, 2, 3, 4, 5] + + assert len(data['B']) == 3 + assert data['B'] == [0.1, 2, 3] + + def test_key_on_different_line_whitespace(self): + key_on_different_line_whitespace = r"""A + 0.0 1.00000000000000e+00 2.00000000000000e+00 + 3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00 +B 1.0e-01 2.000000e+00 3.00000000000000e+00""" + data = socetset_keywords_to_dict(key_on_different_line_whitespace) + assert len(data['A']) == 6 + assert data['A'] == [0, 1, 2, 3, 4, 5] + + assert len(data['B']) == 3 + assert data['B'] == [0.1, 2, 3] diff --git a/plio/utils/utils.py b/plio/utils/utils.py index 4ef547489d0fe016d85a6697bf202bde08b01f1b..64eba371c93eb9961a04e509bbeb58da97ed5d51 100644 --- a/plio/utils/utils.py +++ b/plio/utils/utils.py @@ -9,6 +9,48 @@ import pandas as pd import numpy as np +def is_number(s): + """ + Check if an argument is convertable to a number + + Parameters + ---------- + s : object + The argument to check for conversion + + Returns + ------- + : bool + True if conversion is possible, otherwise False. + """ + try: + float(s) + return True + except ValueError: + return False + +def convert_string_to_float(s): + """ + Attempt to convert a string to a float. + + Parameters + --------- + s : str + The string to convert + + Returns + ------- + : float / str + If successful, the converted value, else the argument is passed back + out. + """ + + try: + return float(s) + except TypeError: + return s + + def metadatatoband(metadata): wv2band = [] for k, v in metadata.items():