socet read without regex

13e31075 · jay · e403b3f9 · 13e31075 · 13e31075 · 13e31075
Commit 13e31075 authored 6 years ago by jay
--- a/plio/io/io_bae.py
+++ b/plio/io/io_bae.py
@@ -7,53 +7,66 @@ from functools import singledispatch
 import numpy as np
 import pandas as pd
-def socetset_keywords_to_json(keywords, ell=None):
+from plio.utils.utils import is_number, convert_string_to_float
+def socetset_keywords_to_dict(keywords, ell=None):
    """
    Convert a SocetCet keywords.list file to JSON
    Parameters
    ----------
    keywords : str
-               Path to the socetset keywords.list file
+               Path to the socetset keywords.list file or a raw string that
+               will be split on '\n' and parsed.
+    ell : str
+          Optional path to the ellipsoid keywords.list file or a raw string 
+          that will be split on '\n' and parsed
    Returns
    -------
-     : str
+     data : dict 
-       The serialized JSON string.
+            A dictionary containing the socet keywords parsed.
    """
-    matcher = re.compile(r'\b(?!\d)\w+\b')
+    data = {}
-    numeric_matcher = re.compile(r'\W?-?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?')
-    stream = {}
+    def parse(lines):
+        for l in lines:
-    def parse(fi):
+            l = l.strip()
-        with open(fi, 'r') as f:
-            for l in f:
-                l = l.rstrip()
            if not l:
                continue
-                matches = matcher.findall(l)
+            elems = l.split()
-                if matches:
+            if is_number(elems[0]) is False:
-                    key = matches[0]
+                key = elems[0]
-                    stream[key] = []
+                if key in data.keys():
-                    # Case where the kw are strings after the key
+                    raise ValueError('Duplicate dictionary key: {}'.format(key))
-                    if len(matches) > 1:
+                data[key] = []
-                        stream[key] = matches[1:]
+                if len(elems) == 1:
-                    # Case where the kw are numeric types after the key
+                    continue
-                    else:
+                if len(elems) == 2:
-                        nums = numeric_matcher.findall(l)
+                    data[key] = convert_string_to_float(elems[1])
-                        if len(nums) == 1:
-                            stream[key] = float(nums[0])
                else:
-                            stream[key] += map(float, nums)
+                    data[key] += [convert_string_to_float(e) for e in elems[1:]]
            else:
-                    # Case where the values are on a newline after the key
+                data[key] += [convert_string_to_float(e) for e in elems]
-                    nums = numeric_matcher.findall(l)
-                    stream[key] += map(float, nums)
+    if os.path.exists(keywords):
+        with open(keywords, 'r') as f:
+            keywords = f.readlines()
+    else:
+        keywords = keywords.split('\n')
    parse(keywords)
    if ell:
+        if os.path.exists(ell):
+            with open(ell, 'r') as f:
+                ell = f.readlines()
+        else:
+            ell = ell.split('\n')
        parse(ell)
-    return json.dumps(stream)
+    return data
 @singledispatch
 def read_ipf(arg): # pragma: no cover

--- a/plio/io/tests/test_io_bae.py
+++ b/plio/io/tests/test_io_bae.py
@@ -5,7 +5,7 @@ import numpy as np
 import pandas as pd
 from pandas.util.testing import assert_frame_equal
-from plio.io.io_bae import socetset_keywords_to_json, read_gpf, save_gpf, read_ipf, save_ipf
+from plio.io.io_bae import socetset_keywords_to_dict, read_gpf, save_gpf, read_ipf, save_ipf
 from plio.examples import get_path
 import pytest
@@ -89,13 +89,47 @@ def test_write_gpf(gpf, file):
    # np.testing.assert_array_almost_equal(truth_arr, test_arr)
-def test_create_from_socet_lis():
+class TestISDFromSocetLis():
-    socetlis = get_path('socet_isd.lis')
-    socetell = get_path('ellipsoid.ell')
+    def test_parse_with_empty_newlines(self):
-    js = json.loads(socetset_keywords_to_json(socetlis))
+        # Ensure all keys read when whitespace present
-    assert isinstance(js, dict)  # This is essentially a JSON linter
+        empty_newlines = r"""T0_QUAT 1.0000000000000000000000000e-01
-    # Manually validated
-    assert 'RECTIFICATION_TERMS' in js.keys()
+T1_QUAT 1.0000000000000000000000000e-01"""
-    assert 'SEMI_MAJOR_AXIS' in js.keys()  # From ellipsoid file
+        data = socetset_keywords_to_dict(empty_newlines)
-    assert 'NUMBER_OF_EPHEM' in js.keys()
+        assert len(data.keys()) == 2
-    assert len(js['EPHEM_PTS']) / 3 == js['NUMBER_OF_EPHEM']
+    def test_duplicate_key_check(self):
+        duplicate_keys = r"""T 1
+T 1"""
+        with pytest.raises(ValueError):
+            data = socetset_keywords_to_dict(duplicate_keys)
+    def test_multiple_per_line(self):
+        multiple_per_line = r"""T 1 1 1"""
+        data = socetset_keywords_to_dict(multiple_per_line)
+        assert len(data['T']) == 3
+    def test_key_on_different_line(self):
+        key_on_different_line = r"""A
+0.0 1.00000000000000e+00 2.00000000000000e+00
+3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00
+B 1.0e-01 2.000000e+00 3.00000000000000e+00"""
+        data = socetset_keywords_to_dict(key_on_different_line)
+        assert len(data['A']) == 6
+        assert data['A'] == [0, 1, 2, 3, 4, 5]
+        assert len(data['B']) == 3
+        assert data['B'] == [0.1, 2, 3]
+    def test_key_on_different_line_whitespace(self):
+        key_on_different_line_whitespace = r"""A
+    0.0 1.00000000000000e+00 2.00000000000000e+00
+    3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00
+B 1.0e-01 2.000000e+00 3.00000000000000e+00"""
+        data = socetset_keywords_to_dict(key_on_different_line_whitespace)
+        assert len(data['A']) == 6
+        assert data['A'] == [0, 1, 2, 3, 4, 5]
+        assert len(data['B']) == 3
+        assert data['B'] == [0.1, 2, 3]
--- a/plio/utils/utils.py
+++ b/plio/utils/utils.py
@@ -9,6 +9,48 @@ import pandas as pd
 import numpy as np
+def is_number(s):
+    """
+    Check if an argument is convertable to a number
+    Parameters
+    ----------
+    s : object
+        The argument to check for conversion
+    Returns
+    -------
+     : bool
+       True if conversion is possible, otherwise False.
+    """
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False
+def convert_string_to_float(s):
+    """
+    Attempt to convert a string to a float.
+    Parameters
+    ---------
+    s : str
+        The string to convert
+    Returns
+    -------
+    : float / str
+      If successful, the converted value, else the argument is passed back
+      out.
+    """
+    try:
+        return float(s)
+    except TypeError:
+        return s
 def metadatatoband(metadata):
    wv2band = []
    for k, v in metadata.items():