Skip to content
Snippets Groups Projects
Commit 13e31075 authored by jay's avatar jay
Browse files

socet read without regex

parent e403b3f9
No related branches found
No related tags found
No related merge requests found
...@@ -7,53 +7,66 @@ from functools import singledispatch ...@@ -7,53 +7,66 @@ from functools import singledispatch
import numpy as np import numpy as np
import pandas as pd import pandas as pd
def socetset_keywords_to_json(keywords, ell=None): from plio.utils.utils import is_number, convert_string_to_float
def socetset_keywords_to_dict(keywords, ell=None):
""" """
Convert a SocetCet keywords.list file to JSON Convert a SocetCet keywords.list file to JSON
Parameters Parameters
---------- ----------
keywords : str keywords : str
Path to the socetset keywords.list file Path to the socetset keywords.list file or a raw string that
will be split on '\n' and parsed.
ell : str
Optional path to the ellipsoid keywords.list file or a raw string
that will be split on '\n' and parsed
Returns Returns
------- -------
: str data : dict
The serialized JSON string. A dictionary containing the socet keywords parsed.
""" """
matcher = re.compile(r'\b(?!\d)\w+\b') data = {}
numeric_matcher = re.compile(r'\W?-?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?')
stream = {} def parse(lines):
for l in lines:
def parse(fi): l = l.strip()
with open(fi, 'r') as f:
for l in f:
l = l.rstrip()
if not l: if not l:
continue continue
matches = matcher.findall(l) elems = l.split()
if matches: if is_number(elems[0]) is False:
key = matches[0] key = elems[0]
stream[key] = [] if key in data.keys():
# Case where the kw are strings after the key raise ValueError('Duplicate dictionary key: {}'.format(key))
if len(matches) > 1: data[key] = []
stream[key] = matches[1:] if len(elems) == 1:
# Case where the kw are numeric types after the key continue
else: if len(elems) == 2:
nums = numeric_matcher.findall(l) data[key] = convert_string_to_float(elems[1])
if len(nums) == 1:
stream[key] = float(nums[0])
else: else:
stream[key] += map(float, nums) data[key] += [convert_string_to_float(e) for e in elems[1:]]
else: else:
# Case where the values are on a newline after the key data[key] += [convert_string_to_float(e) for e in elems]
nums = numeric_matcher.findall(l)
stream[key] += map(float, nums)
if os.path.exists(keywords):
with open(keywords, 'r') as f:
keywords = f.readlines()
else:
keywords = keywords.split('\n')
parse(keywords) parse(keywords)
if ell: if ell:
if os.path.exists(ell):
with open(ell, 'r') as f:
ell = f.readlines()
else:
ell = ell.split('\n')
parse(ell) parse(ell)
return json.dumps(stream)
return data
@singledispatch @singledispatch
def read_ipf(arg): # pragma: no cover def read_ipf(arg): # pragma: no cover
......
...@@ -5,7 +5,7 @@ import numpy as np ...@@ -5,7 +5,7 @@ import numpy as np
import pandas as pd import pandas as pd
from pandas.util.testing import assert_frame_equal from pandas.util.testing import assert_frame_equal
from plio.io.io_bae import socetset_keywords_to_json, read_gpf, save_gpf, read_ipf, save_ipf from plio.io.io_bae import socetset_keywords_to_dict, read_gpf, save_gpf, read_ipf, save_ipf
from plio.examples import get_path from plio.examples import get_path
import pytest import pytest
...@@ -89,13 +89,47 @@ def test_write_gpf(gpf, file): ...@@ -89,13 +89,47 @@ def test_write_gpf(gpf, file):
# np.testing.assert_array_almost_equal(truth_arr, test_arr) # np.testing.assert_array_almost_equal(truth_arr, test_arr)
def test_create_from_socet_lis(): class TestISDFromSocetLis():
socetlis = get_path('socet_isd.lis')
socetell = get_path('ellipsoid.ell') def test_parse_with_empty_newlines(self):
js = json.loads(socetset_keywords_to_json(socetlis)) # Ensure all keys read when whitespace present
assert isinstance(js, dict) # This is essentially a JSON linter empty_newlines = r"""T0_QUAT 1.0000000000000000000000000e-01
# Manually validated
assert 'RECTIFICATION_TERMS' in js.keys() T1_QUAT 1.0000000000000000000000000e-01"""
assert 'SEMI_MAJOR_AXIS' in js.keys() # From ellipsoid file data = socetset_keywords_to_dict(empty_newlines)
assert 'NUMBER_OF_EPHEM' in js.keys() assert len(data.keys()) == 2
assert len(js['EPHEM_PTS']) / 3 == js['NUMBER_OF_EPHEM']
def test_duplicate_key_check(self):
duplicate_keys = r"""T 1
T 1"""
with pytest.raises(ValueError):
data = socetset_keywords_to_dict(duplicate_keys)
def test_multiple_per_line(self):
multiple_per_line = r"""T 1 1 1"""
data = socetset_keywords_to_dict(multiple_per_line)
assert len(data['T']) == 3
def test_key_on_different_line(self):
key_on_different_line = r"""A
0.0 1.00000000000000e+00 2.00000000000000e+00
3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00
B 1.0e-01 2.000000e+00 3.00000000000000e+00"""
data = socetset_keywords_to_dict(key_on_different_line)
assert len(data['A']) == 6
assert data['A'] == [0, 1, 2, 3, 4, 5]
assert len(data['B']) == 3
assert data['B'] == [0.1, 2, 3]
def test_key_on_different_line_whitespace(self):
key_on_different_line_whitespace = r"""A
0.0 1.00000000000000e+00 2.00000000000000e+00
3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00
B 1.0e-01 2.000000e+00 3.00000000000000e+00"""
data = socetset_keywords_to_dict(key_on_different_line_whitespace)
assert len(data['A']) == 6
assert data['A'] == [0, 1, 2, 3, 4, 5]
assert len(data['B']) == 3
assert data['B'] == [0.1, 2, 3]
...@@ -9,6 +9,48 @@ import pandas as pd ...@@ -9,6 +9,48 @@ import pandas as pd
import numpy as np import numpy as np
def is_number(s):
"""
Check if an argument is convertable to a number
Parameters
----------
s : object
The argument to check for conversion
Returns
-------
: bool
True if conversion is possible, otherwise False.
"""
try:
float(s)
return True
except ValueError:
return False
def convert_string_to_float(s):
"""
Attempt to convert a string to a float.
Parameters
---------
s : str
The string to convert
Returns
-------
: float / str
If successful, the converted value, else the argument is passed back
out.
"""
try:
return float(s)
except TypeError:
return s
def metadatatoband(metadata): def metadatatoband(metadata):
wv2band = [] wv2band = []
for k, v in metadata.items(): for k, v in metadata.items():
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment