Skip to content
Snippets Groups Projects
Commit c1afb880 authored by David P. Mayer's avatar David P. Mayer Committed by jlaura
Browse files

io_bae.py: Explicitly Set GPF/IPF Column data types on read (#72)

* Add pyproj to list of run requirements in meta.yaml

* io_bae.py: explicitly set data types in GPF/IPF cols on read

* Typo fix

* Add tests for GPF/IPF data types, and making sure GPF/IPF point IDs are written correctly

* typo fix in test_io_bae.py that was breaking tests

* file name typo fix

* correct variable and change file names test_io_bae.py

* correct test_write_str_id_ipf, insert required newline in example IPF file

* Change GPF/IPF int and float lengths to 32 bits

* Increase size of GPF lat/lon/ht to float64

* Change GPF/IPF float lengths back to 64 bits to be consistent with BAE spec

* typo fix

* test_io_bae.py: Specify correct dtypes when reading expecting IPF/GPFs with read_csv

* Add python version to appveyor.yml
parent 160c1ae0
No related branches found
No related tags found
No related merge requests found
...@@ -17,6 +17,7 @@ environment: ...@@ -17,6 +17,7 @@ environment:
- PYTHON: "C:\\Miniconda35\\Scripts\\activate.bat" - PYTHON: "C:\\Miniconda35\\Scripts\\activate.bat"
PYTHON_VERSION: 3.5 PYTHON_VERSION: 3.5
- PYTHON: "C:\\Miniconda36\\Scripts\\activate.bat" - PYTHON: "C:\\Miniconda36\\Scripts\\activate.bat"
PYTHON_VERSION: 3.6
matrix: matrix:
exclude: exclude:
......
GROUND POINT FILE
3
point_id,stat,known,lat_Y_North,long_X_East,ht,sig(3),res(3)
1 1 1
0.08538133869187 2.38013146162178 -2697.23744694649990
0.000000 0.000000 1.707214
16.348041 -13.917623 -0.151001
02 1 1
0.08508542650082 2.38081358557147 -2702.00000000000000
0.000000 0.000000 1.000000
15.983286 -16.869508 0.103341
3 1 1
0.08626480095809 2.38708383926110 -2714.16003756809000
0.000000 0.000000 3.828854
12.831839 -34.360639 -2.205846
IMAGE POINT FILE
3
pt_id,val,fid_val,no_obs,l.,s.,sig_l,sig_s,res_l,res_s,fid_x,fid_y
1 1 0 0
-4058.982422 -2318.010742
0.000000 0.000000
-0.062556 -0.214713
0.000000 0.000000
02 1 0 0
-3969.065186 -606.849243
0.000000 0.000000
0.228660 0.105249
0.000000 0.000000
3 1 0 0
-1019.739014 -2300.877197
0.000000 0.000000
-0.025129 -0.002447
0.000000 0.000000
...@@ -110,8 +110,14 @@ def read_ipf_str(input_data): ...@@ -110,8 +110,14 @@ def read_ipf_str(input_data):
assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df)) assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df))
# Soft conversion of numeric types to numerics, allows str in first col for point_id # List of data types for columns in Socet set IPF file
df = df.apply(pd.to_numeric, errors='ignore') col_dtype = ['str','int32','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64']
# Build dict of column names and their data types
dtype_dict = dict(zip(columns, col_dtype))
# Hard conversion of data types to ensure 'pt_id' is treated as string, 'val', 'fid_val', 'no_obs' flags treated as int
df = df.astype(dtype_dict)
return df return df
...@@ -141,7 +147,7 @@ def read_ipf_list(input_data_list): ...@@ -141,7 +147,7 @@ def read_ipf_list(input_data_list):
def save_ipf(df, output_path): def save_ipf(df, output_path):
""" """
Write a socet gpf file from a gpf-defined pandas dataframe Write a socet ipf file from an ipf-defined pandas dataframe
Parameters Parameters
---------- ----------
...@@ -235,8 +241,14 @@ def read_gpf(input_data): ...@@ -235,8 +241,14 @@ def read_gpf(input_data):
df = pd.DataFrame(d, columns=columns) df = pd.DataFrame(d, columns=columns)
# Soft conversion of numeric types to numerics, allows str in first col for point_id # List of data types for columns in Socet set GPF file
df = df.apply(pd.to_numeric, errors='ignore') col_dtype = ['str','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64','float64']
# Build dict of column names and their data types
dtype_dict = dict(zip(columns, col_dtype))
# Hard conversion of data types to ensure 'point_id' is treated as string and 'stat' and 'known' flags treated as int
df = df.astype(dtype_dict)
# Validate the read data with the header point count # Validate the read data with the header point count
assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df)) assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df))
......
...@@ -14,25 +14,57 @@ import pytest ...@@ -14,25 +14,57 @@ import pytest
def insight_gpf(): def insight_gpf():
return get_path('InSightE08_XW.gpf') return get_path('InSightE08_XW.gpf')
@pytest.fixture
def example_str_id_gpf():
return get_path('InSightE08_string_id.gpf')
@pytest.fixture() @pytest.fixture()
def insight_expected_gpf(): def insight_expected_gpf():
return pd.read_csv(get_path('InSightE08_XW.csv')) dtype_dict = {'point_id': 'str',
'stat': 'int32',
'known': 'int32',
'lat_Y_North': 'float64',
'long_X_East': 'float64',
'ht': 'float64',
'sig0': 'float64',
'sig1': 'float64',
'sig2': 'float64',
'res0': 'float64',
'res1': 'float64',
'res2': 'float64'}
return pd.read_csv(get_path('InSightE08_XW.csv'), dtype=dtype_dict)
@pytest.fixture @pytest.fixture
def insight_ipf(): def insight_ipf():
return get_path('P20_008845_1894_XN_09N203W.ipf') return get_path('P20_008845_1894_XN_09N203W.ipf')
@pytest.fixture
def example_str_id_ipf():
return get_path('example_string_id_ipf.ipf')
@pytest.fixture() @pytest.fixture()
def insight_expected_ipf(): def insight_expected_ipf():
return pd.read_csv(get_path('P20_008845_1894_XN_09N203W.csv')) dtype_dict = {'pt_id': 'str',
'val': 'int32',
'fid_val': 'int32',
'no_obs': 'int32',
'l.': 'float64',
's.': 'float64',
'sig_l': 'float64',
'sig_s': 'float64',
'res_l': 'float64',
'res_s': 'float64',
'fid_x': 'float64',
'fid_y': 'float64'}
return pd.read_csv(get_path('P20_008845_1894_XN_09N203W.csv'), dtype=dtype_dict)
@pytest.mark.parametrize('ipf, expected', [([insight_ipf()],insight_expected_ipf())]) @pytest.mark.parametrize('ipf, expected', [([insight_ipf()],insight_expected_ipf())])
def test_read_ifp(ipf, expected): def test_read_ipf(ipf, expected):
df = read_ipf(ipf) df = read_ipf(ipf)
assert_frame_equal(df, expected) assert_frame_equal(df, expected)
@pytest.mark.parametrize('gpf, expected', [(insight_gpf(),insight_expected_gpf())]) @pytest.mark.parametrize('gpf, expected', [(insight_gpf(),insight_expected_gpf())])
def test_read_gfp(gpf, expected): def test_read_gpf(gpf, expected):
df = read_gpf(gpf) df = read_gpf(gpf)
assert_frame_equal(df, expected) assert_frame_equal(df, expected)
...@@ -63,6 +95,26 @@ def test_write_ipf(ipf, file): ...@@ -63,6 +95,26 @@ def test_write_ipf(ipf, file):
assert (truth_arr == test_arr).all() assert (truth_arr == test_arr).all()
@pytest.mark.parametrize('ipf, file', [(example_str_id_ipf(), 'plio/io/tests/temp')])
def test_write_str_id_ipf(ipf, file):
df = read_ipf(ipf)
save_ipf(df, file)
file = os.path.join(file, 'example_string_id_ipf.ipf')
with open(ipf) as f:
fl = f.readlines()
with open(file) as f:
fs = f.readlines()
# Quick check to make sure that length of IPF files matches
# otherwise, the test that follows will be invalid
assert len(fl) == len(fs)
# Test that every 5th line (the lines containing the point ID and integer flags) matches
for i in range(3,len(fs),6):
assert fs[i] == fl[i]
@pytest.mark.parametrize('gpf, file', [(insight_gpf(), 'out.gpf')]) @pytest.mark.parametrize('gpf, file', [(insight_gpf(), 'out.gpf')])
def test_write_gpf(gpf, file): def test_write_gpf(gpf, file):
""" """
...@@ -89,6 +141,65 @@ def test_write_gpf(gpf, file): ...@@ -89,6 +141,65 @@ def test_write_gpf(gpf, file):
# np.testing.assert_array_almost_equal(truth_arr, test_arr) # np.testing.assert_array_almost_equal(truth_arr, test_arr)
@pytest.mark.parametrize('gpf, file', [(example_str_id_gpf(), 'out.gpf')])
def test_write_str_id_gpf(gpf, file):
"""
This test makes sure that the point IDs of a GPF whose point IDs only contain numbers,
are written correctly when saving to disk
"""
df = read_gpf(gpf)
save_gpf(df, file)
with open(gpf) as f:
fl = f.readlines()
with open(file) as f:
fs = f.readlines()
# Quick check to make sure that length of GPF files matches
# otherwise, the test that follows will be invalid
assert len(fl) == len(fs)
# Test that every 5th line (the lines containing the point ID and integer flags) matches
for i in range(3,len(fs),5):
assert fs[i] == fl[i]
@pytest.mark.parametrize('gpf', [(example_str_id_gpf())])
def test_gpf_dtypes(gpf):
"""
This test makes sure that a GPF whose point IDs only contain numbers
are always treated as strings after they're read in.
"""
# Read the GPF file under test into a pandas dataframe
df = read_gpf(gpf)
# Truth list of column data types
truth_dtypes = ['O','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64','float64']
# Test list of column data types
test_dtypes = list(df.dtypes)
# Check that the type of each column matches the truth list
assert truth_dtypes == test_dtypes
@pytest.mark.parametrize('ipf', [(example_str_id_ipf())])
def test_ipf_dtypes(ipf):
"""
This test makes sure that a IPF whose point IDs only contain numbers
are always treated as strings after they're read in.
"""
# Read the IPF file under test into a pandas dataframe
df = read_ipf(ipf)
# Truth list of column data types
truth_dtypes = ['O','int32','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64', 'O']
# Test list of column data types
test_dtypes = list(df.dtypes)
# Check that the type of each column matches the truth list
assert truth_dtypes == test_dtypes
class TestISDFromSocetLis(): class TestISDFromSocetLis():
def test_parse_with_empty_newlines(self): def test_parse_with_empty_newlines(self):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment