diff --git a/appveyor.yml b/appveyor.yml index 4f9ecaafb3c880b6f535611f627012bad2bafd6b..8fadf13c88ed3cd517ba91686f701ca664c01771 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -17,6 +17,7 @@ environment: - PYTHON: "C:\\Miniconda35\\Scripts\\activate.bat" PYTHON_VERSION: 3.5 - PYTHON: "C:\\Miniconda36\\Scripts\\activate.bat" + PYTHON_VERSION: 3.6 matrix: exclude: diff --git a/plio/examples/SocetGXP/InSightE08_string_id.gpf b/plio/examples/SocetGXP/InSightE08_string_id.gpf new file mode 100644 index 0000000000000000000000000000000000000000..8c8df028b349b0c2df86e3a6fd26ffdefd80fdcb --- /dev/null +++ b/plio/examples/SocetGXP/InSightE08_string_id.gpf @@ -0,0 +1,18 @@ +GROUND POINT FILE +3 +point_id,stat,known,lat_Y_North,long_X_East,ht,sig(3),res(3) +1 1 1 +0.08538133869187 2.38013146162178 -2697.23744694649990 +0.000000 0.000000 1.707214 +16.348041 -13.917623 -0.151001 + +02 1 1 +0.08508542650082 2.38081358557147 -2702.00000000000000 +0.000000 0.000000 1.000000 +15.983286 -16.869508 0.103341 + +3 1 1 +0.08626480095809 2.38708383926110 -2714.16003756809000 +0.000000 0.000000 3.828854 +12.831839 -34.360639 -2.205846 + diff --git a/plio/examples/SocetSet/example_string_id_ipf.ipf b/plio/examples/SocetSet/example_string_id_ipf.ipf new file mode 100644 index 0000000000000000000000000000000000000000..38e0d537fe8308832fd52b0bc063d5ba76162da2 --- /dev/null +++ b/plio/examples/SocetSet/example_string_id_ipf.ipf @@ -0,0 +1,21 @@ +IMAGE POINT FILE +3 +pt_id,val,fid_val,no_obs,l.,s.,sig_l,sig_s,res_l,res_s,fid_x,fid_y +1 1 0 0 +-4058.982422 -2318.010742 +0.000000 0.000000 +-0.062556 -0.214713 +0.000000 0.000000 + +02 1 0 0 +-3969.065186 -606.849243 +0.000000 0.000000 +0.228660 0.105249 +0.000000 0.000000 + +3 1 0 0 +-1019.739014 -2300.877197 +0.000000 0.000000 +-0.025129 -0.002447 +0.000000 0.000000 + diff --git a/plio/io/io_bae.py b/plio/io/io_bae.py index c0c132de7fa769e54539c2f705c488f75b98cbcc..55d1832aa55c1e5b210ae68adc9f4e60a43554ec 100644 --- a/plio/io/io_bae.py +++ b/plio/io/io_bae.py @@ -110,8 +110,14 @@ def read_ipf_str(input_data): assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df)) - # Soft conversion of numeric types to numerics, allows str in first col for point_id - df = df.apply(pd.to_numeric, errors='ignore') + # List of data types for columns in Socet set IPF file + col_dtype = ['str','int32','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64'] + + # Build dict of column names and their data types + dtype_dict = dict(zip(columns, col_dtype)) + + # Hard conversion of data types to ensure 'pt_id' is treated as string, 'val', 'fid_val', 'no_obs' flags treated as int + df = df.astype(dtype_dict) return df @@ -141,7 +147,7 @@ def read_ipf_list(input_data_list): def save_ipf(df, output_path): """ - Write a socet gpf file from a gpf-defined pandas dataframe + Write a socet ipf file from an ipf-defined pandas dataframe Parameters ---------- @@ -235,8 +241,14 @@ def read_gpf(input_data): df = pd.DataFrame(d, columns=columns) - # Soft conversion of numeric types to numerics, allows str in first col for point_id - df = df.apply(pd.to_numeric, errors='ignore') + # List of data types for columns in Socet set GPF file + col_dtype = ['str','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64','float64'] + + # Build dict of column names and their data types + dtype_dict = dict(zip(columns, col_dtype)) + + # Hard conversion of data types to ensure 'point_id' is treated as string and 'stat' and 'known' flags treated as int + df = df.astype(dtype_dict) # Validate the read data with the header point count assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df)) diff --git a/plio/io/tests/test_io_bae.py b/plio/io/tests/test_io_bae.py index aacd851c370d34c2fba0b06212e627e63335cb45..91966fa4f0e15f590e0109e9a29deb9c00546021 100644 --- a/plio/io/tests/test_io_bae.py +++ b/plio/io/tests/test_io_bae.py @@ -14,25 +14,57 @@ import pytest def insight_gpf(): return get_path('InSightE08_XW.gpf') +@pytest.fixture +def example_str_id_gpf(): + return get_path('InSightE08_string_id.gpf') + @pytest.fixture() def insight_expected_gpf(): - return pd.read_csv(get_path('InSightE08_XW.csv')) + dtype_dict = {'point_id': 'str', + 'stat': 'int32', + 'known': 'int32', + 'lat_Y_North': 'float64', + 'long_X_East': 'float64', + 'ht': 'float64', + 'sig0': 'float64', + 'sig1': 'float64', + 'sig2': 'float64', + 'res0': 'float64', + 'res1': 'float64', + 'res2': 'float64'} + return pd.read_csv(get_path('InSightE08_XW.csv'), dtype=dtype_dict) @pytest.fixture def insight_ipf(): return get_path('P20_008845_1894_XN_09N203W.ipf') +@pytest.fixture +def example_str_id_ipf(): + return get_path('example_string_id_ipf.ipf') + @pytest.fixture() def insight_expected_ipf(): - return pd.read_csv(get_path('P20_008845_1894_XN_09N203W.csv')) + dtype_dict = {'pt_id': 'str', + 'val': 'int32', + 'fid_val': 'int32', + 'no_obs': 'int32', + 'l.': 'float64', + 's.': 'float64', + 'sig_l': 'float64', + 'sig_s': 'float64', + 'res_l': 'float64', + 'res_s': 'float64', + 'fid_x': 'float64', + 'fid_y': 'float64'} + return pd.read_csv(get_path('P20_008845_1894_XN_09N203W.csv'), dtype=dtype_dict) @pytest.mark.parametrize('ipf, expected', [([insight_ipf()],insight_expected_ipf())]) -def test_read_ifp(ipf, expected): +def test_read_ipf(ipf, expected): df = read_ipf(ipf) assert_frame_equal(df, expected) @pytest.mark.parametrize('gpf, expected', [(insight_gpf(),insight_expected_gpf())]) -def test_read_gfp(gpf, expected): +def test_read_gpf(gpf, expected): df = read_gpf(gpf) assert_frame_equal(df, expected) @@ -63,6 +95,26 @@ def test_write_ipf(ipf, file): assert (truth_arr == test_arr).all() +@pytest.mark.parametrize('ipf, file', [(example_str_id_ipf(), 'plio/io/tests/temp')]) +def test_write_str_id_ipf(ipf, file): + df = read_ipf(ipf) + save_ipf(df, file) + file = os.path.join(file, 'example_string_id_ipf.ipf') + + with open(ipf) as f: + fl = f.readlines() + + with open(file) as f: + fs = f.readlines() + + # Quick check to make sure that length of IPF files matches + # otherwise, the test that follows will be invalid + assert len(fl) == len(fs) + + # Test that every 5th line (the lines containing the point ID and integer flags) matches + for i in range(3,len(fs),6): + assert fs[i] == fl[i] + @pytest.mark.parametrize('gpf, file', [(insight_gpf(), 'out.gpf')]) def test_write_gpf(gpf, file): """ @@ -89,6 +141,65 @@ def test_write_gpf(gpf, file): # np.testing.assert_array_almost_equal(truth_arr, test_arr) +@pytest.mark.parametrize('gpf, file', [(example_str_id_gpf(), 'out.gpf')]) +def test_write_str_id_gpf(gpf, file): + """ + This test makes sure that the point IDs of a GPF whose point IDs only contain numbers, + are written correctly when saving to disk + """ + df = read_gpf(gpf) + save_gpf(df, file) + + with open(gpf) as f: + fl = f.readlines() + + with open(file) as f: + fs = f.readlines() + + # Quick check to make sure that length of GPF files matches + # otherwise, the test that follows will be invalid + assert len(fl) == len(fs) + + # Test that every 5th line (the lines containing the point ID and integer flags) matches + for i in range(3,len(fs),5): + assert fs[i] == fl[i] + +@pytest.mark.parametrize('gpf', [(example_str_id_gpf())]) +def test_gpf_dtypes(gpf): + """ + This test makes sure that a GPF whose point IDs only contain numbers + are always treated as strings after they're read in. + """ + # Read the GPF file under test into a pandas dataframe + df = read_gpf(gpf) + + # Truth list of column data types + truth_dtypes = ['O','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64','float64'] + + # Test list of column data types + test_dtypes = list(df.dtypes) + + # Check that the type of each column matches the truth list + assert truth_dtypes == test_dtypes + +@pytest.mark.parametrize('ipf', [(example_str_id_ipf())]) +def test_ipf_dtypes(ipf): + """ + This test makes sure that a IPF whose point IDs only contain numbers + are always treated as strings after they're read in. + """ + # Read the IPF file under test into a pandas dataframe + df = read_ipf(ipf) + + # Truth list of column data types + truth_dtypes = ['O','int32','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64', 'O'] + + # Test list of column data types + test_dtypes = list(df.dtypes) + + # Check that the type of each column matches the truth list + assert truth_dtypes == test_dtypes + class TestISDFromSocetLis(): def test_parse_with_empty_newlines(self):