diff --git a/notebooks/Socet2ISIS.ipynb b/notebooks/Socet2ISIS.ipynb index 04b6b2c273ff071d3d8d461331c9114267aeadfe..636ba38621d6a47b887c5d420d54b1af792bbf95 100644 --- a/notebooks/Socet2ISIS.ipynb +++ b/notebooks/Socet2ISIS.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -15,12 +15,12 @@ "import numpy as np\n", "\n", "from plio.examples import get_path\n", - "from plio.io.io_bae import read_gpf" + "from plio.io.io_bae import read_gpf, read_ipf" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -69,80 +69,7 @@ " # Sets the value of PATH to the path of the ATF file\n", " files_dict['PATH'] = os.path.dirname(os.path.abspath(atf_file))\n", " \n", - " return files_dict\n", - " \n", - "@singledispatch\n", - "def read_ipf(arg):\n", - " return str(arg)\n", - "\n", - "@read_ipf.register(str)\n", - "def read_ipf_str(input_data):\n", - " \"\"\"\n", - " Read a socet ipf file into a pandas data frame\n", - "\n", - " Parameters\n", - " ----------\n", - " input_data : str\n", - " path to the an input data file\n", - "\n", - " Returns\n", - " -------\n", - " df : pd.DataFrame\n", - " containing the ipf data with appropriate column names and indices\n", - " \"\"\"\n", - "\n", - " # Check that the number of rows is matching the expected number\n", - " with open(input_data, 'r') as f:\n", - " for i, l in enumerate(f):\n", - " if i == 1:\n", - " cnt = int(l)\n", - " elif i == 2:\n", - " col = l\n", - " break\n", - " \n", - " columns = np.genfromtxt(input_data, skip_header=2, dtype='unicode',\n", - " max_rows = 1, delimiter = ',')\n", - "\n", - " # TODO: Add unicode conversion\n", - " d = [line.split() for line in open(input_data, 'r')]\n", - " d = np.hstack(np.array(d[3:]))\n", - " \n", - " d = d.reshape(-1, 12)\n", - " \n", - " df = pd.DataFrame(d, columns=columns)\n", - " file = os.path.split(os.path.splitext(input_data)[0])[1]\n", - " df['ipf_file'] = pd.Series(np.full((len(df['pt_id'])), file), index = df.index)\n", - "\n", - " assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df))\n", - " \n", - " # Soft conversion of numeric types to numerics, allows str in first col for point_id\n", - " df = df.apply(pd.to_numeric, errors='ignore')\n", - "\n", - " return df\n", - "\n", - "@read_ipf.register(list)\n", - "def read_ipf_list(input_data_list):\n", - " \"\"\"\n", - " Read a socet ipf file into a pandas data frame\n", - "\n", - " Parameters\n", - " ----------\n", - " input_data_list : list\n", - " list of paths to the a set of input data files\n", - "\n", - " Returns\n", - " -------\n", - " df : pd.DataFrame\n", - " containing the ipf data with appropriate column names and indices\n", - " \"\"\"\n", - " frames = []\n", - "\n", - " for input_file in input_data_list:\n", - " frames.append(read_ipf(input_file))\n", - "\n", - " df = pd.concat(frames)\n", - "\n", - " return df" + " return files_dict" ] }, { @@ -2019,6 +1946,106 @@ "new_df" ] }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "@singledispatch\n", + "def read_ipf(arg):\n", + " return str(arg)\n", + "\n", + "@read_ipf.register(str)\n", + "def read_ipf_str(input_data):\n", + " \"\"\"\n", + " Read a socet ipf file into a pandas data frame\n", + "\n", + " Parameters\n", + " ----------\n", + " input_data : str\n", + " path to the an input data file\n", + "\n", + " Returns\n", + " -------\n", + " df : pd.DataFrame\n", + " containing the ipf data with appropriate column names and indices\n", + " \"\"\"\n", + "\n", + " # Check that the number of rows is matching the expected number\n", + " with open(input_data, 'r') as f:\n", + " for i, l in enumerate(f):\n", + " if i == 1:\n", + " cnt = int(l)\n", + " elif i == 2:\n", + " col = l\n", + " break\n", + " \n", + " columns = np.genfromtxt(input_data, skip_header=2, dtype='unicode',\n", + " max_rows = 1, delimiter = ',')\n", + "\n", + " # TODO: Add unicode conversion\n", + " d = [line.split() for line in open(input_data, 'r')]\n", + " d = np.hstack(np.array(d[3:]))\n", + " \n", + " d = d.reshape(-1, 12)\n", + " \n", + " df = pd.DataFrame(d, columns=columns)\n", + " file = os.path.split(os.path.splitext(input_data)[0])[1]\n", + " df['ipf_file'] = pd.Series(np.full((len(df['pt_id'])), file), index = df.index)\n", + "\n", + " assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df))\n", + " \n", + " # Soft conversion of numeric types to numerics, allows str in first col for point_id\n", + " df = df.apply(pd.to_numeric, errors='ignore')\n", + "\n", + " return df\n", + "\n", + "@read_ipf.register(list)\n", + "def read_ipf_list(input_data_list):\n", + " \"\"\"\n", + " Read a socet ipf file into a pandas data frame\n", + "\n", + " Parameters\n", + " ----------\n", + " input_data_list : list\n", + " list of paths to the a set of input data files\n", + "\n", + " Returns\n", + " -------\n", + " df : pd.DataFrame\n", + " containing the ipf data with appropriate column names and indices\n", + " \"\"\"\n", + " frames = []\n", + "\n", + " for input_file in input_data_list:\n", + " frames.append(read_ipf(input_file))\n", + "\n", + " df = pd.concat(frames)\n", + "\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "x = np.array(['1', '2', '3'])\n", + "y = np.array(['1', '2', '3'])\n", + "\n", + "print((x == y).all())" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/plio/io/io_bae.py b/plio/io/io_bae.py index c69ddc84a3c65f5217b4265b69757aefe1fdfd36..37e5abc4345c77c09d9dfecffef6abfc2ecd6df0 100644 --- a/plio/io/io_bae.py +++ b/plio/io/io_bae.py @@ -85,11 +85,10 @@ def read_ipf_str(input_data): columns = np.genfromtxt(input_data, skip_header=2, dtype='unicode', max_rows = 1, delimiter = ',') - # TODO: Add unicode conversion d = [line.split() for line in open(input_data, 'r')] d = np.hstack(np.array(d[3:])) - d = d.reshape(-1, 12) + d = d.reshape(-1, 12).astype('unicode') df = pd.DataFrame(d, columns=columns) file = os.path.split(os.path.splitext(input_data)[0])[1] @@ -126,6 +125,54 @@ def read_ipf_list(input_data_list): return df +def save_ipf(df, output_path): + """ + Write a socet gpf file from a gpf-defined pandas dataframe + + Parameters + ---------- + df : pd.DataFrame + Pandas DataFrame + + output_file : str + path to the output data file + + Returns + ------- + int : success value + 0 = success, 1 = errors + """ + + for ipf_file, ipf_df in df.groupby('ipf_file'): + + output_file = os.path.join(output_path, ipf_file + '.ipf') + + # Check that file can be opened + try: + outIPF = open(output_file, 'w', newline='\r\n') + except: + print('Unable to open output ipf file: {0}'.format(output_file)) + return 1 + + #grab number of rows in pandas dataframe ipf group + numpts = len(ipf_df) + + #Output ipf header + outIPF.write('IMAGE POINT FILE\n') + outIPF.write('{0}\n'.format(numpts)) + outIPF.write('pt_id,val,fid_val,no_obs,l.,s.,sig_l,sig_s,res_l,res_s,fid_x,fid_y\n') + + for index, row in ipf_df.iterrows(): + #Output coordinates to ipf file + outIPF.write('{0} {1} {2} {3}\n'.format(row['pt_id'], row['val'], row['fid_val'], row['no_obs'])) + outIPF.write('{:0.6f} {:0.6f}\n'.format(row['l.'], row['s.'])) + outIPF.write('{:0.6f} {:0.6f}\n'.format(row['sig_l'], row['sig_s'])) + outIPF.write('{:0.6f} {:0.6f}\n'.format(row['res_l'], row['res_s'])) + outIPF.write('{:0.6f} {:0.6f}\n\n'.format(row['fid_x'], row['fid_y'])) + + outIPF.close() + return + def read_gpf(input_data): """ Read a socet gpf file into a pandas data frame diff --git a/plio/io/tests/test_io_bae.py b/plio/io/tests/test_io_bae.py index 105680bf6116b8923955777f113617fb87f498b3..edf866e52e78a9575dfcaa9561ae7554cb92e3ba 100644 --- a/plio/io/tests/test_io_bae.py +++ b/plio/io/tests/test_io_bae.py @@ -1,10 +1,11 @@ import json +import os import numpy as np import pandas as pd from pandas.util.testing import assert_frame_equal -from plio.io.io_bae import socetset_keywords_to_json, read_gpf, save_gpf, read_ipf +from plio.io.io_bae import socetset_keywords_to_json, read_gpf, save_gpf, read_ipf, save_ipf from plio.examples import get_path import pytest @@ -25,15 +26,42 @@ def insight_ipf(): def insight_expected_ipf(): return pd.read_csv(get_path('P20_008845_1894_XN_09N203W.csv')) +@pytest.mark.parametrize('ipf, expected', [([insight_ipf()],insight_expected_ipf())]) +def test_read_ifp(ipf, expected): + df = read_ipf(ipf) + assert_frame_equal(df, expected) + @pytest.mark.parametrize('gpf, expected', [(insight_gpf(),insight_expected_gpf())]) def test_read_gfp(gpf, expected): df = read_gpf(gpf) assert_frame_equal(df, expected) -@pytest.mark.parametrize('ipf, expected', [([insight_ipf()],insight_expected_ipf())]) -def test_read_ifp(ipf, expected): +@pytest.mark.parametrize('ipf, file', [(insight_ipf(), 'plio/io/tests/temp')]) +def test_write_ipf(ipf, file): df = read_ipf(ipf) - assert_frame_equal(df, expected) + save_ipf(df, file) + + file = os.path.join(file, 'P20_008845_1894_XN_09N203W.ipf') + + with open(ipf) as f: + fl = f.readlines() + + with open(file) as f: + fs = f.readlines() + + # Check that the header is the same + for i in range(3): + assert fl[i] == fs[i] + + truth_arr = [line.split() for line in open(ipf, 'r')][3:] + truth_arr = np.hstack(np.array(truth_arr)) + truth_arr = truth_arr.reshape(-1, 12) + + test_arr = [line.split() for line in open(file, 'r')][3:] + test_arr = np.hstack(np.array(test_arr)) + test_arr = test_arr.reshape(-1, 12) + + (truth_arr==test_arr).all() @pytest.mark.parametrize('gpf, file', [(insight_gpf(), 'out.gpf')]) def test_write_gpf(gpf, file): @@ -56,7 +84,10 @@ def test_write_gpf(gpf, file): truth_arr = np.genfromtxt(gpf, skip_header=3) test_arr = np.genfromtxt(file, skip_header=3) - np.testing.assert_array_almost_equal(truth_arr, test_arr) + + (truth_arr==test_arr).all() + + # np.testing.assert_array_almost_equal(truth_arr, test_arr) def test_create_from_socet_lis(): socetlis = get_path('socet_isd.lis')