In [2]:

import os
import sys
sys.path.insert(0, os.path.abspath('/home/tthatcher/Desktop/Projects/Plio/plio'))

import warnings
import pandas as pd
import numpy as np

from functools import singledispatch
from plio.examples import get_path
from plio.io.io_bae import read_gpf

  from ._conv import register_converters as _register_converters


In [106]:
def read_atf(atf_file):
    
    with open(atf_file) as f:
        """
        Read an .atf file and return a dict with .sup, .ipf, .prj, .gpf and
        the path to the directory containing the files.

        Parameters
        ----------
        input_data : file
                     .atf file

        Returns
        -------
        files : Python Dictionary
             containing lists of the files mentioned above
        """
        
        from collections import defaultdict

        files = defaultdict(list)

        for line in f:
            filename = os.path.splitext(line)[0]
            ext = os.path.splitext(line)[1]
            
            # Grabs all the IPF and appends them to files['GP_FILE]
            if(ext == '.ipf\n'):
                files['IMAGE_IPF'].append(filename.split(' ')[1] + ext.strip())
                
            # Grabs all the GPF and appends them to files['GP_FILE]
            if ext == '.gpf\n':
                files['GP_FILE'].append(filename.split(' ')[1] + ext.strip())
            
            # Grabs all the PRJ and appends them to files['GP_FILE]
            if ext == '.prj\n':
                files['PROJECT'].append(filename.split(' ')[1] + ext.strip())
            
            # Grabs all the SUP and appends them to files['GP_FILE]
            if ext == '.sup\n':
                files['IMAGE_SUP'].append(filename.split(' ')[1] + ext.strip())
        
        # Gets the filepath of the ATF file and stores it in files['basepath]
        files['basepath'] = os.path.dirname(os.path.abspath(atf_file))
        
        return files

@singledispatch
def read_ipf(arg):
    return str(arg)

@read_ipf.register(str)
def read_ipf_str(input_data):
    """
    Read a socet ipf file into a pandas data frame

    Parameters
    ----------
    input_data : str
                 path to the an input data file

    Returns
    -------
    df : pd.DataFrame
         containing the ipf data with appropriate column names and indices
    """

    # Check that the number of rows is matching the expected number
    with open(input_data, 'r') as f:
        for i, l in enumerate(f):
            if i == 1:
                cnt = int(l)
            elif i == 2:
                col = l
                break
                
    columns = np.genfromtxt(input_data, skip_header=2, dtype='unicode',
                            max_rows = 1, delimiter = ',')

    # TODO: Add unicode conversion
    d = [line.split() for line in open(input_data, 'r')]
    d = np.hstack(np.array(d[3:]))
    
    d = d.reshape(-1, 12)
    
    df = pd.DataFrame(d, columns=columns)
    df['ipf_file'] = pd.Series(np.full((len(df['pt_id'])), input_data), index = df.index)
    print(df['ipf_file'])

    assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df))
    
    # Soft conversion of numeric types to numerics, allows str in first col for point_id
    df = df.apply(pd.to_numeric, errors='ignore')

    return df

@read_ipf.register(list)
def read_ipf_list(input_data_list):
    """
    Read a socet ipf file into a pandas data frame

    Parameters
    ----------
    input_data_list : list
                 list of paths to the a set of input data files

    Returns
    -------
    df : pd.DataFrame
         containing the ipf data with appropriate column names and indices
    """
    frames = []

    for input_file in input_data_list:
        frames.append(read_ipf(input_file))

    df = pd.concat(frames)

    return df

In [107]:
atf_dict = read_atf(get_path('CTX_Athabasca_Middle_step0.atf'))

print(atf_dict['basepath'])
gpf_file = os.path.join(atf_dict['basepath'], atf_dict['GP_FILE'][0])
ipf_list = [os.path.join(atf_dict['basepath'], i) for i in atf_dict['IMAGE_IPF']]

gpf_df = read_gpf(gpf_file).set_index('point_id')
ipf_df = read_ipf(ipf_list).set_index('pt_id')

point_diff = ipf_df.index.difference(gpf_df.index)

if len(point_diff) != 0:
    warnings.warn("The following points found in ipf files missing from gpf file: \n\n{}. \
                  \n\nContinuing, but these points will be missing from the control network".format(list(point_diff)))

new_df = ipf_df.merge(gpf_df, left_index=True, right_index=True)
list(new_df)

/home/tthatcher/Desktop/Projects/Plio/plio/plio/examples/SocetSet
0      /home/tthatcher/Desktop/Projects/Plio/plio/pli...
1      /home/tthatcher/Desktop/Projects/Plio/plio/pli...
2      /home/tthatcher/Desktop/Projects/Plio/plio/pli...
3      /home/tthatcher/Desktop/Projects/Plio/plio/pli...
4      /home/tthatcher/Desktop/Projects/Plio/plio/pli...
5      /home/tthatcher/Desktop/Projects/Plio/plio/pli...
6      /home/tthatcher/Desktop/Projects/Plio/plio/pli...
7      /home/tthatcher/Desktop/Projects/Plio/plio/pli...
8      /home/tthatcher/Desktop/Projects/Plio/plio/pli...
9      /home/tthatcher/Desktop/Projects/Plio/plio/pli...
10     /home/tthatcher/Desktop/Projects/Plio/plio/pli...
11     /home/tthatcher/Desktop/Projects/Plio/plio/pli...
12     /home/tthatcher/Desktop/Projects/Plio/plio/pli...
13     /home/tthatcher/Desktop/Projects/Plio/plio/pli...
14     /home/tthatcher/Desktop/Projects/Plio/plio/pli...
15     /home/tthatcher/Desktop/Projects/Plio/plio/pli...
16     /home/tthatcher


['P03_002226_1895_XI_09N203W_15', 'P03_002226_1895_XI_09N203W_16', 'P03_002226_1895_XI_09N203W_17', 'P03_002226_1895_XI_09N203W_18', 'P03_002226_1895_XI_09N203W_19', 'P03_002226_1895_XI_09N203W_20', 'P03_002226_1895_XI_09N203W_21', 'P03_002226_1895_XI_09N203W_22', 'P03_002226_1895_XI_09N203W_24', 'P03_002226_1895_XI_09N203W_26', 'P03_002226_1895_XI_09N203W_30', 'P03_002226_1895_XI_09N203W_31', 'P03_002226_1895_XI_09N203W_32', 'P03_002226_1895_XI_09N203W_34', 'P03_002226_1895_XI_09N203W_36', 'P03_002226_1895_XI_09N203W_37', 'P03_002226_1895_XI_09N203W_44', 'P03_002226_1895_XI_09N203W_48', 'P03_002226_1895_XI_09N203W_49', 'P03_002226_1895_XI_09N203W_56', 'P03_002226_1895_XI_09N203W_57', 'P03_002226_1895_XI_09N203W_61', 'P03_002226_1895_XI_09N203W_62', 'P03_002226_1895_XI_09N203W_63', 'P03_002226_1895_XI_09N203W_65', 'P19_008344_1894_XN_09N203W_4', 'P20_008845_1894_XN_09N203W_15'].                   

Continuing, but these points will be missing from the control network
  del sys.path[0]

['val',
 'fid_val',
 'no_obs',
 'l.',
 's.',
 'sig_l',
 'sig_s',
 'res_l',
 'res_s',
 'fid_x',
 'fid_y',
 'ipf_file',
 'stat',
 'known',
 'lat_Y_North',
 'long_X_East',
 'ht',
 'sig0',
 'sig1',
 'sig2',
 'res0',
 'res1',
 'res2']

In [4]:
from collections import defaultdict

files = defaultdict(list)

for line in lines:
    ext = os.path.splitext(line)[-1]
    files[ext].append(line)

files['basepath'] = os.path.dirname(os.path.abspath(atf_file))

NameError: name 'lines' is not defined