From 19ba13cc0ac720985c5bdf2102f5467af117c02b Mon Sep 17 00:00:00 2001 From: Lauren Adoram-Kershner <42873279+ladoramkershner@users.noreply.github.com> Date: Fri, 24 Sep 2021 10:40:27 -0700 Subject: [PATCH] Improving place_points_from_cnet speed (#603) * updating from_cnet; tests still needed * reverting transform/spatial reproject change * adding a test to ensure df is formatted with Model columns * addressing comment round one * addressing comments round two * Change log and bug fixes --- CHANGELOG.md | 4 + autocnet/graph/network.py | 143 +++++++++++++++------ autocnet/graph/tests/test_network_graph.py | 93 ++++++++++---- autocnet/io/db/controlnetwork.py | 46 +++---- autocnet/io/db/model.py | 18 +++ autocnet/transformation/spatial.py | 23 ++-- 6 files changed, 228 insertions(+), 99 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7394a61..3df73f76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,9 +36,11 @@ release. ### Added - Added a mutual information matcher [#559](https://github.com/USGS-Astrogeology/autocnet/pull/559) +- Added residual column information to the Points model ### Changed - `geom_match_simple` defaults to a 3rd order warp for interpolation +- Speed improvements for place_points_from_cnet dependent on COPY method instead of ORM update ### Fixed - `update_from_jigsaw` failures due to stale code. Now uses a conntext on the engine to ensure closure @@ -59,3 +61,5 @@ release. - Image to ground to support multiple input types with proper output type handling #580 - Support for ISIS special pixels in image data #577 - Fix for no correlation map returned from `geom_match_simple` #556 + + diff --git a/autocnet/graph/network.py b/autocnet/graph/network.py index d29c9b46..2398ed9a 100644 --- a/autocnet/graph/network.py +++ b/autocnet/graph/network.py @@ -2222,50 +2222,115 @@ class NetworkCandidateGraph(CandidateGraph): except Exception as e: warnings.warn(f'Failed to reset primary id sequence for table {t}') - def place_points_from_cnet(self, cnet): + def cnet_to_db(self, cnet): + """ + Splits an isis control network into two subsets mirroring the points and measures + database table formats. + + Parameters + ---------- + cnet: str or IsisControlNetwork + The ISIS control network or path to the ISIS control network to be loaded. + + Returns + ------- + points: IsisControlNetwork + Subset of the ISIS controlnetwork formatted as io.db.model.Points table + + measures: IsisControlNetwork + Subset of the Isis controlnetwork formatted as io.db.model.Measures table + """ + semi_major, semi_minor = self.config["spatial"]["semimajor_rad"], self.config["spatial"]["semiminor_rad"] + if isinstance(cnet, str): + cnet = from_isis(cnet) + cnet = cnet.rename(columns={'id':'identifier', + 'measureChoosername': 'ChooserName', + 'sampleResidual':'sampler', + 'lineResidual': 'liner'}) + + points = cnet.copy(deep=True) # this prevents Pandas value being set on copy of slice warnings + points.drop_duplicates(subset=['identifier'], inplace=True) + points.insert(0, 'id', list(range(1,len(points)+1))) + points[['overlapid','residuals', 'maxResidual']] = None + points[['cam_type']] = 'isis' + + points['apriori'] = [geoalchemy2.shape.from_shape(shapely.geometry.Point(x,y,z)) for x,y,z in zip(points['aprioriX'].values, points['aprioriY'].values, points['aprioriZ'].values)] + if (points['adjustedX'] == 0).all(): + points['adjusted'] = points['apriori'] + xyz_data = [points['aprioriX'].values, points['aprioriY'].values, points['aprioriZ'].values] + else: + points[['adjusted']] = [geoalchemy2.shape.from_shape(shapely.geometry.Point(x,y,z)) for x,y,z in zip(points['adjustedX'].values, points['adjustedY'].values, points['adjustedZ'].values)] + xyz_data = [points['adjustedX'].values, points['adjustedY'].values, points['adjustedZ'].values] + + og = reproject(xyz_data, semi_major, semi_minor, 'geocent', 'latlon') + oc = og2oc(og[0], og[1], semi_major, semi_minor) + points['geom'] = [geoalchemy2.shape.from_shape(shapely.geometry.Point(lon, lat), srid=self.config['spatial']['latitudinal_srid']) for lon, lat in zip(oc[0], oc[1])] + + cnet.insert(0, 'id', list(range(1,len(cnet)+1))) + pid_map = {ident: pid for ident, pid in zip(points['identifier'], points['id'])} + cnet['pointid'] = cnet.apply(lambda row: pid_map[row['identifier']], axis=1) + + with self.session_scope() as session: + imgs = session.query(Images.serial, Images.id).all() + iid_map = {ii[0]: ii[1] for ii in imgs} + cnet['imageid'] = cnet.apply(lambda row: iid_map[row['serialnumber']], axis=1) + + def GoodnessOfFit_value_extract(row): + mlog = row['measureLog'] + if mlog: + for m in mlog: + if m.messagetype.name == "GoodnessOfFit": + return m.value + return None + + cnet['templateMetric'] = cnet.apply(GoodnessOfFit_value_extract, axis=1) + cnet['templateShift'] = cnet.apply(lambda row: np.sqrt((row['line']-row['aprioriline'])**2 + (row['sample']-row['apriorisample'])**2) if row['ChooserName'] != row['pointChoosername'] else 0, axis=1) + cnet['residual'] = np.sqrt(cnet['liner']**2+cnet['sampler']**2) + cnet['rms'] = np.sqrt(np.mean([cnet['liner']**2, cnet['sampler']**2], axis=0)) + + cnet[['phaseError','phaseDiff','phaseShift']] = None + cnet['weight'] = None + + point_columns = Points.__table__.columns.keys() + measure_columns = Measures.__table__.columns.keys() + points = points[point_columns] + measures = cnet[measure_columns] + + return points, measures + + def place_points_from_cnet(self, cnet, clear_tables=True): + """ + Loads points from a ISIS control network into an AutoCNet formatted database. + + Parameters + ---------- + cnet: str or IsisControlNetwork + The ISIS control network or path to the ISIS control network to be loaded. + + clear_tables: boolean + Clears enteries out of the points and measures database tables if True. + Appends the control network points and measures onto the current points + and measures database tables if False. + """ + if isinstance(cnet, str): cnet = from_isis(cnet) - cnetpoints = cnet.groupby('id') - session = self.Session() - - for id, cnetpoint in cnetpoints: - def get_measures(row): - res = session.query(Images).filter(Images.serial == row.serialnumber).one() - return Measures(pointid=id, - imageid=int(res.id), # Need to grab this - measuretype=int(row.measureType), - serial=row.serialnumber, - sample=float(row['sample']), - line=float(row['line']), - sampler=float(row.sampleResidual), - liner=float(row.lineResidual), - ignore=row.measureIgnore, - jigreject=row.measureJigsawRejected, - aprioriline=float(row.aprioriline), - apriorisample=float(row.apriorisample), - linesigma=float(row.linesigma), - samplesigma=float(row.samplesigma)) - - measures = cnetpoint.apply(get_measures, axis=1) - - row = cnetpoint.iloc[0] - x,y,z= row.adjustedX, row.adjustedY, row.adjustedZ - lon_og, lat_og, alt = reproject([x, y, z], semi_major, semi_minor, 'geocent', 'latlon') - lon, lat = og2oc(lon_og, lat_og, semi_major, semi_minor) - - point = Points(identifier=id, - ignore=row.pointIgnore, - apriori= shapely.geometry.Point(float(row.aprioriX), float(row.aprioriY), float(row.aprioriZ)), - adjusted= shapely.geometry.Point(float(row.adjustedX),float(row.adjustedY),float(row.adjustedZ)), - pointtype=float(row.pointType)) - - point.measures = list(measures) - session.add(point) - session.commit() - session.close() + points, measures = self.cnet_to_db(cnet) + + engine = self.engine + with engine.connect() as connection: + # Execute an SQL COPY from a CSV buffer into the DB + + if engine.dialect.has_table(engine.connect(), 'points', schema='public') and clear_tables: + connection.execute('DROP TABLE measures, points;') + Points.__table__.create(bind=engine, checkfirst=True) + Measures.__table__.create(bind=engine, checkfirst=True) + + points.to_sql('points', connection, schema='public', if_exists='append', index=False, method=io_controlnetwork.copy_from_method) + measures.to_sql('measures', connection, schema='public', if_exists='append', index=False, method=io_controlnetwork.copy_from_method) @classmethod def from_cnet(cls, cnet, filelist, config): diff --git a/autocnet/graph/tests/test_network_graph.py b/autocnet/graph/tests/test_network_graph.py index 658ae521..d56ce7cb 100644 --- a/autocnet/graph/tests/test_network_graph.py +++ b/autocnet/graph/tests/test_network_graph.py @@ -3,6 +3,7 @@ import pytest import sys import pandas as pd +from plio.io.io_controlnetwork import IsisControlNetwork from autocnet.io.db import model from autocnet.graph.network import NetworkCandidateGraph @@ -14,31 +15,50 @@ if sys.platform.startswith("darwin"): @pytest.fixture() def cnet(): - return pd.DataFrame.from_dict({ - 'id' : [1], - 'pointType' : 2, - 'serialnumber' : ['BRUH'], - 'measureJigsawRejected': [False], - 'sampleResidual' : [0.1], - 'pointIgnore' : [False], - 'pointJigsawRejected': [False], - 'lineResidual' : [0.1], - 'linesigma' : [0], - 'samplesigma': [0], - 'adjustedCovar' : [[]], - 'apriorisample' : [0], - 'aprioriline' : [0], - 'line' : [1], - 'sample' : [2], - 'measureIgnore': [False], - 'adjustedX' : [0], - 'adjustedY' : [0], - 'adjustedZ' : [0], - 'aprioriX' : [0], - 'aprioriY' : [0], - 'aprioriZ' : [0], - 'measureType' : [1] - }) + return IsisControlNetwork.from_dict({ + 'id' : [1, 2, 3], + 'pointType' : [2]*3, + 'pointChoosername' : ['findfeatures']*3, + 'pointDatetime' : ['YYYY-MM-DDT00:00:00']*3, + 'pointEditLock': [False]*3, + 'pointIgnore' : [False]*3, + 'pointJigsawRejected': [False]*3, + 'referenceIndex' : [0]*3, + 'aprioriSurfPointSource': ['ground']*3, + 'aprioriSurfPointSourceFile' : ['ground.file']*3, + 'aprioriRadiusSource' : ['radius']*3, + 'aprioriRadiusSourceFile' : ['radius.file']*3, + 'latitudeConstrained' : [False]*3, + 'longitudeConstrained' : [False]*3, + 'radiusConstrained' : [False]*3, + 'aprioriX' : [1017046.81161667, -1402345.22133465, 103571.17894436], + 'aprioriY' : [1017046.81161667, -1402345.22133465, 103571.17894436], + 'aprioriZ' : [1014022.55349016, -1404707.80219809, 101009.09763132], + 'aprioriCovar' : [[]]*3, + 'adjustedX' : [0]*3, + 'adjustedY' : [0]*3, + 'adjustedZ' : [0]*3, + 'adjustedCovar' : [[]]*3, + 'pointLog' : [[]]*3, + 'serialnumber' : ['SN1345', 'SN2348', 'SN9730'], + 'measureType' : [1]*3, + 'sample' : [2]*3, + 'line' : [1]*3, + 'sampleResidual' : [0.1]*3, + 'lineResidual' : [0.1]*3, + 'measureChoosername' : ['pointreg']*3, + 'measureDatetime' : ['YYYY-MM-DDT00:00:00']*3, + 'measureEditLock' : [False]*3, + 'measureIgnore': [False]*3, + 'measureJigsawRejected': [False]*3, + 'diameter' : [1000]*3, + 'apriorisample' : [0]*3, + 'aprioriline' : [0]*3, + 'samplesigma': [0]*3, + 'linesigma' : [0]*3, + 'measureLog' : [[]]*3 + }) + """@pytest.mark.parametrize("image_data, expected_npoints", [({'id':1, 'serial': 'BRUH'}, 1)]) def test_place_points_from_cnet(cnet, image_data, expected_npoints, ncg): @@ -111,4 +131,25 @@ def test_selective_clear_db(ncg): res = session.query(model.Images).all() assert len(res) == 1 res = session.query(model.Points).all() - assert len(res) == 0 \ No newline at end of file + assert len(res) == 0 + +def test_cnet_to_db(ncg, cnet): + # check that the resulting DB DFs have same columns as corresponding Models + imgs = [model.Images(name='foo1', serial=cnet.iloc[0]['serialnumber']), + model.Images(name='foo2', serial=cnet.iloc[1]['serialnumber']), + model.Images(name='foo3', serial=cnet.iloc[2]['serialnumber'])] + + with ncg.session_scope() as session: + session.add_all(imgs) + + p_df, m_df = ncg.cnet_to_db(cnet) + + point_columns = model.Points.__table__.columns.keys() + measure_columns = model.Measures.__table__.columns.keys() + + for key in point_columns: + assert key in p_df.columns, f"column \'{key}\' not in points dataframe" + for key in measure_columns: + assert key in m_df.columns, f"column \'{key}\' not in measures dataframe" + +# TO DO: test the clear tables functionality on ncg.place_points_from_cnet diff --git a/autocnet/io/db/controlnetwork.py b/autocnet/io/db/controlnetwork.py index 8282a677..1ac902ab 100644 --- a/autocnet/io/db/controlnetwork.py +++ b/autocnet/io/db/controlnetwork.py @@ -97,6 +97,29 @@ ORDER BY measures."pointid", measures."id"; return df +def copy_from_method(table, conn, keys, data_iter, pre_truncate=False, fatal_failure=False): + """ + Custom method for pandas.DataFrame.to_sql that will use COPY FROM + From: https://stackoverflow.com/questions/24084710/to-sql-sqlalchemy-copy-from-postgresql-engine + + This is follows the API specified by pandas. + """ + + dbapi_conn = conn.connection + cur = dbapi_conn.cursor() + + s_buf = StringIO() + writer = csv_writer(s_buf, quoting=QUOTE_MINIMAL) + writer.writerows(data_iter) + s_buf.seek(0) + + columns = ', '.join('"{}"'.format(k) for k in keys) + table_name = '{}.{}'.format( + table.schema, table.name) if table.schema else table.name + + sql_query = 'COPY %s (%s) FROM STDIN WITH CSV' % (table_name, columns) + cur.copy_expert(sql=sql_query, file=s_buf) + return cur.rowcount def update_from_jigsaw(cnet, measures, engine, pointid_func=None): """ @@ -131,29 +154,6 @@ def update_from_jigsaw(cnet, measures, engine, pointid_func=None): numeric ID back. This callable is used to unmunge the id. """ - def copy_from_method(table, conn, keys, data_iter, pre_truncate=False, fatal_failure=False): - """ - Custom method for pandas.DataFrame.to_sql that will use COPY FROM - From: https://stackoverflow.com/questions/24084710/to-sql-sqlalchemy-copy-from-postgresql-engine - - This is follows the API specified by pandas. - """ - - dbapi_conn = conn.connection - cur = dbapi_conn.cursor() - - s_buf = StringIO() - writer = csv_writer(s_buf, quoting=QUOTE_MINIMAL) - writer.writerows(data_iter) - s_buf.seek(0) - - columns = ', '.join('"{}"'.format(k) for k in keys) - table_name = '{}.{}'.format( - table.schema, table.name) if table.schema else table.name - - sql_query = 'COPY %s (%s) FROM STDIN WITH CSV' % (table_name, columns) - cur.copy_expert(sql=sql_query, file=s_buf) - return cur.rowcount # Get the PID back from the id. if pointid_func: diff --git a/autocnet/io/db/model.py b/autocnet/io/db/model.py index d06b5ec2..82c7076a 100644 --- a/autocnet/io/db/model.py +++ b/autocnet/io/db/model.py @@ -428,6 +428,8 @@ class Points(Base, BaseMixin): order_by="asc(Measures.id)", backref=backref('point', lazy='joined')) reference_index = Column("referenceIndex", Integer, default=0) + _residuals = Column("residuals", ARRAY(Float)) + _maxresidual = Column("maxResidual", Float) _default_fields = [ "pointtype", @@ -501,6 +503,22 @@ class Points(Base, BaseMixin): v = PointType(v) self._pointtype = v + @hybrid_property + def residuals(self): + return self._residuals + + @residuals.setter + def residuals(self, v): + self._residuals = v + + @hybrid_property + def maxresidual(self): + return self._maxresidual + + @maxresidual.setter + def maxresidual(self, max_res): + self._maxresidual = max_res + #def subpixel_register(self, Session, pointid, **kwargs): # subpixel.subpixel_register_point(args=(Session, pointid), **kwargs) diff --git a/autocnet/transformation/spatial.py b/autocnet/transformation/spatial.py index ea35a7a1..a05df56c 100644 --- a/autocnet/transformation/spatial.py +++ b/autocnet/transformation/spatial.py @@ -7,10 +7,10 @@ def og2oc(lon, lat, semi_major, semi_minor): Parameters ---------- - lon : float + lon : float or np.array longitude 0 to 360 domain (in degrees) - lat : float + lat : float or np.array planetographic latitude (in degrees) semi_major : float @@ -21,10 +21,10 @@ def og2oc(lon, lat, semi_major, semi_minor): Returns ------- - lon: float + lon: float or np.array longitude (in degrees) - lat: float + lat: float or np.array planetocentric latitude (in degrees) """ @@ -42,10 +42,10 @@ def oc2og(lon, lat, semi_major, semi_minor): Parameters ---------- - lon : float + lon : float or np.array longitude 0 to 360 domain (in degrees) - lat : float + lat : float or np.array planetocentric latitude (in degrees) semi_major : float @@ -56,10 +56,10 @@ def oc2og(lon, lat, semi_major, semi_minor): Returns ------- - lon : float + lon : float or np.array longitude (in degrees) - lat : float + lat : float or np.array planetographic latitude (in degrees) """ @@ -86,8 +86,9 @@ def reproject(record, semi_major, semi_minor, source_proj, dest_proj, **kwargs): Parameters ---------- - record : object - Pandas series object + record : array of np.array + Array containing the coordinates to reproject. + Formatted like [[x1, x2, ..., xn], [y1, y2, ..., yn], [z1, z2, ..., zn]] semi_major : float Radius from the center of the body to the equater @@ -103,7 +104,7 @@ def reproject(record, semi_major, semi_minor, source_proj, dest_proj, **kwargs): Returns ------- - : list + : np.arrays Transformed coordinates as y, x, z """ -- GitLab