JDBCConnection.java

package tap.db;

/*
 * This file is part of TAPLibrary.
 * 
 * TAPLibrary is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * TAPLibrary is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with TAPLibrary.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * Copyright 2012,2014 - UDS/Centre de Données astronomiques de Strasbourg (CDS),
 *                       Astronomishes Rechen Institut (ARI)
 */

import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import tap.data.DataReadException;
import tap.data.ResultSetTableIterator;
import tap.data.TableIterator;
import tap.log.TAPLog;
import tap.metadata.TAPColumn;
import tap.metadata.TAPForeignKey;
import tap.metadata.TAPMetadata;
import tap.metadata.TAPMetadata.STDSchema;
import tap.metadata.TAPMetadata.STDTable;
import tap.metadata.TAPSchema;
import tap.metadata.TAPTable;
import tap.metadata.TAPTable.TableType;
import tap.metadata.TAPType;
import tap.metadata.TAPType.TAPDatatype;
import uws.service.log.UWSLog.LogLevel;
import adql.query.ADQLQuery;
import adql.query.IdentifierField;
import adql.translator.ADQLTranslator;
import adql.translator.JDBCTranslator;
import adql.translator.TranslationException;

/**
 * <p>This {@link DBConnection} implementation is theoretically able to deal with any DBMS JDBC connection.</p>
 * 
 * <p><i>Note:
 * 	"Theoretically", because its design has been done using information about Postgres, SQLite, Oracle, MySQL and Java DB (Derby).
 * 	Then it has been really tested successfully with Postgres and SQLite.
 * </i></p>
 * 
 * <h3>Deal with different DBMS features</h3>
 * 
 * <p>Update queries are taking into account whether the following features are supported by the DBMS:</p>
 * <ul>
 * 	<li><b>data definition</b>: when not supported, no update operation will be possible.
 * 	                            All corresponding functions will then throw a {@link DBException} ;
 * 	                            only {@link #executeQuery(ADQLQuery)} will be possibly called.</li>
 * 
 * 	<li><b>transactions</b>: when not supported, no transaction is started or merely used.
 * 	                         It means that in case of update failure, no rollback will be possible
 * 	                         and that already done modification will remain in the database.</li>
 * 
 * 	<li><b>schemas</b>: when the DBMS does not have the notion of schema (like SQLite), no schema creation or dropping will be obviously processed.
 * 	                    Besides, if not already done, database name of all tables will be prefixed by the schema name.
 * 	                    The prefix to apply is returned by {@link #getTablePrefix(String)}.</li>
 * 
 * 	<li><b>batch updates</b>: when not supported, updates will just be done, "normally, one by one.
 * 	                          In one word, there will be merely no optimization.
 * 	                          Anyway, this feature concerns only the insertions into tables.</li>
 * 
 * 	<li><b>case sensitivity of identifiers</b>: the case sensitivity of quoted identifier varies from the used DBMS. This {@link DBConnection}
 * 	                                            implementation is able to adapt itself in function of the way identifiers are stored and
 * 	                                            researched in the database. How the case sensitivity is managed by the DBMS is the problem
 * 	                                            of only one function (which can be overwritten if needed): {@link #equals(String, String, boolean)}.</li>
 * </ul>
 * 
 * <p><i><b>Warning</b>:
 * 	All these features have no impact at all on ADQL query executions ({@link #executeQuery(ADQLQuery)}).
 * </i></p>
 * 
 * <h3>Datatypes</h3>
 * 
 * <p>Column types are converted from DBMS to TAP types with {@link #getTAPType(String)} and from TAP to DBMS types with {@link #getDBMSDatatype(TAPType)}.</p>
 * 
 * <p>
 * 	All typical DBMS datatypes are taken into account, <b>EXCEPT the geometrical types</b> (POINT and REGION). For these types, the only object having this
 * 	information is the translator thanks to {@link JDBCTranslator#isPointType(String)}, {@link JDBCTranslator#isRegionType(String)},
 * 	{@link JDBCTranslator#getPointType()} and {@link JDBCTranslator#getRegionType()}. The two first functions are used to identify a DBMS type as a point or
 * 	a region (note: several DBMS datatypes may be identified as a geometry type). The two others provide the DBMS type corresponding the best to the TAP types
 * 	POINT and REGION.
 * </p>
 * 
 * <p><i><b>Warning:</b>
 * 	The TAP type REGION can be either a circle, a box or a polygon. Since several DBMS types correspond to one TAP type, {@link JDBCTranslator#getRegionType()}
 * 	MUST return a type covering all these region datatypes. Generally, it will be a VARCHAR whose the values would be STC-S expressions.
 * 	Note that this function is used ONLY WHEN tables with a geometrical value is uploaded. On the contrary, {@link JDBCTranslator#isRegionType(String)}
 * 	is used much more often: in order to write the metadata part of a query result.
 * </i></p>
 * 
 * @author Gr&eacute;gory Mantelet (CDS;ARI)
 * @version 2.0 (09/2014)
 * @since 2.0
 */
public class JDBCConnection implements DBConnection {

	/** DBMS name of PostgreSQL used in the database URL. */
	protected final static String DBMS_POSTGRES = "postgresql";

	/** DBMS name of SQLite used in the database URL. */
	protected final static String DBMS_SQLITE = "sqlite";

	/** DBMS name of MySQL used in the database URL. */
	protected final static String DBMS_MYSQL = "mysql";

	/** DBMS name of Oracle used in the database URL. */
	protected final static String DBMS_ORACLE = "oracle";

	/** Connection ID (typically, the job ID). It lets identify the DB errors linked to the Job execution in the logs. */
	protected final String ID;

	/** JDBC connection (created and initialized at the creation of this {@link JDBCConnection} instance). */
	protected final Connection connection;

	/** The translator this connection must use to translate ADQL into SQL. It is also used to get information about the case sensitivity of all types of identifier (schema, table, column). */
	protected final JDBCTranslator translator;

	/** Object to use if any message needs to be logged. <i>note: this logger may be NULL. If NULL, messages will never be printed.</i> */
	protected final TAPLog logger;

	/* JDBC URL MANAGEMENT */

	/** JDBC prefix of any database URL (for instance: jdbc:postgresql://127.0.0.1/myDB or jdbc:postgresql:myDB). */
	public final static String JDBC_PREFIX = "jdbc";

	/** Name (in lower-case) of the DBMS with which the connection is linked. */
	protected final String dbms;

	/* DBMS SUPPORTED FEATURES */

	/** Indicate whether the DBMS supports transactions (start, commit, rollback and end). <i>note: If no transaction is possible, no transaction will be used, but then, it will never possible to cancel modifications in case of error.</i> */
	protected boolean supportsTransaction;

	/** Indicate whether the DBMS supports the definition of data (create, update, drop, insert into schemas and tables). <i>note: If not supported, it will never possible to create TAP_SCHEMA from given metadata (see {@link #setTAPSchema(TAPMetadata)}) and to upload/drop tables (see {@link #addUploadedTable(TAPTable, TableIterator)} and {@link #dropUploadedTable(TAPTable)}).</i> */
	protected boolean supportsDataDefinition;

	/** Indicate whether the DBMS supports several updates in once (using {@link Statement#addBatch(String)} and {@link Statement#executeBatch()}). <i>note: If not supported, every updates will be done one by one. So it is not really a problem, but just a loss of optimization.</i> */
	protected boolean supportsBatchUpdates;

	/** Indicate whether the DBMS has the notion of SCHEMA. Most of the DBMS has it, but not SQLite for instance. <i>note: If not supported, the DB table name will be prefixed by the DB schema name followed by the character "_". Nevertheless, if the DB schema name is NULL, the DB table name will never be prefixed.</i> */
	protected boolean supportsSchema;

	/* CASE SENSITIVITY SUPPORT */

	/** Indicate whether UNquoted identifiers will be considered as case INsensitive and stored in mixed case by the DBMS. <i>note: If FALSE, unquoted identifiers will still be considered as case insensitive for the researches, but will be stored in lower or upper case (in function of {@link #lowerCaseUnquoted} and {@link #upperCaseUnquoted}). If none of these two flags is TRUE, the storage case will be though considered as mixed.</i> */
	protected boolean supportsMixedCaseUnquotedIdentifier;
	/** Indicate whether the unquoted identifiers are stored in lower case in the DBMS. */
	protected boolean lowerCaseUnquoted;
	/** Indicate whether the unquoted identifiers are stored in upper case in the DBMS. */
	protected boolean upperCaseUnquoted;

	/** Indicate whether quoted identifiers will be considered as case INsensitive and stored in mixed case by the DBMS. <i>note: If FALSE, quoted identifiers will be considered as case sensitive and will be stored either in lower, upper or in mixed case (in function of {@link #lowerCaseQuoted}, {@link #upperCaseQuoted} and {@link #mixedCaseQuoted}). If none of these three flags is TRUE, the storage case will be mixed case.</i> */
	protected boolean supportsMixedCaseQuotedIdentifier;
	/** Indicate whether the quoted identifiers are stored in lower case in the DBMS. */
	protected boolean lowerCaseQuoted;
	/** Indicate whether the quoted identifiers are stored in mixed case in the DBMS. */
	protected boolean mixedCaseQuoted;
	/** Indicate whether the quoted identifiers are stored in upper case in the DBMS. */
	protected boolean upperCaseQuoted;

	/**
	 * <p>Creates a JDBC connection to the specified database and with the specified JDBC driver.
	 * This connection is established using the given user name and password.<p>
	 * 
	 * <p><i><u>note:</u> the JDBC driver is loaded using <pre>Class.forName(driverPath)</pre> and the connection is created with <pre>DriverManager.getConnection(dbUrl, dbUser, dbPassword)</pre>.</i></p>
	 * 
	 * <p><i><b>Warning:</b>
	 * 	This constructor really creates a new SQL connection. Creating a SQL connection is time consuming!
	 * 	That's why it is recommended to use a pool of connections. When doing so, you should use the other constructor of this class
	 * 	({@link #JDBCConnection(Connection, String, TAPLog)}).
	 * </i></p>
	 * 
	 * @param driverPath	Full class name of the JDBC driver.
	 * @param dbUrl			URL to the database. <i><u>note</u> This URL may not be prefixed by "jdbc:". If not, the prefix will be automatically added.</i>
	 * @param dbUser		Name of the database user.
	 * @param dbPassword	Password of the given database user.
	 * @param translator	{@link ADQLTranslator} to use in order to get SQL from an ADQL query and to get qualified DB table names.