Skip to content
Snippets Groups Projects
Commit 3e65794d authored by Grégory Mantelet's avatar Grégory Mantelet
Browse files

[TAP] Fix memory issues for the 'text/plain' format.

The previous text formatting process was storing the entire table in memory....
hence OutOfMemoryError when dealing with large table.

Now, this process is done entirely in memory only for a table having less than
1000 lines. For a larger table, its content is stored in a temporary file. This
file is deleted after usage or in case of error.

This formatting process has been tested under JVM monitoring (both JConsole and
VisualVM) and tables larger than 3,000,000 rows, with success.

This commit fixes the GitHub issue #40
parent f912c20a
No related branches found
No related tags found
No related merge requests found
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
<property name="adqlJunitReportsDir" value="reports/adql" /> <property name="adqlJunitReportsDir" value="reports/adql" />
<property name="uwsJunitReportsDir" value="reports/uws" /> <property name="uwsJunitReportsDir" value="reports/uws" />
<property name="tapJunitReportsDir" value="reports/tap" /> <property name="tapJunitReportsDir" value="reports/tap" />
<property name="cdsJunitReportsDir" value="reports/cds" />
<fail message="The property POSTGRES must be set! It provides the path toward a directory or a JAR which contains all classes inside org.postgresql."> <fail message="The property POSTGRES must be set! It provides the path toward a directory or a JAR which contains all classes inside org.postgresql.">
<condition><not><isset property="POSTGRES"/></not></condition> <condition><not><isset property="POSTGRES"/></not></condition>
...@@ -114,12 +115,14 @@ ...@@ -114,12 +115,14 @@
<delete dir="${adqlJunitReportsDir}" failonerror="false" /> <delete dir="${adqlJunitReportsDir}" failonerror="false" />
<delete dir="${uwsJunitReportsDir}" failonerror="false" /> <delete dir="${uwsJunitReportsDir}" failonerror="false" />
<delete dir="${tapJunitReportsDir}" failonerror="false" /> <delete dir="${tapJunitReportsDir}" failonerror="false" />
<delete dir="${cdsJunitReportsDir}" failonerror="false" />
</target> </target>
<target name="junitValidation" depends="cleanJUnitReports,compileJUnit" description="Executes all JUnit tests before building the library and stop ANT at any error."> <target name="junitValidation" depends="cleanJUnitReports,compileJUnit" description="Executes all JUnit tests before building the library and stop ANT at any error.">
<mkdir dir="${adqlJunitReportsDir}"/> <mkdir dir="${adqlJunitReportsDir}"/>
<mkdir dir="${uwsJunitReportsDir}"/> <mkdir dir="${uwsJunitReportsDir}"/>
<mkdir dir="${tapJunitReportsDir}"/> <mkdir dir="${tapJunitReportsDir}"/>
<mkdir dir="${cdsJunitReportsDir}"/>
<junit fork="true" errorproperty="testsFailure" failureproperty="testsFailure"> <junit fork="true" errorproperty="testsFailure" failureproperty="testsFailure">
<classpath refid="junit.class.path" /> <classpath refid="junit.class.path" />
<classpath> <classpath>
...@@ -136,6 +139,9 @@ ...@@ -136,6 +139,9 @@
<batchtest todir="${tapJunitReportsDir}"> <batchtest todir="${tapJunitReportsDir}">
<fileset dir="${testsDir}" includes="tap/**/Test*.java" /> <fileset dir="${testsDir}" includes="tap/**/Test*.java" />
</batchtest> </batchtest>
<batchtest todir="${cdsJunitReportsDir}">
<fileset dir="${testsDir}" includes="cds/**/Test*.java" />
</batchtest>
</junit> </junit>
<delete dir="${junitBuildDir}" failonerror="false" /> <delete dir="${junitBuildDir}" failonerror="false" />
<fail if="${testsFailure}" message="Failed JUnit validation for ADQL, UWS or TAP Lib.!" /> <fail if="${testsFailure}" message="Failed JUnit validation for ADQL, UWS or TAP Lib.!" />
......
...@@ -35,7 +35,12 @@ import java.util.ArrayList; ...@@ -35,7 +35,12 @@ import java.util.ArrayList;
* @version 1.2 Jun 2008 MW Add a toString method (items aligned) ; * @version 1.2 Jun 2008 MW Add a toString method (items aligned) ;
* Fix a bug in align() when the last line is not full * Fix a bug in align() when the last line is not full
* @version 1.3 Nov 2018 GM Make the alignment process - align() - interruptible * @version 1.3 Nov 2018 GM Make the alignment process - align() - interruptible
*
* @deprecated This class suffers a lot of memory issues when dealing with
* large tables. {@link LargeAsciiTable} should be
* used instead.
*/ */
@Deprecated
public class AsciiTable { public class AsciiTable {
public static final int LEFT = 0; public static final int LEFT = 0;
public static final int CENTER = 1; public static final int CENTER = 1;
...@@ -178,7 +183,11 @@ public class AsciiTable { ...@@ -178,7 +183,11 @@ public class AsciiTable {
* <em>This interruption is useful when this * <em>This interruption is useful when this
* alignment operation becomes time and memory * alignment operation becomes time and memory
* consuming.</em> * consuming.</em>
*
* @deprecated Deprecated because of memory issues. Instead, you should use
* {@link LargeAsciiTable#streamAligned(cds.util.LargeAsciiTable.LineProcessor, int[])}.
*/ */
@Deprecated
public String[] displayAligned(int[] pos) throws InterruptedException{ public String[] displayAligned(int[] pos) throws InterruptedException{
return align(pos, '\0', null); return align(pos, '\0', null);
} }
...@@ -203,7 +212,11 @@ public class AsciiTable { ...@@ -203,7 +212,11 @@ public class AsciiTable {
* <em>This interruption is useful when this * <em>This interruption is useful when this
* alignment operation becomes time and memory * alignment operation becomes time and memory
* consuming.</em> * consuming.</em>
*
* @deprecated Deprecated because of memory issues. Instead, you should use
* {@link LargeAsciiTable#streamAligned(cds.util.LargeAsciiTable.LineProcessor, int[], char)}.
*/ */
@Deprecated
public String[] displayAligned(int[] pos, char newsep) throws InterruptedException{ public String[] displayAligned(int[] pos, char newsep) throws InterruptedException{
return displayAligned(pos, newsep, null); return displayAligned(pos, newsep, null);
} }
...@@ -213,8 +226,8 @@ public class AsciiTable { ...@@ -213,8 +226,8 @@ public class AsciiTable {
* *
* <p><strong>IMPORTANT:</strong> * <p><strong>IMPORTANT:</strong>
* The array must have as many columns as the table has. Each column can * The array must have as many columns as the table has. Each column can
* contain either {@link AsciiTable#LEFT}, {@link AsciiTable#CENTER} or * contain either {@link #LEFT}, {@link #CENTER} or
* {@link AsciiTable#RIGHT}. If the array contains ONE item, it will be * {@link #RIGHT}. If the array contains ONE item, it will be
* used for every column. * used for every column.
* </p> * </p>
* *
...@@ -230,7 +243,11 @@ public class AsciiTable { ...@@ -230,7 +243,11 @@ public class AsciiTable {
* <em>This interruption is useful when this * <em>This interruption is useful when this
* alignment operation becomes time and memory * alignment operation becomes time and memory
* consuming.</em> * consuming.</em>
*
* @deprecated Deprecated because of memory issues. Instead, you should use
* {@link LargeAsciiTable#streamAligned(cds.util.LargeAsciiTable.LineProcessor, int[], char, Thread)}.
*/ */
@Deprecated
public String[] displayAligned(int[] pos, char newsep, final Thread thread) throws InterruptedException{ public String[] displayAligned(int[] pos, char newsep, final Thread thread) throws InterruptedException{
if (newsep == csep) if (newsep == csep)
newsep = '\0'; newsep = '\0';
......
This diff is collapsed.
...@@ -26,15 +26,18 @@ import java.io.OutputStream; ...@@ -26,15 +26,18 @@ import java.io.OutputStream;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
import adql.db.DBColumn; import adql.db.DBColumn;
import cds.util.AsciiTable; import cds.util.LargeAsciiTable;
import cds.util.LargeAsciiTable.LineProcessor;
import cds.util.LargeAsciiTable.LineProcessorException;
import tap.ServiceConnection; import tap.ServiceConnection;
import tap.TAPException; import tap.TAPException;
import tap.TAPExecutionReport; import tap.TAPExecutionReport;
import tap.data.TableIterator; import tap.data.TableIterator;
/** /**
* Format any given query (table) result into a simple table ASCII representation * Format any given query (table) result into a simple table ASCII
* (columns' width are adjusted so that all columns are well aligned and of the same width). * representation (columns' width are adjusted so that all columns are well
* aligned and of the same width).
* *
* @author Gr&eacute;gory Mantelet (CDS;ARI) * @author Gr&eacute;gory Mantelet (CDS;ARI)
* @version 2.3 (11/2018) * @version 2.3 (11/2018)
...@@ -46,21 +49,50 @@ public class TextFormat implements OutputFormat { ...@@ -46,21 +49,50 @@ public class TextFormat implements OutputFormat {
* @since 2.0 */ * @since 2.0 */
protected static final char COL_SEP = '\u25c6'; protected static final char COL_SEP = '\u25c6';
/** The {@link ServiceConnection} to use (for the log and to have some information about the service (particularly: name, description). */ /** How all columns must be aligned.
* @see LargeAsciiTable#streamAligned(LineProcessor, int[])
* @since 2.3 */
protected int[] alignment = new int[]{ LargeAsciiTable.LEFT };
/** The {@link ServiceConnection} to use (for the log and to have some
* information about the service (particularly: name, description). */
protected final ServiceConnection service; protected final ServiceConnection service;
/** /**
* Build a {@link TextFormat}. * Build a {@link TextFormat}.
* *
* <p><em><strong>Note:</strong>
* All columns values will be aligned on the left.
* To change this default alignment, use
* {@link #TextFormat(ServiceConnection, int[])} instead.
* </em></p>
*
* @param service Description of the TAP service. * @param service Description of the TAP service.
* *
* @throws NullPointerException If the given service connection is <code>null</code>. * @throws NullPointerException If the given service connection is NULL.
*/ */
public TextFormat(final ServiceConnection service) throws NullPointerException{ public TextFormat(final ServiceConnection service) throws NullPointerException{
this(service, new int[]{ LargeAsciiTable.LEFT });
}
/**
* Build a {@link TextFormat}.
*
* @param service Description of the TAP service.
* @param customAlignment How columns must be aligned.
* <em>(see {@link LargeAsciiTable#streamAligned(LineProcessor, int[])}
* to know the rules about this array)</em>
*
* @throws NullPointerException If the given service connection is NULL.
*
* @since 2.3
*/
public TextFormat(final ServiceConnection service, final int[] customAlignment) throws NullPointerException{
if (service == null) if (service == null)
throw new NullPointerException("The given service connection is NULL!"); throw new NullPointerException("The given service connection is NULL!");
this.service = service; this.service = service;
this.alignment = customAlignment;
} }
@Override @Override
...@@ -86,12 +118,11 @@ public class TextFormat implements OutputFormat { ...@@ -86,12 +118,11 @@ public class TextFormat implements OutputFormat {
@Override @Override
public void writeResult(TableIterator result, OutputStream output, TAPExecutionReport execReport, Thread thread) throws TAPException, IOException, InterruptedException{ public void writeResult(TableIterator result, OutputStream output, TAPExecutionReport execReport, Thread thread) throws TAPException, IOException, InterruptedException{
// Prepare the formatting of the whole output: // Prepare the formatting of the whole output:
AsciiTable asciiTable = new AsciiTable(COL_SEP); try(LargeAsciiTable asciiTable = new LargeAsciiTable(COL_SEP)){
// Write header: // Write header:
String headerLine = getHeader(result, execReport, thread); String headerLine = getHeader(result, execReport, thread);
asciiTable.addHeaderLine(headerLine); asciiTable.addHeaderLine(headerLine);
asciiTable.endHeaderLine();
if (thread.isInterrupted()) if (thread.isInterrupted())
throw new InterruptedException(); throw new InterruptedException();
...@@ -101,21 +132,10 @@ public class TextFormat implements OutputFormat { ...@@ -101,21 +132,10 @@ public class TextFormat implements OutputFormat {
// Finally write the formatted ASCII table (header + data) in the output stream: // Finally write the formatted ASCII table (header + data) in the output stream:
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(output)); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(output));
String[] lines = asciiTable.displayAligned(new int[]{ AsciiTable.LEFT }, '|', thread);
execReport.nbRows = 0; execReport.nbRows = 0;
for(String l : lines){ try{
// stop right now the formatting if the job has been aborted/cancelled/interrupted: // Align and write the result:
if (thread.isInterrupted()) execReport.nbRows = asciiTable.streamAligned(new LineFormatter(writer), alignment, '|', thread);
throw new InterruptedException();
// write the line:
writer.write(l);
writer.newLine();
// update the counter of written lines:
execReport.nbRows++;
// flush the writer every 30 lines:
if (execReport.nbRows % 30 == 0)
writer.flush();
}
// Add a line in case of an OVERFLOW: // Add a line in case of an OVERFLOW:
if (overflow){ if (overflow){
...@@ -124,14 +144,57 @@ public class TextFormat implements OutputFormat { ...@@ -124,14 +144,57 @@ public class TextFormat implements OutputFormat {
} }
writer.flush(); writer.flush();
}catch(LineProcessorException lpe){
throw new TAPException("Unexpected error while formatting a result line!", lpe);
}
}
}
/**
* Lets format a line and then write it in the given {@link BufferedWriter}.
*
* @author Gr&eacute;gory Mantelet (CDS)
* @version 2.3 (11/2018)
* @since 2.3
*/
protected static class LineFormatter implements LineProcessor {
private final BufferedWriter writer;
private long nbFormattedLines = 0;
public LineFormatter(final BufferedWriter writer){
this.writer = writer;
}
@Override
public boolean process(String line) throws LineProcessorException{
try{
// write the line:
writer.write(line);
writer.newLine();
// update the counter of written lines:
//execReport.nbRows++;
nbFormattedLines++;
// flush the writer every 30 lines:
if (nbFormattedLines % 30 == 0)
writer.flush();
return true;
}catch(IOException ioe){
throw new LineProcessorException("Impossible to write the given result line!", ioe);
}
}
} }
/** /**
* Get the whole header (one row whose columns are just the columns' name). * Get the whole header (one row whose columns are just the columns' name).
* *
* @param result Result to write later (but it contains also metadata that was extracted from the result itself). * @param result Result to write later (but it contains also metadata
* @param execReport Execution report (which contains the metadata extracted/guessed from the ADQL query). * that was extracted from the result itself).
* @param thread Thread which has asked for this formatting (it must be used in order to test the {@link Thread#isInterrupted()} flag and so interrupt everything if need). * @param execReport Execution report (which contains the metadata
* extracted/guessed from the ADQL query).
* @param thread Thread which has asked for this formatting (it must
* be used in order to test the
* {@link Thread#isInterrupted()} flag and so interrupt
* everything if need).
* *
* @return All the written metadata. * @return All the written metadata.
* *
...@@ -158,19 +221,25 @@ public class TextFormat implements OutputFormat { ...@@ -158,19 +221,25 @@ public class TextFormat implements OutputFormat {
} }
/** /**
* Write all the data rows into the given {@link AsciiTable} object. * Write all the data rows into the given {@link LargeAsciiTable} object.
* *
* @param queryResult Result to write. * @param queryResult Result to write.
* @param asciiTable Output in which the rows (as string) must be written. * @param asciiTable Output in which the rows (as string) must be
* @param execReport Execution report (which contains the maximum allowed number of records to output). * written.
* @param thread Thread which has asked for this formatting (it must be used in order to test the {@link Thread#isInterrupted()} flag and so interrupt everything if need). * @param execReport Execution report (which contains the maximum allowed
* number of records to output).
* @param thread Thread which has asked for this formatting (it must
* be used in order to test the
* {@link Thread#isInterrupted()} flag and so interrupt
* everything if need).
* *
* @return <i>true</i> if an overflow (i.e. nbDBRows > MAXREC) is detected, <i>false</i> otherwise. * @return <i>true</i> if an overflow (i.e. nbDBRows > MAXREC) is detected,
* <i>false</i> otherwise.
* *
* @throws InterruptedException If the thread has been interrupted. * @throws InterruptedException If the thread has been interrupted.
* @throws TAPException If any other error occurs. * @throws TAPException If any other error occurs.
*/ */
protected boolean writeData(final TableIterator queryResult, final AsciiTable asciiTable, final TAPExecutionReport execReport, final Thread thread) throws TAPException, InterruptedException{ protected boolean writeData(final TableIterator queryResult, final LargeAsciiTable asciiTable, final TAPExecutionReport execReport, final Thread thread) throws IOException, TAPException, InterruptedException{
execReport.nbRows = 0; execReport.nbRows = 0;
boolean overflow = false; boolean overflow = false;
...@@ -210,6 +279,9 @@ public class TextFormat implements OutputFormat { ...@@ -210,6 +279,9 @@ public class TextFormat implements OutputFormat {
execReport.nbRows++; execReport.nbRows++;
} }
// Declare the table as complete:
asciiTable.endTable();
return overflow; return overflow;
} }
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment