From a382b2518e2f7a9246f43669c5a022651d6b98d3 Mon Sep 17 00:00:00 2001 From: gmantele <gmantele@ari.uni-heidelberg.de> Date: Fri, 8 Sep 2017 14:57:04 +0200 Subject: [PATCH] [ADQL] Throwing a ParseException instead of an Error when an incorrect character that can not be interpreted by the JavaCC Token Manager is encountered. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Actually, the TokenMgrError thrown by JavaCC is caught by all ADQLParser.parseQuery(...) functions, wrapped inside a ParseException which is finally thrown instead of the TokenMgrError. In this way, ADQL-Lib users just have to care about a single Throwable: ParseException. Besides the error message has been slightly modified from: > Lexical error at line 1, column 10. Encountered: "\u00e9" (233), after : \"\" to: > Incorrect character encountered at l.1, c.10: \"\\u00e9\" ('é'), after : \"\" Thus, the error is more user-friendly, more easy to understand by users. Additionally, the incorrect character is displayed, as before, in its unicode expression, but also in its character form (instead of an integer value that nobody can really understand). This commit fixes the GitHub issue #17 --- src/adql/parser/ADQLParser.java | 172 +++++++++++---------- src/adql/parser/ParseException.java | 13 +- src/adql/parser/ParseException.java.backup | 13 +- src/adql/parser/TokenMgrError.java | 20 ++- src/adql/parser/TokenMgrError.java.backup | 18 ++- src/adql/parser/adqlGrammar.jj | 22 ++- test/adql/parser/TestADQLParser.java | 21 +++ 7 files changed, 166 insertions(+), 113 deletions(-) diff --git a/src/adql/parser/ADQLParser.java b/src/adql/parser/ADQLParser.java index ce4f24d..704d43b 100644 --- a/src/adql/parser/ADQLParser.java +++ b/src/adql/parser/ADQLParser.java @@ -67,7 +67,7 @@ import adql.translator.TranslationException; * @see ADQLQueryFactory * * @author Grégory Mantelet (CDS;ARI) - gmantele@ari.uni-heidelberg.de -* @version 1.4 (04/2017) +* @version 1.4 (09/2017) */ public class ADQLParser implements ADQLParserConstants { @@ -295,7 +295,11 @@ public class ADQLParser implements ADQLParserConstants { public final ADQLQuery parseQuery() throws ParseException{ stackQuery.clear(); query = null; - return Query(); + try{ + return Query(); + }catch(TokenMgrError tme){ + throw new ParseException(tme.getMessage(), new TextPosition(tme.getErrorLine(), tme.getErrorColumn())); + } } /** @@ -313,7 +317,11 @@ public class ADQLParser implements ADQLParserConstants { stackQuery.clear(); query = null; ReInit(new java.io.ByteArrayInputStream(q.getBytes())); - return Query(); + try{ + return Query(); + }catch(TokenMgrError tme){ + throw new ParseException(tme.getMessage(), new TextPosition(tme.getErrorLine(), tme.getErrorColumn())); + } } /** @@ -331,7 +339,11 @@ public class ADQLParser implements ADQLParserConstants { stackQuery.clear(); query = null; ReInit(stream); - return Query(); + try{ + return Query(); + }catch(TokenMgrError tme){ + throw new ParseException(tme.getMessage(), new TextPosition(tme.getErrorLine(), tme.getErrorColumn())); + } } public final void setDebug(boolean debug){ @@ -3912,82 +3924,6 @@ public class ADQLParser implements ADQLParserConstants { } } - private boolean jj_3R_59(){ - if (jj_3R_24()) - return true; - return false; - } - - private boolean jj_3R_58(){ - if (jj_scan_token(LEFT_PAR)) - return true; - if (jj_3R_42()) - return true; - if (jj_scan_token(RIGHT_PAR)) - return true; - return false; - } - - private boolean jj_3R_57(){ - if (jj_3R_27()) - return true; - return false; - } - - private boolean jj_3R_124(){ - if (jj_scan_token(CIRCLE)) - return true; - if (jj_scan_token(LEFT_PAR)) - return true; - if (jj_3R_134()) - return true; - if (jj_scan_token(COMMA)) - return true; - if (jj_3R_135()) - return true; - if (jj_scan_token(COMMA)) - return true; - if (jj_3R_101()) - return true; - if (jj_scan_token(RIGHT_PAR)) - return true; - return false; - } - - private boolean jj_3R_44(){ - if (jj_scan_token(SELECT)) - return true; - return false; - } - - private boolean jj_3R_56(){ - if (jj_3R_101()) - return true; - return false; - } - - private boolean jj_3R_123(){ - if (jj_scan_token(CENTROID)) - return true; - if (jj_scan_token(LEFT_PAR)) - return true; - if (jj_3R_113()) - return true; - if (jj_scan_token(RIGHT_PAR)) - return true; - return false; - } - - private boolean jj_3R_117(){ - if (jj_scan_token(LEFT_PAR)) - return true; - if (jj_3R_27()) - return true; - if (jj_scan_token(RIGHT_PAR)) - return true; - return false; - } - private boolean jj_3R_16(){ if (jj_scan_token(LEFT_PAR)) return true; @@ -5612,6 +5548,82 @@ public class ADQLParser implements ADQLParserConstants { return false; } + private boolean jj_3R_59(){ + if (jj_3R_24()) + return true; + return false; + } + + private boolean jj_3R_58(){ + if (jj_scan_token(LEFT_PAR)) + return true; + if (jj_3R_42()) + return true; + if (jj_scan_token(RIGHT_PAR)) + return true; + return false; + } + + private boolean jj_3R_57(){ + if (jj_3R_27()) + return true; + return false; + } + + private boolean jj_3R_124(){ + if (jj_scan_token(CIRCLE)) + return true; + if (jj_scan_token(LEFT_PAR)) + return true; + if (jj_3R_134()) + return true; + if (jj_scan_token(COMMA)) + return true; + if (jj_3R_135()) + return true; + if (jj_scan_token(COMMA)) + return true; + if (jj_3R_101()) + return true; + if (jj_scan_token(RIGHT_PAR)) + return true; + return false; + } + + private boolean jj_3R_44(){ + if (jj_scan_token(SELECT)) + return true; + return false; + } + + private boolean jj_3R_56(){ + if (jj_3R_101()) + return true; + return false; + } + + private boolean jj_3R_123(){ + if (jj_scan_token(CENTROID)) + return true; + if (jj_scan_token(LEFT_PAR)) + return true; + if (jj_3R_113()) + return true; + if (jj_scan_token(RIGHT_PAR)) + return true; + return false; + } + + private boolean jj_3R_117(){ + if (jj_scan_token(LEFT_PAR)) + return true; + if (jj_3R_27()) + return true; + if (jj_scan_token(RIGHT_PAR)) + return true; + return false; + } + /** Generated Token Manager. */ public ADQLParserTokenManager token_source; SimpleCharStream jj_input_stream; diff --git a/src/adql/parser/ParseException.java b/src/adql/parser/ParseException.java index 108993a..2751dcd 100644 --- a/src/adql/parser/ParseException.java +++ b/src/adql/parser/ParseException.java @@ -2,12 +2,13 @@ /* JavaCCOptions:KEEP_LINE_COL=null * * Modified by Grégory Mantelet (CDS), on March 2017 - * Modification: several small modifications. - * /!\ DO NOT RE-GENERATE THIS FILE /!\ - * In case of re-generation, replace it by - * ParseException.java.backup (but maybe after a diff - * in case of significant modifications have been done - * by a new version of JavaCC). + * Modifications: + * - several small modifications. + * + * /!\ DO NOT RE-GENERATE THIS FILE /!\ + * In case of re-generation, replace it by ParseException.java.backup (but maybe + * after a diff in case of significant modifications have been done by a new + * version of JavaCC). */ package adql.parser; diff --git a/src/adql/parser/ParseException.java.backup b/src/adql/parser/ParseException.java.backup index 108993a..2751dcd 100644 --- a/src/adql/parser/ParseException.java.backup +++ b/src/adql/parser/ParseException.java.backup @@ -2,12 +2,13 @@ /* JavaCCOptions:KEEP_LINE_COL=null * * Modified by Grégory Mantelet (CDS), on March 2017 - * Modification: several small modifications. - * /!\ DO NOT RE-GENERATE THIS FILE /!\ - * In case of re-generation, replace it by - * ParseException.java.backup (but maybe after a diff - * in case of significant modifications have been done - * by a new version of JavaCC). + * Modifications: + * - several small modifications. + * + * /!\ DO NOT RE-GENERATE THIS FILE /!\ + * In case of re-generation, replace it by ParseException.java.backup (but maybe + * after a diff in case of significant modifications have been done by a new + * version of JavaCC). */ package adql.parser; diff --git a/src/adql/parser/TokenMgrError.java b/src/adql/parser/TokenMgrError.java index 60cb468..fa5d8aa 100644 --- a/src/adql/parser/TokenMgrError.java +++ b/src/adql/parser/TokenMgrError.java @@ -1,13 +1,17 @@ /* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 6.0 */ /* JavaCCOptions: * - * Modified by Grégory Mantelet (CDS), on April 2017 - * Modification: addition of position (line and column) in the original text. - * /!\ DO NOT RE-GENERATE THIS FILE /!\ - * In case of re-generation, replace it by - * TokenMgrError.java.backup (but maybe after a diff - * in case of significant modifications have been done - * by a new version of JavaCC). + * Modified by Grégory Mantelet (CDS), on Sept. 2017 + * Modifications: + * - addition of position (line and column) in the original text + * - adapt the error message so that being more explicit for humans + * and display the incorrect character as a character instead of an + * integer value + * + * /!\ DO NOT RE-GENERATE THIS FILE /!\ + * In case of re-generation, replace it by TokenMgrError.java.backup (but maybe + * after a diff in case of significant modifications have been done by a new + * version of JavaCC). */ package adql.parser; @@ -118,7 +122,7 @@ public class TokenMgrError extends Error { * Note: You can customize the lexical error message by modifying this method. */ protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar){ - return ("Lexical error at line " + errorLine + ", column " + errorColumn + ". Encountered: " + (EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + "after : \"" + addEscapes(errorAfter) + "\""); + return ("Incorrect character encountered at l." + errorLine + ", c." + errorColumn + ": " + (EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " ('" + curChar + "'), ") + "after : \"" + addEscapes(errorAfter) + "\""); } /** diff --git a/src/adql/parser/TokenMgrError.java.backup b/src/adql/parser/TokenMgrError.java.backup index 60cb468..61b4a9c 100644 --- a/src/adql/parser/TokenMgrError.java.backup +++ b/src/adql/parser/TokenMgrError.java.backup @@ -1,13 +1,15 @@ /* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 6.0 */ /* JavaCCOptions: * - * Modified by Grégory Mantelet (CDS), on April 2017 - * Modification: addition of position (line and column) in the original text. - * /!\ DO NOT RE-GENERATE THIS FILE /!\ - * In case of re-generation, replace it by - * TokenMgrError.java.backup (but maybe after a diff - * in case of significant modifications have been done - * by a new version of JavaCC). + * Modified by Grégory Mantelet (CDS), on Sept. 2017 + * Modifications: + * - addition of position (line and column) in the original text + * - display the incorrect character as a character instead of an integer + * + * /!\ DO NOT RE-GENERATE THIS FILE /!\ + * In case of re-generation, replace it by TokenMgrError.java.backup (but maybe + * after a diff in case of significant modifications have been done by a new + * version of JavaCC). */ package adql.parser; @@ -118,7 +120,7 @@ public class TokenMgrError extends Error { * Note: You can customize the lexical error message by modifying this method. */ protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar){ - return ("Lexical error at line " + errorLine + ", column " + errorColumn + ". Encountered: " + (EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + "after : \"" + addEscapes(errorAfter) + "\""); + return ("Incorrect character encountered at l." + errorLine + ", c." + errorColumn + ": " + (EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " ('" + curChar + "'), ") + "after : \"" + addEscapes(errorAfter) + "\""); } /** diff --git a/src/adql/parser/adqlGrammar.jj b/src/adql/parser/adqlGrammar.jj index a4d2cb4..1de71d3 100644 --- a/src/adql/parser/adqlGrammar.jj +++ b/src/adql/parser/adqlGrammar.jj @@ -26,7 +26,7 @@ * If the syntax is not conform to the ADQL definition an error message is printed else it will be the message "Correct syntax". * * Author: Grégory Mantelet (CDS;ARI) - gmantele@ari.uni-heidelberg.de -* Version: 1.4 (04/2017) +* Version: 1.4 (09/2017) */ /* ########### */ @@ -89,7 +89,7 @@ import adql.translator.TranslationException; * @see ADQLQueryFactory * * @author Grégory Mantelet (CDS;ARI) - gmantele@ari.uni-heidelberg.de -* @version 1.4 (04/2017) +* @version 1.4 (09/2017) */ public class ADQLParser { @@ -317,7 +317,11 @@ public class ADQLParser { public final ADQLQuery parseQuery() throws ParseException { stackQuery.clear(); query = null; - return Query(); + try { + return Query(); + }catch(TokenMgrError tme) { + throw new ParseException(tme.getMessage(), new TextPosition(tme.getErrorLine(), tme.getErrorColumn())); + } } /** @@ -335,7 +339,11 @@ public class ADQLParser { stackQuery.clear(); query = null; ReInit(new java.io.ByteArrayInputStream(q.getBytes())); - return Query(); + try { + return Query(); + }catch(TokenMgrError tme) { + throw new ParseException(tme.getMessage(), new TextPosition(tme.getErrorLine(), tme.getErrorColumn())); + } } /** @@ -353,7 +361,11 @@ public class ADQLParser { stackQuery.clear(); query = null; ReInit(stream); - return Query(); + try { + return Query(); + }catch(TokenMgrError tme) { + throw new ParseException(tme.getMessage(), new TextPosition(tme.getErrorLine(), tme.getErrorColumn())); + } } public final void setDebug(boolean debug){ diff --git a/test/adql/parser/TestADQLParser.java b/test/adql/parser/TestADQLParser.java index 9e6a016..4549cb8 100644 --- a/test/adql/parser/TestADQLParser.java +++ b/test/adql/parser/TestADQLParser.java @@ -158,4 +158,25 @@ public class TestADQLParser { } } + @Test + public void testIncorrectCharacter(){ + /* An identifier must be written only with digits, an underscore or + * regular latin characters: */ + try{ + (new ADQLParser()).parseQuery("select grégory FROM aTable"); + }catch(Throwable t){ + assertEquals(ParseException.class, t.getClass()); + assertEquals("Incorrect character encountered at l.1, c.10: \"\\u00e9\" ('é'), after : \"\"", t.getMessage()); + } + + // But in a string, delimited identifier or a comment, it is fine: + try{ + (new ADQLParser()).parseQuery("select 'grégory' FROM aTable"); + (new ADQLParser()).parseQuery("select \"grégory\" FROM aTable"); + (new ADQLParser()).parseQuery("select * FROM aTable -- a comment by Grégory"); + }catch(Throwable t){ + fail("This error should never occurs because all these queries have an accentuated character but at a correct place."); + } + } + } -- GitLab