From a36cbf22a9fef46f12cd24ab31b43e66e2572733 Mon Sep 17 00:00:00 2001 From: Khaled Noaman <knoaman@apache.org> Date: Tue, 4 Mar 2003 16:36:17 +0000 Subject: [PATCH] RegEx: fix for character category escape git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@174824 13f79535-47bb-0310-9956-ffa450edef68 --- src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml | 2 +- .../util/MsgLoaders/ICU/resources/en_US.txt | 2 +- .../InMemory/XercesMessages_en_US.hpp | 4 +- .../MsgCatalog/XercesMessages_en_US.Msg | 2 +- src/xercesc/util/Platforms/Win32/Version.rc | 3 +- src/xercesc/util/regx/RegxParser.cpp | 61 +++++-------------- 6 files changed, 22 insertions(+), 52 deletions(-) diff --git a/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml b/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml index 46e817714..c101b4ded 100644 --- a/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml +++ b/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml @@ -545,7 +545,7 @@ <Message Id="Parser_Factor5" Text="A back reference or an anchor or a lookahead or a lookbehind is expected in a conditional pattern"/> <Message Id="Parser_Factor6" Text="There are more than three choices in a conditional group"/> <Message Id="Parser_Atom1" Text="A character in U+0040-U+005f must follow \c"/> - <Message Id="Parser_Atom2" Text="A Category character or '{' is required"/> + <Message Id="Parser_Atom2" Text="A '{' is required before a category character."/> <Message Id="Parser_Atom3" Text="A property name is not closed by '}'"/> <Message Id="Parser_Atom4" Text="Unexpected meta character"/> <Message Id="Parser_Atom5" Text="Unknown property"/> diff --git a/src/xercesc/util/MsgLoaders/ICU/resources/en_US.txt b/src/xercesc/util/MsgLoaders/ICU/resources/en_US.txt index f5f8754a4..7b62eec82 100644 --- a/src/xercesc/util/MsgLoaders/ICU/resources/en_US.txt +++ b/src/xercesc/util/MsgLoaders/ICU/resources/en_US.txt @@ -546,7 +546,7 @@ en_US { "A back reference or an anchor or a lookahead or a lookbehind is expected in a conditional pattern " , "There are more than three choices in a conditional group " , "A character in U+0040-U+005f must follow \c " , - "A Category character or '{' is required " , + "A '{' is required before a category character. " , "A property name is not closed by '}' " , "Unexpected meta character " , "Unknown property " , diff --git a/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp b/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp index a50ab0d86..12002aa6d 100644 --- a/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp +++ b/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp @@ -1092,8 +1092,8 @@ const XMLCh gXMLExceptArray[][128] = 0x006E,0x0020,0x0061,0x0020,0x0063,0x006F,0x006E,0x0064,0x0069,0x0074,0x0069,0x006F,0x006E,0x0061,0x006C,0x0020,0x0067,0x0072,0x006F,0x0075,0x0070,0x00 } , { 0x0041,0x0020,0x0063,0x0068,0x0061,0x0072,0x0061,0x0063,0x0074,0x0065,0x0072,0x0020,0x0069,0x006E,0x0020,0x0055,0x002B,0x0030,0x0030,0x0034,0x0030,0x002D,0x0055,0x002B,0x0030,0x0030,0x0035,0x0066,0x0020,0x006D,0x0075,0x0073,0x0074,0x0020,0x0066, 0x006F,0x006C,0x006C,0x006F,0x0077,0x0020,0x005C,0x0063,0x00 } - , { 0x0041,0x0020,0x0043,0x0061,0x0074,0x0065,0x0067,0x006F,0x0072,0x0079,0x0020,0x0063,0x0068,0x0061,0x0072,0x0061,0x0063,0x0074,0x0065,0x0072,0x0020,0x006F,0x0072,0x0020,0x0027,0x007B,0x0027,0x0020,0x0069,0x0073,0x0020,0x0072,0x0065,0x0071,0x0075, - 0x0069,0x0072,0x0065,0x0064,0x00 } + , { 0x0041,0x0020,0x0027,0x007B,0x0027,0x0020,0x0069,0x0073,0x0020,0x0072,0x0065,0x0071,0x0075,0x0069,0x0072,0x0065,0x0064,0x0020,0x0062,0x0065,0x0066,0x006F,0x0072,0x0065,0x0020,0x0061,0x0020,0x0063,0x0061,0x0074,0x0065,0x0067,0x006F,0x0072,0x0079, + 0x0020,0x0063,0x0068,0x0061,0x0072,0x0061,0x0063,0x0074,0x0065,0x0072,0x002E,0x00 } , { 0x0041,0x0020,0x0070,0x0072,0x006F,0x0070,0x0065,0x0072,0x0074,0x0079,0x0020,0x006E,0x0061,0x006D,0x0065,0x0020,0x0069,0x0073,0x0020,0x006E,0x006F,0x0074,0x0020,0x0063,0x006C,0x006F,0x0073,0x0065,0x0064,0x0020,0x0062,0x0079,0x0020,0x0027,0x007D, 0x0027,0x00 } , { 0x0055,0x006E,0x0065,0x0078,0x0070,0x0065,0x0063,0x0074,0x0065,0x0064,0x0020,0x006D,0x0065,0x0074,0x0061,0x0020,0x0063,0x0068,0x0061,0x0072,0x0061,0x0063,0x0074,0x0065,0x0072,0x00 } diff --git a/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg b/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg index 00e212a1a..b8df4b03c 100644 --- a/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg +++ b/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg @@ -529,7 +529,7 @@ $set 3 122 A back reference or an anchor or a lookahead or a lookbehind is expected in a conditional pattern 123 There are more than three choices in a conditional group 124 A character in U+0040-U+005f must follow \c -125 A Category character or '{' is required +125 A '{' is required before a category character. 126 A property name is not closed by '}' 127 Unexpected meta character 128 Unknown property diff --git a/src/xercesc/util/Platforms/Win32/Version.rc b/src/xercesc/util/Platforms/Win32/Version.rc index 7503eaebf..7beb0e912 100644 --- a/src/xercesc/util/Platforms/Win32/Version.rc +++ b/src/xercesc/util/Platforms/Win32/Version.rc @@ -632,7 +632,7 @@ BEGIN 8314 L"\x0041\x0020\x0062\x0061\x0063\x006B\x0020\x0072\x0065\x0066\x0065\x0072\x0065\x006E\x0063\x0065\x0020\x006F\x0072\x0020\x0061\x006E\x0020\x0061\x006E\x0063\x0068\x006F\x0072\x0020\x006F\x0072\x0020\x0061\x0020\x006C\x006F\x006F\x006B\x0061\x0068\x0065\x0061\x0064\x0020\x006F\x0072\x0020\x0061\x0020\x006C\x006F\x006F\x006B\x0062\x0065\x0068\x0069\x006E\x0064\x0020\x0069\x0073\x0020\x0065\x0078\x0070\x0065\x0063\x0074\x0065\x0064\x0020\x0069\x006E\x0020\x0061\x0020\x0063\x006F\x006E\x0064\x0069\x0074\x0069\x006F\x006E\x0061\x006C\x0020\x0070\x0061\x0074\x0074\x0065\x0072\x006E\x00" 8315 L"\x0054\x0068\x0065\x0072\x0065\x0020\x0061\x0072\x0065\x0020\x006D\x006F\x0072\x0065\x0020\x0074\x0068\x0061\x006E\x0020\x0074\x0068\x0072\x0065\x0065\x0020\x0063\x0068\x006F\x0069\x0063\x0065\x0073\x0020\x0069\x006E\x0020\x0061\x0020\x0063\x006F\x006E\x0064\x0069\x0074\x0069\x006F\x006E\x0061\x006C\x0020\x0067\x0072\x006F\x0075\x0070\x00" 8316 L"\x0041\x0020\x0063\x0068\x0061\x0072\x0061\x0063\x0074\x0065\x0072\x0020\x0069\x006E\x0020\x0055\x002B\x0030\x0030\x0034\x0030\x002D\x0055\x002B\x0030\x0030\x0035\x0066\x0020\x006D\x0075\x0073\x0074\x0020\x0066\x006F\x006C\x006C\x006F\x0077\x0020\x005C\x0063\x00" - 8317 L"\x0041\x0020\x0043\x0061\x0074\x0065\x0067\x006F\x0072\x0079\x0020\x0063\x0068\x0061\x0072\x0061\x0063\x0074\x0065\x0072\x0020\x006F\x0072\x0020\x0027\x007B\x0027\x0020\x0069\x0073\x0020\x0072\x0065\x0071\x0075\x0069\x0072\x0065\x0064\x00" + 8317 L"\x0041\x0020\x0027\x007B\x0027\x0020\x0069\x0073\x0020\x0072\x0065\x0071\x0075\x0069\x0072\x0065\x0064\x0020\x0062\x0065\x0066\x006F\x0072\x0065\x0020\x0061\x0020\x0063\x0061\x0074\x0065\x0067\x006F\x0072\x0079\x0020\x0063\x0068\x0061\x0072\x0061\x0063\x0074\x0065\x0072\x002E\x00" 8318 L"\x0041\x0020\x0070\x0072\x006F\x0070\x0065\x0072\x0074\x0079\x0020\x006E\x0061\x006D\x0065\x0020\x0069\x0073\x0020\x006E\x006F\x0074\x0020\x0063\x006C\x006F\x0073\x0065\x0064\x0020\x0062\x0079\x0020\x0027\x007D\x0027\x00" 8319 L"\x0055\x006E\x0065\x0078\x0070\x0065\x0063\x0074\x0065\x0064\x0020\x006D\x0065\x0074\x0061\x0020\x0063\x0068\x0061\x0072\x0061\x0063\x0074\x0065\x0072\x00" 8320 L"\x0055\x006E\x006B\x006E\x006F\x0077\x006E\x0020\x0070\x0072\x006F\x0070\x0065\x0072\x0074\x0079\x00" @@ -901,6 +901,7 @@ BEGIN 24600 L"\x0055\x006E\x0072\x0065\x0063\x006F\x0067\x006E\x0069\x007A\x0065\x0064\x0020\x004E\x006F\x0064\x0065\x0020\x0054\x0079\x0070\x0065\x00" END + #endif // English (U.S.) resources ///////////////////////////////////////////////////////////////////////////// diff --git a/src/xercesc/util/regx/RegxParser.cpp b/src/xercesc/util/regx/RegxParser.cpp index 753f3f89b..e6f027fcc 100644 --- a/src/xercesc/util/regx/RegxParser.cpp +++ b/src/xercesc/util/regx/RegxParser.cpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.5 2003/03/04 16:36:17 knoaman + * RegEx: fix for character category escape + * * Revision 1.4 2003/01/13 19:02:23 knoaman * [Bug 14390] C++ Indentifier collision with Python. * @@ -1085,57 +1088,23 @@ Token* RegxParser::parseAtom() { RangeToken* RegxParser::processBacksolidus_pP(const XMLInt32 ch) { - bool positive = (ch == chLatin_p); - processNext(); - if (fState != REGX_T_CHAR) - ThrowXML(ParseException,XMLExcepts::Parser_Atom2); - - RangeToken* tok = 0; - - switch(fCharData) { - case chLatin_L: - tok = fTokenFactory->getRange(fgUniLetter, !positive); - break; - case chLatin_M: - tok = fTokenFactory->getRange(fgUniMark, !positive); - break; - case chLatin_N: - tok = fTokenFactory->getRange(fgUniNumber, !positive); - break; - case chLatin_Z: - tok = fTokenFactory->getRange(fgUniSeparator, !positive); - break; - case chLatin_C: - tok = fTokenFactory->getRange(fgUniControl, !positive); - break; - case chLatin_P: - tok = fTokenFactory->getRange(fgUniPunctuation, !positive); - break; - case chLatin_S: - tok = fTokenFactory->getRange(fgUniSymbol, !positive); - break; - case chOpenCurly: - { - int nameStart = fOffset; - int nameEnd = XMLString::indexOf(fString,chCloseCurly,nameStart); + if (fState != REGX_T_CHAR || fCharData != chOpenCurly) + ThrowXML(ParseException,XMLExcepts::Parser_Atom2); - if (nameEnd < 0) - ThrowXML(ParseException,XMLExcepts::Parser_Atom3); + int nameStart = fOffset; + int nameEnd = XMLString::indexOf(fString,chCloseCurly,nameStart); - fOffset = nameEnd + 1; - XMLCh* rangeName = new XMLCh[(nameEnd - nameStart) + 1]; - ArrayJanitor<XMLCh> janRangeName(rangeName); - XMLString::subString(rangeName, fString, nameStart, nameEnd); - tok = fTokenFactory->getRange(rangeName, !positive); - } - break; - default: - ThrowXML(ParseException,XMLExcepts::Parser_Atom2); - } + if (nameEnd < 0) + ThrowXML(ParseException,XMLExcepts::Parser_Atom3); + + fOffset = nameEnd + 1; + XMLCh* rangeName = new XMLCh[(nameEnd - nameStart) + 1]; + ArrayJanitor<XMLCh> janRangeName(rangeName); + XMLString::subString(rangeName, fString, nameStart, nameEnd); - return tok; + return fTokenFactory->getRange(rangeName, !(ch == chLatin_p)); } -- GitLab