From a36cbf22a9fef46f12cd24ab31b43e66e2572733 Mon Sep 17 00:00:00 2001
From: Khaled Noaman <knoaman@apache.org>
Date: Tue, 4 Mar 2003 16:36:17 +0000
Subject: [PATCH] RegEx: fix for character category escape

git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@174824 13f79535-47bb-0310-9956-ffa450edef68
---
 src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml    |  2 +-
 .../util/MsgLoaders/ICU/resources/en_US.txt   |  2 +-
 .../InMemory/XercesMessages_en_US.hpp         |  4 +-
 .../MsgCatalog/XercesMessages_en_US.Msg       |  2 +-
 src/xercesc/util/Platforms/Win32/Version.rc   |  3 +-
 src/xercesc/util/regx/RegxParser.cpp          | 61 +++++--------------
 6 files changed, 22 insertions(+), 52 deletions(-)

diff --git a/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml b/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml
index 46e817714..c101b4ded 100644
--- a/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml
+++ b/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml
@@ -545,7 +545,7 @@
             <Message Id="Parser_Factor5" Text="A back reference or an anchor or a lookahead or a lookbehind is expected in a conditional pattern"/>
             <Message Id="Parser_Factor6" Text="There are more than three choices in a conditional group"/>
             <Message Id="Parser_Atom1" Text="A character in U+0040-U+005f must follow \c"/>
-            <Message Id="Parser_Atom2" Text="A Category character or '{' is required"/>
+            <Message Id="Parser_Atom2" Text="A '{' is required before a category character."/>
             <Message Id="Parser_Atom3" Text="A property name is not closed by '}'"/>
             <Message Id="Parser_Atom4" Text="Unexpected meta character"/>
             <Message Id="Parser_Atom5" Text="Unknown property"/>
diff --git a/src/xercesc/util/MsgLoaders/ICU/resources/en_US.txt b/src/xercesc/util/MsgLoaders/ICU/resources/en_US.txt
index f5f8754a4..7b62eec82 100644
--- a/src/xercesc/util/MsgLoaders/ICU/resources/en_US.txt
+++ b/src/xercesc/util/MsgLoaders/ICU/resources/en_US.txt
@@ -546,7 +546,7 @@ en_US {
 		"A back reference or an anchor or a lookahead or a lookbehind is expected in a conditional pattern " ,
 		"There are more than three choices in a conditional group " ,
 		"A character in U+0040-U+005f must follow \c " ,
-		"A Category character or '{' is required " ,
+		"A '{' is required before a category character. " ,
 		"A property name is not closed by '}' " ,
 		"Unexpected meta character " ,
 		"Unknown property " ,
diff --git a/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp b/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp
index a50ab0d86..12002aa6d 100644
--- a/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp
+++ b/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp
@@ -1092,8 +1092,8 @@ const XMLCh gXMLExceptArray[][128] =
       0x006E,0x0020,0x0061,0x0020,0x0063,0x006F,0x006E,0x0064,0x0069,0x0074,0x0069,0x006F,0x006E,0x0061,0x006C,0x0020,0x0067,0x0072,0x006F,0x0075,0x0070,0x00 }
   , { 0x0041,0x0020,0x0063,0x0068,0x0061,0x0072,0x0061,0x0063,0x0074,0x0065,0x0072,0x0020,0x0069,0x006E,0x0020,0x0055,0x002B,0x0030,0x0030,0x0034,0x0030,0x002D,0x0055,0x002B,0x0030,0x0030,0x0035,0x0066,0x0020,0x006D,0x0075,0x0073,0x0074,0x0020,0x0066,
       0x006F,0x006C,0x006C,0x006F,0x0077,0x0020,0x005C,0x0063,0x00 }
-  , { 0x0041,0x0020,0x0043,0x0061,0x0074,0x0065,0x0067,0x006F,0x0072,0x0079,0x0020,0x0063,0x0068,0x0061,0x0072,0x0061,0x0063,0x0074,0x0065,0x0072,0x0020,0x006F,0x0072,0x0020,0x0027,0x007B,0x0027,0x0020,0x0069,0x0073,0x0020,0x0072,0x0065,0x0071,0x0075,
-      0x0069,0x0072,0x0065,0x0064,0x00 }
+  , { 0x0041,0x0020,0x0027,0x007B,0x0027,0x0020,0x0069,0x0073,0x0020,0x0072,0x0065,0x0071,0x0075,0x0069,0x0072,0x0065,0x0064,0x0020,0x0062,0x0065,0x0066,0x006F,0x0072,0x0065,0x0020,0x0061,0x0020,0x0063,0x0061,0x0074,0x0065,0x0067,0x006F,0x0072,0x0079,
+      0x0020,0x0063,0x0068,0x0061,0x0072,0x0061,0x0063,0x0074,0x0065,0x0072,0x002E,0x00 }
   , { 0x0041,0x0020,0x0070,0x0072,0x006F,0x0070,0x0065,0x0072,0x0074,0x0079,0x0020,0x006E,0x0061,0x006D,0x0065,0x0020,0x0069,0x0073,0x0020,0x006E,0x006F,0x0074,0x0020,0x0063,0x006C,0x006F,0x0073,0x0065,0x0064,0x0020,0x0062,0x0079,0x0020,0x0027,0x007D,
       0x0027,0x00 }
   , { 0x0055,0x006E,0x0065,0x0078,0x0070,0x0065,0x0063,0x0074,0x0065,0x0064,0x0020,0x006D,0x0065,0x0074,0x0061,0x0020,0x0063,0x0068,0x0061,0x0072,0x0061,0x0063,0x0074,0x0065,0x0072,0x00 }
diff --git a/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg b/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg
index 00e212a1a..b8df4b03c 100644
--- a/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg
+++ b/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg
@@ -529,7 +529,7 @@ $set 3
 122  A back reference or an anchor or a lookahead or a lookbehind is expected in a conditional pattern
 123  There are more than three choices in a conditional group
 124  A character in U+0040-U+005f must follow \c
-125  A Category character or '{' is required
+125  A '{' is required before a category character.
 126  A property name is not closed by '}'
 127  Unexpected meta character
 128  Unknown property
diff --git a/src/xercesc/util/Platforms/Win32/Version.rc b/src/xercesc/util/Platforms/Win32/Version.rc
index 7503eaebf..7beb0e912 100644
--- a/src/xercesc/util/Platforms/Win32/Version.rc
+++ b/src/xercesc/util/Platforms/Win32/Version.rc
@@ -632,7 +632,7 @@ BEGIN
     8314              L"\x0041\x0020\x0062\x0061\x0063\x006B\x0020\x0072\x0065\x0066\x0065\x0072\x0065\x006E\x0063\x0065\x0020\x006F\x0072\x0020\x0061\x006E\x0020\x0061\x006E\x0063\x0068\x006F\x0072\x0020\x006F\x0072\x0020\x0061\x0020\x006C\x006F\x006F\x006B\x0061\x0068\x0065\x0061\x0064\x0020\x006F\x0072\x0020\x0061\x0020\x006C\x006F\x006F\x006B\x0062\x0065\x0068\x0069\x006E\x0064\x0020\x0069\x0073\x0020\x0065\x0078\x0070\x0065\x0063\x0074\x0065\x0064\x0020\x0069\x006E\x0020\x0061\x0020\x0063\x006F\x006E\x0064\x0069\x0074\x0069\x006F\x006E\x0061\x006C\x0020\x0070\x0061\x0074\x0074\x0065\x0072\x006E\x00"
     8315              L"\x0054\x0068\x0065\x0072\x0065\x0020\x0061\x0072\x0065\x0020\x006D\x006F\x0072\x0065\x0020\x0074\x0068\x0061\x006E\x0020\x0074\x0068\x0072\x0065\x0065\x0020\x0063\x0068\x006F\x0069\x0063\x0065\x0073\x0020\x0069\x006E\x0020\x0061\x0020\x0063\x006F\x006E\x0064\x0069\x0074\x0069\x006F\x006E\x0061\x006C\x0020\x0067\x0072\x006F\x0075\x0070\x00"
     8316              L"\x0041\x0020\x0063\x0068\x0061\x0072\x0061\x0063\x0074\x0065\x0072\x0020\x0069\x006E\x0020\x0055\x002B\x0030\x0030\x0034\x0030\x002D\x0055\x002B\x0030\x0030\x0035\x0066\x0020\x006D\x0075\x0073\x0074\x0020\x0066\x006F\x006C\x006C\x006F\x0077\x0020\x005C\x0063\x00"
-    8317              L"\x0041\x0020\x0043\x0061\x0074\x0065\x0067\x006F\x0072\x0079\x0020\x0063\x0068\x0061\x0072\x0061\x0063\x0074\x0065\x0072\x0020\x006F\x0072\x0020\x0027\x007B\x0027\x0020\x0069\x0073\x0020\x0072\x0065\x0071\x0075\x0069\x0072\x0065\x0064\x00"
+    8317              L"\x0041\x0020\x0027\x007B\x0027\x0020\x0069\x0073\x0020\x0072\x0065\x0071\x0075\x0069\x0072\x0065\x0064\x0020\x0062\x0065\x0066\x006F\x0072\x0065\x0020\x0061\x0020\x0063\x0061\x0074\x0065\x0067\x006F\x0072\x0079\x0020\x0063\x0068\x0061\x0072\x0061\x0063\x0074\x0065\x0072\x002E\x00"
     8318              L"\x0041\x0020\x0070\x0072\x006F\x0070\x0065\x0072\x0074\x0079\x0020\x006E\x0061\x006D\x0065\x0020\x0069\x0073\x0020\x006E\x006F\x0074\x0020\x0063\x006C\x006F\x0073\x0065\x0064\x0020\x0062\x0079\x0020\x0027\x007D\x0027\x00"
     8319              L"\x0055\x006E\x0065\x0078\x0070\x0065\x0063\x0074\x0065\x0064\x0020\x006D\x0065\x0074\x0061\x0020\x0063\x0068\x0061\x0072\x0061\x0063\x0074\x0065\x0072\x00"
     8320              L"\x0055\x006E\x006B\x006E\x006F\x0077\x006E\x0020\x0070\x0072\x006F\x0070\x0065\x0072\x0074\x0079\x00"
@@ -901,6 +901,7 @@ BEGIN
     24600             L"\x0055\x006E\x0072\x0065\x0063\x006F\x0067\x006E\x0069\x007A\x0065\x0064\x0020\x004E\x006F\x0064\x0065\x0020\x0054\x0079\x0070\x0065\x00"
 END
 
+
 #endif    // English (U.S.) resources
 /////////////////////////////////////////////////////////////////////////////
 
diff --git a/src/xercesc/util/regx/RegxParser.cpp b/src/xercesc/util/regx/RegxParser.cpp
index 753f3f89b..e6f027fcc 100644
--- a/src/xercesc/util/regx/RegxParser.cpp
+++ b/src/xercesc/util/regx/RegxParser.cpp
@@ -56,6 +56,9 @@
 
 /*
  * $Log$
+ * Revision 1.5  2003/03/04 16:36:17  knoaman
+ * RegEx: fix for character category escape
+ *
  * Revision 1.4  2003/01/13 19:02:23  knoaman
  * [Bug 14390] C++ Indentifier collision with Python.
  *
@@ -1085,57 +1088,23 @@ Token* RegxParser::parseAtom() {
 
 RangeToken* RegxParser::processBacksolidus_pP(const XMLInt32 ch) {
 
-    bool positive = (ch == chLatin_p);
-
     processNext();
-	if (fState != REGX_T_CHAR)
-		ThrowXML(ParseException,XMLExcepts::Parser_Atom2);
-
-    RangeToken* tok = 0;
-
-	switch(fCharData) {
 
-    case chLatin_L:
-		tok = fTokenFactory->getRange(fgUniLetter, !positive);
-		break;
-    case chLatin_M:
-		tok = fTokenFactory->getRange(fgUniMark, !positive);
-		break;
-    case chLatin_N:
-		tok = fTokenFactory->getRange(fgUniNumber, !positive);
-		break;
-    case chLatin_Z:
-		tok = fTokenFactory->getRange(fgUniSeparator, !positive);
-		break;
-    case chLatin_C:
-		tok = fTokenFactory->getRange(fgUniControl, !positive);
-		break;
-    case chLatin_P:
-		tok = fTokenFactory->getRange(fgUniPunctuation, !positive);
-		break;
-    case chLatin_S:
-		tok = fTokenFactory->getRange(fgUniSymbol, !positive);
-		break;
-    case chOpenCurly:
-		{
-			int nameStart = fOffset;
-			int nameEnd = XMLString::indexOf(fString,chCloseCurly,nameStart);
+    if (fState != REGX_T_CHAR || fCharData != chOpenCurly)
+        ThrowXML(ParseException,XMLExcepts::Parser_Atom2);
 
-			if (nameEnd < 0)
-				ThrowXML(ParseException,XMLExcepts::Parser_Atom3);
+    int nameStart = fOffset;
+    int nameEnd = XMLString::indexOf(fString,chCloseCurly,nameStart);
 
-			fOffset = nameEnd + 1;
-			XMLCh* rangeName = new XMLCh[(nameEnd - nameStart) + 1];
-			ArrayJanitor<XMLCh> janRangeName(rangeName);
-			XMLString::subString(rangeName, fString, nameStart, nameEnd);
-			tok = fTokenFactory->getRange(rangeName, !positive);
-		}
-		break;
-	default:
-		ThrowXML(ParseException,XMLExcepts::Parser_Atom2);
-	}
+    if (nameEnd < 0)
+        ThrowXML(ParseException,XMLExcepts::Parser_Atom3);
+    
+    fOffset = nameEnd + 1;
+    XMLCh* rangeName = new XMLCh[(nameEnd - nameStart) + 1];
+    ArrayJanitor<XMLCh> janRangeName(rangeName);
+    XMLString::subString(rangeName, fString, nameStart, nameEnd);
 
-    return tok;
+    return  fTokenFactory->getRange(rangeName, !(ch == chLatin_p));
 }
 
 
-- 
GitLab