diff --git a/src/util/regx/RegularExpression.hpp b/src/util/regx/RegularExpression.hpp index dfac5b62a2a424c673cc35f91e74f6395cf881ce..a211fe83e5d41eec7fb30e10539785e93d10dbd9 100644 --- a/src/util/regx/RegularExpression.hpp +++ b/src/util/regx/RegularExpression.hpp @@ -88,7 +88,7 @@ public: // ----------------------------------------------------------------------- // Public Constructors and Destructor // ----------------------------------------------------------------------- - RegularExpression(const char* const pattern); + RegularExpression(const char* const pattern); RegularExpression(const char* const pattern, const char* const options); RegularExpression(const XMLCh* const pattern); RegularExpression(const XMLCh* const pattern, const XMLCh* const options); @@ -125,11 +125,11 @@ public: // ----------------------------------------------------------------------- // Matching methods // ----------------------------------------------------------------------- - bool matches(const char* const matchString); - bool matches(const char* const matchString, const int start, + bool matches(const char* const matchString); + bool matches(const char* const matchString, const int start, const int end); - bool matches(const char* const matchString, Match* const pMatch); - bool matches(const char* const matchString, const int start, + bool matches(const char* const matchString, Match* const pMatch); + bool matches(const char* const matchString, const int start, const int end, Match* const pMatch); bool matches(const XMLCh* const matchString); @@ -155,13 +155,13 @@ private: bool nextCh(XMLInt32& ch, int& offset, const short direction); bool fInUse; - bool fAdoptMatch; + bool fAdoptMatch; int fStart; int fLimit; int fLength; int fSize; int* fOffsets; - Match* fMatch; + Match* fMatch; XMLCh* fString; friend class Janitor<Context>; @@ -262,9 +262,9 @@ private: Op* fOperations; Token* fTokenTree; RangeToken* fFirstChar; - static RangeToken* fWordRange; - OpFactory fOpFactory; - XMLMutex fMutex; + static RangeToken* fWordRange; + OpFactory fOpFactory; + XMLMutex fMutex; TokenFactory* fTokenFactory; }; @@ -510,15 +510,18 @@ inline int RegularExpression::matchUnion(Context* const context, const Op* const op, int offset, const short direction) { - for (int i=0; i<op->getSize(); i++) { + unsigned int opSize = op->getSize(); + int ret = -1; - int ret = match(context, op->elementAt(i), offset, direction); + for (unsigned int i=0; i < opSize; i++) { - if (ret >= 0) - return ret; + ret = match(context, op->elementAt(i), offset, direction); + + if (ret > 0) + break; } - return -1; + return ret; } inline int RegularExpression::matchModifier(Context* const context, diff --git a/src/util/regx/UnionToken.cpp b/src/util/regx/UnionToken.cpp index c3f7beeb35eef263416366353a14c7ed66e87103..3a222818cfa55e171f021bb60d7f9abca9843029 100644 --- a/src/util/regx/UnionToken.cpp +++ b/src/util/regx/UnionToken.cpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.4 2001/06/05 14:50:32 knoaman + * Fixes to regular expression. + * * Revision 1.3 2001/05/11 13:26:52 tng * Copyright update. * @@ -120,84 +123,87 @@ void UnionToken::addChild(Token* const child, TokenFactory* const tokFactory) { if (fChildren == 0) fChildren = new RefVectorOf<Token>(INITIALSIZE, false); - unsigned short childType = child->getTokenType(); - - if (childType == UNION) { + if (getTokenType() == UNION) { fChildren->addElement(child); return; } + unsigned short childType = child->getTokenType(); + unsigned int childSize = child->size(); + if (childType == CONCAT) { - for (int i = 0; i < child->size(); i++) { - fChildren->addElement(child->getChild(i)); + for (unsigned int i = 0; i < childSize; i++) { + + addChild(child->getChild(i), tokFactory); } + return; } - unsigned int childSize = fChildren->size(); - if (childSize == 0) { + unsigned int childrenSize = fChildren->size(); + if (childrenSize == 0) { fChildren->addElement(child); return; } - Token* previousTok = fChildren->elementAt(childSize - 1); - unsigned short previousType = previousTok->getTokenType(); + Token* previousTok = fChildren->elementAt(childrenSize - 1); + unsigned short previousType = previousTok->getTokenType(); - if (!((previousType == CHAR || previousType == STRING) + if (!((previousType == CHAR || previousType == STRING) && (childType == CHAR || childType == STRING))) { fChildren->addElement(child); return; } - // Continue - XMLBuffer stringBuf; + // Continue + XMLBuffer stringBuf; - if (previousType == CHAR) { + if (previousType == CHAR) { - XMLInt32 ch = previousTok->getChar(); + XMLInt32 ch = previousTok->getChar(); - if (ch >= 0x10000) { + if (ch >= 0x10000) { - XMLCh* chSurrogate = RegxUtil::decomposeToSurrogates(ch); - stringBuf.append(chSurrogate); - delete [] chSurrogate; - } - else { - stringBuf.append((XMLCh) ch); - } + XMLCh* chSurrogate = RegxUtil::decomposeToSurrogates(ch); + stringBuf.append(chSurrogate); + delete [] chSurrogate; + } + else { + stringBuf.append((XMLCh) ch); + } - previousTok = tokFactory->createString(0); - fChildren->setElementAt(previousTok, childSize - 1); - } - else { - stringBuf.append(previousTok->getString()); - } + previousTok = tokFactory->createString(0); + fChildren->setElementAt(previousTok, childrenSize - 1); + } + else { + stringBuf.append(previousTok->getString()); + } - if (childType == CHAR) { + if (childType == CHAR) { - XMLInt32 ch = child->getChar(); + XMLInt32 ch = child->getChar(); - if (ch >= 0x10000) { + if (ch >= 0x10000) { - XMLCh* chSurrogate = RegxUtil::decomposeToSurrogates(ch); - stringBuf.append(chSurrogate); - delete [] chSurrogate; - } - else { - stringBuf.append((XMLCh) ch); - } - } - else { - stringBuf.append(child->getString()); - } + XMLCh* chSurrogate = RegxUtil::decomposeToSurrogates(ch); + stringBuf.append(chSurrogate); + delete [] chSurrogate; + } + else { + stringBuf.append((XMLCh) ch); + } + } + else { + stringBuf.append(child->getString()); + } - ((StringToken*) previousTok)->setString(stringBuf.getRawBuffer()); + ((StringToken*) previousTok)->setString(stringBuf.getRawBuffer()); } /** - * End of file UnionToken.cpp + * End of file UnionToken.cpp */