diff --git a/src/xercesc/internal/DGXMLScanner.cpp b/src/xercesc/internal/DGXMLScanner.cpp index 3a16978e178becef02e58861e6114c87cd551156..773f09dbfd843830d28f22ca6160f66793399360 100644 --- a/src/xercesc/internal/DGXMLScanner.cpp +++ b/src/xercesc/internal/DGXMLScanner.cpp @@ -1527,7 +1527,7 @@ bool DGXMLScanner::scanStartTag(bool& gotData) , (fDoNamespaces) ? elemDecl->getElementName()->getPrefix() : 0 , *fAttrList , attCount - , false + , isEmpty , isRoot ); } @@ -1552,19 +1552,6 @@ bool DGXMLScanner::scanStartTag(bool& gotData) } } - // If we have a doc handler, tell it about the end tag - if (fDocHandler) - { - fDocHandler->endElement - ( - *elemDecl - , uriId - , isRoot - , (fDoNamespaces) ? elemDecl->getElementName()->getPrefix() - : XMLUni::fgZeroLenString - ); - } - // Pop the element stack back off since it'll never be used now fElemStack.popTop(); diff --git a/src/xercesc/internal/WFXMLScanner.cpp b/src/xercesc/internal/WFXMLScanner.cpp index 07bda7e923d0dda1979a0148b7e0907ffd19a874..ac8d8220f22a0814acf96cd58f70e10cbefdc309 100644 --- a/src/xercesc/internal/WFXMLScanner.cpp +++ b/src/xercesc/internal/WFXMLScanner.cpp @@ -1515,7 +1515,7 @@ bool WFXMLScanner::scanStartTagNS(bool& gotData) , elemDecl->getElementName()->getPrefix() , *fAttrList , attCount - , false + , isEmpty , isRoot ); } @@ -1528,18 +1528,6 @@ bool WFXMLScanner::scanStartTagNS(bool& gotData) // Pop the element stack back off since it'll never be used now fElemStack.popTop(); - // If we have a doc handler, tell it about the end tag - if (fDocHandler) - { - fDocHandler->endElement - ( - *elemDecl - , uriId - , isRoot - , elemDecl->getElementName()->getPrefix() - ); - } - // If the elem stack is empty, then it was an empty root if (isRoot) gotData = false; diff --git a/src/xercesc/util/regx/RangeToken.cpp b/src/xercesc/util/regx/RangeToken.cpp index ad80222bb923151c969f60ec6641c0b7c128138e..72ac963e8a2293e3f90bfa6e5dc49fe37d10b154 100644 --- a/src/xercesc/util/regx/RangeToken.cpp +++ b/src/xercesc/util/regx/RangeToken.cpp @@ -77,6 +77,72 @@ RangeToken::~RangeToken() { } +// This is a struct that defines a mapping for +// case-insensitive matching. The first character +// is the character we try to match in the range. +// The second is the character we add to the range, +// because it maps to the first when we're folding +// case. +struct ExceptionCharsStruct +{ + XMLInt32 baseChar; + + XMLInt32 matchingChar; +}; + + +// This is an array of character mappings that we will +// add to ranges for case-insensitive matching. +static const ExceptionCharsStruct s_exceptions[] = +{ + { 0x49, 0x130 }, + { 0x49, 0x131 }, + { 0x4b, 0x212a }, + { 0x53, 0x17f }, + { 0x69, 0x130 }, + { 0x69, 0x131 }, + { 0x6b, 0x212a }, + { 0x73, 0x17f }, + { 0xc5, 0x212b }, + { 0xe5, 0x212b }, + { 0x1c4, 0x1c5 }, + { 0x1c6, 0x1c5 }, + { 0x1c7, 0x1c8 }, + { 0x1c9, 0x1c8 }, + { 0x1ca, 0x1cb }, + { 0x1cc, 0x1cb }, + { 0x1f1, 0x1f2 }, + { 0x1f3, 0x1f2 }, + { 0x392, 0x3d0 }, + { 0x395, 0x3f5 }, + { 0x398, 0x3d1 }, + { 0x398, 0x3f4 }, + { 0x399, 0x345 }, + { 0x399, 0x1fbe }, + { 0x39a, 0x3f0 }, + { 0x39c, 0xb5 }, + { 0x3a0, 0x3d6 }, + { 0x3a1, 0x3f1 }, + { 0x3a3, 0x3c2 }, + { 0x3a6, 0x3d5 }, + { 0x3a9, 0x2126 }, + { 0x3b2, 0x3d0 }, + { 0x3b5, 0x3f5 }, + { 0x3b8, 0x3d1 }, + { 0x3b8, 0x3f4 }, + { 0x3b9, 0x345 }, + { 0x3b9, 0x1fbe }, + { 0x3ba, 0x3f0 }, + { 0x3bc, 0xb5 }, + { 0x3c0, 0x3d6 }, + { 0x3c1, 0x3f1 }, + { 0x3c3, 0x3c2 }, + { 0x3c6, 0x3d5 }, + { 0x3c9, 0x2126 }, + { 0x1e60, 0x1e9b }, + { 0x1e61, 0x1e9b } +}; + // --------------------------------------------------------------------------- // RangeToken: Getter methods // --------------------------------------------------------------------------- @@ -86,12 +152,13 @@ RangeToken* RangeToken::getCaseInsensitiveToken(TokenFactory* const tokFactory) bool isNRange = (getTokenType() == T_NRANGE) ? true : false; RangeToken* lwrToken = tokFactory->createRange(isNRange); + unsigned int exceptIndex = 0; #if XERCES_USE_TRANSCODER_ICU && ((U_ICU_VERSION_MAJOR_NUM > 2) || (U_ICU_VERSION_MAJOR_NUM == 2 && U_ICU_VERSION_MINOR_NUM >=4)) UChar* rangeStr=(UChar*)fMemoryManager->allocate(40*fElemCount*sizeof(UChar)); ArrayJanitor<UChar> janRange(rangeStr, fMemoryManager); int c=0; - rangeStr[c++] = chOpenSquare; + rangeStr[c++] = chOpenSquare; for (unsigned int i = 0; i < fElemCount - 1; i += 2) { XMLCh buffer[10]; unsigned int len, j; @@ -182,6 +249,34 @@ RangeToken* RangeToken::getCaseInsensitiveToken(TokenFactory* const tokFactory) lwrToken->addRange(ch, ch); } #endif + + const unsigned int exceptionsSize = + sizeof(s_exceptions) / sizeof(s_exceptions[0]); + + // Add any exception chars. These are characters where the the + // case mapping is not symmetric. (Unicode case mappings are not isomorphic...) + while (exceptIndex < exceptionsSize) + { + if (s_exceptions[exceptIndex].baseChar < ch) + { + ++exceptIndex; + } + else if (s_exceptions[exceptIndex].baseChar == ch) + { + const XMLInt32 matchingChar = + s_exceptions[exceptIndex].matchingChar; + + lwrToken->addRange( + matchingChar, + matchingChar); + + ++exceptIndex; + } + else + { + break; + } + } } } @@ -262,7 +357,7 @@ void RangeToken::addRange(const XMLInt32 start, const XMLInt32 end) { if(fSorted && fRanges[fElemCount-1] >= val1) { - for (int i = 0; i < (int)fElemCount; i +=2) + for (int i = 0; i < (int)fElemCount; i +=2) { // check if this range is already part of this one if (fRanges[i] <= val1 && fRanges[i+1] >= val2) @@ -273,8 +368,8 @@ void RangeToken::addRange(const XMLInt32 start, const XMLInt32 end) { fRanges[i+1]=val2; break; } - else if (fRanges[i] > val1 || - (fRanges[i]==val1 && fRanges[i+1] > val2)) + else if (fRanges[i] > val1 || + (fRanges[i]==val1 && fRanges[i+1] > val2)) { for(int j=fElemCount-1;j>=i;j--) fRanges[j+2]=fRanges[j]; @@ -283,7 +378,7 @@ void RangeToken::addRange(const XMLInt32 start, const XMLInt32 end) { fElemCount += 2; break; } - } + } } else { diff --git a/src/xercesc/util/regx/RangeToken.hpp b/src/xercesc/util/regx/RangeToken.hpp index 18cb344a5f8d32c2123921d78e01444a3650701e..69e32bf75d15dc1b7d7944b1c9f79aca3e3661da 100644 --- a/src/xercesc/util/regx/RangeToken.hpp +++ b/src/xercesc/util/regx/RangeToken.hpp @@ -56,6 +56,8 @@ public: // ----------------------------------------------------------------------- RangeToken* getCaseInsensitiveToken(TokenFactory* const tokFactory); + void setCaseInsensitiveToken(RangeToken* tok); + // ----------------------------------------------------------------------- // Setter methods // ----------------------------------------------------------------------- @@ -115,6 +117,11 @@ private: }; +inline void RangeToken::setCaseInsensitiveToken(RangeToken* tok) +{ + fCaseIToken = tok; +} + inline void RangeToken::createMap() { if (!fMap) diff --git a/src/xercesc/util/regx/RegularExpression.cpp b/src/xercesc/util/regx/RegularExpression.cpp index 2ea202e5867913922775071680d7e921c7832f9a..02fbb38c263fd73877b2f418f6d3160b7cfee695 100644 --- a/src/xercesc/util/regx/RegularExpression.cpp +++ b/src/xercesc/util/regx/RegularExpression.cpp @@ -1619,7 +1619,8 @@ void RegularExpression::prepare() { if (fOperations != 0 && fOperations->getNextOp() == 0 && (fOperations->getOpType() == Op::O_STRING || - fOperations->getOpType() == Op::O_CHAR) ) { + fOperations->getOpType() == Op::O_CHAR) && + !isSet(fOptions, IGNORE_CASE) ) { fFixedStringOnly = true; @@ -1648,8 +1649,9 @@ void RegularExpression::prepare() { fBMPattern = new (fMemoryManager) BMPattern(fFixedString, 256, isSet(fOptions, IGNORE_CASE), fMemoryManager); } - else if (!isSet(fOptions, XMLSCHEMA_MODE) && - !isSet(fOptions, PROHIBIT_FIXED_STRING_OPTIMIZATION)) { + else if (!isSet(fOptions, XMLSCHEMA_MODE) && + !isSet(fOptions, PROHIBIT_FIXED_STRING_OPTIMIZATION) && + !isSet(fOptions, IGNORE_CASE)) { int fixedOpts = 0; Token* tok = fTokenTree->findFixedString(fOptions, fixedOpts); diff --git a/src/xercesc/util/regx/UnicodeRangeFactory.cpp b/src/xercesc/util/regx/UnicodeRangeFactory.cpp index b305dc09b92016f9b9f1b7b777b4ae43c94bdb5d..c5511072621f990d3fd3157174f1ca8f3e5c19fe 100644 --- a/src/xercesc/util/regx/UnicodeRangeFactory.cpp +++ b/src/xercesc/util/regx/UnicodeRangeFactory.cpp @@ -194,9 +194,16 @@ void UnicodeRangeFactory::buildRanges(RangeTokenMap *rangeTokMap) { tok->createMap(); rangeTokMap->setRangeToken(fgUniIsSpace, tok , true); + RangeToken* const dummyToken = + tokFactory->createRange(); + + dummyToken->addRange(-1, -2); + dummyToken->createMap(); + // build the internal maps. for (int l=0; l < UNICATEGSIZE; l++) { ranges[l]->createMap(); + ranges[l]->setCaseInsensitiveToken(dummyToken); } fRangesCreated = true; diff --git a/src/xercesc/validators/schema/SchemaValidator.cpp b/src/xercesc/validators/schema/SchemaValidator.cpp index fe14b4b22d3ab979d7195e02e9109b7925c1fd54..f2d22a7e6b99699fbcf63f47bb29a90f1a868085 100644 --- a/src/xercesc/validators/schema/SchemaValidator.cpp +++ b/src/xercesc/validators/schema/SchemaValidator.cpp @@ -1427,11 +1427,13 @@ SchemaValidator::checkNameAndTypeOK(SchemaGrammar* const currentGrammar, // case of mixed complex types with attributes only if (derivedURI == XMLElementDecl::fgPCDataElemId) { - if (!XMLString::equals(derivedName, baseName) || derivedURI != baseURI) - ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::PD_NameTypeOK1, fMemoryManager); return; } + if (!XMLString::equals(derivedName, baseName) || derivedURI != baseURI) { + ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::PD_NameTypeOK1, fMemoryManager); + } + SchemaGrammar* aGrammar = currentGrammar; const XMLCh* schemaURI = fGrammarResolver->getStringPool()->getValueForId(derivedURI); @@ -1634,28 +1636,85 @@ SchemaValidator::checkTypesOK(const SchemaElementDecl* const derivedElemDecl, void SchemaValidator::checkRecurseAsIfGroup(SchemaGrammar* const currentGrammar, - ContentSpecNode* const derivedSpecNode, + ContentSpecNode* const derivedSpecNodeIn, const int derivedScope, const ContentSpecNode* const baseSpecNode, const int baseScope, ValueVectorOf<ContentSpecNode*>* const baseNodes, const ComplexTypeInfo* const baseInfo) { - ContentSpecNode::NodeTypes baseType = baseSpecNode->getType(); - ValueVectorOf<ContentSpecNode*> derivedNodes(1, fMemoryManager); + ContentSpecNode::NodeTypes baseType = baseSpecNode->getType(); bool toLax = false; //Treat the element as if it were in a group of the same variety as base - ContentSpecNode derivedGroupNode(baseType, derivedSpecNode, 0, false, true, fMemoryManager); - - derivedNodes.addElement(derivedSpecNode); + ContentSpecNode derivedGroupNode(baseType, derivedSpecNodeIn, 0, false, true, fMemoryManager); + const ContentSpecNode* const derivedSpecNode = &derivedGroupNode; if ((baseSpecNode->getType() & 0x0f) == ContentSpecNode::Choice) { toLax = true; } + // Instead of calling this routine, inline it + // checkRecurse(currentGrammar, &derivedGroupNode, derivedScope, &derivedNodes, + // baseSpecNode, baseScope, baseNodes, baseInfo, toLax); + + if (!isOccurrenceRangeOK(derivedSpecNode->getMinOccurs(), derivedSpecNode->getMaxOccurs(), + baseSpecNode->getMinOccurs(), baseSpecNode->getMaxOccurs())) { + ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::PD_Recurse1, fMemoryManager); + } + + // check for mapping of children + XMLExcepts::Codes codeToThrow = XMLExcepts::NoError; + unsigned int count2= baseNodes->size(); + unsigned int current = 0; + + { + bool matched = false; + + for (unsigned int j = current; j < count2; j++) { + + ContentSpecNode* baseNode = baseNodes->elementAt(j); + current++; + + bool bDoBreak=false; // workaround for Borland bug with 'break' in 'catch' + try { + + checkParticleDerivationOk(currentGrammar, derivedSpecNodeIn, + derivedScope, baseNode, baseScope, baseInfo); + matched = true; + break; + } + catch(const XMLException&) { + if (!toLax && baseNode->getMinTotalRange()) { + bDoBreak=true; + } + } + if(bDoBreak) + break; + } + + // did not find a match + if (!matched) { + codeToThrow = XMLExcepts::PD_Recurse2; + } + } + + // Now, see if there are some elements in the base we didn't match up + // in case of Sequence or All + if (!toLax && codeToThrow == XMLExcepts::NoError && + (true || (baseType & 0x0f) == ContentSpecNode::All || + derivedSpecNodeIn->getElement()->getURI() != XMLElementDecl::fgPCDataElemId)) { + for (unsigned int j = current; j < count2; j++) { + if (baseNodes->elementAt(j)->getMinTotalRange() * baseSpecNode->getMinOccurs()) { //!emptiable + codeToThrow = XMLExcepts::PD_Recurse2; + break; + } + } + } + + if (codeToThrow != XMLExcepts::NoError) { + ThrowXMLwithMemMgr(RuntimeException, codeToThrow, fMemoryManager); + } - checkRecurse(currentGrammar, &derivedGroupNode, derivedScope, &derivedNodes, - baseSpecNode, baseScope, baseNodes, baseInfo, toLax); } void diff --git a/src/xercesc/validators/schema/TraverseSchema.cpp b/src/xercesc/validators/schema/TraverseSchema.cpp index eaf2c0f67019cb32027aa0f29616b578b556532a..a7f94eac671070a0b91c5ccb631938cf0ac67276 100644 --- a/src/xercesc/validators/schema/TraverseSchema.cpp +++ b/src/xercesc/validators/schema/TraverseSchema.cpp @@ -6175,7 +6175,12 @@ void TraverseSchema::processComplexContent(const DOMElement* const ctElem, // Compose the final content model by concatenating the base and // the current in sequence if (!specNode) { - + if (isMixed) { + if (baseSpecNode && baseSpecNode->hasAllContent()) { + reportSchemaError(ctElem, XMLUni::fgXMLErrDomain, XMLErrs::NotAllContent); + throw TraverseSchema::InvalidComplexTypeInfo; // REVISIT - should we continue + } + } if (baseSpecNode) { specNodeJan.reset(new (fGrammarPoolMemoryManager) ContentSpecNode(*baseSpecNode)); specNode = specNodeJan.get();