diff --git a/src/xercesc/internal/DGXMLScanner.cpp b/src/xercesc/internal/DGXMLScanner.cpp index 514d9ddfa9de4799e9ee176a4c38c9bc707c6bc4..cf8bf4a144424d27758d8a504069b0d7cb10b21e 100644 --- a/src/xercesc/internal/DGXMLScanner.cpp +++ b/src/xercesc/internal/DGXMLScanner.cpp @@ -2378,6 +2378,13 @@ void DGXMLScanner::updateNSMap(const XMLCh* const attrPrefix void DGXMLScanner::scanAttrListforNameSpaces(RefVectorOf<XMLAttr>* theAttrList, int attCount, XMLElementDecl* elemDecl) { + + // + // Decide if to use hash table to do duplicate checking + // + bool toUseHashTable = false; + setAttrDupChkRegistry((unsigned int&)attCount, toUseHashTable); + // Make an initial pass through the list and find any xmlns attributes or // schema attributes. // When we find one, send it off to be used to update the element stack's @@ -2412,17 +2419,35 @@ void DGXMLScanner::scanAttrListforNameSpaces(RefVectorOf<XMLAttr>* theAttrList, // by checking for qualified names with the same local part and with prefixes // which have been bound to namespace names that are identical. XMLAttr* loopAttr; - for (int attrIndex=0; attrIndex < index; attrIndex++) { - loopAttr = theAttrList->elementAt(attrIndex); - if (loopAttr->getURIId() == curAttr->getURIId() && - XMLString::equals(loopAttr->getName(), curAttr->getName())) { + + if (!toUseHashTable) + { + for (int attrIndex=0; attrIndex < index; attrIndex++) { + loopAttr = theAttrList->elementAt(attrIndex); + if (loopAttr->getURIId() == curAttr->getURIId() && + XMLString::equals(loopAttr->getName(), curAttr->getName())) { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , curAttr->getName() + , elemDecl->getFullName() + ); + } + } + } + else + { + if (fAttrDupChkRegistry->containsKey((void*)curAttr->getName(), curAttr->getURIId())) + { emitError - ( + ( XMLErrs::AttrAlreadyUsedInSTag , curAttr->getName() , elemDecl->getFullName() - ); + ); } + + fAttrDupChkRegistry->put((void*)curAttr->getName(), curAttr->getURIId(), curAttr); } } } diff --git a/src/xercesc/internal/IGXMLScanner2.cpp b/src/xercesc/internal/IGXMLScanner2.cpp index 265b4fae7b5e2af7bd7788672dfb66090184b79e..54d449286ed01e2c2dab0351dc87c8caf0683b38 100644 --- a/src/xercesc/internal/IGXMLScanner2.cpp +++ b/src/xercesc/internal/IGXMLScanner2.cpp @@ -110,6 +110,15 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs XMLBufBid bbNormal(&fBufMgr); XMLBuffer& normBuf = bbNormal.getBuffer(); + // + // Decide if to use hash table to do duplicate checking + // + bool toUseHashTable = false; + if (fGrammarType == Grammar::DTDGrammarType) + { + setAttrDupChkRegistry(attCount, toUseHashTable); + } + // Loop through our explicitly provided attributes, which are in the raw // scanned form, and build up XMLAttr objects. unsigned int index; @@ -617,16 +626,32 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // by checking for qualified names with the same local part and with prefixes // which have been bound to namespace names that are identical. if (fGrammarType == Grammar::DTDGrammarType) { - for (unsigned int attrIndex=0; attrIndex < retCount; attrIndex++) { - curAttr = toFill.elementAt(attrIndex); - if (uriId == curAttr->getURIId() && - XMLString::equals(suffPtr, curAttr->getName())) { + if (!toUseHashTable) + { + for (unsigned int attrIndex=0; attrIndex < retCount; attrIndex++) { + curAttr = toFill.elementAt(attrIndex); + if (uriId == curAttr->getURIId() && + XMLString::equals(suffPtr, curAttr->getName())) { + emitError + ( + + XMLErrs::AttrAlreadyUsedInSTag + , curAttr->getName() + , elemDecl->getFullName() + ); + } + } + } + else + { + if (fAttrDupChkRegistry->containsKey((void*)suffPtr, uriId)) + { emitError - ( + ( XMLErrs::AttrAlreadyUsedInSTag , curAttr->getName() , elemDecl->getFullName() - ); + ); } } } @@ -658,6 +683,12 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs ); curAttr->setSpecified(true); } + + if (toUseHashTable) + { + fAttrDupChkRegistry->put((void*)suffPtr, uriId, curAttr); + } + if(psviAttr) psviAttr->setValue(curAttr->getValue()); diff --git a/src/xercesc/internal/WFXMLScanner.cpp b/src/xercesc/internal/WFXMLScanner.cpp index 93c1afc270de34fc9be582b7d42e39c96a2b2243..fc627c9f4bca14d618a50daae67f32af3beaa476 100644 --- a/src/xercesc/internal/WFXMLScanner.cpp +++ b/src/xercesc/internal/WFXMLScanner.cpp @@ -1457,6 +1457,13 @@ bool WFXMLScanner::scanStartTagNS(bool& gotData) } if(attCount) { + + // + // Decide if to use hash table to do duplicate checking + // + bool toUseHashTable = false; + setAttrDupChkRegistry(attCount, toUseHashTable); + // check for duplicate namespace attributes: // by checking for qualified names with the same local part and with prefixes // which have been bound to namespace names that are identical. @@ -1464,17 +1471,35 @@ bool WFXMLScanner::scanStartTagNS(bool& gotData) XMLAttr* curAtt; for (unsigned int attrIndex=0; attrIndex < attCount-1; attrIndex++) { loopAttr = fAttrList->elementAt(attrIndex); - for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) { - curAtt = fAttrList->elementAt(curAttrIndex); - if (curAtt->getURIId() == loopAttr->getURIId() && - XMLString::equals(curAtt->getName(), loopAttr->getName())) { - emitError - ( - XMLErrs::AttrAlreadyUsedInSTag + + if (!toUseHashTable) + { + for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) { + curAtt = fAttrList->elementAt(curAttrIndex); + if (curAtt->getURIId() == loopAttr->getURIId() && + XMLString::equals(curAtt->getName(), loopAttr->getName())) { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag , curAtt->getName() , elemDecl->getFullName() + ); + } + } + } + else + { + if (fAttrDupChkRegistry->containsKey((void*)loopAttr->getName(), loopAttr->getURIId())) + { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , loopAttr->getName() + , elemDecl->getFullName() ); } + + fAttrDupChkRegistry->put((void*)loopAttr->getName(), loopAttr->getURIId(), loopAttr); } } } diff --git a/src/xercesc/internal/XMLScanner.cpp b/src/xercesc/internal/XMLScanner.cpp index a650e38458fcfc2d8114c4a51dcc517fa8b9208f..e54b6d6b488d178270b1b479e5155eafb93e3601 100644 --- a/src/xercesc/internal/XMLScanner.cpp +++ b/src/xercesc/internal/XMLScanner.cpp @@ -157,6 +157,7 @@ XMLScanner::XMLScanner(XMLValidator* const valToAdopt, , fScannerId(0) , fSequenceId(0) , fAttrList(0) + , fAttrDupChkRegistry(0) , fDocHandler(0) , fDocTypeHandler(0) , fEntityHandler(0) @@ -237,6 +238,7 @@ XMLScanner::XMLScanner( XMLDocumentHandler* const docHandler , fScannerId(0) , fSequenceId(0) , fAttrList(0) + , fAttrDupChkRegistry(0) , fDocHandler(docHandler) , fDocTypeHandler(docTypeHandler) , fEntityHandler(entityHandler) @@ -279,6 +281,7 @@ XMLScanner::XMLScanner( XMLDocumentHandler* const docHandler XMLScanner::~XMLScanner() { delete fAttrList; + delete fAttrDupChkRegistry; delete fValidationContext; fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName; fMemoryManager->deallocate(fExternalSchemaLocation);//delete [] fExternalSchemaLocation; diff --git a/src/xercesc/internal/XMLScanner.hpp b/src/xercesc/internal/XMLScanner.hpp index 59d2eb012b6595110528d3fcd5ecf75e1728e02d..0714714b2daee6834d8c0d39a7b9d3a5344ffca1 100644 --- a/src/xercesc/internal/XMLScanner.hpp +++ b/src/xercesc/internal/XMLScanner.hpp @@ -16,6 +16,9 @@ /* * $Log$ + * Revision 1.40 2004/09/28 21:27:38 peiyongz + * Optimized duplicated attributes checking for large number of attributes + * * Revision 1.39 2004/09/28 02:14:13 cargilld * Add support for validating annotations. * @@ -731,6 +734,13 @@ protected: void resetUIntPool(); void recreateUIntPool(); + inline + void setAttrDupChkRegistry + ( + const unsigned int &attrNumber + , bool &toUseHashTable + ); + // ----------------------------------------------------------------------- // Data members // @@ -986,6 +996,7 @@ protected: XMLUInt32 fScannerId; XMLUInt32 fSequenceId; RefVectorOf<XMLAttr>* fAttrList; + RefHash2KeysTableOf<XMLAttr>* fAttrDupChkRegistry; XMLDocumentHandler* fDocHandler; DocTypeHandler* fDocTypeHandler; XMLEntityHandler* fEntityHandler; @@ -1521,6 +1532,29 @@ inline void XMLScanner::resetValidationContext() fEntityDeclPoolRetrieved = false; } +inline void XMLScanner::setAttrDupChkRegistry(const unsigned int &attrNumber + , bool &toUseHashTable) +{ + // once the attribute exceed 20, we use hash table to check duplication + if (attrNumber > 20) + { + toUseHashTable = true; + + if (!fAttrDupChkRegistry) + { + fAttrDupChkRegistry = new (fMemoryManager) RefHash2KeysTableOf<XMLAttr> + ( + 2*attrNumber+1, false, new (fMemoryManager)HashXMLCh(), fMemoryManager + ); + } + else + { + fAttrDupChkRegistry->removeAll(); + } + } + +} + XERCES_CPP_NAMESPACE_END #endif