diff --git a/src/xercesc/internal/DGXMLScanner.cpp b/src/xercesc/internal/DGXMLScanner.cpp index 5b3dc2bef7df6e5d6e7d3b4ca4cdc1bebcaa3012..494d4249eb8a6d80d786feb1b1e1b3a617daeda6 100644 --- a/src/xercesc/internal/DGXMLScanner.cpp +++ b/src/xercesc/internal/DGXMLScanner.cpp @@ -80,6 +80,7 @@ #include <xercesc/validators/DTD/DTDValidator.hpp> #include <xercesc/util/OutOfMemoryException.hpp> #include <xercesc/util/XMLResourceIdentifier.hpp> +#include <xercesc/util/HashPtr.hpp> XERCES_CPP_NAMESPACE_BEGIN @@ -95,6 +96,9 @@ DGXMLScanner::DGXMLScanner(XMLValidator* const valToAdopt , fDTDValidator(0) , fDTDGrammar(0) , fDTDElemNonDeclPool(0) + , fElemCount(0) + , fAttDefRegistry(0) + , fUndeclaredAttrRegistry(0) { try { @@ -134,6 +138,9 @@ DGXMLScanner::DGXMLScanner( XMLDocumentHandler* const docHandler , fDTDValidator(0) , fDTDGrammar(0) , fDTDElemNonDeclPool(0) + , fElemCount(0) + , fAttDefRegistry(0) + , fUndeclaredAttrRegistry(0) { try { @@ -1169,6 +1176,9 @@ bool DGXMLScanner::scanStartTag(bool& gotData) // pairs until we get there. unsigned int attCount = 0; unsigned int curAttListSize = fAttrList->size(); + wasAdded = false; + fElemCount++; + while (true) { // And get the next non-space character @@ -1253,37 +1263,57 @@ bool DGXMLScanner::scanStartTag(bool& gotData) // See if this attribute is declared for this element. If we are // not validating of course it will not be at first, but we will // fault it into the pool (to avoid lots of redundant errors.) - wasAdded = false; - XMLAttDef* attDef = elemDecl->findAttr - ( - fAttNameBuf.getRawBuffer() - , 0 - , 0 - , 0 - , XMLElementDecl::AddIfNotFound - , wasAdded - ); + XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( fAttNameBuf.getRawBuffer()); - if (fValidate) + // now need to prepare for duplicate detection + if(attDef) { - if (wasAdded) + unsigned int *curCountPtr = fAttDefRegistry->get(attDef); + if(!curCountPtr) { - // This is to tell the Validator that this attribute was - // faulted-in, was not an attribute in the attdef originally - attDef->setCreateReason(XMLAttDef::JustFaultIn); - - fValidator->emitError - ( - XMLValid::AttNotDefinedForElement - , fAttNameBuf.getRawBuffer() - , qnameRawBuf + curCountPtr = getNewUIntPtr(); + *curCountPtr = fElemCount; + fAttDefRegistry->put(attDef, curCountPtr); + } + else if(*curCountPtr < fElemCount) + *curCountPtr = fElemCount; + else + { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , attDef->getFullName() + , elemDecl->getFullName() + ); + } + } + else + { + XMLCh * namePtr = fAttNameBuf.getRawBuffer(); + unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr); + if(!curCountPtr) + { + curCountPtr = getNewUIntPtr(); + *curCountPtr = fElemCount; + fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr); + } + else if(*curCountPtr < fElemCount) + *curCountPtr = fElemCount; + else + { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , namePtr + , elemDecl->getFullName() ); } - // If this attribute was faulted-in and first occurence, - // then emit an error - else if (attDef->getCreateReason() == XMLAttDef::JustFaultIn - && !attDef->getProvided()) + } + if (fValidate) + { + if (!attDef) { + fValidator->emitError ( XMLValid::AttNotDefinedForElement @@ -1293,28 +1323,11 @@ bool DGXMLScanner::scanStartTag(bool& gotData) } } - // If its already provided, then there are more than one of - // this attribute in this start tag, so emit an error. - if (attDef->getProvided()) - { - emitError - ( - XMLErrs::AttrAlreadyUsedInSTag - , attDef->getFullName() - , qnameRawBuf - ); - } - else - { - // Mark this one as already seen - attDef->setProvided(true); - } - // Skip any whitespace before the value and then scan the att // value. This will come back normalized with entity refs and // char refs expanded. fReaderMgr.skipPastSpaces(); - if (!scanAttValue(attDef, fAttValueBuf)) + if (!scanAttValue(attDef, fAttNameBuf.getRawBuffer(), fAttValueBuf)) { static const XMLCh tmpList[] = { @@ -1352,7 +1365,7 @@ bool DGXMLScanner::scanStartTag(bool& gotData) // determine if it has a valid value. It will output any needed // errors, but we just keep going. We only need to do this if // we are validating. - if (!wasAdded && attDef->getCreateReason() != XMLAttDef::JustFaultIn) + if (attDef) { // Let the validator pass judgement on the attribute value if (fValidate) @@ -1403,7 +1416,7 @@ bool DGXMLScanner::scanStartTag(bool& gotData) fEmptyNamespaceId , fAttNameBuf.getRawBuffer() , fAttValueBuf.getRawBuffer() - , attDef->getType() + , (attDef)?attDef->getType():XMLAttDef::CData , true , fMemoryManager ); @@ -1416,7 +1429,7 @@ bool DGXMLScanner::scanStartTag(bool& gotData) , fAttNameBuf.getRawBuffer() , XMLUni::fgZeroLenString , fAttValueBuf.getRawBuffer() - , attDef->getType() + , (attDef)?attDef->getType():XMLAttDef::CData , true , fMemoryManager ); @@ -1434,7 +1447,7 @@ bool DGXMLScanner::scanStartTag(bool& gotData) fEmptyNamespaceId , fAttNameBuf.getRawBuffer() , fAttValueBuf.getRawBuffer() - , attDef->getType() + , (attDef)?attDef->getType():XMLAttDef::CData ); } else @@ -1445,7 +1458,7 @@ bool DGXMLScanner::scanStartTag(bool& gotData) , fAttNameBuf.getRawBuffer() , XMLUni::fgZeroLenString , fAttValueBuf.getRawBuffer() - , attDef->getType() + , (attDef)?attDef->getType():XMLAttDef::CData ); } curAtt->setSpecified(true); @@ -1870,6 +1883,14 @@ void DGXMLScanner::commonInit() fDTDValidator = new (fMemoryManager) DTDValidator(); initValidator(fDTDValidator); fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager); + fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int> + ( + 509, false, new (fMemoryManager)HashPtr(), fMemoryManager + ); + fUndeclaredAttrRegistry = new (fMemoryManager) RefHashTableOf<unsigned int> + ( + 509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager + ); } void DGXMLScanner::cleanUp() @@ -1877,6 +1898,8 @@ void DGXMLScanner::cleanUp() delete fAttrNSList; delete fDTDValidator; delete fDTDElemNonDeclPool; + delete fAttDefRegistry; + delete fUndeclaredAttrRegistry; } @@ -1921,8 +1944,9 @@ DGXMLScanner::buildAttList(const unsigned int attCount // Get the current att def, for convenience and its def type XMLAttDef& curDef = attDefList.getAttDef(i); - if (!curDef.getProvided() && curDef.getCreateReason() != XMLAttDef::JustFaultIn) - { + unsigned int *attCountPtr = fAttDefRegistry->get(&curDef); + if (!attCountPtr || *attCountPtr < fElemCount) + { // did not occur const XMLAttDef::DefAttTypes defType = curDef.getDefaultType(); if (fValidate) @@ -2039,9 +2063,6 @@ DGXMLScanner::buildAttList(const unsigned int attCount retCount++; } } - else { - curDef.setProvided(false); - } } } @@ -2182,6 +2203,18 @@ void DGXMLScanner::scanReset(const InputSource& src) fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit(); fEntityExpansionCount = 0; } + if(fUIntPoolRowTotal >= 32) + { // 8 KB tied up with validating attributes... + fAttDefRegistry->removeAll(); + fUndeclaredAttrRegistry->removeAll(); + recreateUIntPool(); + } + else + { + // note that this will implicitly reset the values of the hashtables, + // though their buckets will still be tied up + resetUIntPool(); + } } @@ -2440,6 +2473,7 @@ InputSource* DGXMLScanner::resolveSystemId(const XMLCh* const sysId) // DGXMLScanner: Private parsing methods // --------------------------------------------------------------------------- bool DGXMLScanner::scanAttValue( const XMLAttDef* const attDef + , const XMLCh *const attrName , XMLBuffer& toFill) { enum States @@ -2449,8 +2483,9 @@ bool DGXMLScanner::scanAttValue( const XMLAttDef* const attDef }; // Get the type and name - const XMLAttDef::AttTypes type = attDef->getType(); - const XMLCh* const attrName = attDef->getFullName(); + const XMLAttDef::AttTypes type = (attDef) + ?attDef->getType() + :XMLAttDef::CData; // Reset the target buffer toFill.reset(); @@ -2465,7 +2500,9 @@ bool DGXMLScanner::scanAttValue( const XMLAttDef* const attDef const unsigned int curReader = fReaderMgr.getCurrentReaderNum(); // Get attribute def - to check to see if it's declared externally or not - bool isAttExternal = attDef->isExternal(); + bool isAttExternal = (attDef) + ?attDef->isExternal() + :false; // Loop until we get the attribute value. Note that we use a double // loop here to avoid the setup/teardown overhead of the exception diff --git a/src/xercesc/internal/DGXMLScanner.hpp b/src/xercesc/internal/DGXMLScanner.hpp index 21328ccc21e896dd4b84744558f1ecc3ea6f880f..13f94d1a74703db8352e05d0caef1282678693de 100644 --- a/src/xercesc/internal/DGXMLScanner.hpp +++ b/src/xercesc/internal/DGXMLScanner.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.11 2003/11/24 05:09:39 neilg + * implement new, statless, method for detecting duplicate attributes + * * Revision 1.10 2003/10/22 20:22:30 knoaman * Prepare for annotation support. * @@ -210,6 +213,7 @@ private : bool scanAttValue ( const XMLAttDef* const attDef + , const XMLCh *const attrName , XMLBuffer& toFill ); bool scanContent(const bool extEntity); @@ -240,12 +244,23 @@ private : // // fDTDElemNonDeclPool // registry of "faulted-in" DTD element decls + // fElemCount + // count of the number of start tags seen so far (starts at 1). + // Used for duplicate attribute detection/processing of required/defaulted attributes + // fAttDefRegistry + // mapping from XMLAttDef instances to the count of the last + // start tag where they were utilized. + // fUndeclaredAttrRegistry + // mapping of attr QNames to the count of the last start tag in which they occurred // // ----------------------------------------------------------------------- ValueVectorOf<XMLAttr*>* fAttrNSList; DTDValidator* fDTDValidator; DTDGrammar* fDTDGrammar; NameIdPool<DTDElementDecl>* fDTDElemNonDeclPool; + unsigned int fElemCount; + RefHashTableOf<unsigned int>* fAttDefRegistry; + RefHashTableOf<unsigned int>* fUndeclaredAttrRegistry; }; inline const XMLCh* DGXMLScanner::getName() const diff --git a/src/xercesc/internal/IGXMLScanner.cpp b/src/xercesc/internal/IGXMLScanner.cpp index 1ceee95206c75889c19622004f54f89ce1fde60d..db393bcb28e243750dc8d16306c6f27e3f1fff80 100644 --- a/src/xercesc/internal/IGXMLScanner.cpp +++ b/src/xercesc/internal/IGXMLScanner.cpp @@ -64,6 +64,7 @@ // --------------------------------------------------------------------------- #include <xercesc/internal/IGXMLScanner.hpp> #include <xercesc/util/RuntimeException.hpp> +#include <xercesc/util/HashPtr.hpp> #include <xercesc/util/UnexpectedEOFException.hpp> #include <xercesc/sax/InputSource.hpp> #include <xercesc/framework/XMLDocumentHandler.hpp> @@ -108,6 +109,10 @@ IGXMLScanner::IGXMLScanner( XMLValidator* const valToAdopt , fFieldActivator(0) , fDTDElemNonDeclPool(0) , fSchemaElemNonDeclPool(0) + , fElemCount(0) + , fAttDefRegistry(0) + , fUndeclaredAttrRegistry(0) + , fUndeclaredAttrRegistryNS(0) { try { @@ -150,6 +155,10 @@ IGXMLScanner::IGXMLScanner( XMLDocumentHandler* const docHandler , fFieldActivator(0) , fDTDElemNonDeclPool(0) , fSchemaElemNonDeclPool(0) + , fElemCount(0) + , fAttDefRegistry(0) + , fUndeclaredAttrRegistry(0) + , fUndeclaredAttrRegistryNS(0) { try { @@ -554,7 +563,19 @@ void IGXMLScanner::commonInit() fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager); // create pools for undeclared elements fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager); - fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager); + fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager); + fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int> + ( + 509, false, new (fMemoryManager)HashPtr(), fMemoryManager + ); + fUndeclaredAttrRegistry = new (fMemoryManager) RefHashTableOf<unsigned int> + ( + 509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager + ); + fUndeclaredAttrRegistryNS = new (fMemoryManager) RefHash2KeysTableOf<unsigned int> + ( + 509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager + ); } void IGXMLScanner::cleanUp() @@ -569,6 +590,9 @@ void IGXMLScanner::cleanUp() delete fLocationPairs; delete fDTDElemNonDeclPool; delete fSchemaElemNonDeclPool; + delete fAttDefRegistry; + delete fUndeclaredAttrRegistry; + delete fUndeclaredAttrRegistryNS; } // --------------------------------------------------------------------------- diff --git a/src/xercesc/internal/IGXMLScanner.hpp b/src/xercesc/internal/IGXMLScanner.hpp index ed90df6de0a936ed8949d90ee7d2e8f5361c5ee2..be74cb15ed67e57d921ad102f67385e57283c892 100644 --- a/src/xercesc/internal/IGXMLScanner.hpp +++ b/src/xercesc/internal/IGXMLScanner.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.12 2003/11/24 05:09:38 neilg + * implement new, statless, method for detecting duplicate attributes + * * Revision 1.11 2003/10/22 20:22:30 knoaman * Prepare for annotation support. * @@ -213,6 +216,7 @@ private : bool normalizeAttValue ( const XMLAttDef* const attDef + , const XMLCh* const name , const XMLCh* const value , XMLBuffer& toFill ); @@ -324,6 +328,17 @@ private : // registry of "faulted-in" DTD element decls // fSchemaElemNonDeclPool // registry for elements without decls in the grammar + // fElemCount + // count of the number of start tags seen so far (starts at 1). + // Used for duplicate attribute detection/processing of required/defaulted attributes + // fAttDefRegistry + // mapping from XMLAttDef instances to the count of the last + // start tag where they were utilized. + // fUndeclaredAttrRegistry + // mapping of attr QNames to the count of the last start tag in which they occurred + // fUndeclaredAttrRegistryNS + // mapping of namespaceId/localName pairs to the count of the last + // start tag in which they occurred. // // ----------------------------------------------------------------------- bool fSeeXsi; @@ -341,6 +356,10 @@ private : ValueVectorOf<XMLCh*>* fLocationPairs; NameIdPool<DTDElementDecl>* fDTDElemNonDeclPool; RefHash3KeysIdPool<SchemaElementDecl>* fSchemaElemNonDeclPool; + unsigned int fElemCount; + RefHashTableOf<unsigned int>* fAttDefRegistry; + RefHashTableOf<unsigned int>* fUndeclaredAttrRegistry; + RefHash2KeysTableOf<unsigned int>* fUndeclaredAttrRegistryNS; }; inline const XMLCh* IGXMLScanner::getName() const diff --git a/src/xercesc/internal/IGXMLScanner2.cpp b/src/xercesc/internal/IGXMLScanner2.cpp index c2d54dbc545ae44544a2def7b3598d57cdfecebe..2e98ee4033ee791c7fe918a1579b5e55fe0caba4 100644 --- a/src/xercesc/internal/IGXMLScanner2.cpp +++ b/src/xercesc/internal/IGXMLScanner2.cpp @@ -119,7 +119,8 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // that it owns, and to return us a boolean indicating whether it has // any defs. If schemas are being validated, the complexType // at the top of the SchemaValidator's stack will - // know what's best. REVISIT: don't modify grammar at all... + // know what's best. REVISIT: don't modify grammar at all; eliminate + // this step... ComplexTypeInfo *currType = 0; if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType) currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); @@ -127,6 +128,9 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs ? currType->resetDefs() : elemDecl->resetDefs(); + // another set of attributes; increment element counter + fElemCount++; + // If there are no expliclitily provided attributes and there are no // defined attributes for the element, the we don't have anything to do. // So just return zero in this case. @@ -204,6 +208,7 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // If its not a special case namespace attr of some sort, then we // do normal checking and processing. XMLAttDef::AttTypes attType; + DatatypeValidator *attrValidator = 0; if (!isNSAttr || fGrammarType == Grammar::DTDGrammarType) { // Some checking for attribute wild card first (for schema) @@ -295,20 +300,82 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // Find this attribute within the parent element. We pass both // the uriID/name and the raw QName buffer, since we don't know // how the derived validator and its elements store attributes. - bool wasAdded = false; if (!attDef) { - attDef = elemDecl->findAttr - ( - curPair->getKey() - , uriId - , suffPtr - , prefPtr - , XMLElementDecl::AddIfNotFound - , wasAdded - ); + if(fGrammarType == Grammar::SchemaGrammarType) + attDef = ((SchemaElementDecl *)elemDecl)->getAttDef( suffPtr , uriId); + else + attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( namePtr); + } + + // now need to prepare for duplicate detection + if(attDef) + { + unsigned int *curCountPtr = fAttDefRegistry->get(attDef); + if(!curCountPtr) + { + curCountPtr = getNewUIntPtr(); + *curCountPtr = fElemCount; + fAttDefRegistry->put(attDef, curCountPtr); + } + else if(*curCountPtr < fElemCount) + *curCountPtr = fElemCount; + else + { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , attDef->getFullName() + , elemDecl->getFullName() + ); + } + } + else + { + if(fGrammarType == Grammar::DTDGrammarType) + { + unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr); + if(!curCountPtr) + { + curCountPtr = getNewUIntPtr(); + *curCountPtr = fElemCount; + fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr); + } + else if(*curCountPtr < fElemCount) + *curCountPtr = fElemCount; + else + { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , namePtr + , elemDecl->getFullName() + ); + } + } + else // schema grammar + { + unsigned int *curCountPtr = fUndeclaredAttrRegistryNS->get(suffPtr, uriId); + if(!curCountPtr) + { + curCountPtr = getNewUIntPtr(); + *curCountPtr = fElemCount; + fUndeclaredAttrRegistryNS->put((void *)suffPtr, uriId, curCountPtr); + } + else if(*curCountPtr < fElemCount) + *curCountPtr = fElemCount; + else + { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , namePtr + , elemDecl->getFullName() + ); + } + } } - if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType) { + if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType && attDef) { //we may have set it to invalid already, but this is the first time we are guarenteed to have the attDef if(((SchemaAttDef *)(attDef))->getValidity() != PSVIDefs::INVALID) ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::VALID); @@ -326,15 +393,7 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs } } - if (wasAdded) - { - // This is to tell the Validator that this attribute was - // faulted-in, was not an attribute in the attdef originally - attDef->setCreateReason(XMLAttDef::JustFaultIn); - } - - bool errorCondition = fValidate && !attDefForWildCard && - attDef->getCreateReason() == XMLAttDef::JustFaultIn && !attDef->getProvided(); + bool errorCondition = fValidate && !attDefForWildCard && !attDef; if (errorCondition && !skipThisOne && !laxThisOne) { // @@ -360,7 +419,7 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs , bufMsg.getRawBuffer() , elemDecl->getFullName() ); - if(fGrammarType == Grammar::SchemaGrammarType) { + if(fGrammarType == Grammar::SchemaGrammarType && attDef) { ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID); if (getPSVIHandler()) { @@ -369,7 +428,7 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs } } } - else if(errorCondition && laxThisOne && fGrammarType == Grammar::SchemaGrammarType) { + else if(errorCondition && laxThisOne && fGrammarType == Grammar::SchemaGrammarType && attDef) { ((SchemaAttDef *)(attDef))->setValidationAttempted(PSVIDefs::NONE); ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::UNKNOWN); if (getPSVIHandler()) @@ -381,6 +440,7 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs } + /**** REVISIT: excise this dead code // If its already provided, then there are more than one of // this attribute in this start tag, so emit an error. if (attDef->getProvided()) @@ -404,15 +464,18 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs { attDef->setProvided(true); } + ********/ // Now normalize the raw value since we have the attribute type. We // don't care about the return status here. If it failed, an error // was issued, which is all we care about. if (attDefForWildCard) { - ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator()); + if(attDef) + ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator()); normalizeAttValue ( attDefForWildCard + , namePtr , curPair->getValue() , normBuf ); @@ -440,28 +503,33 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs , false , elemDecl ); + attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator(); } + else // no decl; default DOMTypeInfo to anySimpleType + attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); // Save the type for later use attType = attDefForWildCard->getType(); - if(fGrammarType == Grammar::SchemaGrammarType) { + if(fGrammarType == Grammar::SchemaGrammarType && attDef) + { ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef); DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator(); - if(tempDV && tempDV->getType() == DatatypeValidator::Union) - ((SchemaAttDef*)attDef)->setMembertypeValidator(((UnionDatatypeValidator *)tempDV)->getMemberTypeValidator()); + if(tempDV && tempDV->getType() == DatatypeValidator::Union ) + ((SchemaAttDef*)attDef)->setMembertypeValidator(attrValidator); } } else { normalizeAttValue ( attDef + , namePtr , curPair->getValue() , normBuf ); // If we found an attdef for this one, then lets validate it. - if (attDef->getCreateReason() != XMLAttDef::JustFaultIn) + if (attDef) { if (fNormalizeData && (fGrammarType == Grammar::SchemaGrammarType)) { @@ -486,15 +554,25 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs , false , elemDecl ); + attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator(); } + else if(fGrammarType == Grammar::SchemaGrammarType) + attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); + } + else // no attDef at all; default to anySimpleType + { + if(fGrammarType == Grammar::SchemaGrammarType) + attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); } // Save the type for later use - attType = attDef->getType(); + attType = (attDef)?attDef->getType():XMLAttDef::CData; - if(fGrammarType == Grammar::SchemaGrammarType) + if(fGrammarType == Grammar::SchemaGrammarType && attDef) + { ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef); + } } @@ -505,10 +583,13 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs attType = XMLAttDef::CData; normalizeAttRawValue ( - curPair->getKey() + namePtr , curPair->getValue() , normBuf ); + if((uriId == fXMLNSNamespaceId) + || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI)) + attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); } // Add this attribute to the attribute list that we use to pass them @@ -545,6 +626,8 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs , attType , true , fMemoryManager + , attrValidator + , (fGrammarType == Grammar::SchemaGrammarType ) ); toFill.addElement(curAttr); } @@ -558,10 +641,12 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs , prefPtr , normBuf.getRawBuffer() , attType + , attrValidator + , (fGrammarType == Grammar::SchemaGrammarType ) ); curAttr->setSpecified(true); } - + // Bump the count of attrs in the list retCount++; } @@ -584,9 +669,9 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // Get the current att def, for convenience and its def type const XMLAttDef *curDef = &attDefList.getAttDef(i); const XMLAttDef::DefAttTypes defType = curDef->getDefaultType(); - - if (!curDef->getProvided()) - { + unsigned int *attCountPtr = fAttDefRegistry->get((void *)curDef); + if (!attCountPtr || *attCountPtr < fElemCount) + { // did not occur if(fGrammarType == Grammar::SchemaGrammarType) { ((SchemaAttDef *)curDef)->setValidationAttempted(PSVIDefs::FULL); ((SchemaAttDef *)curDef)->setValidity(PSVIDefs::VALID); @@ -598,7 +683,7 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs } } - //the attributes is not provided + //the attribute is not provided if (fValidate) { // If we are validating and its required, then an error @@ -622,7 +707,7 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs } } else if ((defType == XMLAttDef::Default) || - (defType == XMLAttDef::Fixed) ) + (defType == XMLAttDef::Fixed) ) { if (fStandalone && curDef->isExternal()) { @@ -643,9 +728,8 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs } // Fault in the value if needed, and bump the att count. - // We have to if ((defType == XMLAttDef::Default) - || (defType == XMLAttDef::Fixed)) + || (defType == XMLAttDef::Fixed)) { // Let the validator pass judgement on the attribute value if (fValidate) @@ -691,7 +775,7 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs ((SchemaElementDecl *)elemDecl)->updateValidityFromAttribute((SchemaAttDef *)curDef); } - else + else if(attCountPtr) { //attribute is provided // (schema) report error for PROHIBITED attrs that are present (V_TAGc) @@ -728,6 +812,7 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // are legal if escaped only. And some escape chars are not subject to // normalization rules. bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef + , const XMLCh* const attName , const XMLCh* const value , XMLBuffer& toFill) { @@ -739,14 +824,18 @@ bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef }; // Get the type and name - const XMLAttDef::AttTypes type = attDef->getType(); + const XMLAttDef::AttTypes type = (attDef) + ?attDef->getType() + :XMLAttDef::CData; // Assume its going to go fine, and empty the target buffer in preperation bool retVal = true; toFill.reset(); // Get attribute def - to check to see if it's declared externally or not - bool isAttExternal = attDef->isExternal(); + bool isAttExternal = (attDef) + ?attDef->isExternal() + :false; // Loop through the chars of the source value and normalize it according // to the type. @@ -769,7 +858,7 @@ bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef // not allowed in attribute values. if (!escaped && (*srcPtr == chOpenAngle)) { - emitError(XMLErrs::BracketInAttrValue, attDef->getFullName()); + emitError(XMLErrs::BracketInAttrValue, attName); retVal = false; } @@ -783,17 +872,18 @@ bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef // XML 1.0, Section 2.9 if (fStandalone && fValidate && isAttExternal) { - // Can't have a standalone document declaration of "yes" if attribute - // values are subject to normalisation - fValidator->emitError(XMLValid::NoAttNormForStandalone, attDef->getFullName()); - if(fGrammarType == Grammar::SchemaGrammarType) { - ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID); + // Can't have a standalone document declaration of "yes" if attribute + // values are subject to normalisation + fValidator->emitError(XMLValid::NoAttNormForStandalone, attName); + if(fGrammarType == Grammar::SchemaGrammarType && attDef) { + ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID); if (getPSVIHandler()) { // REVISIT: // PSVIAttribute->setValidity(PSVIItem::VALIDITY_INVALID); } - } + } + } nextCh = chSpace; } @@ -831,8 +921,8 @@ bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef { // Can't have a standalone document declaration of "yes" if attribute // values are subject to normalisation - fValidator->emitError(XMLValid::NoAttNormForStandalone, attDef->getFullName()); - if(fGrammarType == Grammar::SchemaGrammarType) { + fValidator->emitError(XMLValid::NoAttNormForStandalone, attName); + if(fGrammarType == Grammar::SchemaGrammarType && attDef) { ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID); if (getPSVIHandler()) { @@ -855,7 +945,7 @@ bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef srcPtr++; } - if(fGrammarType == Grammar::SchemaGrammarType) + if(fGrammarType == Grammar::SchemaGrammarType && attDef) ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromAttribute((SchemaAttDef *)attDef); return retVal; @@ -1094,6 +1184,20 @@ void IGXMLScanner::scanReset(const InputSource& src) fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit(); fEntityExpansionCount = 0; } + fElemCount = 0; + if(fUIntPoolRowTotal >= 32) + { // 8 KB tied up with validating attributes... + fAttDefRegistry->removeAll(); + fUndeclaredAttrRegistry->removeAll(); + fUndeclaredAttrRegistryNS->removeAll(); + recreateUIntPool(); + } + else + { + // note that this will implicitly reset the values of the hashtables, + // though their buckets will still be tied up + resetUIntPool(); + } } diff --git a/src/xercesc/internal/SGXMLScanner.cpp b/src/xercesc/internal/SGXMLScanner.cpp index e811a6ade3384413a848b42c58596d062db1d775..2cb0dbf8c17cc0b7975c4cc953e378ea64a7aaa6 100644 --- a/src/xercesc/internal/SGXMLScanner.cpp +++ b/src/xercesc/internal/SGXMLScanner.cpp @@ -86,6 +86,7 @@ #include <xercesc/validators/schema/identity/ValueStore.hpp> #include <xercesc/util/OutOfMemoryException.hpp> #include <xercesc/util/XMLResourceIdentifier.hpp> +#include <xercesc/util/HashPtr.hpp> XERCES_CPP_NAMESPACE_BEGIN @@ -110,6 +111,9 @@ SGXMLScanner::SGXMLScanner( XMLValidator* const valToAdopt , fValueStoreCache(0) , fFieldActivator(0) , fElemNonDeclPool(0) + , fElemCount(0) + , fAttDefRegistry(0) + , fUndeclaredAttrRegistryNS(0) { try { @@ -156,6 +160,9 @@ SGXMLScanner::SGXMLScanner( XMLDocumentHandler* const docHandler , fValueStoreCache(0) , fFieldActivator(0) , fElemNonDeclPool(0) + , fElemCount(0) + , fAttDefRegistry(0) + , fUndeclaredAttrRegistryNS(0) { try { @@ -2015,6 +2022,14 @@ void SGXMLScanner::commonInit() fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote); fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote); fElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager); + fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int> + ( + 509, false, new (fMemoryManager)HashPtr(), fMemoryManager + ); + fUndeclaredAttrRegistryNS = new (fMemoryManager) RefHash2KeysTableOf<unsigned int> + ( + 509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager + ); } void SGXMLScanner::cleanUp() @@ -2028,6 +2043,8 @@ void SGXMLScanner::cleanUp() delete fMatcherStack; delete fValueStoreCache; delete fElemNonDeclPool; + delete fAttDefRegistry; + delete fUndeclaredAttrRegistryNS; } void SGXMLScanner::resizeElemState() { @@ -2072,6 +2089,8 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs ? currType->resetDefs() : elemDecl->resetDefs(); + fElemCount++; + // If there are no expliclitily provided attributes and there are no // defined attributes for the element, the we don't have anything to do. // So just return zero in this case. @@ -2149,6 +2168,7 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // If its not a special case namespace attr of some sort, then we // do normal checking and processing. XMLAttDef::AttTypes attType; + DatatypeValidator *attrValidator = 0; if (!isNSAttr) { // Some checking for attribute wild card first (for schema) @@ -2233,20 +2253,54 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // Find this attribute within the parent element. We pass both // the uriID/name and the raw QName buffer, since we don't know // how the derived validator and its elements store attributes. - bool wasAdded = false; if (!attDef) { - attDef = elemDecl->findAttr - ( - curPair->getKey() - , uriId - , suffPtr - , prefPtr - , XMLElementDecl::AddIfNotFound - , wasAdded - ); + attDef = ((SchemaElementDecl *)elemDecl)->getAttDef(suffPtr, uriId); } - if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType) { + // now need to prepare for duplicate detection + if(attDef) + { + unsigned int *curCountPtr = fAttDefRegistry->get(attDef); + if(!curCountPtr) + { + curCountPtr = getNewUIntPtr(); + *curCountPtr = fElemCount; + fAttDefRegistry->put(attDef, curCountPtr); + } + else if(*curCountPtr < fElemCount) + *curCountPtr = fElemCount; + else + { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , attDef->getFullName() + , elemDecl->getFullName() + ); + } + } + else + { + unsigned int *curCountPtr = fUndeclaredAttrRegistryNS->get(suffPtr, uriId); + if(!curCountPtr) + { + curCountPtr = getNewUIntPtr(); + *curCountPtr = fElemCount; + fUndeclaredAttrRegistryNS->put((void *)suffPtr, uriId, curCountPtr); + } + else if(*curCountPtr < fElemCount) + *curCountPtr = fElemCount; + else + { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , namePtr + , elemDecl->getFullName() + ); + } + } + if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType && attDef) { //we may have set it to invalid already, but this is the first time we are guarenteed to have the attDef if(((SchemaAttDef *)(attDef))->getValidity() != PSVIDefs::INVALID) { @@ -2266,15 +2320,7 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs } } - if (wasAdded) - { - // This is to tell the Validator that this attribute was - // faulted-in, was not an attribute in the attdef originally - attDef->setCreateReason(XMLAttDef::JustFaultIn); - } - - bool errorCondition = fValidate && !attDefForWildCard && - attDef->getCreateReason() == XMLAttDef::JustFaultIn && !attDef->getProvided(); + bool errorCondition = fValidate && !attDefForWildCard && !attDef; if (errorCondition && !skipThisOne && !laxThisOne) { // @@ -2300,14 +2346,15 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs , bufMsg.getRawBuffer() , elemDecl->getFullName() ); - ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID); + if(attDef) + ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID); if (getPSVIHandler()) { // REVISIT: // PSVIAttribute->setValidity(PSVIItem::VALIDITY_INVALID); } } - else if(errorCondition && laxThisOne) { + else if(errorCondition && laxThisOne && attDef) { ((SchemaAttDef *)(attDef))->setValidationAttempted(PSVIDefs::NONE); ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::UNKNOWN); if (getPSVIHandler()) @@ -2319,6 +2366,7 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs } + /**** REVISIT: excise this dead code // If its already provided, then there are more than one of // this attribute in this start tag, so emit an error. if (attDef->getProvided()) @@ -2341,15 +2389,18 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs { attDef->setProvided(true); } + *******/ // Now normalize the raw value since we have the attribute type. We // don't care about the return status here. If it failed, an error // was issued, which is all we care about. if (attDefForWildCard) { - ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator()); + if(attDef) + ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator()); normalizeAttValue ( attDefForWildCard + , namePtr , curPair->getValue() , normBuf ); @@ -2377,26 +2428,32 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs , false , elemDecl ); + attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator(); } + else // no decl; default DOMTypeInfo to anySimpleType + attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); + // Save the type for later use attType = attDefForWildCard->getType(); - ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef); + if(attDef) + ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef); DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator(); - if(tempDV && tempDV->getType() == DatatypeValidator::Union) - ((SchemaAttDef*)attDef)->setMembertypeValidator(((UnionDatatypeValidator *)tempDV)->getMemberTypeValidator()); + if(tempDV && tempDV->getType() == DatatypeValidator::Union && attDef) + ((SchemaAttDef*)attDef)->setMembertypeValidator(attrValidator); } else { normalizeAttValue ( attDef + , namePtr , curPair->getValue() , normBuf ); // If we found an attdef for this one, then lets validate it. - if (attDef->getCreateReason() != XMLAttDef::JustFaultIn) + if (attDef) { if (fNormalizeData && (fGrammarType == Grammar::SchemaGrammarType)) { @@ -2421,12 +2478,20 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs , false , elemDecl ); + attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator(); } + else + attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); } + else + attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); // Save the type for later use - attType = attDef->getType(); - ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef); + attType = (attDef)?attDef->getType():XMLAttDef::CData; + if(attDef) + { + ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef); + } } } else @@ -2435,10 +2500,13 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs attType = XMLAttDef::CData; normalizeAttRawValue ( - curPair->getKey() + namePtr , curPair->getValue() , normBuf ); + if((uriId == fXMLNSNamespaceId) + || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI)) + attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); } @@ -2457,6 +2525,8 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs , attType , true , fMemoryManager + , attrValidator + , true ); toFill.addElement(curAttr); } @@ -2470,6 +2540,8 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs , prefPtr , normBuf.getRawBuffer() , attType + , attrValidator + , true ); curAttr->setSpecified(true); } @@ -2496,8 +2568,9 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs XMLAttDef *curDef = &attDefList.getAttDef(i); const XMLAttDef::DefAttTypes defType = curDef->getDefaultType(); - if (!curDef->getProvided()) - { + unsigned int *attCountPtr = fAttDefRegistry->get(curDef); + if (!attCountPtr || *attCountPtr < fElemCount) + { // did not occur ((SchemaAttDef *)curDef)->setValidationAttempted(PSVIDefs::FULL); ((SchemaAttDef *)curDef)->setValidity(PSVIDefs::VALID); if (getPSVIHandler()) @@ -2507,7 +2580,7 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // PSVIAttribute->setValidity(PSVIItem::VALIDITY_VALID); } - //the attributes is not provided + //the attribute is not provided if (fValidate) { // If we are validating and its required, then an error @@ -2546,9 +2619,8 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs } // Fault in the value if needed, and bump the att count. - // We have to if ((defType == XMLAttDef::Default) - || (defType == XMLAttDef::Fixed)) + || (defType == XMLAttDef::Fixed)) { // Let the validator pass judgement on the attribute value if (fValidate) @@ -2582,7 +2654,7 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs ((SchemaElementDecl *)elemDecl)->updateValidityFromAttribute((SchemaAttDef *)curDef); } - else + else if (attCountPtr) { //attribute is provided // (schema) report error for PROHIBITED attrs that are present (V_TAGc) @@ -2617,6 +2689,7 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // are legal if escaped only. And some escape chars are not subject to // normalization rules. bool SGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef + , const XMLCh* const attrName , const XMLCh* const value , XMLBuffer& toFill) { @@ -2628,15 +2701,18 @@ bool SGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef }; // Get the type and name - const XMLAttDef::AttTypes type = attDef->getType(); - const XMLCh* const attrName = attDef->getFullName(); + const XMLAttDef::AttTypes type = (attDef) + ?attDef->getType() + :XMLAttDef::CData; // Assume its going to go fine, and empty the target buffer in preperation bool retVal = true; toFill.reset(); // Get attribute def - to check to see if it's declared externally or not - bool isAttExternal = attDef->isExternal(); + bool isAttExternal = (attDef) + ?attDef->isExternal() + :false; // Loop through the chars of the source value and normalize it according // to the type. @@ -2676,7 +2752,8 @@ bool SGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef // Can't have a standalone document declaration of "yes" if attribute // values are subject to normalisation fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); - ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID); + if(attDef) + ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID); if (getPSVIHandler()) { // REVISIT: @@ -2720,7 +2797,8 @@ bool SGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef // Can't have a standalone document declaration of "yes" if attribute // values are subject to normalisation fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); - ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID); + if(attDef) + ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID); if (getPSVIHandler()) { // REVISIT: @@ -2741,7 +2819,8 @@ bool SGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef srcPtr++; } - ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromAttribute((SchemaAttDef *)attDef); + if(attDef) + ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromAttribute((SchemaAttDef *)attDef); return retVal; } @@ -2957,6 +3036,20 @@ void SGXMLScanner::scanReset(const InputSource& src) fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit(); fEntityExpansionCount = 0; } + fElemCount = 0; + if(fUIntPoolRowTotal >= 32) + { // 8 KB tied up with validating attributes... + fAttDefRegistry->removeAll(); + fUndeclaredAttrRegistryNS->removeAll(); + recreateUIntPool(); + } + else + { + // note that this will implicitly reset the values of the hashtables, + // though their buckets will still be tied up + resetUIntPool(); + } + } diff --git a/src/xercesc/internal/SGXMLScanner.hpp b/src/xercesc/internal/SGXMLScanner.hpp index 911f8f9a8bd0e3de944d025b3aae99d64a0e337e..61ffccc4d9ca318f0a75ee1d66264e95f68c37d5 100644 --- a/src/xercesc/internal/SGXMLScanner.hpp +++ b/src/xercesc/internal/SGXMLScanner.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.11 2003/11/24 05:09:38 neilg + * implement new, statless, method for detecting duplicate attributes + * * Revision 1.10 2003/10/22 20:22:30 knoaman * Prepare for annotation support. * @@ -207,6 +210,7 @@ private : bool normalizeAttValue ( const XMLAttDef* const attDef + , const XMLCh* const attrName , const XMLCh* const value , XMLBuffer& toFill ); @@ -310,6 +314,16 @@ private : // its xpath. // fElemNonDeclPool // registry for elements without decls in the grammar + // fElemCount + // count of the number of start tags seen so far (starts at 1). + // Used for duplicate attribute detection/processing of required/defaulted attributes + // fAttDefRegistry + // mapping from XMLAttDef instances to the count of the last + // start tag where they were utilized. + // fUndeclaredAttrRegistryNS + // mapping of namespaceId/localName pairs to the count of the last + // start tag in which they occurred. + // // // ----------------------------------------------------------------------- bool fSeeXsi; @@ -325,6 +339,9 @@ private : ValueStoreCache* fValueStoreCache; FieldActivator* fFieldActivator; RefHash3KeysIdPool<SchemaElementDecl>* fElemNonDeclPool; + unsigned int fElemCount; + RefHashTableOf<unsigned int>* fAttDefRegistry; + RefHash2KeysTableOf<unsigned int>* fUndeclaredAttrRegistryNS; }; inline const XMLCh* SGXMLScanner::getName() const diff --git a/src/xercesc/internal/XMLScanner.cpp b/src/xercesc/internal/XMLScanner.cpp index 147f4bbd694b996d9a2fb0cc969f1b6b0ba34090..8148d65a6f26ca25132a80a736b9412761cddad6 100644 --- a/src/xercesc/internal/XMLScanner.cpp +++ b/src/xercesc/internal/XMLScanner.cpp @@ -224,6 +224,10 @@ XMLScanner::XMLScanner(XMLValidator* const valToAdopt, , fPrefixBuf(1023, manager) , fURIBuf(1023, manager) , fElemStack(manager) + , fUIntPool(0) + , fUIntPoolRow(0) + , fUIntPoolCol(0) + , fUIntPoolRowTotal(2) { commonInit(); @@ -295,6 +299,10 @@ XMLScanner::XMLScanner( XMLDocumentHandler* const docHandler , fPrefixBuf(1023, manager) , fURIBuf(1023, manager) , fElemStack(manager) + , fUIntPool(0) + , fUIntPoolRow(0) + , fUIntPoolCol(0) + , fUIntPoolRowTotal(2) { commonInit(); @@ -311,6 +319,12 @@ XMLScanner::~XMLScanner() fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName; fMemoryManager->deallocate(fExternalSchemaLocation);//delete [] fExternalSchemaLocation; fMemoryManager->deallocate(fExternalNoNamespaceSchemaLocation);//delete [] fExternalNoNamespaceSchemaLocation; + // delete fUIntPool + for (unsigned int i=0; i<=fUIntPoolRow; i++) + { + fMemoryManager->deallocate(fUIntPool[i]); + } + fMemoryManager->deallocate(fUIntPool); } @@ -710,6 +724,8 @@ void XMLScanner::setParseSettings(XMLScanner* const refScanner) setExternalSchemaLocation(refScanner->getExternalSchemaLocation()); setExternalNoNamespaceSchemaLocation(refScanner->getExternalNoNamespaceSchemaLocation()); setValidationScheme(refScanner->getValidationScheme()); + setSecurityManager(refScanner->getSecurityManager()); + setPSVIHandler(refScanner->getPSVIHandler()); } // --------------------------------------------------------------------------- @@ -740,6 +756,12 @@ void XMLScanner::commonInit() // Create the GrammarResolver //fGrammarResolver = new GrammarResolver(); + + // create initial, 64-element, fUIntPool + fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) *fUIntPoolRowTotal); + fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6); + memset(fUIntPool[0], 0, sizeof(unsigned int) << 6); + fUIntPool[1] = 0; } @@ -2128,4 +2150,63 @@ XMLScanner::scanUpToWSOr(XMLBuffer& toFill, const XMLCh chEndChar) return toFill.getLen(); } +unsigned int *XMLScanner::getNewUIntPtr() +{ + // this method hands back a new pointer initialized to 0 + unsigned int *retVal; + if(fUIntPoolCol < 64) + { + retVal = fUIntPool[fUIntPoolRow]+fUIntPoolCol; + fUIntPoolCol++; + return retVal; + } + // time to grow the pool... + if(fUIntPoolRow+1 == fUIntPoolRowTotal) + { + // and time to add some space for new rows: + fUIntPoolRowTotal <<= 1; + unsigned int **newArray = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal ); + memcpy(newArray, fUIntPool, (fUIntPoolRow+1) * sizeof(unsigned int *)); + fMemoryManager->deallocate(fUIntPool); + fUIntPool = newArray; + // need to 0 out new elements we won't need: + for (unsigned int i=fUIntPoolRow+2; i<fUIntPoolRowTotal; i++) + fUIntPool[i] = 0; + } + // now to add a new row; we just made sure we have space + fUIntPoolRow++; + fUIntPool[fUIntPoolRow] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6); + memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6); + // point to next element + fUIntPoolCol = 1; + return fUIntPool[fUIntPoolRow]; +} + +void XMLScanner::resetUIntPool() +{ + // to reuse the unsigned int pool--and the hashtables that use it-- + // simply reinitialize everything to 0's + for(unsigned int i = 0; i<= fUIntPoolRow; i++) + memset(fUIntPool[i], 0, sizeof(unsigned int) << 6); +} + +void XMLScanner::recreateUIntPool() +{ + // this allows a bloated unsigned int pool to be dispensed with + + // first, delete old fUIntPool + for (unsigned int i=0; i<=fUIntPoolRow; i++) + { + fMemoryManager->deallocate(fUIntPool[i]); + } + fMemoryManager->deallocate(fUIntPool); + + fUIntPoolRow = fUIntPoolCol = 0; + fUIntPoolRowTotal = 2; + fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal); + fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6); + memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6); + fUIntPool[1] = 0; +} + XERCES_CPP_NAMESPACE_END diff --git a/src/xercesc/internal/XMLScanner.hpp b/src/xercesc/internal/XMLScanner.hpp index b684255166d553c0d94ef34e9a6c63ede9667402..aea1c67d4fe03b92dde152dc2a35afc82685dbd1 100644 --- a/src/xercesc/internal/XMLScanner.hpp +++ b/src/xercesc/internal/XMLScanner.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.28 2003/11/24 05:09:38 neilg + * implement new, statless, method for detecting duplicate attributes + * * Revision 1.27 2003/11/13 15:00:44 peiyongz * Solve Compilation/Linkage error on AIX/Solaris/HP/Linux * @@ -705,6 +708,9 @@ protected: XMLTokens senseNextToken(unsigned int& orgReader); void initValidator(XMLValidator* theValidator); inline void resetValidationContext(); + unsigned int *getNewUIntPtr(); + void resetUIntPool(); + void recreateUIntPool(); // ----------------------------------------------------------------------- // Data members @@ -714,7 +720,7 @@ protected: // the document handler the attributes found. To make it more // efficient we keep this ref vector of XMLAttr objects around. We // just reuse it over and over, allowing it to grow to meet the - // peek need. + // peak need. // // fBufMgr // This is a manager for temporary buffers used during scanning. @@ -909,6 +915,15 @@ protected: // // fXMLVersion // Enum to indicate if the main doc is XML 1.1 or XML 1.0 conformant + // fUIntPool + // pool of unsigned integers to help with duplicate attribute + // detection and filling in default/fixed attributes + // fUIntPoolRow + // current row in fUIntPool + // fUIntPoolCol + // current column i row + // fUIntPoolRowTotal + // total number of rows in table // // fMemoryManager // Pluggable memory manager for dynamic allocation/deallocation. @@ -937,6 +952,10 @@ protected: unsigned int fXMLNamespaceId; unsigned int fXMLNSNamespaceId; unsigned int fSchemaNamespaceId; + unsigned int ** fUIntPool; + unsigned int fUIntPoolRow; + unsigned int fUIntPoolCol; + unsigned int fUIntPoolRowTotal; XMLUInt32 fScannerId; XMLUInt32 fSequenceId; RefVectorOf<XMLAttr>* fAttrList; @@ -1448,3 +1467,4 @@ inline void XMLScanner::resetValidationContext() XERCES_CPP_NAMESPACE_END #endif +