diff --git a/src/xercesc/internal/DGXMLScanner.cpp b/src/xercesc/internal/DGXMLScanner.cpp index 213154c5586b83c70c74e3b43484fcef89495582..97e4bb00a077cef0d7a3ba3df61e02899516789b 100644 --- a/src/xercesc/internal/DGXMLScanner.cpp +++ b/src/xercesc/internal/DGXMLScanner.cpp @@ -760,7 +760,9 @@ void DGXMLScanner::scanDocTypeDecl() fDocTypeHandler->resetDocType(); // There must be some space after DOCTYPE - if (!fReaderMgr.skipPastSpaces()) + bool skippedSomething; + fReaderMgr.skipPastSpaces(skippedSomething); + if (!skippedSomething) { emitError(XMLErrs::ExpectedWhitespace); @@ -1968,62 +1970,6 @@ bool DGXMLScanner::scanStartTagNS(bool& gotData) return true; } -unsigned int -DGXMLScanner::resolveQName(const XMLCh* const qName - , XMLBuffer& prefixBuf - , const short mode - , int& prefixColonPos) -{ - // Lets split out the qName into a URI and name buffer first. The URI - // can be empty. - prefixColonPos = XMLString::indexOf(qName, chColon); - if (prefixColonPos == -1) - { - // Its all name with no prefix, so put the whole thing into the name - // buffer. Then map the empty string to a URI, since the empty string - // represents the default namespace. This will either return some - // explicit URI which the default namespace is mapped to, or the - // the default global namespace. - bool unknown = false; - - prefixBuf.reset(); - return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown); - } - else - { - // Copy the chars up to but not including the colon into the prefix - // buffer. - prefixBuf.set(qName, prefixColonPos); - - // Watch for the special namespace prefixes. We always map these to - // special URIs. 'xml' gets mapped to the official URI that its defined - // to map to by the NS spec. xmlns gets mapped to a special place holder - // URI that we define (so that it maps to something checkable.) - const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer(); - if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) { - - // if this is an element, it is an error to have xmlns as prefix - if (mode == ElemStack::Mode_Element) - emitError(XMLErrs::NoXMLNSAsElementPrefix, qName); - - return fXMLNSNamespaceId; - } - else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) { - return fXMLNamespaceId; - } - else - { - bool unknown = false; - unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown); - - if (unknown) - emitError(XMLErrs::UnknownPrefix, prefixRawBuf); - - return uriId; - } - } -} - // --------------------------------------------------------------------------- // DGXMLScanner: Grammar preparsing // --------------------------------------------------------------------------- @@ -2442,39 +2388,6 @@ DGXMLScanner::buildAttList(const XMLSize_t attCount } -unsigned int -DGXMLScanner::resolvePrefix( const XMLCh* const prefix - , const ElemStack::MapModes mode) -{ - // Watch for the special namespace prefixes. We always map these to - // special URIs. 'xml' gets mapped to the official URI that its defined - // to map to by the NS spec. xmlns gets mapped to a special place holder - // URI that we define (so that it maps to something checkable.) - if (XMLString::equals(prefix, XMLUni::fgXMLNSString)) - return fXMLNSNamespaceId; - else if (XMLString::equals(prefix, XMLUni::fgXMLString)) - return fXMLNamespaceId; - - // Ask the element stack to search up itself for a mapping for the - // passed prefix. - bool unknown; - unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown); - - // If it was unknown, then the URI was faked in but we have to issue an error - if (unknown) - emitError(XMLErrs::UnknownPrefix, prefix); - - // check to see if uriId is empty; in XML 1.1 an emptynamespace is okay unless - // we are trying to use it. - if (*prefix && - mode == ElemStack::Mode_Element && - fXMLVersion != XMLReader::XMLV1_0 && - uriId == fElemStack.getEmptyNamespaceId()) - emitError(XMLErrs::UnknownPrefix, prefix); - - return uriId; -} - // This method will reset the scanner data structures, and related plugged // in stuff, for a new scan session. We get the input source for the primary // XML entity, create the reader for it, and push it on the stack so that diff --git a/src/xercesc/internal/DGXMLScanner.hpp b/src/xercesc/internal/DGXMLScanner.hpp index dcab0e542bc83fb03f1072527454a05cd3daa2f0..68e1463216c1ca60bfa8f45932549ff3e2abc373 100644 --- a/src/xercesc/internal/DGXMLScanner.hpp +++ b/src/xercesc/internal/DGXMLScanner.hpp @@ -66,13 +66,6 @@ public : virtual const XMLCh* getName() const; virtual NameIdPool<DTDEntityDecl>* getEntityDeclPool(); virtual const NameIdPool<DTDEntityDecl>* getEntityDeclPool() const; - virtual unsigned int resolveQName - ( - const XMLCh* const qName - , XMLBuffer& prefixBufToFill - , const short mode - , int& prefixColonPos - ); virtual void scanDocument ( const InputSource& src @@ -125,11 +118,6 @@ private : , XMLElementDecl* elemDecl , RefVectorOf<XMLAttr>& toFill ); - unsigned int resolvePrefix - ( - const XMLCh* const prefix - , const ElemStack::MapModes mode - ); void updateNSMap ( const XMLCh* const attrPrefix diff --git a/src/xercesc/internal/ElemStack.cpp b/src/xercesc/internal/ElemStack.cpp index b2f248f41657ed9b8a86d60366346be22348e221..f80049ae24bf9de4bf890375bf509bb9206df9c8 100644 --- a/src/xercesc/internal/ElemStack.cpp +++ b/src/xercesc/internal/ElemStack.cpp @@ -292,20 +292,11 @@ void ElemStack::addPrefix( const XMLCh* const prefixToAdd unsigned int ElemStack::mapPrefixToURI( const XMLCh* const prefixToMap - , const MapModes mode , bool& unknown) const { // Assume we find it unknown = false; - // - // If the prefix is empty, and we are in attribute mode, then we assign - // it to the empty namespace because the default namespace does not - // apply to attributes. - // - if (!*prefixToMap && (mode == Mode_Attribute)) - return fEmptyNamespaceId; - // // Map the prefix to its unique id, from the prefix string pool. If its // not a valid prefix, then its a failure. @@ -689,7 +680,6 @@ void WFElemStack::addPrefix( const XMLCh* const prefixToAdd unsigned int WFElemStack::mapPrefixToURI( const XMLCh* const prefixToMap - , const MapModes mode , bool& unknown) const { // Assume we find it @@ -706,14 +696,6 @@ unsigned int WFElemStack::mapPrefixToURI( const XMLCh* const prefixToMap return fUnknownNamespaceId; } - // - // If the prefix is empty, and we are in attribute mode, then we assign - // it to the empty namespace because the default namespace does not - // apply to attributes. - // - if (!*prefixToMap && (mode == Mode_Attribute)) - return fEmptyNamespaceId; - // // Check for the special prefixes 'xml' and 'xmlns' since they cannot // be overridden. diff --git a/src/xercesc/internal/ElemStack.hpp b/src/xercesc/internal/ElemStack.hpp index 043d1383f833ad557a9ba29243450a7dfdaceef8..95667ba370a10c67b25812b254cc15ef1cb5e6bd 100644 --- a/src/xercesc/internal/ElemStack.hpp +++ b/src/xercesc/internal/ElemStack.hpp @@ -185,7 +185,6 @@ public : unsigned int mapPrefixToURI ( const XMLCh* const prefixToMap - , const MapModes mode , bool& unknown ) const; ValueVectorOf<PrefMapElem*>* getNamespaceMap() const; @@ -361,7 +360,6 @@ public : unsigned int mapPrefixToURI ( const XMLCh* const prefixToMap - , const MapModes mode , bool& unknown ) const; diff --git a/src/xercesc/internal/IGXMLScanner.cpp b/src/xercesc/internal/IGXMLScanner.cpp index a46afbce187eedbeb3a7d122af48cd34019df89a..3b717a799c73b2e288c0571c318627ed3265db39 100644 --- a/src/xercesc/internal/IGXMLScanner.cpp +++ b/src/xercesc/internal/IGXMLScanner.cpp @@ -605,18 +605,15 @@ IGXMLScanner::rawAttrScan(const XMLCh* const elemName { if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) { - if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) - { - // Ok, skip by them and get another char - fReaderMgr.getNextChar(); - fReaderMgr.skipPastSpaces(); - nextCh = fReaderMgr.peekNextChar(); - } - else + bool bFoundSpace; + fReaderMgr.skipPastSpaces(bFoundSpace); + if (!bFoundSpace) { // Emit the error but keep on going emitError(XMLErrs::ExpectedWhitespace); } + // Ok, peek another char + nextCh = fReaderMgr.peekNextChar(); } } @@ -626,7 +623,7 @@ IGXMLScanner::rawAttrScan(const XMLCh* const elemName // the special case checks. if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh)) { - // Assume its going to be an attribute, so get a name from + // Assume it's going to be an attribute, so get a name from // the input. int colonPosition; if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition)) @@ -748,10 +745,8 @@ IGXMLScanner::rawAttrScan(const XMLCh* const elemName if (attCount >= fRawAttrColonListSize) { resizeRawAttrColonList(); } - fRawAttrColonList[attCount] = colonPosition; - - // And bump the count of attributes we've gotten - attCount++; + // Set the position of the colon and bump the count of attributes we've gotten + fRawAttrColonList[attCount++] = colonPosition; // And go to the top again for another attribute continue; @@ -1237,7 +1232,9 @@ void IGXMLScanner::scanDocTypeDecl() fDocTypeHandler->resetDocType(); // There must be some space after DOCTYPE - if (!fReaderMgr.skipPastSpaces()) + bool skippedSomething; + fReaderMgr.skipPastSpaces(skippedSomething); + if (!skippedSomething) { emitError(XMLErrs::ExpectedWhitespace); @@ -1681,17 +1678,15 @@ bool IGXMLScanner::scanStartTag(bool& gotData) { if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) { - if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) - { - // Ok, skip by them and peek another char - fReaderMgr.skipPastSpaces(); - nextCh = fReaderMgr.peekNextChar(); - } - else + bool bFoundSpace; + fReaderMgr.skipPastSpaces(bFoundSpace); + if (!bFoundSpace) { // Emit the error but keep on going emitError(XMLErrs::ExpectedWhitespace); } + // Ok, peek another char + nextCh = fReaderMgr.peekNextChar(); } } @@ -2844,76 +2839,6 @@ bool IGXMLScanner::scanStartTagNS(bool& gotData) } -unsigned int -IGXMLScanner::resolveQName(const XMLCh* const qName - , XMLBuffer& prefixBuf - , const short mode - , int& prefixColonPos) -{ - prefixColonPos = XMLString::indexOf(qName, chColon); - return resolveQNameWithColon(qName, prefixBuf, mode, prefixColonPos); -} - -unsigned int -IGXMLScanner::resolveQNameWithColon(const XMLCh* const qName - , XMLBuffer& prefixBuf - , const short mode - , const int prefixColonPos) -{ - // Lets split out the qName into a URI and name buffer first. The URI - // can be empty. - if (prefixColonPos == -1) - { - // Its all name with no prefix, so put the whole thing into the name - // buffer. Then map the empty string to a URI, since the empty string - // represents the default namespace. This will either return some - // explicit URI which the default namespace is mapped to, or the - // the default global namespace. - bool unknown = false; - - prefixBuf.reset(); - return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown); - } - else - { - // Copy the chars up to but not including the colon into the prefix - // buffer. - prefixBuf.set(qName, prefixColonPos); - - // Watch for the special namespace prefixes. We always map these to - // special URIs. 'xml' gets mapped to the official URI that its defined - // to map to by the NS spec. xmlns gets mapped to a special place holder - // URI that we define (so that it maps to something checkable.) - const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer(); - if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) { - - // if this is an element, it is an error to have xmlns as prefix - if (mode == ElemStack::Mode_Element) - emitError(XMLErrs::NoXMLNSAsElementPrefix, qName); - - return fXMLNSNamespaceId; - } - else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) { - return fXMLNamespaceId; - } - else - { - bool unknown = false; - unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown); - - if (unknown) - emitError(XMLErrs::UnknownPrefix, prefixRawBuf); - - // check to see if uriId is empty - if (fXMLVersion != XMLReader::XMLV1_0 && - uriId == fElemStack.getEmptyNamespaceId()) - emitError(XMLErrs::UnknownPrefix, prefixRawBuf); - - return uriId; - } - } -} - // --------------------------------------------------------------------------- // IGXMLScanner: Helper methos // --------------------------------------------------------------------------- diff --git a/src/xercesc/internal/IGXMLScanner.hpp b/src/xercesc/internal/IGXMLScanner.hpp index eca71ff476d711caa5a1fd2b671becadb5358cae..343d54ac387b94f53b38b19f8d81e3038f7039b1 100644 --- a/src/xercesc/internal/IGXMLScanner.hpp +++ b/src/xercesc/internal/IGXMLScanner.hpp @@ -77,13 +77,6 @@ public : virtual const XMLCh* getName() const; virtual NameIdPool<DTDEntityDecl>* getEntityDeclPool(); virtual const NameIdPool<DTDEntityDecl>* getEntityDeclPool() const; - virtual unsigned int resolveQName - ( - const XMLCh* const qName - , XMLBuffer& prefixBufToFill - , const short mode - , int& prefixColonPos - ); virtual void scanDocument ( const InputSource& src @@ -150,17 +143,6 @@ private : , const XMLCh* const value , XMLBuffer& toFill ); - unsigned int resolvePrefix - ( - const XMLCh* const prefix - , const ElemStack::MapModes mode - ); - unsigned int resolvePrefix - ( - const XMLCh* const prefix - , XMLBuffer& uriBufToFill - , const ElemStack::MapModes mode - ); void updateNSMap ( const XMLCh* const attrName @@ -188,13 +170,6 @@ private : void resizeRawAttrColonList(); - unsigned int resolveQNameWithColon - ( - const XMLCh* const qName - , XMLBuffer& prefixBufToFill - , const short mode - , const int prefixColonPos - ); // ----------------------------------------------------------------------- // Private scanning methods // ----------------------------------------------------------------------- diff --git a/src/xercesc/internal/IGXMLScanner2.cpp b/src/xercesc/internal/IGXMLScanner2.cpp index 586b5e461b78a76af31059c36b9bd86c759522d6..f801a8f14a2307ad43e79dded2c773088bd6f478 100644 --- a/src/xercesc/internal/IGXMLScanner2.cpp +++ b/src/xercesc/internal/IGXMLScanner2.cpp @@ -130,9 +130,13 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs setAttrDupChkRegistry(attCount, toUseHashTable); } + XMLBufBid bbPrefix(&fBufMgr); + XMLBuffer& prefixBuf = bbPrefix.getBuffer(); + // Loop through our explicitly provided attributes, which are in the raw // scanned form, and build up XMLAttr objects. XMLSize_t index; + const XMLCh* prefPtr, *suffPtr; for (index = 0; index < attCount; index++) { PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID; @@ -142,50 +146,33 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // We have to split the name into its prefix and name parts. Then // we map the prefix to its URI. const XMLCh* const namePtr = curPair->getKey(); - ArrayJanitor<XMLCh> janName(0); - - // use a stack-based buffer when possible. - XMLCh tempBuffer[100]; const int colonInd = fRawAttrColonList[index]; - const XMLCh* prefPtr = XMLUni::fgZeroLenString; - const XMLCh* suffPtr = XMLUni::fgZeroLenString; + unsigned int uriId; if (colonInd != -1) { - // We have to split the string, so make a copy. - if (XMLString::stringLen(namePtr) < sizeof(tempBuffer) / sizeof(tempBuffer[0])) - { - XMLString::copyString(tempBuffer, namePtr); - tempBuffer[colonInd] = chNull; - prefPtr = tempBuffer; - } - else - { - janName.reset(XMLString::replicate(namePtr, fMemoryManager), fMemoryManager); - janName[colonInd] = chNull; - prefPtr = janName.get(); - } - + prefixBuf.set(namePtr, colonInd); + prefPtr = prefixBuf.getRawBuffer(); suffPtr = namePtr + colonInd + 1; + // Map the prefix to a URI id + uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute); } else { // No colon, so we just have a name with no prefix + prefPtr = XMLUni::fgZeroLenString; suffPtr = namePtr; + // an empty prefix is always the empty namespace, when dealing with attributes + uriId = fEmptyNamespaceId; } - // Map the prefix to a URI id. We tell him that we are mapping an - // attr prefix, so any xmlns attrs at this level will not affect it. - const unsigned int uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute); - // If the uri comes back as the xmlns or xml URI or its just a name // and that name is 'xmlns', then we handle it specially. So set a // boolean flag that lets us quickly below know which we are dealing // with. - const bool isNSAttr = (uriId == fXMLNSNamespaceId) - - || XMLString::equals(suffPtr, XMLUni::fgXMLNSString) - || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI); + const bool isNSAttr = (uriId == fEmptyNamespaceId)? + XMLString::equals(suffPtr, XMLUni::fgXMLNSString) : + (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI)); // If its not a special case namespace attr of some sort, then we @@ -712,7 +699,7 @@ IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs if (fGrammarType == Grammar::DTDGrammarType) { if (!toUseHashTable) { - for (unsigned int attrIndex=0; attrIndex < retCount; attrIndex++) { + for (XMLSize_t attrIndex=0; attrIndex < retCount; attrIndex++) { curAttr = toFill.elementAt(attrIndex); if (uriId == curAttr->getURIId() && XMLString::equals(suffPtr, curAttr->getName())) { @@ -988,71 +975,62 @@ bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef }; // Get the type and name - const XMLAttDef::AttTypes type = (attDef) - ?attDef->getType() - :XMLAttDef::CData; + const XMLAttDef::AttTypes type = (attDef)?attDef->getType():XMLAttDef::CData; // Assume its going to go fine, and empty the target buffer in preperation bool retVal = true; toFill.reset(); - // Get attribute def - to check to see if it's declared externally or not - bool isAttExternal = (attDef) - ?attDef->isExternal() - :false; - // Loop through the chars of the source value and normalize it according // to the type. - States curState = InContent; - bool firstNonWS = false; XMLCh nextCh; const XMLCh* srcPtr = value; if (type == XMLAttDef::CData || type > XMLAttDef::Notation) { - while (*srcPtr) { - // Get the next character from the source. We have to watch for - // escaped characters (which are indicated by a 0xFFFF value followed - // by the char that was escaped.) - nextCh = *srcPtr; - - // Do we have an escaped character ? - if (nextCh == 0xFFFF) + // Get the next character from the source. We have to watch for + // escaped characters (which are indicated by a 0xFFFF value followed + // by the char that was escaped.) + while ((nextCh = *srcPtr++)!=0) + { + switch(nextCh) { - nextCh = *++srcPtr; - } - else if ( (nextCh <= 0x0D) && (nextCh == 0x09 || nextCh == 0x0A || nextCh == 0x0D) ) { + // Do we have an escaped character ? + case 0xFFFF: + nextCh = *srcPtr++; + break; + case 0x09: + case 0x0A: + case 0x0D: // Check Validity Constraint for Standalone document declaration // XML 1.0, Section 2.9 - if (fStandalone && fValidate && isAttExternal) + if (fStandalone && fValidate && attDef && attDef->isExternal()) { // Can't have a standalone document declaration of "yes" if attribute // values are subject to normalisation fValidator->emitError(XMLValid::NoAttNormForStandalone, attName); } nextCh = chSpace; - } - else if (nextCh == chOpenAngle) { + break; + case chOpenAngle: // If its not escaped, then make sure its not a < character, which is // not allowed in attribute values. emitError(XMLErrs::BracketInAttrValue, attName); retVal = false; + break; } // Add this char to the target buffer toFill.append(nextCh); - - // And move up to the next character in the source - srcPtr++; } } else { - while (*srcPtr) + States curState = InContent; + bool firstNonWS = false; + // Get the next character from the source. We have to watch for + // escaped characters (which are indicated by a 0xFFFF value followed + // by the char that was escaped.) + while ((nextCh = *srcPtr)!=0) { - // Get the next character from the source. We have to watch for - // escaped characters (which are indicated by a 0xFFFF value followed - // by the char that was escaped.) - nextCh = *srcPtr; - // Do we have an escaped character ? if (nextCh == 0xFFFF) { @@ -1089,7 +1067,7 @@ bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef // Check Validity Constraint for Standalone document declaration // XML 1.0, Section 2.9 - if (fStandalone && fValidate && isAttExternal) + if (fStandalone && fValidate && attDef && attDef->isExternal()) { if (!firstNonWS || (nextCh != chSpace) || (!*srcPtr) || fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr)) { @@ -1165,60 +1143,6 @@ bool IGXMLScanner::normalizeAttRawValue( const XMLCh* const attrName return retVal; } -unsigned int -IGXMLScanner::resolvePrefix( const XMLCh* const prefix - , const ElemStack::MapModes mode) -{ - // Watch for the special namespace prefixes. We always map these to - // special URIs. 'xml' gets mapped to the official URI that its defined - // to map to by the NS spec. xmlns gets mapped to a special place holder - // URI that we define (so that it maps to something checkable.) - if (XMLString::equals(prefix, XMLUni::fgXMLNSString)) - return fXMLNSNamespaceId; - else if (XMLString::equals(prefix, XMLUni::fgXMLString)) - return fXMLNamespaceId; - - // Ask the element stack to search up itself for a mapping for the - // passed prefix. - bool unknown; - unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown); - - // If it was unknown, then the URI was faked in but we have to issue an error - if (unknown) - emitError(XMLErrs::UnknownPrefix, prefix); - - return uriId; -} - -unsigned int -IGXMLScanner::resolvePrefix( const XMLCh* const prefix - , XMLBuffer& bufToFill - , const ElemStack::MapModes mode) -{ - // Watch for the special namespace prefixes. We always map these to - // special URIs. 'xml' gets mapped to the official URI that its defined - // to map to by the NS spec. xmlns gets mapped to a special place holder - // URI that we define (so that it maps to something checkable.) - if (XMLString::equals(prefix, XMLUni::fgXMLNSString)) - return fXMLNSNamespaceId; - else if (XMLString::equals(prefix, XMLUni::fgXMLString)) - return fXMLNamespaceId; - - // Ask the element stack to search up itself for a mapping for the - // passed prefix. - bool unknown; - unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown); - - // If it was unknown, then the URI was faked in but we have to issue an error - if (unknown) - emitError(XMLErrs::UnknownPrefix, prefix); - - getURIText(uriId,bufToFill); - - return uriId; -} - - // This method will reset the scanner data structures, and related plugged // in stuff, for a new scan session. We get the input source for the primary // XML entity, create the reader for it, and push it on the stack so that @@ -1662,8 +1586,7 @@ void IGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount) // schema attributes. // When we find one, send it off to be used to update the element stack's // namespace mappings. - XMLSize_t index = 0; - for (index = 0; index < attCount; index++) + for (XMLSize_t index = 0; index < attCount; index++) { // each attribute has the prefix:suffix="value" const KVStringPair* curPair = fRawAttrList->elementAt(index); @@ -1692,7 +1615,7 @@ void IGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount) XMLBufBid bbXsi(&fBufMgr); XMLBuffer& fXsiType = bbXsi.getBuffer(); - for (index = 0; index < attCount; index++) + for (XMLSize_t index = 0; index < attCount; index++) { // each attribute has the prefix:suffix="value" const KVStringPair* curPair = fRawAttrList->elementAt(index); diff --git a/src/xercesc/internal/ReaderMgr.cpp b/src/xercesc/internal/ReaderMgr.cpp index 123b1476e2788bf42956083273433b6aa837a4a3..63fda50e838a89a994bb4c981930a1a452cd57c8 100644 --- a/src/xercesc/internal/ReaderMgr.cpp +++ b/src/xercesc/internal/ReaderMgr.cpp @@ -240,26 +240,39 @@ bool ReaderMgr::skipIfQuote(XMLCh& chGotten) return false; } +void ReaderMgr::skipPastSpaces(bool& skippedSomething, bool inDecl /* = false */) +{ + // we rely on the fact that fCurReader->skipSpaces will NOT reset the flag to false, but only + // set it to true if a space is found + skippedSomething = false; + // + // Skip all the spaces in the current reader. If it returned because + // it hit a non-space, break out. Else we have to pop another entity + // and keep going. + // + while (!fCurReader->skipSpaces(skippedSomething, inDecl)) + { + // Try to pop another entity. If we can't then we are done + if (!popReader()) + break; + } +} -bool ReaderMgr::skipPastSpaces(bool inDecl) +void ReaderMgr::skipPastSpaces() { - bool skippedSomething = false; + // we are not using it, so we don't care to initialize it bool tmpFlag; // // Skip all the spaces in the current reader. If it returned because // it hit a non-space, break out. Else we have to pop another entity // and keep going. // - while (!fCurReader->skipSpaces(tmpFlag, inDecl)) + while (!fCurReader->skipSpaces(tmpFlag, false)) { - if (tmpFlag) - skippedSomething = true; - - // Try to pop another enitity. If we can't then we are done + // Try to pop another entity. If we can't then we are done if (!popReader()) break; } - return (tmpFlag || skippedSomething); } void ReaderMgr::skipQuotedString(const XMLCh quoteCh) diff --git a/src/xercesc/internal/ReaderMgr.hpp b/src/xercesc/internal/ReaderMgr.hpp index d7565574b54fb8baccec1fb1aff96c89a31d3d70..1f3eee5c0dcdeb852c5ba32a141368fa8167b81d 100644 --- a/src/xercesc/internal/ReaderMgr.hpp +++ b/src/xercesc/internal/ReaderMgr.hpp @@ -89,7 +89,8 @@ public : XMLCh peekNextChar(); bool skipIfQuote(XMLCh& chGotten); void skipPastChar(const XMLCh toSkip); - bool skipPastSpaces(bool inDecl = false); + void skipPastSpaces(bool& skippedSomething, bool inDecl = false); + void skipPastSpaces(); void skipToChar(const XMLCh toSkipTo); bool skippedChar(const XMLCh toSkip); bool skippedSpace(); diff --git a/src/xercesc/internal/SGXMLScanner.cpp b/src/xercesc/internal/SGXMLScanner.cpp index e56e33d57bb6a35466eed4d5d3dd597fff8bd88b..f5eff4ea4bf57ac4f9ba83c694f0199292a6dd94 100644 --- a/src/xercesc/internal/SGXMLScanner.cpp +++ b/src/xercesc/internal/SGXMLScanner.cpp @@ -1824,76 +1824,6 @@ bool SGXMLScanner::scanStartTag(bool& gotData) } -unsigned int -SGXMLScanner::resolveQName(const XMLCh* const qName - , XMLBuffer& prefixBuf - , const short mode - , int& prefixColonPos) -{ - prefixColonPos = XMLString::indexOf(qName, chColon); - return resolveQNameWithColon(qName, prefixBuf, mode, prefixColonPos); -} - -unsigned int -SGXMLScanner::resolveQNameWithColon(const XMLCh* const qName - , XMLBuffer& prefixBuf - , const short mode - , const int prefixColonPos) -{ - // Lets split out the qName into a URI and name buffer first. The URI - // can be empty. - if (prefixColonPos == -1) - { - // Its all name with no prefix, so put the whole thing into the name - // buffer. Then map the empty string to a URI, since the empty string - // represents the default namespace. This will either return some - // explicit URI which the default namespace is mapped to, or the - // the default global namespace. - bool unknown = false; - - prefixBuf.reset(); - return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown); - } - else - { - // Copy the chars up to but not including the colon into the prefix - // buffer. - prefixBuf.set(qName, prefixColonPos); - - // Watch for the special namespace prefixes. We always map these to - // special URIs. 'xml' gets mapped to the official URI that its defined - // to map to by the NS spec. xmlns gets mapped to a special place holder - // URI that we define (so that it maps to something checkable.) - const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer(); - if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) { - - // if this is an element, it is an error to have xmlns as prefix - if (mode == ElemStack::Mode_Element) - emitError(XMLErrs::NoXMLNSAsElementPrefix, qName); - - return fXMLNSNamespaceId; - } - else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) { - return fXMLNamespaceId; - } - else - { - bool unknown = false; - unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown); - - if (unknown) - emitError(XMLErrs::UnknownPrefix, prefixRawBuf); - - // check to see if uriId is empty - if (fXMLVersion != XMLReader::XMLV1_0 && - uriId == fElemStack.getEmptyNamespaceId()) - emitError(XMLErrs::UnknownPrefix, prefixRawBuf); - - return uriId; - } - } -} - // --------------------------------------------------------------------------- // SGXMLScanner: Grammar preparsing // --------------------------------------------------------------------------- @@ -2174,9 +2104,13 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs XMLBufBid bbNormal(&fBufMgr); XMLBuffer& normBuf = bbNormal.getBuffer(); + XMLBufBid bbPrefix(&fBufMgr); + XMLBuffer& prefixBuf = bbPrefix.getBuffer(); + // Loop through our explicitly provided attributes, which are in the raw // scanned form, and build up XMLAttr objects. XMLSize_t index; + const XMLCh* prefPtr, *suffPtr; for (index = 0; index < attCount; index++) { PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID; @@ -2186,51 +2120,33 @@ SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // We have to split the name into its prefix and name parts. Then // we map the prefix to its URI. const XMLCh* const namePtr = curPair->getKey(); - ArrayJanitor <XMLCh> janName(0); - - // use a stack-based buffer when possible. - XMLCh tempBuffer[100]; const int colonInd = fRawAttrColonList[index]; - const XMLCh* prefPtr = XMLUni::fgZeroLenString; - const XMLCh* suffPtr = XMLUni::fgZeroLenString; + unsigned int uriId; if (colonInd != -1) { - // We have to split the string, so make a copy. - if (XMLString::stringLen(namePtr) < sizeof(tempBuffer) / sizeof(tempBuffer[0])) - { - XMLString::copyString(tempBuffer, namePtr); - tempBuffer[colonInd] = chNull; - prefPtr = tempBuffer; - } - else - { - janName.reset(XMLString::replicate(namePtr, fMemoryManager), fMemoryManager); - janName[colonInd] = chNull; - prefPtr = janName.get(); - } - + prefixBuf.set(namePtr, colonInd); + prefPtr = prefixBuf.getRawBuffer(); suffPtr = namePtr + colonInd + 1; + // Map the prefix to a URI id + uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute); } else { // No colon, so we just have a name with no prefix + prefPtr = XMLUni::fgZeroLenString; suffPtr = namePtr; + // an empty prefix is always the empty namespace, when dealing with attributes + uriId = fEmptyNamespaceId; } - // Map the prefix to a URI id. We tell him that we are mapping an - // attr prefix, so any xmlns attrs at this level will not affect it. - const unsigned int uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute); - // If the uri comes back as the xmlns or xml URI or its just a name // and that name is 'xmlns', then we handle it specially. So set a // boolean flag that lets us quickly below know which we are dealing // with. - const bool isNSAttr = (uriId == fXMLNSNamespaceId) - - || XMLString::equals(suffPtr, XMLUni::fgXMLNSString) - || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI); - + const bool isNSAttr = (uriId == fEmptyNamespaceId)? + XMLString::equals(suffPtr, XMLUni::fgXMLNSString) : + (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI)); // If its not a special case namespace attr of some sort, then we // do normal checking and processing. @@ -3108,60 +3024,6 @@ bool SGXMLScanner::normalizeAttRawValue( const XMLCh* const attrName return retVal; } -unsigned int -SGXMLScanner::resolvePrefix( const XMLCh* const prefix - , const ElemStack::MapModes mode) -{ - // Watch for the special namespace prefixes. We always map these to - // special URIs. 'xml' gets mapped to the official URI that its defined - // to map to by the NS spec. xmlns gets mapped to a special place holder - // URI that we define (so that it maps to something checkable.) - if (XMLString::equals(prefix, XMLUni::fgXMLNSString)) - return fXMLNSNamespaceId; - else if (XMLString::equals(prefix, XMLUni::fgXMLString)) - return fXMLNamespaceId; - - // Ask the element stack to search up itself for a mapping for the - // passed prefix. - bool unknown; - unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown); - - // If it was unknown, then the URI was faked in but we have to issue an error - if (unknown) - emitError(XMLErrs::UnknownPrefix, prefix); - - return uriId; -} - -unsigned int -SGXMLScanner::resolvePrefix( const XMLCh* const prefix - , XMLBuffer& bufToFill - , const ElemStack::MapModes mode) -{ - // Watch for the special namespace prefixes. We always map these to - // special URIs. 'xml' gets mapped to the official URI that its defined - // to map to by the NS spec. xmlns gets mapped to a special place holder - // URI that we define (so that it maps to something checkable.) - if (XMLString::equals(prefix, XMLUni::fgXMLNSString)) - return fXMLNSNamespaceId; - else if (XMLString::equals(prefix, XMLUni::fgXMLString)) - return fXMLNamespaceId; - - // Ask the element stack to search up itself for a mapping for the - // passed prefix. - bool unknown; - unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown); - - // If it was unknown, then the URI was faked in but we have to issue an error - if (unknown) - emitError(XMLErrs::UnknownPrefix, prefix); - - getURIText(uriId,bufToFill); - - return uriId; -} - - // This method will reset the scanner data structures, and related plugged // in stuff, for a new scan session. We get the input source for the primary // XML entity, create the reader for it, and push it on the stack so that @@ -3552,8 +3414,7 @@ void SGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount) // schema attributes. // When we find one, send it off to be used to update the element stack's // namespace mappings. - XMLSize_t index; - for (index = 0; index < attCount; index++) + for (XMLSize_t index = 0; index < attCount; index++) { // each attribute has the prefix:suffix="value" const KVStringPair* curPair = fRawAttrList->elementAt(index); @@ -3582,7 +3443,7 @@ void SGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount) XMLBufBid bbXsi(&fBufMgr); XMLBuffer& fXsiType = bbXsi.getBuffer(); - for (index = 0; index < attCount; index++) + for (XMLSize_t index = 0; index < attCount; index++) { // each attribute has the prefix:suffix="value" const KVStringPair* curPair = fRawAttrList->elementAt(index); diff --git a/src/xercesc/internal/SGXMLScanner.hpp b/src/xercesc/internal/SGXMLScanner.hpp index 0ecdb3540c44fbbe42935042c4d8c06d5f456dfe..2bfe290188a8bfd2901537f6b56463bbae131c5a 100644 --- a/src/xercesc/internal/SGXMLScanner.hpp +++ b/src/xercesc/internal/SGXMLScanner.hpp @@ -75,13 +75,6 @@ public : virtual const XMLCh* getName() const; virtual NameIdPool<DTDEntityDecl>* getEntityDeclPool(); virtual const NameIdPool<DTDEntityDecl>* getEntityDeclPool() const; - virtual unsigned int resolveQName - ( - const XMLCh* const qName - , XMLBuffer& prefixBufToFill - , const short mode - , int& prefixColonPos - ); virtual void scanDocument ( const InputSource& src @@ -131,11 +124,6 @@ protected: const XMLCh* const attrName , const XMLCh* const attrValue ); - unsigned int resolvePrefix - ( - const XMLCh* const prefix - , const ElemStack::MapModes mode - ); void resizeElemState(); void updateNSMap @@ -145,13 +133,6 @@ protected: , const int colonPosition ); void resizeRawAttrColonList(); - unsigned int resolveQNameWithColon - ( - const XMLCh* const qName - , XMLBuffer& prefixBufToFill - , const short mode - , const int prefixColonPos - ); // ----------------------------------------------------------------------- // Data members // @@ -254,12 +235,6 @@ private : , const XMLCh* const value , XMLBuffer& toFill ); - unsigned int resolvePrefix - ( - const XMLCh* const prefix - , XMLBuffer& uriBufToFill - , const ElemStack::MapModes mode - ); void scanRawAttrListforNameSpaces(XMLSize_t attCount); void parseSchemaLocation(const XMLCh* const schemaLocationStr); void resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri); diff --git a/src/xercesc/internal/ValidationContextImpl.cpp b/src/xercesc/internal/ValidationContextImpl.cpp index 11ce5cb65283a2a36746730eb25b84a4b7ba3419..50aed58c9a5c09470102fa5a5bfbe741ab8309dc 100644 --- a/src/xercesc/internal/ValidationContextImpl.cpp +++ b/src/xercesc/internal/ValidationContextImpl.cpp @@ -191,7 +191,7 @@ bool ValidationContextImpl::isPrefixUnknown(XMLCh* prefix) { } else if (!XMLString::equals(prefix, XMLUni::fgXMLString)) { if(fElemStack && !fElemStack->isEmpty()) - fElemStack->mapPrefixToURI(prefix, (ElemStack::MapModes) ElemStack::Mode_Element, unknown); + fElemStack->mapPrefixToURI(prefix, unknown); else if(fNamespaceScope) unknown = (fNamespaceScope->getNamespaceForPrefix(prefix)==fNamespaceScope->getEmptyNamespaceId()); } @@ -202,7 +202,7 @@ const XMLCh* ValidationContextImpl::getURIForPrefix(XMLCh* prefix) { bool unknown = false; unsigned int uriId; if(fElemStack) - uriId = fElemStack->mapPrefixToURI(prefix, (ElemStack::MapModes) ElemStack::Mode_Element, unknown); + uriId = fElemStack->mapPrefixToURI(prefix, unknown); else if(fNamespaceScope) unknown = ((uriId = fNamespaceScope->getNamespaceForPrefix(prefix))==fNamespaceScope->getEmptyNamespaceId()); if (!unknown) diff --git a/src/xercesc/internal/WFXMLScanner.cpp b/src/xercesc/internal/WFXMLScanner.cpp index d42e8796534fd1c51bbf6838d126428f52fc3f8f..b337f512d431cc800d32864c9dbf0eb8566f7b82 100644 --- a/src/xercesc/internal/WFXMLScanner.cpp +++ b/src/xercesc/internal/WFXMLScanner.cpp @@ -445,39 +445,6 @@ void WFXMLScanner::cleanUp() delete fElements; } -unsigned int -WFXMLScanner::resolvePrefix(const XMLCh* const prefix - , const ElemStack::MapModes mode) -{ - // Watch for the special namespace prefixes. We always map these to - // special URIs. 'xml' gets mapped to the official URI that its defined - // to map to by the NS spec. xmlns gets mapped to a special place holder - // URI that we define (so that it maps to something checkable.) - if (XMLString::equals(prefix, XMLUni::fgXMLNSString)) - return fXMLNSNamespaceId; - else if (XMLString::equals(prefix, XMLUni::fgXMLString)) - return fXMLNamespaceId; - - // Ask the element stack to search up itself for a mapping for the - // passed prefix. - bool unknown; - unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown); - - // If it was unknown, then the URI was faked in but we have to issue an error - if (unknown) - emitError(XMLErrs::UnknownPrefix, prefix); - - // check to see if uriId is empty; in XML 1.1 an emptynamespace is okay unless - // we are trying to use it. - if (*prefix && - mode == ElemStack::Mode_Element && - fXMLVersion != XMLReader::XMLV1_0 && - uriId == fElemStack.getEmptyNamespaceId()) - emitError(XMLErrs::UnknownPrefix, prefix); - - return uriId; -} - // This method will reset the scanner data structures, and related plugged // in stuff, for a new scan session. We get the input source for the primary // XML entity, create the reader for it, and push it on the stack so that @@ -839,17 +806,15 @@ bool WFXMLScanner::scanStartTag(bool& gotData) { if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) { - if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) - { - // Ok, skip by them and peek another char - fReaderMgr.skipPastSpaces(); - nextCh = fReaderMgr.peekNextChar(); - } - else + bool bFoundSpace; + fReaderMgr.skipPastSpaces(bFoundSpace); + if (!bFoundSpace) { // Emit the error but keep on going emitError(XMLErrs::ExpectedWhitespace); } + // Ok, peek another char + nextCh = fReaderMgr.peekNextChar(); } } @@ -1170,17 +1135,15 @@ bool WFXMLScanner::scanStartTagNS(bool& gotData) { if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) { - if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) - { - // Ok, skip by them and peek another char - fReaderMgr.skipPastSpaces(); - nextCh = fReaderMgr.peekNextChar(); - } - else + bool bFoundSpace; + fReaderMgr.skipPastSpaces(bFoundSpace); + if (!bFoundSpace) { // Emit the error but keep on going emitError(XMLErrs::ExpectedWhitespace); } + // Ok, peek another char + nextCh = fReaderMgr.peekNextChar(); } } @@ -1538,62 +1501,6 @@ bool WFXMLScanner::scanStartTagNS(bool& gotData) return true; } -unsigned int -WFXMLScanner::resolveQName(const XMLCh* const qName - , XMLBuffer& prefixBuf - , const short mode - , int& prefixColonPos) -{ - // Lets split out the qName into a URI and name buffer first. The URI - // can be empty. - prefixColonPos = XMLString::indexOf(qName, chColon); - if (prefixColonPos == -1) - { - // Its all name with no prefix, so put the whole thing into the name - // buffer. Then map the empty string to a URI, since the empty string - // represents the default namespace. This will either return some - // explicit URI which the default namespace is mapped to, or the - // the default global namespace. - bool unknown = false; - - prefixBuf.reset(); - return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown); - } - else - { - // Copy the chars up to but not including the colon into the prefix - // buffer. - prefixBuf.set(qName, prefixColonPos); - - // Watch for the special namespace prefixes. We always map these to - // special URIs. 'xml' gets mapped to the official URI that its defined - // to map to by the NS spec. xmlns gets mapped to a special place holder - // URI that we define (so that it maps to something checkable.) - const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer(); - if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) { - - // if this is an element, it is an error to have xmlns as prefix - if (mode == ElemStack::Mode_Element) - emitError(XMLErrs::NoXMLNSAsElementPrefix, qName); - - return fXMLNSNamespaceId; - } - else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) { - return fXMLNamespaceId; - } - else - { - bool unknown = false; - unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown); - - if (unknown) - emitError(XMLErrs::UnknownPrefix, prefixRawBuf); - - return uriId; - } - } -} - // --------------------------------------------------------------------------- // XMLScanner: Private parsing methods // --------------------------------------------------------------------------- diff --git a/src/xercesc/internal/WFXMLScanner.hpp b/src/xercesc/internal/WFXMLScanner.hpp index 5f50bea287ff3df981a71631ed80bcb6655845b2..b88b0346e54ce0fb31a76fb44f8470d2f3290a85 100644 --- a/src/xercesc/internal/WFXMLScanner.hpp +++ b/src/xercesc/internal/WFXMLScanner.hpp @@ -62,13 +62,6 @@ public : virtual const XMLCh* getName() const; virtual NameIdPool<DTDEntityDecl>* getEntityDeclPool(); virtual const NameIdPool<DTDEntityDecl>* getEntityDeclPool() const; - virtual unsigned int resolveQName - ( - const XMLCh* const qName - , XMLBuffer& prefixBufToFill - , const short mode - , int& prefixColonPos - ); virtual void scanDocument ( const InputSource& src @@ -112,11 +105,6 @@ private : // ----------------------------------------------------------------------- void commonInit(); void cleanUp(); - unsigned int resolvePrefix - ( - const XMLCh* const prefix - , const ElemStack::MapModes mode - ); // ----------------------------------------------------------------------- // Private scanning methods diff --git a/src/xercesc/internal/XMLReader.cpp b/src/xercesc/internal/XMLReader.cpp index 63805b2e973ee9075c9dfe9790de73168fc51db1..9f6c5fd703ad0d4e3ad0927fd501e6bbcfa0d14e 100644 --- a/src/xercesc/internal/XMLReader.cpp +++ b/src/xercesc/internal/XMLReader.cpp @@ -703,108 +703,100 @@ bool XMLReader::getName(XMLBuffer& toFill, const bool token) return !toFill.isEmpty(); } -bool XMLReader::getQName(XMLBuffer& toFill, int* colonPosition) +bool XMLReader::getNCName(XMLBuffer& toFill) { - XMLSize_t charIndex_start; - bool checkNextCharacterForFirstNCName = true; + if (fCharIndex == fCharsAvail && !refreshCharBuffer()) + return false; - // We are only looking for two iterations (i.e. 'NCANAME':'NCNAME'). - // We will stop when we finished scanning for a QName (i.e. either a second - // colon or an invalid char). - *colonPosition = -1; - for (;;) { + XMLSize_t charIndex_start = fCharIndex, count; + // Lets check the first char for being a first name char. If not, then + // what's the point in living mannnn? Just give up now. We only do this + // if its a name and not a name token that they want. + if (fXMLVersion == XMLV1_1 + && ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F))) { + // make sure one more char is in the buffer, the transcoder + // should put only a complete surrogate pair into the buffer + assert(fCharIndex+1 < fCharsAvail); + if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF)) + return false; - // Ok, first lets see if we have chars in the buffer. If not, then lets - // reload. - if (fCharIndex == fCharsAvail) { - if (!refreshCharBuffer()) { - break; - } + // Looks ok, so lets eat it + fCharIndex += 2; + } + else { + if (!isFirstNCNameChar(fCharBuf[fCharIndex])) { + return false; } - charIndex_start = fCharIndex; - if (checkNextCharacterForFirstNCName) { - - checkNextCharacterForFirstNCName = false; - // Lets check the first char for being a first name char. If not, then - // what's the point in living mannnn? Just give up now. We only do this - // if its a name and not a name token that they want. - if (fXMLVersion == XMLV1_1 - && ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F))) { - // make sure one more char is in the buffer, the transcoder - // should put only a complete surrogate pair into the buffer - assert(fCharIndex+1 < fCharsAvail); - if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF)) - return false; - - // Looks ok, so lets eat it - fCharIndex += 2; - } - else { - if (!isFirstNCNameChar(fCharBuf[fCharIndex])) { - return false; - } - - // Looks ok, so lets eat it - fCharIndex++; - } - } + // Looks ok, so lets eat it + fCharIndex++; + } - while(true) + do + { + if (fCharIndex == fCharsAvail) { - // Check the current char and take it if it's a name char. Else - // break out. - for (;(fCharIndex < fCharsAvail) && ((fgCharCharsTable[fCharBuf[fCharIndex]] & gNCNameCharMask) != 0);fCharIndex++); - - // if it isn't a NameChar, it could be a surrogate - if ( (fCharIndex < fCharsAvail) && (fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F) ) + // we have to copy the accepted character(s), and update the column number, + // before getting new data and losing the value of fCharIndex + if((count = fCharIndex - charIndex_start)!=0) { - // make sure one more char is in the buffer, the transcoder - // should put only a complete surrogate pair into the buffer - assert(fCharIndex+1 < fCharsAvail); - if ( (fXMLVersion == XMLV1_0) || - (fCharBuf[fCharIndex+1] < 0xDC00) || - (fCharBuf[fCharIndex+1] > 0xDFFF) ) { - break; - } - - fCharIndex += 2; - continue; + fCurCol += (XMLFileLoc)count; + toFill.append(&fCharBuf[charIndex_start], count); } - break; + if(!refreshCharBuffer()) + return true; + charIndex_start = fCharIndex; } - // we have to copy the accepted character(s), and update column - if (fCharIndex != charIndex_start) + // Check the current char and take it if it's a name char + if (fXMLVersion == XMLV1_1) { - fCurCol += (XMLFileLoc)(fCharIndex - charIndex_start); - toFill.append(&fCharBuf[charIndex_start], fCharIndex - charIndex_start); - } - - // something is wrong if there is still something in the buffer - // or if we don't get no more, then break out. - if (fCharIndex < fCharsAvail) { - if (fCharBuf[fCharIndex] != chColon) { - break; - } - - if (*colonPosition != -1) { - return false; + while(fCharIndex < fCharsAvail) + { + if(isNCNameChar(fCharBuf[fCharIndex])) fCharIndex++; + else if((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F) && ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))) fCharIndex+=2; } - - *colonPosition = (int)toFill.getLen(); - toFill.append(chColon); - fCharIndex++; - fCurCol++; - checkNextCharacterForFirstNCName = true; } + else + while(fCharIndex < fCharsAvail && isNCNameChar(fCharBuf[fCharIndex])) fCharIndex++; + // if we didn't consume the entire buffer, we are done + } while(fCharIndex == fCharsAvail); + + // we have to copy the accepted character(s), and update column + if((count = fCharIndex - charIndex_start)!=0) + { + fCurCol += (XMLFileLoc)count; + toFill.append(&fCharBuf[charIndex_start], count); } + return true; +} - if (checkNextCharacterForFirstNCName) { +bool XMLReader::getQName(XMLBuffer& toFill, int* colonPosition) +{ + // We are only looking for two iterations (i.e. 'NCNAME':'NCNAME'). + // We will stop when we finished scanning for a QName (i.e. either a second + // colon or an invalid char). + if(!getNCName(toFill)) + { + *colonPosition = -1; return false; } + if (fCharIndex == fCharsAvail && !refreshCharBuffer()) + { + *colonPosition = -1; + return true; + } + if (fCharBuf[fCharIndex] != chColon) + { + *colonPosition = -1; + return true; + } - return !toFill.isEmpty(); + *colonPosition = (int)toFill.getLen(); + toFill.append(chColon); + fCharIndex++; + fCurCol++; + return getNCName(toFill); } bool XMLReader::getSpaces(XMLBuffer& toFill) @@ -945,16 +937,12 @@ bool XMLReader::getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck) bool XMLReader::skipIfQuote(XMLCh& chGotten) { - if (fCharIndex == fCharsAvail) - { - if (!refreshCharBuffer()) - return false; - } + if (fCharIndex == fCharsAvail && !refreshCharBuffer()) + return false; - const XMLCh curCh = fCharBuf[fCharIndex]; - if ((curCh == chDoubleQuote) || (curCh == chSingleQuote)) + chGotten = fCharBuf[fCharIndex]; + if ((chGotten == chDoubleQuote) || (chGotten == chSingleQuote)) { - chGotten = curCh; fCharIndex++; fCurCol++; return true; @@ -965,14 +953,12 @@ bool XMLReader::skipIfQuote(XMLCh& chGotten) bool XMLReader::skipSpaces(bool& skippedSomething, bool inDecl) { - // Remember the current line and column - XMLFileLoc orgLine = fCurLine; - XMLFileLoc orgCol = fCurCol; + // DO NOT set the skippedSomething to 'false', but change it to be 'true' only // We enter a loop where we skip over spaces until we hit the end of // this reader or a non-space value. The return indicates whether we // hit the non-space (true) or the end (false). - while (true) + do { // Loop through the current chars in the buffer while (fCharIndex < fCharsAvail) @@ -983,7 +969,7 @@ bool XMLReader::skipSpaces(bool& skippedSomething, bool inDecl) { // Get the current char out of the buffer and eat it XMLCh curCh = fCharBuf[fCharIndex++]; - + skippedSomething = true; // // 'curCh' is a whitespace(x20|x9|xD|xA), so we only can have // end-of-line combinations with a leading chCR(xD) or chLF(xA) @@ -1006,24 +992,17 @@ bool XMLReader::skipSpaces(bool& skippedSomething, bool inDecl) { handleEOL(curCh, inDecl); } - } else - { - skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol); return true; - } } // We've eaten up the current buffer, so lets try to reload it. If // we don't get anything new, then break out. If we do, then we go // back to the top to keep getting spaces. - if (!refreshCharBuffer()) - break; - } + } while(refreshCharBuffer()); // We never hit any non-space and ate up the whole reader - skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol); return false; } diff --git a/src/xercesc/internal/XMLReader.hpp b/src/xercesc/internal/XMLReader.hpp index 3a17ab2a2f09c89c5ab926b2d436038fe6b6209b..315a4e3129f7d1931586cd0e60846417fdea3e56 100644 --- a/src/xercesc/internal/XMLReader.hpp +++ b/src/xercesc/internal/XMLReader.hpp @@ -172,6 +172,7 @@ public: // ----------------------------------------------------------------------- bool getName(XMLBuffer& toFill, const bool token); bool getQName(XMLBuffer& toFill, int* colonPosition); + bool getNCName(XMLBuffer& toFill); bool getNextChar(XMLCh& chGotten); bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten); void movePlainContentChars(XMLBuffer &dest); diff --git a/src/xercesc/internal/XMLScanner.cpp b/src/xercesc/internal/XMLScanner.cpp index 507671b5d9c9793f7ec63494b3aff2b4ca456c52..f770a313744af364e01e588a9e8db02f7f4f5f5d 100644 --- a/src/xercesc/internal/XMLScanner.cpp +++ b/src/xercesc/internal/XMLScanner.cpp @@ -1379,14 +1379,15 @@ void XMLScanner::scanXMLDecl(const DeclTypes type) while (true) { // Skip any spaces - const bool spaceCount = fReaderMgr.skipPastSpaces(true); + bool skippedSomething; + fReaderMgr.skipPastSpaces(skippedSomething, true); // If we are looking at a question mark, then break out if (fReaderMgr.lookingAtChar(chQuestion)) break; // If this is not the first string, then we require the spaces - if (!spaceCount && curCount) + if (!skippedSomething && curCount) emitError(XMLErrs::ExpectedWhitespace); // Get characters up to the next whitespace or equal's sign. @@ -2218,11 +2219,24 @@ void XMLScanner::scanComment() // just makes the calling code cleaner by eating whitespace. bool XMLScanner::scanEq(bool inDecl) { - fReaderMgr.skipPastSpaces(inDecl); - if (fReaderMgr.skippedChar(chEqual)) + if(inDecl) { - fReaderMgr.skipPastSpaces(inDecl); - return true; + bool skippedSomething; + fReaderMgr.skipPastSpaces(skippedSomething, inDecl); + if (fReaderMgr.skippedChar(chEqual)) + { + fReaderMgr.skipPastSpaces(skippedSomething, inDecl); + return true; + } + } + else + { + fReaderMgr.skipPastSpaces(); + if (fReaderMgr.skippedChar(chEqual)) + { + fReaderMgr.skipPastSpaces(); + return true; + } } return false; } @@ -2294,4 +2308,86 @@ void XMLScanner::recreateUIntPool() fUIntPool[1] = 0; } +unsigned int XMLScanner::resolvePrefix( const XMLCh* const prefix + , const ElemStack::MapModes mode) +{ + // + // If the prefix is empty, and we are in attribute mode, then we assign + // it to the empty namespace because the default namespace does not + // apply to attributes. + // + if (!*prefix) + { + if(mode == ElemStack::Mode_Attribute) + return fEmptyNamespaceId; + } + // Watch for the special namespace prefixes. We always map these to + // special URIs. 'xml' gets mapped to the official URI that its defined + // to map to by the NS spec. xmlns gets mapped to a special place holder + // URI that we define (so that it maps to something checkable.) + else + { + if (XMLString::equals(prefix, XMLUni::fgXMLNSString)) + return fXMLNSNamespaceId; + else if (XMLString::equals(prefix, XMLUni::fgXMLString)) + return fXMLNamespaceId; + } + + // Ask the element stack to search up itself for a mapping for the + // passed prefix. + bool unknown; + unsigned int uriId = fElemStack.mapPrefixToURI(prefix, unknown); + + // If it was unknown, then the URI was faked in but we have to issue an error + if (unknown) + emitError(XMLErrs::UnknownPrefix, prefix); + + // check to see if uriId is empty; in XML 1.1 an emptynamespace is okay unless + // we are trying to use it. + if (*prefix && + mode == ElemStack::Mode_Element && + fXMLVersion != XMLReader::XMLV1_0 && + uriId == fElemStack.getEmptyNamespaceId()) + emitError(XMLErrs::UnknownPrefix, prefix); + + return uriId; +} + +unsigned int +XMLScanner::resolveQName( const XMLCh* const qName + , XMLBuffer& prefixBuf + , const ElemStack::MapModes mode + , int& prefixColonPos) +{ + prefixColonPos = XMLString::indexOf(qName, chColon); + return resolveQNameWithColon(qName, prefixBuf, mode, prefixColonPos); +} + +unsigned int +XMLScanner::resolveQNameWithColon( const XMLCh* const qName + , XMLBuffer& prefixBuf + , const ElemStack::MapModes mode + , const int prefixColonPos) +{ + // Lets split out the qName into a URI and name buffer first. The URI + // can be empty. + if (prefixColonPos == -1) + { + // Its all name with no prefix, so put the whole thing into the name + // buffer. Then map the empty string to a URI, since the empty string + // represents the default namespace. This will either return some + // explicit URI which the default namespace is mapped to, or the + // the default global namespace. + prefixBuf.reset(); + return resolvePrefix(XMLUni::fgZeroLenString, mode); + } + else + { + // Copy the chars up to but not including the colon into the prefix + // buffer. + prefixBuf.set(qName, prefixColonPos); + return resolvePrefix(prefixBuf.getRawBuffer(), mode); + } +} + XERCES_CPP_NAMESPACE_END diff --git a/src/xercesc/internal/XMLScanner.hpp b/src/xercesc/internal/XMLScanner.hpp index b8313a5c587804b56021e60745093fad24d4178d..0d61e85a6f318deaabd9840295ce669bfa6fe293 100644 --- a/src/xercesc/internal/XMLScanner.hpp +++ b/src/xercesc/internal/XMLScanner.hpp @@ -197,13 +197,6 @@ public : virtual const XMLCh* getName() const = 0; virtual NameIdPool<DTDEntityDecl>* getEntityDeclPool() = 0; virtual const NameIdPool<DTDEntityDecl>* getEntityDeclPool() const = 0; - virtual unsigned int resolveQName - ( - const XMLCh* const qName - , XMLBuffer& prefixBufToFill - , const short mode - , int& prefixColonPos - ) = 0; virtual void scanDocument ( const InputSource& src @@ -446,6 +439,17 @@ public : , const bool toCache = false ); + // ----------------------------------------------------------------------- + // Helper methods + // ----------------------------------------------------------------------- + unsigned int resolveQName + ( + const XMLCh* const qName + , XMLBuffer& prefixBufToFill + , const ElemStack::MapModes mode + , int& prefixColonPos + ); + protected: // ----------------------------------------------------------------------- // Protected pure virtual methods @@ -490,6 +494,18 @@ protected: unsigned int *getNewUIntPtr(); void resetUIntPool(); void recreateUIntPool(); + unsigned int resolvePrefix + ( + const XMLCh* const prefix + , const ElemStack::MapModes mode + ); + unsigned int resolveQNameWithColon + ( + const XMLCh* const qName + , XMLBuffer& prefixBufToFill + , const ElemStack::MapModes mode + , const int prefixColonPos + ); inline void setAttrDupChkRegistry diff --git a/src/xercesc/parsers/AbstractDOMParser.cpp b/src/xercesc/parsers/AbstractDOMParser.cpp index ae4c1d7f2ec42473da03c24e715d11faf00dbe63..b339756f9ae3276b3461d5c51f3f8ef8f50cee1f 100644 --- a/src/xercesc/parsers/AbstractDOMParser.cpp +++ b/src/xercesc/parsers/AbstractDOMParser.cpp @@ -1010,9 +1010,10 @@ void AbstractDOMParser::startElement(const XMLElementDecl& elemDecl // unsigned int attrURIId = oneAttrib->getURIId(); const XMLCh* localName = oneAttrib->getName(); + const XMLCh* prefix = oneAttrib->getPrefix(); namespaceURI = 0; - if (XMLString::equals(localName, XMLUni::fgXMLNSString)) + if ((prefix==0 || *prefix==0) && XMLString::equals(localName, XMLUni::fgXMLNSString)) { // xmlns=... attrURIId = xmlnsNSId; @@ -1024,7 +1025,7 @@ void AbstractDOMParser::startElement(const XMLElementDecl& elemDecl } attr = (DOMAttrImpl*) createAttrNS (namespaceURI, - oneAttrib->getPrefix (), + prefix, localName, oneAttrib->getQName()); diff --git a/src/xercesc/parsers/SAX2XMLReaderImpl.cpp b/src/xercesc/parsers/SAX2XMLReaderImpl.cpp index d37b704cb12f5667e25859a9bb1d9ec7abfee736..696d802e3236f27fdc34608007c559b671e3c589 100644 --- a/src/xercesc/parsers/SAX2XMLReaderImpl.cpp +++ b/src/xercesc/parsers/SAX2XMLReaderImpl.cpp @@ -724,11 +724,18 @@ startElement( const XMLElementDecl& elemDecl const XMLCh* nsURI = 0; const XMLAttr* tempAttr = attrList.elementAt(i); - if (XMLString::equals(tempAttr->getQName(), XMLUni::fgXMLNSString)) - nsURI = tempAttr->getValue(); - if (XMLString::equals(tempAttr->getPrefix(), XMLUni::fgXMLNSString)) + const XMLCh* prefix = tempAttr->getPrefix(); + if(prefix && *prefix) + { + if(XMLString::equals(prefix, XMLUni::fgXMLNSString)) + { + nsPrefix = tempAttr->getName(); + nsURI = tempAttr->getValue(); + } + } + else if (XMLString::equals(tempAttr->getName(), XMLUni::fgXMLNSString)) { - nsPrefix = tempAttr->getName(); + nsPrefix = XMLUni::fgZeroLenString; nsURI = tempAttr->getValue(); } if (!fNamespacePrefix) @@ -738,8 +745,6 @@ startElement( const XMLElementDecl& elemDecl } if (nsURI != 0) { - if (nsPrefix == 0) - nsPrefix = XMLUni::fgZeroLenString; if(fDocHandler) fDocHandler->startPrefixMapping(nsPrefix, nsURI); unsigned int nPrefixId=fPrefixesStorage->addOrFind(nsPrefix); diff --git a/src/xercesc/util/QName.cpp b/src/xercesc/util/QName.cpp index 934d222347ea5a25567bdb37c3c9ea1ed6ad54db..7feff90672e795d87ff23d879fbd8559debf098a 100644 --- a/src/xercesc/util/QName.cpp +++ b/src/xercesc/util/QName.cpp @@ -308,7 +308,7 @@ void QName::setName(const XMLCh* const rawName else { // No colon, so we just have a name with no prefix - setPrefix(XMLUni::fgZeroLenString); + setNPrefix(XMLUni::fgZeroLenString, 0); // And clean up any QName and leave it undone until/if asked for again if (fRawName) @@ -321,23 +321,6 @@ void QName::setName(const XMLCh* const rawName fURIId = uriId; } -void QName::setPrefix(const XMLCh* prefix) -{ - if (!fPrefixBufSz || !XMLString::copyNString(fPrefix, prefix, fPrefixBufSz)) - { - XMLSize_t newLen = XMLString::stringLen(prefix); - fMemoryManager->deallocate(fPrefix); //delete [] fPrefix; - fPrefix = 0; - fPrefixBufSz = newLen + 8; - fPrefix = (XMLCh*) fMemoryManager->allocate - ( - (fPrefixBufSz + 1) * sizeof(XMLCh) - ); //new XMLCh[fPrefixBufSz + 1]; - XMLString::moveChars(fPrefix, prefix, newLen); - fPrefix[newLen] = chNull; - } -} - void QName::setNPrefix(const XMLCh* prefix, const XMLSize_t newLen) { if (!fPrefixBufSz || (newLen > fPrefixBufSz)) @@ -354,23 +337,6 @@ void QName::setNPrefix(const XMLCh* prefix, const XMLSize_t newLen) fPrefix[newLen] = chNull; } -void QName::setLocalPart(const XMLCh* localPart) -{ - if (!fLocalPartBufSz || !XMLString::copyNString(fLocalPart, localPart, fLocalPartBufSz)) - { - XMLSize_t newLen = XMLString::stringLen(localPart); - fMemoryManager->deallocate(fLocalPart); //delete [] fLocalPart; - fLocalPart = 0; - fLocalPartBufSz = newLen + 8; - fLocalPart = (XMLCh*) fMemoryManager->allocate - ( - (fLocalPartBufSz + 1) * sizeof(XMLCh) - ); //new XMLCh[fLocalPartBufSz + 1]; - XMLString::moveChars(fLocalPart, localPart, newLen); - fLocalPart[newLen] = chNull; - } -} - void QName::setNLocalPart(const XMLCh* localPart, const XMLSize_t newLen) { if (!fLocalPartBufSz || (newLen > fLocalPartBufSz)) diff --git a/src/xercesc/util/QName.hpp b/src/xercesc/util/QName.hpp index 303fbf288cb850cb18935a7212c1c528678a9434..2b3468fe5c50897eb3f9a6e3afb5b912a9619eac 100644 --- a/src/xercesc/util/QName.hpp +++ b/src/xercesc/util/QName.hpp @@ -202,6 +202,16 @@ inline void QName::setURI(const unsigned int uriId) fURIId = uriId; } +inline void QName::setPrefix(const XMLCh* prefix) +{ + setNPrefix(prefix, XMLString::stringLen(prefix)); +} + +inline void QName::setLocalPart(const XMLCh* localPart) +{ + setNLocalPart(localPart, XMLString::stringLen(localPart)); +} + XERCES_CPP_NAMESPACE_END #endif diff --git a/src/xercesc/util/XMLString.cpp b/src/xercesc/util/XMLString.cpp index 0f589a5192f8f6c1d12158718aa6d230f063dffc..d6425a2bdff6f41ed4ae4b01e3c90d0913ed17cd 100644 --- a/src/xercesc/util/XMLString.cpp +++ b/src/xercesc/util/XMLString.cpp @@ -1338,7 +1338,7 @@ void XMLString::copyString(XMLCh* const target, const XMLCh* const src) while (*pszIn) *pszOut++ = *pszIn++; - // Capp off the target where we ended + // Cap off the target where we ended *pszOut = 0; } @@ -1347,18 +1347,16 @@ bool XMLString::copyNString( XMLCh* const target , const XMLCh* const src , const XMLSize_t maxChars) { - XMLCh* outPtr = target; - const XMLCh* srcPtr = src; - const XMLCh* endPtr = target + maxChars - 1; - - while (*srcPtr && (outPtr <= endPtr)) - *outPtr++ = *srcPtr++; - - // Cap it off here - *outPtr = 0; - // Return whether we copied it all or hit the max - return (*srcPtr == 0); + XMLSize_t len = stringLen(src); + if(len > maxChars) + { + XMLString::moveChars(target, src, maxChars); + target[maxChars] = 0; + return false; + } + XMLString::moveChars(target, src, len+1); + return true; } const XMLCh* XMLString::findAny(const XMLCh* const toSearch @@ -1435,17 +1433,12 @@ int XMLString::patternMatch( const XMLCh* const toSearch int XMLString::indexOf(const XMLCh* const toSearch, const XMLCh ch) { - if (toSearch) - { - const XMLCh* srcPtr = toSearch; - while (*srcPtr) - { - if (ch == *srcPtr) - return (int)(srcPtr - toSearch); + if (!toSearch || !*toSearch) return -1; - srcPtr++; - } - } + const XMLCh* srcPtr = toSearch; + while (*srcPtr) + if (ch == *srcPtr++) + return (int)(srcPtr - toSearch - 1); return -1; } @@ -1458,14 +1451,13 @@ int XMLString::indexOf( const XMLCh* const toSearch const XMLSize_t len = stringLen(toSearch); // Make sure the start index is within the XMLString bounds - if ((int)fromIndex > ((int)len)-1) + if (fromIndex >= len) ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Str_StartIndexPastEnd, manager); - for (XMLSize_t i = fromIndex; i < len; i++) - { - if (toSearch[i] == ch) - return (int)i; - } + const XMLCh* srcPtr = toSearch+fromIndex; + while (*srcPtr) + if (ch == *srcPtr++) + return (int)(srcPtr - toSearch - 1); return -1; } @@ -1473,11 +1465,10 @@ int XMLString::lastIndexOf(const XMLCh ch, const XMLCh* const toSearch, const XMLSize_t toSearchLen) { - for (int i = (int)toSearchLen-1; i >= 0; i--) - { - if (toSearch[i] == ch) - return i; - } + const XMLCh* srcPtr = toSearch+toSearchLen; + while (srcPtr >= toSearch) + if (ch == *srcPtr--) + return (int)(srcPtr + 1 - toSearch); return -1; } @@ -1486,15 +1477,14 @@ int XMLString::lastIndexOf( const XMLCh* const toSearch , const XMLSize_t fromIndex , MemoryManager* const manager) { - const int len = (int)stringLen(toSearch); - if ((int)fromIndex > len-1) + const XMLSize_t len = stringLen(toSearch); + if (fromIndex >= len) ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Str_StartIndexPastEnd, manager); - for (int i = (int)fromIndex; i >= 0; i--) - { - if (toSearch[i] == ch) - return i; - } + const XMLCh* srcPtr = toSearch+fromIndex; + while (srcPtr >= toSearch) + if (ch == *srcPtr--) + return (int)(srcPtr + 1 - toSearch); return -1; } diff --git a/src/xercesc/util/XMLString.hpp b/src/xercesc/util/XMLString.hpp index 852ee8b16f0980fbcfc2ac0e94b5880783940d70..912f096024e19be20dd1ac466f6bf890a4f508d3 100644 --- a/src/xercesc/util/XMLString.hpp +++ b/src/xercesc/util/XMLString.hpp @@ -432,9 +432,9 @@ public: */ static bool copyNString ( - XMLCh* const target - , const XMLCh* const src - , const XMLSize_t maxChars + XMLCh* const target + , const XMLCh* const src + , const XMLSize_t maxChars ); //@} diff --git a/src/xercesc/validators/DTD/DTDScanner.cpp b/src/xercesc/validators/DTD/DTDScanner.cpp index 5ab9519e438a007ce5f3bf10c62036af8abb91ea..edb81e5b03bf4bf7c342c8f009a7c82eacf64199 100644 --- a/src/xercesc/validators/DTD/DTDScanner.cpp +++ b/src/xercesc/validators/DTD/DTDScanner.cpp @@ -2705,7 +2705,9 @@ bool DTDScanner::scanId( XMLBuffer& pubIdToFill } // We must skip spaces - if (!fReaderMgr->skipPastSpaces()) + bool skippedSomething; + fReaderMgr->skipPastSpaces(skippedSomething); + if (!skippedSomething) { fScanner->emitError(XMLErrs::ExpectedWhitespace); return false; @@ -2730,7 +2732,9 @@ bool DTDScanner::scanId( XMLBuffer& pubIdToFill // So following this we must have whitespace, a public literal, whitespace, // and a system literal. // - if (!fReaderMgr->skipPastSpaces()) + bool skippedSomething; + fReaderMgr->skipPastSpaces(skippedSomething); + if (!skippedSomething) { fScanner->emitError(XMLErrs::ExpectedWhitespace); @@ -2751,7 +2755,8 @@ bool DTDScanner::scanId( XMLBuffer& pubIdToFill return true; // check if there is any space follows - bool hasSpace = fReaderMgr->skipPastSpaces(); + bool hasSpace; + fReaderMgr->skipPastSpaces(hasSpace); // // In order to recover best here we need to see if diff --git a/src/xercesc/validators/common/CMStateSet.hpp b/src/xercesc/validators/common/CMStateSet.hpp index e55da3032cba2ffeb0a0152844cac05e5bfc4b3e..ff053cef33a8753a54fe6324a4643287e808f5d1 100644 --- a/src/xercesc/validators/common/CMStateSet.hpp +++ b/src/xercesc/validators/common/CMStateSet.hpp @@ -45,6 +45,7 @@ XERCES_CPP_NAMESPACE_BEGIN class CMStateSetEnumerator; +// This value must be 4 in order to use the SSE2 instruction set #define CMSTATE_CACHED_INT32_SIZE 4 // This value must be a multiple of 128 in order to use the SSE2 instruction set @@ -161,12 +162,24 @@ public : { if(fDynamicBuffer==0) { - for (XMLSize_t index = 0; index < CMSTATE_CACHED_INT32_SIZE; index++) - if(setToOr.fBits[index]) - if(fBits[index]) - fBits[index] |= setToOr.fBits[index]; - else - fBits[index] = setToOr.fBits[index]; +#ifdef XERCES_HAVE_SSE2_INTRINSIC + if(XMLPlatformUtils::fgSSE2ok) + { + __m128i xmm1 = _mm_loadu_si128((__m128i*)fBits); + __m128i xmm2 = _mm_loadu_si128((__m128i*)setToOr.fBits); + __m128i xmm3 = _mm_or_si128(xmm1, xmm2); // OR 4 32-bit words + _mm_storeu_si128((__m128i*)fBits, xmm3); + } + else +#endif + { + for (XMLSize_t index = 0; index < CMSTATE_CACHED_INT32_SIZE; index++) + if(setToOr.fBits[index]) + if(fBits[index]) + fBits[index] |= setToOr.fBits[index]; + else + fBits[index] = setToOr.fBits[index]; + } } else { diff --git a/tests/src/DOM/DOMTest/DTest.cpp b/tests/src/DOM/DOMTest/DTest.cpp index d3b42f427e385d5be438c0ce024c69e9e3e5f6a6..f09b0b7d2155545af81b504cffb757e23ff53d3a 100644 --- a/tests/src/DOM/DOMTest/DTest.cpp +++ b/tests/src/DOM/DOMTest/DTest.cpp @@ -5321,6 +5321,7 @@ bool DOMTest::testRegex() { TEST_VALID_SCHEMA_REGEX("abbbbx", "ab{2,4}x", __LINE__); TEST_INVALID_SCHEMA_REGEX("abx", "ab{2,4}x", __LINE__); TEST_INVALID_SCHEMA_REGEX("abbbbbx", "ab{2,4}x", __LINE__); + TEST_VALID_SCHEMA_REGEX("PAG_1", "PAG_[0-9]{1,}", __LINE__); TEST_VALID_SCHEMA_REGEX("5 Bedford Street Boston , MA 15604-1536", "\\d{1,5}\\s([A-Z][a-z]{1,20}\\s){1}Street\\s([A-Z][a-z]{1,20}\\s){1},\\s[A-Z]{2}\\s15604-1536", __LINE__); @@ -5586,6 +5587,7 @@ bool DOMTest::testUtilFunctions() XMLString::collapseWS(tempStr); TEST_STRING(tempStr, tempStr2); + // test removeWS XMLString::transcode("xyz", tempStr2, 3999); XMLString::transcode(" x\tyz ", tempStr, 3999); XMLString::removeWS(tempStr); @@ -5632,6 +5634,76 @@ bool DOMTest::testUtilFunctions() OK = false; } + // test copyNString + XMLCh buffer[100]; + XMLString::transcode("xyz", tempStr, 3999); + if(!XMLString::copyNString(buffer, tempStr, 100)) + { + fprintf(stderr, "copyNString test failed at line %i\n", __LINE__); + OK = false; + } + if(!XMLString::copyNString(buffer, tempStr, 3)) + { + fprintf(stderr, "copyNString test failed at line %i\n", __LINE__); + OK = false; + } + if(XMLString::copyNString(buffer, tempStr, 2)) + { + fprintf(stderr, "copyNString test failed at line %i\n", __LINE__); + OK = false; + } + if(!XMLString::copyNString(buffer, tempStr, 4)) + { + fprintf(stderr, "copyNString test failed at line %i\n", __LINE__); + OK = false; + } + + // test indexOf + XMLString::transcode("1234567890", tempStr, 3999); + if(XMLString::indexOf(tempStr, '1')!=0) + { + fprintf(stderr, "indexOf test failed at line %i\n", __LINE__); + OK = false; + } + if(XMLString::indexOf(tempStr, '5')!=4) + { + fprintf(stderr, "indexOf test failed at line %i\n", __LINE__); + OK = false; + } + if(XMLString::indexOf(tempStr, '0')!=9) + { + fprintf(stderr, "indexOf test failed at line %i\n", __LINE__); + OK = false; + } + if(XMLString::indexOf(tempStr, 'A')!=-1) + { + fprintf(stderr, "indexOf test failed at line %i\n", __LINE__); + OK = false; + } + + // test lastIndexOf + XMLString::transcode("1234567890", tempStr, 3999); + if(XMLString::lastIndexOf(tempStr, '1')!=0) + { + fprintf(stderr, "lastIndexOf test failed at line %i\n", __LINE__); + OK = false; + } + if(XMLString::lastIndexOf(tempStr, '5')!=4) + { + fprintf(stderr, "lastIndexOf test failed at line %i\n", __LINE__); + OK = false; + } + if(XMLString::lastIndexOf(tempStr, '0')!=9) + { + fprintf(stderr, "lastIndexOf test failed at line %i\n", __LINE__); + OK = false; + } + if(XMLString::lastIndexOf(tempStr, 'A')!=-1) + { + fprintf(stderr, "lastIndexOf test failed at line %i\n", __LINE__); + OK = false; + } + // this tests the cached bit storage CMStateSet setT(60); setT.setBit(8);