diff --git a/src/internal/ReaderMgr.hpp b/src/internal/ReaderMgr.hpp index 617a03ec3c22c21ec81d177d1ee7b4eba2e9a883..18ca8227923c60be717c4662056b701797af4f4a 100644 --- a/src/internal/ReaderMgr.hpp +++ b/src/internal/ReaderMgr.hpp @@ -1,37 +1,37 @@ /* * The Apache Software License, Version 1.1 - * + * * Copyright (c) 1999-2000 The Apache Software Foundation. All rights * reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * + * * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * + * notice, this list of conditions and the following disclaimer. + * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * + * * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: + * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. - * + * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this - * software without prior written permission. For written + * software without prior written permission. For written * permission, please contact apache\@apache.org. - * + * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE @@ -45,7 +45,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== - * + * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation, and was * originally based on software copyright (c) 1999, International @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.13 2001/07/12 18:50:08 tng + * Some performance modification regarding standalone check and xml decl check. + * * Revision 1.12 2000/09/09 00:18:18 andyh * Reordered member variables in ThrowEOEJanitor. Patch submitted * by Kirk Wylie. @@ -187,6 +190,7 @@ public : void skipQuotedString(const XMLCh quoteCh); XMLCh skipUntilIn(const XMLCh* const listToSkip); XMLCh skipUntilInOrWS(const XMLCh* const listToSkip); + bool peekString(const XMLCh* const toPeek); // ----------------------------------------------------------------------- @@ -405,6 +409,11 @@ inline void ReaderMgr::skipPastChar(const XMLCh toSkipPast) } } +inline bool ReaderMgr::peekString(const XMLCh* const toPeek) +{ + return fCurReader->peekString(toPeek); +} + inline void ReaderMgr::setEntityHandler(XMLEntityHandler* const newHandler) { fEntityHandler = newHandler; diff --git a/src/internal/XMLReader.cpp b/src/internal/XMLReader.cpp index 1ea28b2b6404661df7be5ab6dc8e21ab866f94f3..7862f3a440297ae081d285b2190de5f94cc63436 100644 --- a/src/internal/XMLReader.cpp +++ b/src/internal/XMLReader.cpp @@ -1136,6 +1136,50 @@ bool XMLReader::skippedString(const XMLCh* const toSkip) return true; } +// +// This is just to peek if the next coming buffer +// matches the string toPeek. +// Similar to skippedString, but just the fCharIndex and fCurCol are not updated +// +bool XMLReader::peekString(const XMLCh* const toPeek) +{ + // Get the length of the string to skip + const unsigned int srcLen = XMLString::stringLen(toPeek); + + // + // See if the current reader has enough chars to test against this + // string. If not, then ask it to reload its buffer. If that does not + // get us enough, then it cannot match. + // + // NOTE: This works because strings never have to cross a reader! And + // a string to skip will never have a new line in it, so we will never + // miss adjusting the current line. + // + unsigned int charsLeft = charsLeftInBuffer(); + while (charsLeft < srcLen) + { + refreshCharBuffer(); + unsigned int t = charsLeftInBuffer(); + if (t == charsLeft) // if the refreshCharBuf() did not add anything new + return false; // give up and return. + charsLeft = t; + } + + + + + // + // Ok, now we now that the current reader has enough chars in its + // buffer and that its index is back at zero. So we can do a quick and + // dirty comparison straight to its buffer with no requirement to unget + // if it fails. + // + if (XMLString::compareNString(&fCharBuf[fCharIndex], toPeek, srcLen)) + return false; + + return true; +} + // --------------------------------------------------------------------------- // XMLReader: Setter methods (most are inlined) diff --git a/src/internal/XMLReader.hpp b/src/internal/XMLReader.hpp index eb93d74fb03f313e3385c0214e810ea89e39b02e..c967435ce7dab6d7ff924de63886f49b30871d34 100644 --- a/src/internal/XMLReader.hpp +++ b/src/internal/XMLReader.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.17 2001/07/12 18:50:13 tng + * Some performance modification regarding standalone check and xml decl check. + * * Revision 1.16 2001/05/11 13:26:17 tng * Copyright update. * @@ -273,6 +276,7 @@ public: bool skippedChar(const XMLCh toSkip); bool skippedSpace(); bool skippedString(const XMLCh* const toSkip); + bool peekString(const XMLCh* const toPeek); // ----------------------------------------------------------------------- diff --git a/src/internal/XMLScanner.cpp b/src/internal/XMLScanner.cpp index a9e47827f64d8a149671792fc030a6dbce532e4f..4f57c94d11f69cf31d1863fdf9c963055ded5c80 100644 --- a/src/internal/XMLScanner.cpp +++ b/src/internal/XMLScanner.cpp @@ -2377,7 +2377,7 @@ bool XMLScanner::scanStartTag(bool& gotData) // char refs expanded. // fReaderMgr.skipPastSpaces(); - if (!scanAttValue(attDef->getFullName(), fAttValueBuf, attDef->getType())) + if (!scanAttValue(attDef, fAttValueBuf)) { static const XMLCh tmpList[] = { @@ -2512,7 +2512,8 @@ bool XMLScanner::scanStartTag(bool& gotData) // top again. // emitError(XMLErrs::ExpectedAttrName); - scanAttValue(XMLUni::fgZeroLenString, fAttValueBuf, XMLAttDef::CData); + fReaderMgr.getNextChar(); + fReaderMgr.skipQuotedString(nextCh); fReaderMgr.skipPastSpaces(); continue; } @@ -3420,58 +3421,3 @@ XMLScanner::resolveQName( const XMLCh* const qName return uriId; } -bool XMLScanner::checkXMLDecl(bool startWithAngle) { - // - // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' - // [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') - // - // [3] S ::= (#x20 | #x9 | #xD | #xA)+ - // - - if (startWithAngle) { - if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpace) - || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTab) - || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLF) - || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCR)) - { - return true; - } - else if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpaceU) - || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTabU) - || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLFU) - || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCRU)) - { - // - // Just in case, check for upper case. If found, issue - // an error, but keep going. - // - emitError(XMLErrs::XMLDeclMustBeLowerCase); - return true; - } - } - else { - if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpace) - || fReaderMgr.skippedString(XMLUni::fgXMLStringHTab) - || fReaderMgr.skippedString(XMLUni::fgXMLStringLF) - || fReaderMgr.skippedString(XMLUni::fgXMLStringCR)) - { - return true; - } - else if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpaceU) - || fReaderMgr.skippedString(XMLUni::fgXMLStringHTabU) - || fReaderMgr.skippedString(XMLUni::fgXMLStringLFU) - || fReaderMgr.skippedString(XMLUni::fgXMLStringCRU)) - { - // - // Just in case, check for upper case. If found, issue - // an error, but keep going. - // - emitError(XMLErrs::XMLDeclMustBeLowerCase); - return true; - } - } - - return false; -} - - diff --git a/src/internal/XMLScanner.hpp b/src/internal/XMLScanner.hpp index 06befa032ce58b422c1e5cead054291c4b969b4c..bef1130e9e676e381f55e7f7fd865e2bfddabcad 100644 --- a/src/internal/XMLScanner.hpp +++ b/src/internal/XMLScanner.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.26 2001/07/12 18:50:17 tng + * Some performance modification regarding standalone check and xml decl check. + * * Revision 1.25 2001/07/10 21:09:31 tng * Give proper error messsage when scanning external id. * @@ -519,10 +522,15 @@ private : void checkIDRefs(); bool isLegalToken(const XMLPScanToken& toCheck); bool normalizeAttValue + ( + const XMLAttDef* const attDef + , const XMLCh* const value + , XMLBuffer& toFill + ); + bool normalizeAttRawValue ( const XMLCh* const attrName , const XMLCh* const value - , const XMLAttDef::AttTypes type , XMLBuffer& toFill ); @@ -569,9 +577,8 @@ private : ); bool scanAttValue ( - const XMLCh* const attrName + const XMLAttDef* const attDef , XMLBuffer& toFill - , const XMLAttDef::AttTypes type ); void scanCDSection(); void scanCharData(XMLBuffer& toToUse); @@ -1116,4 +1123,62 @@ inline void XMLScanner::setDoValidation(const bool validate) else fValScheme = Val_Never; } + +inline bool XMLScanner::checkXMLDecl(bool startWithAngle) { + // + // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' + // [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') + // + // [3] S ::= (#x20 | #x9 | #xD | #xA)+ + // + + if (startWithAngle) { + if (fReaderMgr.peekString(XMLUni::fgXMLDeclString)) { + if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpace) + || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTab) + || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLF) + || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCR)) + { + return true; + } + else if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpaceU) + || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTabU) + || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLFU) + || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCRU)) + { + // + // Just in case, check for upper case. If found, issue + // an error, but keep going. + // + emitError(XMLErrs::XMLDeclMustBeLowerCase); + return true; + } + } + } + else { + if (fReaderMgr.peekString(XMLUni::fgXMLString)) { + if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpace) + || fReaderMgr.skippedString(XMLUni::fgXMLStringHTab) + || fReaderMgr.skippedString(XMLUni::fgXMLStringLF) + || fReaderMgr.skippedString(XMLUni::fgXMLStringCR)) + { + return true; + } + else if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpaceU) + || fReaderMgr.skippedString(XMLUni::fgXMLStringHTabU) + || fReaderMgr.skippedString(XMLUni::fgXMLStringLFU) + || fReaderMgr.skippedString(XMLUni::fgXMLStringCRU)) + { + // + // Just in case, check for upper case. If found, issue + // an error, but keep going. + // + emitError(XMLErrs::XMLDeclMustBeLowerCase); + return true; + } + } + } + + return false; +} #endif diff --git a/src/internal/XMLScanner2.cpp b/src/internal/XMLScanner2.cpp index 285df72a24ab927934716c52b8ca8491eeec8014..157726b480ed8684fd8c1dcdafbd6faeed6dbc5f 100644 --- a/src/internal/XMLScanner2.cpp +++ b/src/internal/XMLScanner2.cpp @@ -313,9 +313,8 @@ XMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs // normalizeAttValue ( - curPair->getKey() + attDef , curPair->getValue() - , attDef->getType() , normBuf ); @@ -350,11 +349,10 @@ XMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs { // Just normalize as CDATA attType = XMLAttDef::CData; - normalizeAttValue + normalizeAttRawValue ( curPair->getKey() , curPair->getValue() - , XMLAttDef::CData , normBuf ); } @@ -546,9 +544,8 @@ bool XMLScanner::isLegalToken(const XMLPScanToken& toCheck) // are legal if escaped only. And some escape chars are not subject to // normalization rules. // -bool XMLScanner::normalizeAttValue( const XMLCh* const attrName +bool XMLScanner::normalizeAttValue( const XMLAttDef* const attDef , const XMLCh* const value - , const XMLAttDef::AttTypes type , XMLBuffer& toFill) { // A simple state value for a whitespace processing state machine @@ -558,6 +555,10 @@ bool XMLScanner::normalizeAttValue( const XMLCh* const attrName , InContent }; + // Get the type and name + const XMLAttDef::AttTypes type = attDef->getType(); + const XMLCh* const attrName = attDef->getFullName(); + // Assume its going to go fine, and empty the target buffer in preperation bool retVal = true; toFill.reset(); @@ -565,13 +566,7 @@ bool XMLScanner::normalizeAttValue( const XMLCh* const attrName // // Get attribute def - to check to see if it's declared externally or not // - bool added = false; - bool isAttExternal = false; - const ElemStack::StackElem* topElem = fElemStack.topElement(); - if (topElem && topElem->fThisElement) { - const XMLAttDef* attDef = topElem->fThisElement->findAttr(attrName, 0, 0, 0, XMLElementDecl::FailIfNotFound, added); - isAttExternal = (attDef) ? attDef->isExternal() : false; - } + bool isAttExternal = attDef->isExternal(); // // Loop through the chars of the source value and normalize it according @@ -639,18 +634,6 @@ bool XMLScanner::normalizeAttValue( const XMLCh* const attrName } else { - // - // Check Validity Constraint for Standalone document declaration - // XML 1.0, Section 2.9 - // - if (fStandalone && fValidate && isAttExternal) - { - // - // Can't have a standalone document declaration of "yes" if attribute - // values are subject to normalisation - // - fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); - } srcPtr++; continue; } @@ -661,13 +644,13 @@ bool XMLScanner::normalizeAttValue( const XMLCh* const attrName { curState = InWhitespace; srcPtr++; - if (!firstNonWS || (nextCh != chSpace)) + // + // Check Validity Constraint for Standalone document declaration + // XML 1.0, Section 2.9 + // + if (fStandalone && fValidate && isAttExternal) { - // - // Check Validity Constraint for Standalone document declaration - // XML 1.0, Section 2.9 - // - if (fStandalone && fValidate && isAttExternal) + if (!firstNonWS || (nextCh != chSpace) || (fReaderMgr.lookingAtSpace())) { // // Can't have a standalone document declaration of "yes" if attribute @@ -691,6 +674,76 @@ bool XMLScanner::normalizeAttValue( const XMLCh* const attrName return retVal; } +// +// This method will just normalize the input value as CDATA without +// any standalone checking. +// +bool XMLScanner::normalizeAttRawValue( const XMLCh* const attrName + , const XMLCh* const value + , XMLBuffer& toFill) +{ + // A simple state value for a whitespace processing state machine + enum States + { + InWhitespace + , InContent + }; + + // Assume its going to go fine, and empty the target buffer in preperation + bool retVal = true; + toFill.reset(); + + // + // Loop through the chars of the source value and normalize it according + // to the type. + // + States curState = InContent; + bool escaped; + bool firstNonWS = false; + XMLCh nextCh; + const XMLCh* srcPtr = value; + while (*srcPtr) + { + // + // Get the next character from the source. We have to watch for + // escaped characters (which are indicated by a 0xFFFF value followed + // by the char that was escaped.) + // + nextCh = *srcPtr; + escaped = (nextCh == 0xFFFF); + if (escaped) + nextCh = *++srcPtr; + + // + // If its not escaped, then make sure its not a < character, which is + // not allowed in attribute values. + // + if (!escaped && (*srcPtr == chOpenAngle)) + { + emitError(XMLErrs::BracketInAttrValue, attrName); + retVal = false; + } + + if (!escaped) + { + // + // NOTE: Yes this is a little redundant in that a 0x20 is + // replaced with an 0x20. But its faster to do this (I think) + // than checking for 9, A, and D separately. + // + if (XMLReader::isWhitespace(nextCh)) + nextCh = chSpace; + } + + // Add this char to the target buffer + toFill.append(nextCh); + + // And move up to the next character in the source + srcPtr++; + } + return retVal; +} + unsigned int XMLScanner::resolvePrefix( const XMLCh* const prefix , const ElemStack::MapModes mode) @@ -1096,7 +1149,7 @@ void XMLScanner::updateNSMap(const XMLCh* const attrName // care about the return value. An error was issued for the error, which // is all we care about here. // - normalizeAttValue(attrName, attrValue, XMLAttDef::CData, normalBuf); + normalizeAttRawValue(attrName, attrValue, normalBuf); // // Ok, we have to get the unique id for the attribute value, which is the @@ -1566,9 +1619,8 @@ bool XMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFi } -bool XMLScanner::scanAttValue( const XMLCh* const attrName - , XMLBuffer& toFill - , const XMLAttDef::AttTypes type) +bool XMLScanner::scanAttValue( const XMLAttDef* const attDef + , XMLBuffer& toFill) { enum States { @@ -1576,6 +1628,9 @@ bool XMLScanner::scanAttValue( const XMLCh* const attrName , InContent }; + // Get the type and name + const XMLAttDef::AttTypes type = attDef->getType(); + const XMLCh* const attrName = attDef->getFullName(); // Reset the target buffer toFill.reset(); @@ -1594,13 +1649,7 @@ bool XMLScanner::scanAttValue( const XMLCh* const attrName // // Get attribute def - to check to see if it's declared externally or not // - bool added = false; - bool isAttExternal = false; - const ElemStack::StackElem* topElem = fElemStack.topElement(); - if (topElem && topElem->fThisElement) { - const XMLAttDef* attDef = topElem->fThisElement->findAttr(attrName, 0, 0, 0, XMLElementDecl::FailIfNotFound, added); - isAttExternal = (attDef) ? attDef->isExternal() : false; - } + bool isAttExternal = attDef->isExternal(); // // Loop until we get the attribute value. Note that we use a double @@ -1765,18 +1814,6 @@ bool XMLScanner::scanAttValue( const XMLCh* const attrName } else { - // - // Check Validity Constraint for Standalone document declaration - // XML 1.0, Section 2.9 - // - if (fStandalone && fValidate && isAttExternal) - { - // - // Can't have a standalone document declaration of "yes" if attribute - // values are subject to normalisation - // - fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); - } continue; } } @@ -1785,13 +1822,13 @@ bool XMLScanner::scanAttValue( const XMLCh* const attrName if (XMLReader::isWhitespace(nextCh)) { curState = InWhitespace; - if (!firstNonWS || (nextCh != chSpace)) + // + // Check Validity Constraint for Standalone document declaration + // XML 1.0, Section 2.9 + // + if (fStandalone && fValidate && isAttExternal) { - // - // Check Validity Constraint for Standalone document declaration - // XML 1.0, Section 2.9 - // - if (fStandalone && fValidate && isAttExternal) + if (!firstNonWS || (nextCh != chSpace) || (fReaderMgr.lookingAtSpace())) { // // Can't have a standalone document declaration of "yes" if attribute diff --git a/src/util/XMLUni.cpp b/src/util/XMLUni.cpp index a9190c2a0a68cd74b40a1ddd4df8e448853df3d1..724c8c2d07be0ad9eaa0b6cf0f45bd644f02e35c 100644 --- a/src/util/XMLUni.cpp +++ b/src/util/XMLUni.cpp @@ -509,6 +509,11 @@ const XMLCh XMLUni::fgXMLStringLFU[] = }; //<?xml{S} +const XMLCh XMLUni::fgXMLDeclString[] = +{ + chOpenAngle, chQuestion, chLatin_x, chLatin_m, chLatin_l, chNull +}; + const XMLCh XMLUni::fgXMLDeclStringSpace[] = { chOpenAngle, chQuestion, chLatin_x, chLatin_m, chLatin_l, chSpace, chNull diff --git a/src/util/XMLUni.hpp b/src/util/XMLUni.hpp index 02f8b7edc904719a7e12bf216410fa990f1ad00f..88c4a91f6354f504a867a2fc0ae47456f0b56358 100644 --- a/src/util/XMLUni.hpp +++ b/src/util/XMLUni.hpp @@ -164,6 +164,7 @@ public : static const XMLCh fgXMLStringHTabU[]; static const XMLCh fgXMLStringCRU[]; static const XMLCh fgXMLStringLFU[]; + static const XMLCh fgXMLDeclString[]; static const XMLCh fgXMLDeclStringSpace[]; static const XMLCh fgXMLDeclStringHTab[]; static const XMLCh fgXMLDeclStringLF[];