From d9a5ff7c0acbd93837f9872f978bba34fa4494c8 Mon Sep 17 00:00:00 2001 From: Khaled Noaman <knoaman@apache.org> Date: Thu, 5 Dec 2002 16:19:27 +0000 Subject: [PATCH] Initial check-in. git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@174464 13f79535-47bb-0310-9956-ffa450edef68 --- src/xercesc/internal/SGXMLScanner.cpp | 3949 +++++++++++++++++++++++++ src/xercesc/internal/SGXMLScanner.hpp | 306 ++ 2 files changed, 4255 insertions(+) create mode 100644 src/xercesc/internal/SGXMLScanner.cpp create mode 100644 src/xercesc/internal/SGXMLScanner.hpp diff --git a/src/xercesc/internal/SGXMLScanner.cpp b/src/xercesc/internal/SGXMLScanner.cpp new file mode 100644 index 000000000..102e916cf --- /dev/null +++ b/src/xercesc/internal/SGXMLScanner.cpp @@ -0,0 +1,3949 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + +/* + * $Id$ + */ + + +// --------------------------------------------------------------------------- +// Includes +// --------------------------------------------------------------------------- +#include <xercesc/internal/SGXMLScanner.hpp> +#include <xercesc/util/RuntimeException.hpp> +#include <xercesc/util/UnexpectedEOFException.hpp> +#include <xercesc/framework/LocalFileInputSource.hpp> +#include <xercesc/framework/URLInputSource.hpp> +#include <xercesc/framework/XMLDocumentHandler.hpp> +#include <xercesc/framework/XMLEntityHandler.hpp> +#include <xercesc/framework/XMLPScanToken.hpp> +#include <xercesc/internal/EndOfEntityException.hpp> +#include <xercesc/validators/common/ContentLeafNameTypeVector.hpp> +#include <xercesc/validators/schema/SchemaValidator.hpp> +#include <xercesc/validators/schema/TraverseSchema.hpp> +#include <xercesc/validators/schema/XSDDOMParser.hpp> +#include <xercesc/validators/schema/SubstitutionGroupComparator.hpp> +#include <xercesc/validators/schema/identity/FieldActivator.hpp> +#include <xercesc/validators/schema/identity/XPathMatcherStack.hpp> +#include <xercesc/validators/schema/identity/ValueStoreCache.hpp> +#include <xercesc/validators/schema/identity/IC_Selector.hpp> +#include <xercesc/validators/schema/identity/ValueStore.hpp> + +XERCES_CPP_NAMESPACE_BEGIN + +// --------------------------------------------------------------------------- +// SGXMLScanner: Constructors and Destructor +// --------------------------------------------------------------------------- +SGXMLScanner::SGXMLScanner(XMLValidator* const valToAdopt) : + + XMLScanner(valToAdopt) + , fSeeXsi(false) + , fElemStateSize(16) + , fElemState(0) + , fEntityTable(0) + , fRawAttrList(0) + , fSchemaValidator(0) + , fMatcherStack(0) + , fValueStoreCache(0) + , fFieldActivator(0) +{ + try + { + commonInit(); + + if (valToAdopt) + { + if (!valToAdopt->handlesSchema()) + ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator); + } + else + { + fValidator = fSchemaValidator; + } + } + catch(...) + { + cleanUp(); + throw; + } +} + +SGXMLScanner::SGXMLScanner( XMLDocumentHandler* const docHandler + , DocTypeHandler* const docTypeHandler + , XMLEntityHandler* const entityHandler + , XMLErrorReporter* const errHandler + , XMLValidator* const valToAdopt) : + + XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt) + , fSeeXsi(false) + , fElemStateSize(16) + , fElemState(0) + , fEntityTable(0) + , fRawAttrList(0) + , fSchemaValidator(0) + , fMatcherStack(0) + , fValueStoreCache(0) + , fFieldActivator(0) +{ + try + { + commonInit(); + + if (valToAdopt) + { + if (!valToAdopt->handlesSchema()) + ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator); + } + else + { + fValidator = fSchemaValidator; + } + } + catch(...) + { + cleanUp(); + throw; + } +} + +SGXMLScanner::~SGXMLScanner() +{ + cleanUp(); +} + +// --------------------------------------------------------------------------- +// XMLScanner: Getter methods +// --------------------------------------------------------------------------- +NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool() +{ + return 0; +} + +const NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool() const +{ + return 0; +} + +// --------------------------------------------------------------------------- +// SGXMLScanner: Main entry point to scan a document +// --------------------------------------------------------------------------- +void SGXMLScanner::scanDocument(const InputSource& src) +{ + // Bump up the sequence id for this parser instance. This will invalidate + // any previous progressive scan tokens. + fSequenceId++; + + try + { + // Reset the scanner and its plugged in stuff for a new run. This + // resets all the data structures, creates the initial reader and + // pushes it on the stack, and sets up the base document path. + scanReset(src); + + // If we have a document handler, then call the start document + if (fDocHandler) + fDocHandler->startDocument(); + + // Scan the prolog part, which is everything before the root element + // including the DTD subsets. + scanProlog(); + + // If we got to the end of input, then its not a valid XML file. + // Else, go on to scan the content. + if (fReaderMgr.atEOF()) + { + emitError(XMLErrs::EmptyMainEntity); + } + else + { + // Scan content, and tell it its not an external entity + if (scanContent(false)) + { + // Do post-parse validation if required + if (fValidate) + { + // We handle ID reference semantics at this level since + // its required by XML 1.0. + checkIDRefs(); + + // Then allow the validator to do any extra stuff it wants +// fValidator->postParseValidation(); + } + + // That went ok, so scan for any miscellaneous stuff + if (!fReaderMgr.atEOF()) + scanMiscellaneous(); + } + } + + // If we have a document handler, then call the end document + if (fDocHandler) + fDocHandler->endDocument(); + + // Reset the reader manager to close all files, sockets, etc... + fReaderMgr.reset(); + } + // NOTE: + // + // In all of the error processing below, the emitError() call MUST come + // before the flush of the reader mgr, or it will fail because it tries + // to find out the position in the XML source of the error. + catch(const XMLErrs::Codes) + { + // This is a 'first fatal error' type exit, so reset and fall through + fReaderMgr.reset(); + } + catch(const XMLValid::Codes) + { + // This is a 'first fatal error' type exit, so reset and fall through + fReaderMgr.reset(); + } + catch(const XMLException& excToCatch) + { + // Emit the error and catch any user exception thrown from here. Make + // sure in all cases we flush the reader manager. + fInException = true; + try + { + if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) + emitError + ( + XMLErrs::XMLException_Warning + , excToCatch.getType() + , excToCatch.getMessage() + ); + else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) + emitError + ( + XMLErrs::XMLException_Fatal + , excToCatch.getType() + , excToCatch.getMessage() + ); + else + emitError + ( + XMLErrs::XMLException_Error + , excToCatch.getType() + , excToCatch.getMessage() + ); + } + catch(...) + { + // Flush the reader manager and rethrow user's error + fReaderMgr.reset(); + throw; + } + + // If it returned, then reset the reader manager and fall through + fReaderMgr.reset(); + } + catch(...) + { + // Reset and rethrow + fReaderMgr.reset(); + throw; + } +} + + +bool SGXMLScanner::scanNext(XMLPScanToken& token) +{ + // Make sure this token is still legal + if (!isLegalToken(token)) + ThrowXML(RuntimeException, XMLExcepts::Scan_BadPScanToken); + + // Find the next token and remember the reader id + unsigned int orgReader; + XMLTokens curToken; + + bool retVal = true; + + try + { + while (true) + { + // We have to handle any end of entity exceptions that happen here. + // We could be at the end of X nested entities, each of which will + // generate an end of entity exception as we try to move forward. + try + { + curToken = senseNextToken(orgReader); + break; + } + catch(const EndOfEntityException& toCatch) + { + // Send an end of entity reference event + if (fDocHandler) + fDocHandler->endEntityReference(toCatch.getEntity()); + } + } + + if (curToken == Token_CharData) + { + scanCharData(fCDataBuf); + } + else if (curToken == Token_EOF) + { + if (!fElemStack.isEmpty()) + { + const ElemStack::StackElem* topElem = fElemStack.popTop(); + emitError + ( + XMLErrs::EndedWithTagsOnStack + , topElem->fThisElement->getFullName() + ); + } + + retVal = false; + } + else + { + // Its some sort of markup + bool gotData = true; + switch(curToken) + { + case Token_CData : + // Make sure we are within content + if (fElemStack.isEmpty()) + emitError(XMLErrs::CDATAOutsideOfContent); + scanCDSection(); + break; + + case Token_Comment : + scanComment(); + break; + + case Token_EndTag : + scanEndTag(gotData); + break; + + case Token_PI : + scanPI(); + break; + + case Token_StartTag : + scanStartTag(gotData); + break; + + default : + fReaderMgr.skipToChar(chOpenAngle); + break; + } + + if (orgReader != fReaderMgr.getCurrentReaderNum()) + emitError(XMLErrs::PartialMarkupInEntity); + + // If we hit the end, then do the miscellaneous part + if (!gotData) + { + // Do post-parse validation if required + if (fValidate) + { + // We handle ID reference semantics at this level since + // its required by XML 1.0. + checkIDRefs(); + + // Then allow the validator to do any extra stuff it wants +// fValidator->postParseValidation(); + } + + // That went ok, so scan for any miscellaneous stuff + scanMiscellaneous(); + + if (fValidate) + fValueStoreCache->endDocument(); + + if (fDocHandler) + fDocHandler->endDocument(); + } + } + } + // NOTE: + // + // In all of the error processing below, the emitError() call MUST come + // before the flush of the reader mgr, or it will fail because it tries + // to find out the position in the XML source of the error. + catch(const XMLErrs::Codes) + { + // This is a 'first failure' exception, so reset and return failure + fReaderMgr.reset(); + return false; + } + catch(const XMLValid::Codes) + { + // This is a 'first fatal error' type exit, so reset and reuturn failure + fReaderMgr.reset(); + return false; + } + catch(const XMLException& excToCatch) + { + // Emit the error and catch any user exception thrown from here. Make + // sure in all cases we flush the reader manager. + fInException = true; + try + { + if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) + emitError + ( + XMLErrs::XMLException_Warning + , excToCatch.getType() + , excToCatch.getMessage() + ); + else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) + emitError + ( + XMLErrs::XMLException_Fatal + , excToCatch.getType() + , excToCatch.getMessage() + ); + else + emitError + ( + XMLErrs::XMLException_Error + , excToCatch.getType() + , excToCatch.getMessage() + ); + } + catch(...) + { + // Reset and rethrow user error + fReaderMgr.reset(); + throw; + } + + // Reset and return failure + fReaderMgr.reset(); + return false; + } + catch(...) + { + // Reset and rethrow original error + fReaderMgr.reset(); + throw; + } + + // If we hit the end, then flush the reader manager + if (!retVal) + fReaderMgr.reset(); + + return retVal; +} + +// --------------------------------------------------------------------------- +// SGXMLScanner: Private scanning methods +// --------------------------------------------------------------------------- + +// This method is called from scanStartTag() to handle the very raw initial +// scan of the attributes. It just fills in the passed collection with +// key/value pairs for each attribute. No processing is done on them at all. +unsigned int +SGXMLScanner::rawAttrScan(const XMLCh* const elemName + , RefVectorOf<KVStringPair>& toFill + , bool& isEmpty) +{ + // Keep up with how many attributes we've seen so far, and how many + // elements are available in the vector. This way we can reuse old + // elements until we run out and then expand it. + unsigned int attCount = 0; + unsigned int curVecSize = toFill.size(); + + // Assume it is not empty + isEmpty = false; + + // We loop until we either see a /> or >, handling key/value pairs util + // we get there. We place them in the passed vector, which we will expand + // as required to hold them. + while (true) + { + // Get the next character, which should be non-space + XMLCh nextCh = fReaderMgr.peekNextChar(); + + // If the next character is not a slash or closed angle bracket, + // then it must be whitespace, since whitespace is required + // between the end of the last attribute and the name of the next + // one. + // + if (attCount) + { + if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) + { + if (XMLReader::isWhitespace(nextCh)) + { + // Ok, skip by them and get another char + fReaderMgr.getNextChar(); + fReaderMgr.skipPastSpaces(); + nextCh = fReaderMgr.peekNextChar(); + } + else + { + // Emit the error but keep on going + emitError(XMLErrs::ExpectedWhitespace); + } + } + } + + // Ok, here we first check for any of the special case characters. + // If its not one, then we do the normal case processing, which + // assumes that we've hit an attribute value, Otherwise, we do all + // the special case checks. + if (!XMLReader::isSpecialStartTagChar(nextCh)) + { + // Assume its going to be an attribute, so get a name from + // the input. + if (!fReaderMgr.getName(fAttNameBuf)) + { + emitError(XMLErrs::ExpectedAttrName); + fReaderMgr.skipPastChar(chCloseAngle); + return attCount; + } + + // And next must be an equal sign + if (!scanEq()) + { + static const XMLCh tmpList[] = + { + chSingleQuote, chDoubleQuote, chCloseAngle + , chOpenAngle, chForwardSlash, chNull + }; + + emitError(XMLErrs::ExpectedEqSign); + + // Try to sync back up by skipping forward until we either + // hit something meaningful. + const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); + + if ((chFound == chCloseAngle) || (chFound == chForwardSlash)) + { + // Jump back to top for normal processing of these + continue; + } + else if ((chFound == chSingleQuote) + || (chFound == chDoubleQuote) + || XMLReader::isWhitespace(chFound)) + { + // Just fall through assuming that the value is to follow + } + else if (chFound == chOpenAngle) + { + // Assume a malformed tag and that new one is starting + emitError(XMLErrs::UnterminatedStartTag, elemName); + return attCount; + } + else + { + // Something went really wrong + return attCount; + } + } + + // Next should be the quoted attribute value. We just do a simple + // and stupid scan of this value. The only thing we do here + // is to expand entity references. + if (!basicAttrValueScan(fAttNameBuf.getRawBuffer(), fAttValueBuf)) + { + static const XMLCh tmpList[] = + { + chCloseAngle, chOpenAngle, chForwardSlash, chNull + }; + + emitError(XMLErrs::ExpectedAttrValue); + + // It failed, so lets try to get synced back up. We skip + // forward until we find some whitespace or one of the + // chars in our list. + const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); + + if ((chFound == chCloseAngle) + || (chFound == chForwardSlash) + || XMLReader::isWhitespace(chFound)) + { + // Just fall through and process this attribute, though + // the value will be "". + } + else if (chFound == chOpenAngle) + { + // Assume a malformed tag and that new one is starting + emitError(XMLErrs::UnterminatedStartTag, elemName); + return attCount; + } + else + { + // Something went really wrong + return attCount; + } + } + + // Make sure that the name is basically well formed for namespace + // enabled rules. It either has no colons, or it has one which + // is neither the first or last char. + const int colonFirst = XMLString::indexOf(fAttNameBuf.getRawBuffer(), chColon); + if (colonFirst != -1) + { + const int colonLast = XMLString::lastIndexOf(fAttNameBuf.getRawBuffer(), chColon); + + if (colonFirst != colonLast) + { + emitError(XMLErrs::TooManyColonsInName); + continue; + } + else if ((colonFirst == 0) + || (colonLast == (int)fAttNameBuf.getLen() - 1)) + { + emitError(XMLErrs::InvalidColonPos); + continue; + } + } + + // And now lets add it to the passed collection. If we have not + // filled it up yet, then we use the next element. Else we add + // a new one. + KVStringPair* curPair = 0; + if (attCount >= curVecSize) + { + curPair = new KVStringPair + ( + fAttNameBuf.getRawBuffer() + , fAttValueBuf.getRawBuffer() + ); + toFill.addElement(curPair); + } + else + { + curPair = toFill.elementAt(attCount); + curPair->set(fAttNameBuf.getRawBuffer(), fAttValueBuf.getRawBuffer()); + } + + // And bump the count of attributes we've gotten + attCount++; + + // And go to the top again for another attribute + continue; + } + + // It was some special case character so do all of the checks and + // deal with it. + if (!nextCh) + ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF); + + if (nextCh == chForwardSlash) + { + fReaderMgr.getNextChar(); + isEmpty = true; + if (!fReaderMgr.skippedChar(chCloseAngle)) + emitError(XMLErrs::UnterminatedStartTag, elemName); + break; + } + else if (nextCh == chCloseAngle) + { + fReaderMgr.getNextChar(); + break; + } + else if (nextCh == chOpenAngle) + { + // Check for this one specially, since its going to be common + // and it is kind of auto-recovering since we've already hit the + // next open bracket, which is what we would have seeked to (and + // skipped this whole tag.) + emitError(XMLErrs::UnterminatedStartTag, elemName); + break; + } + else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote)) + { + // Check for this one specially, which is probably a missing + // attribute name, e.g. ="value". Just issue expected name + // error and eat the quoted string, then jump back to the + // top again. + emitError(XMLErrs::ExpectedAttrName); + fReaderMgr.getNextChar(); + fReaderMgr.skipQuotedString(nextCh); + fReaderMgr.skipPastSpaces(); + continue; + } + } + + return attCount; +} + + +// This method will kick off the scanning of the primary content of the +// document, i.e. the elements. +bool SGXMLScanner::scanContent(const bool extEntity) +{ + // Go into a loop until we hit the end of the root element, or we fall + // out because there is no root element. + // + // We have to do kind of a deeply nested double loop here in order to + // avoid doing the setup/teardown of the exception handler on each + // round. Doing it this way we only do it when an exception actually + // occurs. + bool gotData = true; + bool inMarkup = false; + while (gotData) + { + try + { + while (gotData) + { + // Sense what the next top level token is. According to what + // this tells us, we will call something to handle that kind + // of thing. + unsigned int orgReader; + const XMLTokens curToken = senseNextToken(orgReader); + + // Handle character data and end of file specially. Char data + // is not markup so we don't want to handle it in the loop + // below. + if (curToken == Token_CharData) + { + // Scan the character data and call appropriate events. Let + // him use our local character data buffer for efficiency. + scanCharData(fCDataBuf); + continue; + } + else if (curToken == Token_EOF) + { + // The element stack better be empty at this point or we + // ended prematurely before all elements were closed. + if (!fElemStack.isEmpty()) + { + const ElemStack::StackElem* topElem = fElemStack.popTop(); + emitError + ( + XMLErrs::EndedWithTagsOnStack + , topElem->fThisElement->getFullName() + ); + } + + // Its the end of file, so clear the got data flag + gotData = false; + continue; + } + + // We are in some sort of markup now + inMarkup = true; + + // According to the token we got, call the appropriate + // scanning method. + switch(curToken) + { + case Token_CData : + // Make sure we are within content + if (fElemStack.isEmpty()) + emitError(XMLErrs::CDATAOutsideOfContent); + scanCDSection(); + break; + + case Token_Comment : + scanComment(); + break; + + case Token_EndTag : + scanEndTag(gotData); + break; + + case Token_PI : + scanPI(); + break; + + case Token_StartTag : + scanStartTag(gotData); + break; + + default : + fReaderMgr.skipToChar(chOpenAngle); + break; + } + + if (orgReader != fReaderMgr.getCurrentReaderNum()) + emitError(XMLErrs::PartialMarkupInEntity); + + // And we are back out of markup again + inMarkup = false; + } + } + catch(const EndOfEntityException& toCatch) + { + // If we were in some markup when this happened, then its a + // partial markup error. + if (inMarkup) + emitError(XMLErrs::PartialMarkupInEntity); + + // Send an end of entity reference event + if (fDocHandler) + fDocHandler->endEntityReference(toCatch.getEntity()); + + inMarkup = false; + } + } + + // It went ok, so return success + return true; +} + + +void SGXMLScanner::scanEndTag(bool& gotData) +{ + // Assume we will still have data until proven otherwise. It will only + // ever be false if this is the end of the root element. + gotData = true; + + // Check if the element stack is empty. If so, then this is an unbalanced + // element (i.e. more ends than starts, perhaps because of bad text + // causing one to be skipped.) + if (fElemStack.isEmpty()) + { + emitError(XMLErrs::MoreEndThanStartTags); + fReaderMgr.skipPastChar(chCloseAngle); + ThrowXML(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd); + } + + // After the </ is the element QName, so get a name from the input + if (!fReaderMgr.getName(fQNameBuf)) + { + // It failed so we can't really do anything with it + emitError(XMLErrs::ExpectedElementName); + fReaderMgr.skipPastChar(chCloseAngle); + return; + } + + int prefixColonPos = -1; + unsigned int uriId = resolveQName + ( + fQNameBuf.getRawBuffer() + , fPrefixBuf + , ElemStack::Mode_Element + , prefixColonPos + ); + + // Pop the stack of the element we are supposed to be ending. Remember + // that we don't own this. The stack just keeps them and reuses them. + // + // NOTE: We CANNOT do this until we've resolved the element name because + // the element stack top contains the prefix to URI mappings for this + // element. + unsigned int topUri = fElemStack.getCurrentURI(); + const ElemStack::StackElem* topElem = fElemStack.popTop(); + + // See if it was the root element, to avoid multiple calls below + const bool isRoot = fElemStack.isEmpty(); + + // Make sure that its the end of the element that we expect + XMLElementDecl* tempElement = topElem->fThisElement; + const XMLCh* rawNameBuf = fQNameBuf.getRawBuffer(); + + if ((topUri != uriId) || + (!XMLString::equals(tempElement->getBaseName(), &rawNameBuf[prefixColonPos + 1]))) + { + emitError + ( + XMLErrs::ExpectedEndOfTagX + , topElem->fThisElement->getFullName() + ); + } + + // Make sure we are back on the same reader as where we started + if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) + emitError(XMLErrs::PartialTagMarkupError); + + // Skip optional whitespace + fReaderMgr.skipPastSpaces(); + + // Make sure we find the closing bracket + if (!fReaderMgr.skippedChar(chCloseAngle)) + { + emitError + ( + XMLErrs::UnterminatedEndTag + , topElem->fThisElement->getFullName() + ); + } + + // If validation is enabled, then lets pass him the list of children and + // this element and let him validate it. + if (fValidate) + { + int res = fValidator->checkContent + ( + topElem->fThisElement + , topElem->fChildren + , topElem->fChildCount + ); + + if (res >= 0) + { + // One of the elements is not valid for the content. NOTE that + // if no children were provided but the content model requires + // them, it comes back with a zero value. But we cannot use that + // to index the child array in this case, and have to put out a + // special message. + if (!topElem->fChildCount) + { + fValidator->emitError + ( + XMLValid::EmptyNotValidForContent + , topElem->fThisElement->getFormattedContentModel() + ); + } + else if ((unsigned int)res >= topElem->fChildCount) + { + fValidator->emitError + ( + XMLValid::NotEnoughElemsForCM + , topElem->fThisElement->getFormattedContentModel() + ); + } + else + { + fValidator->emitError + ( + XMLValid::ElementNotValidForContent + , topElem->fChildren[res]->getRawName() + , topElem->fThisElement->getFormattedContentModel() + ); + } + } + + // reset xsi:type ComplexTypeInfo + ((SchemaElementDecl*)topElem->fThisElement)->setXsiComplexTypeInfo(0); + + // call matchers and de-activate context + int oldCount = fMatcherStack->getMatcherCount(); + + if (oldCount || + ((SchemaElementDecl*)topElem->fThisElement)->getIdentityConstraintCount()) { + + for (int i = oldCount - 1; i >= 0; i--) { + + XPathMatcher* matcher = fMatcherStack->getMatcherAt(i); + matcher->endElement(*(topElem->fThisElement)); + } + + if (fMatcherStack->size() > 0) { + fMatcherStack->popContext(); + } + + // handle everything *but* keyref's. + int newCount = fMatcherStack->getMatcherCount(); + + for (int j = oldCount - 1; j >= newCount; j--) { + + XPathMatcher* matcher = fMatcherStack->getMatcherAt(j); + IdentityConstraint* ic = matcher->getIdentityConstraint(); + + if (ic && (ic->getType() != IdentityConstraint::KEYREF)) { + + matcher->endDocumentFragment(); + fValueStoreCache->transplant(ic, matcher->getInitialDepth()); + } + else if (!ic) { + matcher->endDocumentFragment(); + } + } + + // now handle keyref's... + for (int k = oldCount - 1; k >= newCount; k--) { + + XPathMatcher* matcher = fMatcherStack->getMatcherAt(k); + IdentityConstraint* ic = matcher->getIdentityConstraint(); + + if (ic && (ic->getType() == IdentityConstraint::KEYREF)) { + + ValueStore* values = fValueStoreCache->getValueStoreFor(ic, matcher->getInitialDepth()); + + if (values) { // nothing to do if nothing matched! + values->endDcocumentFragment(fValueStoreCache); + } + + matcher->endDocumentFragment(); + } + } + + fValueStoreCache->endElement(); + } + } + + // If we have a doc handler, tell it about the end tag + if (fDocHandler) + { + fDocHandler->endElement + ( + *topElem->fThisElement + , uriId + , isRoot + , fPrefixBuf.getRawBuffer() + ); + } + + // If this was the root, then done with content + gotData = !isRoot; + + if (gotData) { + + // Restore the grammar + fGrammar = fElemStack.getCurrentGrammar(); + fGrammarType = fGrammar->getGrammarType(); + fValidator->setGrammar(fGrammar); + + // Restore the validation flag + fValidate = fElemStack.getValidationFlag(); + } +} + + +// This method handles the high level logic of scanning the DOCType +// declaration. This calls the DTDScanner and kicks off both the scanning of +// the internal subset and the scanning of the external subset, if any. +// +// When we get here the '<!DOCTYPE' part has already been scanned, which is +// what told us that we had a doc type decl to parse. +void SGXMLScanner::scanDocTypeDecl() +{ + // Just skips over it + // REVISIT: Should we issue a warning + static const XMLCh doctypeIE[] = + { + chOpenSquare, chCloseAngle, chNull + }; + XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE); + + if (nextCh == chOpenSquare) + fReaderMgr.skipPastChar(chCloseSquare); + + fReaderMgr.skipPastChar(chCloseAngle); +} + +// This method is called to scan a start tag when we are processing +// namespaces. There are two different versions of this method, one for +// namespace aware processing an done for non-namespace aware processing. +// +// This method is called after we've scanned the < of a start tag. So we +// have to get the element name, then scan the attributes, after which +// we are either going to see >, />, or attributes followed by one of those +// sequences. +bool SGXMLScanner::scanStartTag(bool& gotData) +{ + // Assume we will still have data until proven otherwise. It will only + // ever be false if this is the root and its empty. + gotData = true; + + // The current position is after the open bracket, so we need to read in + // in the element name. + if (!fReaderMgr.getName(fQNameBuf)) + { + emitError(XMLErrs::ExpectedElementName); + fReaderMgr.skipToChar(chOpenAngle); + return false; + } + + // See if its the root element + const bool isRoot = fElemStack.isEmpty(); + + // Skip any whitespace after the name + fReaderMgr.skipPastSpaces(); + + // First we have to do the rawest attribute scan. We don't do any + // normalization of them at all, since we don't know yet what type they + // might be (since we need the element decl in order to do that.) + bool isEmpty; + unsigned int attCount = rawAttrScan + ( + fQNameBuf.getRawBuffer() + , *fRawAttrList + , isEmpty + ); + const bool gotAttrs = (attCount != 0); + + // save the contentleafname and currentscope before addlevel, for later use + ContentLeafNameTypeVector* cv = 0; + XMLContentModel* cm = 0; + int currentScope = Grammar::TOP_LEVEL_SCOPE; + if (!isRoot) { + + SchemaElementDecl* tempElement = (SchemaElementDecl*) fElemStack.topElement()->fThisElement; + SchemaElementDecl::ModelTypes modelType = tempElement->getModelType(); + + if ((modelType == SchemaElementDecl::Mixed_Simple) + || (modelType == SchemaElementDecl::Mixed_Complex) + || (modelType == SchemaElementDecl::Children)) + { + cm = tempElement->getContentModel(); + cv = cm->getContentLeafNameTypeVector(); + currentScope = fElemStack.getCurrentScope(); + } + } + + // Now, since we might have to update the namespace map for this element, + // but we don't have the element decl yet, we just tell the element stack + // to expand up to get ready. + unsigned int elemDepth = fElemStack.addLevel(); + fElemStack.setValidationFlag(fValidate); + + // Check if there is any external schema location specified, and if we are at root, + // go through them first before scanning those specified in the instance document + if (isRoot + && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) { + + if (fExternalSchemaLocation) + parseSchemaLocation(fExternalSchemaLocation); + if (fExternalNoNamespaceSchemaLocation) + resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString); + } + + // Make an initial pass through the list and find any xmlns attributes or + // schema attributes. + if (attCount) + scanRawAttrListforNameSpaces(fRawAttrList, attCount); + + // Resolve the qualified name to a URI and name so that we can look up + // the element decl for this element. We have now update the prefix to + // namespace map so we should get the correct element now. + int prefixColonPos = -1; + const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer(); + unsigned int uriId = resolveQName + ( + qnameRawBuf + , fPrefixBuf + , ElemStack::Mode_Element + , prefixColonPos + ); + + //if schema, check if we should lax or skip the validation of this element + bool parentValidation = fValidate; + bool laxThisOne = false; + if (cv) { + QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId); + // elementDepth will be > 0, as cv is only constructed if element is not + // root. + laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1); + } + + // Look up the element now in the grammar. This will get us back a + // generic element decl object. We tell him to fault one in if he does + // not find it. + XMLElementDecl* elemDecl = 0; + bool wasAdded = false; + const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1]; + + if (uriId != fEmptyNamespaceId) { + + // Check in current grammar before switching if necessary + elemDecl = fGrammar->getElemDecl + ( + uriId + , nameRawBuf + , qnameRawBuf + , currentScope + ); + + if (!elemDecl && (fURIStringPool->getId(fGrammar->getTargetNamespace()) != uriId)) { + // not found, switch to the specified grammar + const XMLCh* uriStr = getURIText(uriId); + if (!switchGrammar(uriStr) && fValidate && !laxThisOne) + { + fValidator->emitError + ( + XMLValid::GrammarNotFound + ,uriStr + ); + } + + elemDecl = fGrammar->getElemDecl + ( + uriId + , nameRawBuf + , qnameRawBuf + , currentScope + ); + } + + if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) { + // if not found, then it may be a reference, try TOP_LEVEL_SCOPE + elemDecl = fGrammar->getElemDecl + ( + uriId + , nameRawBuf + , qnameRawBuf + , Grammar::TOP_LEVEL_SCOPE + ); + + if(!elemDecl) { + // still not found in specified uri + // try emptyNamesapce see if element should be un-qualified. + elemDecl = fGrammar->getElemDecl + ( + fEmptyNamespaceId + , nameRawBuf + , qnameRawBuf + , currentScope + ); + + if (elemDecl && elemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) { + fValidator->emitError + ( + XMLValid::ElementNotUnQualified + , elemDecl->getFullName() + ); + } + } + } + + if (!elemDecl) { + // still not found, fault this in and issue error later + elemDecl = fGrammar->putElemDecl(uriId + , nameRawBuf + , fPrefixBuf.getRawBuffer() + , qnameRawBuf + , currentScope + , true); + wasAdded = true; + } + } + else if (!elemDecl) + { + //the element has no prefix, + //thus it is either a non-qualified element defined in current targetNS + //or an element that is defined in the globalNS + + //try unqualifed first + elemDecl = fGrammar->getElemDecl + ( + uriId + , nameRawBuf + , qnameRawBuf + , currentScope + ); + + unsigned orgGrammarUri = fURIStringPool->getId(fGrammar->getTargetNamespace()); + + if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) { + //not found, switch grammar and try globalNS + if (!switchGrammar(XMLUni::fgZeroLenString) && fValidate && !laxThisOne) + { + fValidator->emitError + ( + XMLValid::GrammarNotFound + , XMLUni::fgZeroLenString + ); + } + + elemDecl = fGrammar->getElemDecl + ( + uriId + , nameRawBuf + , qnameRawBuf + , currentScope + ); + } + + if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) { + // if not found, then it may be a reference, try TOP_LEVEL_SCOPE + elemDecl = fGrammar->getElemDecl + ( + uriId + , nameRawBuf + , qnameRawBuf + , Grammar::TOP_LEVEL_SCOPE + ); + + if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) { + // still Not found in specified uri + // go to original Grammar again to see if element needs to be fully qualified. + const XMLCh* uriStr = getURIText(orgGrammarUri); + if (!switchGrammar(uriStr) && fValidate && !laxThisOne) + { + fValidator->emitError + ( + XMLValid::GrammarNotFound + ,uriStr + ); + } + + elemDecl = fGrammar->getElemDecl + ( + orgGrammarUri + , nameRawBuf + , qnameRawBuf + , currentScope + ); + + if (elemDecl && elemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) { + fValidator->emitError + ( + XMLValid::ElementNotQualified + , elemDecl->getFullName() + ); + } + } + } + + if (!elemDecl) { + // still not found, fault this in and issue error later + elemDecl = fGrammar->putElemDecl(uriId + , nameRawBuf + , fPrefixBuf.getRawBuffer() + , qnameRawBuf + , currentScope + , true); + wasAdded = true; + } + } + + // We do something different here according to whether we found the + // element or not. + if (wasAdded) + { + if (laxThisOne) { + fValidate = false; + fElemStack.setValidationFlag(fValidate); + } + + // If validating then emit an error + if (fValidate) + { + // This is to tell the reuse Validator that this element was + // faulted-in, was not an element in the grammar pool originally + elemDecl->setCreateReason(XMLElementDecl::JustFaultIn); + + fValidator->emitError + ( + XMLValid::ElementNotDefined + , elemDecl->getFullName() + ); + } + } + else + { + // If its not marked declared and validating, then emit an error + if (!elemDecl->isDeclared()) { + if (laxThisOne) { + fValidate = false; + fElemStack.setValidationFlag(fValidate); + } + + if (fValidate) + { + fValidator->emitError + ( + XMLValid::ElementNotDefined + , elemDecl->getFullName() + ); + } + } + + ((SchemaElementDecl*)elemDecl)->setXsiComplexTypeInfo(0); + } + + // Now we can update the element stack to set the current element + // decl. We expanded the stack above, but couldn't store the element + // decl because we didn't know it yet. + fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum()); + fElemStack.setCurrentURI(uriId); + + if (isRoot) + fRootGrammar = fGrammar; + + // Validate the element + if (fValidate) + fValidator->validateElement(elemDecl); + + + ComplexTypeInfo* typeinfo = ((SchemaElementDecl*)elemDecl)->getComplexTypeInfo(); + if (typeinfo) { + currentScope = typeinfo->getScopeDefined(); + + // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type) + XMLCh* typeName = typeinfo->getTypeName(); + const XMLCh poundStr[] = {chPound, chNull}; + if (!XMLString::startsWith(typeName, poundStr)) { + const int comma = XMLString::indexOf(typeName, chComma); + if (comma > 0) { + XMLBuffer prefixBuf(comma+1); + prefixBuf.append(typeName, comma); + const XMLCh* uriStr = prefixBuf.getRawBuffer(); + if (!switchGrammar(uriStr) && fValidate && !laxThisOne) + { + fValidator->emitError + ( + XMLValid::GrammarNotFound + , prefixBuf.getRawBuffer() + ); + } + } + } + } + fElemStack.setCurrentScope(currentScope); + + // Set element next state + if (elemDepth >= fElemStateSize) { + resizeElemState(); + } + + fElemState[elemDepth] = 0; + fElemStack.setCurrentGrammar(fGrammar); + + // If this is the first element and we are validating, check the root + // element. + if (isRoot) + { + if (fValidate) + { + // Some validators may also want to check the root, call the + // XMLValidator::checkRootElement + if (fValidatorFromUser && !fValidator->checkRootElement(elemDecl->getId())) + fValidator->emitError(XMLValid::RootElemNotLikeDocType); + } + } + else if (parentValidation) + { + // If the element stack is not empty, then add this element as a + // child of the previous top element. If its empty, this is the root + // elem and is not the child of anything. + fElemStack.addChild(elemDecl->getElementName(), true); + } + + // Now lets get the fAttrList filled in. This involves faulting in any + // defaulted and fixed attributes and normalizing the values of any that + // we got explicitly. + // + // We update the attCount value with the total number of attributes, but + // it goes in with the number of values we got during the raw scan of + // explictly provided attrs above. + attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList); + + // activate identity constraints + if (fValidate) { + + unsigned int count = ((SchemaElementDecl*) elemDecl)->getIdentityConstraintCount(); + + if (count || fMatcherStack->getMatcherCount()) { + + fValueStoreCache->startElement(); + fMatcherStack->pushContext(); + fValueStoreCache->initValueStoresFor((SchemaElementDecl*) elemDecl, (int) elemDepth); + + for (unsigned int i = 0; i < count; i++) { + activateSelectorFor(((SchemaElementDecl*) elemDecl)->getIdentityConstraintAt(i), (int) elemDepth); + } + + // call all active identity constraints + count = fMatcherStack->getMatcherCount(); + + for (unsigned int j = 0; j < count; j++) { + + XPathMatcher* matcher = fMatcherStack->getMatcherAt(j); + matcher->startElement(*elemDecl, uriId, fPrefixBuf.getRawBuffer(), *fAttrList, attCount); + } + } + } + + // Since the element may have default values, call start tag now regardless if it is empty or not + // If we have a document handler, then tell it about this start tag + if (fDocHandler) + { + fDocHandler->startElement + ( + *elemDecl + , uriId + , fPrefixBuf.getRawBuffer() + , *fAttrList + , attCount + , false + , isRoot + ); + } + + // If empty, validate content right now if we are validating and then + // pop the element stack top. Else, we have to update the current stack + // top's namespace mapping elements. + if (isEmpty) + { + // Pop the element stack back off since it'll never be used now + fElemStack.popTop(); + + // If validating, then insure that its legal to have no content + if (fValidate) + { + const int res = fValidator->checkContent(elemDecl, 0, 0); + if (res >= 0) + { + fValidator->emitError + ( + XMLValid::ElementNotValidForContent + , elemDecl->getFullName() + , elemDecl->getFormattedContentModel() + ); + } + + // reset xsi:type ComplexTypeInfo + ((SchemaElementDecl*)elemDecl)->setXsiComplexTypeInfo(0); + + // call matchers and de-activate context + int oldCount = fMatcherStack->getMatcherCount(); + if (oldCount || ((SchemaElementDecl*) elemDecl)->getIdentityConstraintCount()) { + + for (int i = oldCount - 1; i >= 0; i--) { + + XPathMatcher* matcher = fMatcherStack->getMatcherAt(i); + matcher->endElement(*elemDecl); + } + + if (fMatcherStack->size() > 0) { + fMatcherStack->popContext(); + } + + // handle everything *but* keyref's. + int newCount = fMatcherStack->getMatcherCount(); + + for (int j = oldCount - 1; j >= newCount; j--) { + + XPathMatcher* matcher = fMatcherStack->getMatcherAt(j); + IdentityConstraint* ic = matcher->getIdentityConstraint(); + + if (ic && (ic->getType() != IdentityConstraint::KEYREF)) { + + matcher->endDocumentFragment(); + fValueStoreCache->transplant(ic, matcher->getInitialDepth()); + } + else if (!ic) { + matcher->endDocumentFragment(); + } + } + + // now handle keyref's... + for (int k = oldCount - 1; k >= newCount; k--) { + + XPathMatcher* matcher = fMatcherStack->getMatcherAt(k); + IdentityConstraint* ic = matcher->getIdentityConstraint(); + + if (ic && (ic->getType() == IdentityConstraint::KEYREF)) { + + ValueStore* values = fValueStoreCache->getValueStoreFor(ic, matcher->getInitialDepth()); + + if (values) { // nothing to do if nothing matched! + values->endDcocumentFragment(fValueStoreCache); + } + + matcher->endDocumentFragment(); + } + } + + fValueStoreCache->endElement(); + } + } + + // If we have a doc handler, tell it about the end tag + if (fDocHandler) + { + fDocHandler->endElement + ( + *elemDecl + , uriId + , isRoot + , fPrefixBuf.getRawBuffer() + ); + } + + // If the elem stack is empty, then it was an empty root + if (isRoot) + gotData = false; + else + { + // Restore the grammar + fGrammar = fElemStack.getCurrentGrammar(); + fGrammarType = fGrammar->getGrammarType(); + fValidator->setGrammar(fGrammar); + + // Restore the validation flag + fValidate = fElemStack.getValidationFlag(); + } + } + + return true; +} + + +unsigned int +SGXMLScanner::resolveQName(const XMLCh* const qName + , XMLBuffer& prefixBuf + , const short mode + , int& prefixColonPos) +{ + // Lets split out the qName into a URI and name buffer first. The URI + // can be empty. + prefixColonPos = XMLString::indexOf(qName, chColon); + if (prefixColonPos == -1) + { + // Its all name with no prefix, so put the whole thing into the name + // buffer. Then map the empty string to a URI, since the empty string + // represents the default namespace. This will either return some + // explicit URI which the default namespace is mapped to, or the + // the default global namespace. + bool unknown = false; + + prefixBuf.reset(); + return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown); + } + else + { + // Copy the chars up to but not including the colon into the prefix + // buffer. + prefixBuf.set(qName, prefixColonPos); + + // Watch for the special namespace prefixes. We always map these to + // special URIs. 'xml' gets mapped to the official URI that its defined + // to map to by the NS spec. xmlns gets mapped to a special place holder + // URI that we define (so that it maps to something checkable.) + const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer(); + if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) { + + // if this is an element, it is an error to have xmlns as prefix + if (mode == ElemStack::Mode_Element) + emitError(XMLErrs::NoXMLNSAsElementPrefix, qName); + + return fXMLNSNamespaceId; + } + else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) { + return fXMLNamespaceId; + } + else + { + bool unknown = false; + unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown); + + if (unknown) + emitError(XMLErrs::UnknownPrefix, prefixRawBuf); + + return uriId; + } + } +} + +// --------------------------------------------------------------------------- +// SGXMLScanner: IC activation methos +// --------------------------------------------------------------------------- +void SGXMLScanner::activateSelectorFor(IdentityConstraint* const ic, const int initialDepth) { + + IC_Selector* selector = ic->getSelector(); + + if (!selector) + return; + + XPathMatcher* matcher = selector->createMatcher(fFieldActivator, initialDepth); + + fMatcherStack->addMatcher(matcher); + matcher->startDocumentFragment(); +} + +// --------------------------------------------------------------------------- +// SGXMLScanner: Grammar preparsing +// --------------------------------------------------------------------------- +Grammar* SGXMLScanner::loadGrammar(const InputSource& src + , const short grammarType + , const bool toCache) +{ + try + { + fGrammarResolver->cacheGrammarFromParse(false); + fGrammarResolver->useCachedGrammarInParse(false); + fRootGrammar = 0; + + if (fValScheme == Val_Auto) { + fValidate = true; + } + + // Reset some status flags + fInException = false; + fStandalone = false; + fErrorCount = 0; + fHasNoDTD = true; + fSeeXsi = false; + + if (grammarType == Grammar::SchemaGrammarType) { + return loadXMLSchemaGrammar(src, toCache); + } + + // Reset the reader manager to close all files, sockets, etc... + fReaderMgr.reset(); + } + // NOTE: + // + // In all of the error processing below, the emitError() call MUST come + // before the flush of the reader mgr, or it will fail because it tries + // to find out the position in the XML source of the error. + catch(const XMLErrs::Codes) + { + // This is a 'first fatal error' type exit, so reset and fall through + fReaderMgr.reset(); + } + catch(const XMLValid::Codes) + { + // This is a 'first fatal error' type exit, so reset and fall through + fReaderMgr.reset(); + + } + catch(const XMLException& excToCatch) + { + // Emit the error and catch any user exception thrown from here. Make + // sure in all cases we flush the reader manager. + fInException = true; + try + { + if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) + emitError + ( + XMLErrs::DisplayErrorMessage + , excToCatch.getMessage() + ); + else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) + emitError + ( + XMLErrs::XMLException_Fatal + , excToCatch.getType() + , excToCatch.getMessage() + ); + else + emitError + ( + XMLErrs::XMLException_Error + , excToCatch.getType() + , excToCatch.getMessage() + ); + } + + catch(...) + { + // Flush the reader manager and rethrow user's error + fReaderMgr.reset(); + throw; + } + + // If it returned, then reset the reader manager and fall through + fReaderMgr.reset(); + } + catch(...) + { + // Reset and rethrow + fReaderMgr.reset(); + throw; + } + + return 0; +} + +// --------------------------------------------------------------------------- +// SGXMLScanner: Private helper methods +// --------------------------------------------------------------------------- +// This method handles the common initialization, to avoid having to do +// it redundantly in multiple constructors. +void SGXMLScanner::commonInit() +{ + // Create the element state array + fElemState = new unsigned int[fElemStateSize]; + + // And we need one for the raw attribute scan. This just stores key/ + // value string pairs (prior to any processing.) + fRawAttrList = new RefVectorOf<KVStringPair>(32); + + // Create dummy schema grammar + fSchemaGrammar = new SchemaGrammar(); + + // Create the Validator and init them + fSchemaValidator = new SchemaValidator(); + initValidator(fSchemaValidator); + + // Create IdentityConstraint info + fMatcherStack = new XPathMatcherStack(); + fValueStoreCache = new ValueStoreCache(); + fFieldActivator = new FieldActivator(fValueStoreCache, fMatcherStack); + fValueStoreCache->setScanner(this); + + // Add the default entity entries for the character refs that must always + // be present. + fEntityTable = new ValueHashTableOf<XMLCh>(11); + fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand); + fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle); + fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle); + fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote); + fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote); +} + +void SGXMLScanner::cleanUp() +{ + delete [] fElemState; + delete fSchemaGrammar; + delete fEntityTable; + delete fRawAttrList; + delete fSchemaValidator; + delete fFieldActivator; + delete fMatcherStack; + delete fValueStoreCache; +} + +void SGXMLScanner::resizeElemState() { + + unsigned int newSize = fElemStateSize * 2; + unsigned int* newElemState = new unsigned int[newSize]; + + // Copy the existing values + unsigned int index = 0; + for (; index < fElemStateSize; index++) + newElemState[index] = fElemState[index]; + + for (; index < newSize; index++) + newElemState[index] = 0; + + // Delete the old array and udpate our members + delete [] fElemState; + fElemState = newElemState; + fElemStateSize = newSize; +} + +// This method is called from scanStartTagNS() to build up the list of +// XMLAttr objects that will be passed out in the start tag callout. We +// get the key/value pairs from the raw scan of explicitly provided attrs, +// which have not been normalized. And we get the element declaration from +// which we will get any defaulted or fixed attribute defs and add those +// in as well. +unsigned int +SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs + , const unsigned int attCount + , XMLElementDecl* elemDecl + , RefVectorOf<XMLAttr>& toFill) +{ + // Ask the element to clear the 'provided' flag on all of the att defs + // that it owns, and to return us a boolean indicating whether it has + // any defs. + const bool hasDefs = elemDecl->resetDefs(); + + // If there are no expliclitily provided attributes and there are no + // defined attributes for the element, the we don't have anything to do. + // So just return zero in this case. + if (!hasDefs && !attCount) + return 0; + + // Keep up with how many attrs we end up with total + unsigned int retCount = 0; + + // And get the current size of the output vector. This lets us use + // existing elements until we fill it, then start adding new ones. + const unsigned int curAttListSize = toFill.size(); + + // We need a buffer into which raw scanned attribute values will be + // normalized. + XMLBufBid bbNormal(&fBufMgr); + XMLBuffer& normBuf = bbNormal.getBuffer(); + + // Loop through our explicitly provided attributes, which are in the raw + // scanned form, and build up XMLAttr objects. + unsigned int index; + for (index = 0; index < attCount; index++) + { + const KVStringPair* curPair = providedAttrs.elementAt(index); + + // We have to split the name into its prefix and name parts. Then + // we map the prefix to its URI. + const XMLCh* const namePtr = curPair->getKey(); + ArrayJanitor<XMLCh> janName(0); + + // use a stack-based buffer when possible. + XMLCh tempBuffer[100]; + + const int colonInd = XMLString::indexOf(namePtr, chColon); + const XMLCh* prefPtr = XMLUni::fgZeroLenString; + const XMLCh* suffPtr = XMLUni::fgZeroLenString; + if (colonInd != -1) + { + // We have to split the string, so make a copy. + if (XMLString::stringLen(namePtr) < sizeof(tempBuffer) / sizeof(tempBuffer[0])) + { + XMLString::copyString(tempBuffer, namePtr); + tempBuffer[colonInd] = chNull; + prefPtr = tempBuffer; + } + else + { + janName.reset(XMLString::replicate(namePtr)); + janName[colonInd] = chNull; + prefPtr = janName.get(); + } + + suffPtr = prefPtr + colonInd + 1; + } + else + { + // No colon, so we just have a name with no prefix + suffPtr = namePtr; + } + + // Map the prefix to a URI id. We tell him that we are mapping an + // attr prefix, so any xmlns attrs at this level will not affect it. + const unsigned int uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute); + + // If the uri comes back as the xmlns or xml URI or its just a name + // and that name is 'xmlns', then we handle it specially. So set a + // boolean flag that lets us quickly below know which we are dealing + // with. + const bool isNSAttr = (uriId == fXMLNSNamespaceId) + || (uriId == fXMLNamespaceId) + || XMLString::equals(suffPtr, XMLUni::fgXMLNSString) + || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI); + + + // If its not a special case namespace attr of some sort, then we + // do normal checking and processing. + XMLAttDef::AttTypes attType; + if (!isNSAttr) + { + // Some checking for attribute wild card first (for schema) + bool laxThisOne = false; + bool skipThisOne = false; + + XMLAttDef* attDefForWildCard = 0; + XMLAttDef* attDef = 0; + + if (fGrammarType == Grammar::SchemaGrammarType) { + + //retrieve the att def + attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, uriId); + + // if not found or faulted in - check for a matching wildcard attribute + // if no matching wildcard attribute, check (un)qualifed cases and flag + // appropriate errors + if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) { + + SchemaAttDef* attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard(); + + if (attWildCard) { + //if schema, see if we should lax or skip the validation of this attribute + if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) { + + SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId)); + if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) { + RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry(); + if (attRegistry) { + attDefForWildCard = attRegistry->get(suffPtr); + } + } + } + } + else { + // not found, see if the attDef should be qualified or not + if (uriId == fEmptyNamespaceId) { + attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, fURIStringPool->getId(fGrammar->getTargetNamespace())); + if (fValidate + && attDef + && attDef->getCreateReason() != XMLAttDef::JustFaultIn) { + // the attribute should be qualified + fValidator->emitError + ( + XMLValid::AttributeNotQualified + , attDef->getFullName() + ); + } + } + else { + attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, fEmptyNamespaceId); + if (fValidate + && attDef + && attDef->getCreateReason() != XMLAttDef::JustFaultIn) { + // the attribute should be qualified + fValidator->emitError + ( + XMLValid::AttributeNotUnQualified + , attDef->getFullName() + ); + } + } + } + } + } + + // Find this attribute within the parent element. We pass both + // the uriID/name and the raw QName buffer, since we don't know + // how the derived validator and its elements store attributes. + bool wasAdded = false; + if (!attDef) { + attDef = elemDecl->findAttr + ( + curPair->getKey() + , uriId + , suffPtr + , prefPtr + , XMLElementDecl::AddIfNotFound + , wasAdded + ); + } + + if (wasAdded) + { + // This is to tell the Validator that this attribute was + // faulted-in, was not an attribute in the attdef originally + attDef->setCreateReason(XMLAttDef::JustFaultIn); + } + + if (fValidate && !attDefForWildCard && !skipThisOne && !laxThisOne && + attDef->getCreateReason() == XMLAttDef::JustFaultIn && !attDef->getProvided()) + { + // + // Its not valid for this element, so issue an error if we are + // validating. + // + XMLBufBid bbURI(&fBufMgr); + XMLBuffer& bufURI = bbURI.getBuffer(); + + getURIText(uriId, bufURI); + + XMLBufBid bbMsg(&fBufMgr); + XMLBuffer& bufMsg = bbMsg.getBuffer(); + bufMsg.append(chOpenCurly); + bufMsg.append(bufURI.getRawBuffer()); + bufMsg.append(chCloseCurly); + bufMsg.append(suffPtr); + fValidator->emitError + ( + XMLValid::AttNotDefinedForElement + , bufMsg.getRawBuffer() + , elemDecl->getFullName() + ); + } + + // If its already provided, then there are more than one of + // this attribute in this start tag, so emit an error. + if (attDef->getProvided()) + { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , attDef->getFullName() + , elemDecl->getFullName() + ); + } + else + { + attDef->setProvided(true); + } + + // Now normalize the raw value since we have the attribute type. We + // don't care about the return status here. If it failed, an error + // was issued, which is all we care about. + if (attDefForWildCard) { + normalizeAttValue + ( + attDefForWildCard + , curPair->getValue() + , normBuf + ); + + // If we found an attdef for this one, then lets validate it. + if (fNormalizeData) + { + // normalize the attribute according to schema whitespace facet + XMLBufBid bbtemp(&fBufMgr); + XMLBuffer& tempBuf = bbtemp.getBuffer(); + + DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator(); + ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, normBuf.getRawBuffer(), tempBuf); + normBuf.set(tempBuf.getRawBuffer()); + } + + if (fValidate && !skipThisOne) { + fValidator->validateAttrValue + ( + attDefForWildCard + , normBuf.getRawBuffer() + , false + , elemDecl + ); + } + + // Save the type for later use + attType = attDefForWildCard->getType(); + } + else { + normalizeAttValue + ( + attDef + , curPair->getValue() + , normBuf + ); + + // If we found an attdef for this one, then lets validate it. + if (attDef->getCreateReason() != XMLAttDef::JustFaultIn) + { + if (fNormalizeData && (fGrammarType == Grammar::SchemaGrammarType)) + { + // normalize the attribute according to schema whitespace facet + XMLBufBid bbtemp(&fBufMgr); + XMLBuffer& tempBuf = bbtemp.getBuffer(); + + DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator(); + ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, normBuf.getRawBuffer(), tempBuf); + normBuf.set(tempBuf.getRawBuffer()); + } + + if (fValidate && !skipThisOne) + { + fValidator->validateAttrValue + ( + attDef + , normBuf.getRawBuffer() + , false + , elemDecl + ); + } + } + + // Save the type for later use + attType = attDef->getType(); + } + } + else + { + // Just normalize as CDATA + attType = XMLAttDef::CData; + normalizeAttRawValue + ( + curPair->getKey() + , curPair->getValue() + , normBuf + ); + } + + // Add this attribute to the attribute list that we use to pass them + // to the handler. We reuse its existing elements but expand it as + // required. + XMLAttr* curAttr; + if (retCount >= curAttListSize) + { + curAttr = new XMLAttr + ( + uriId + , suffPtr + , prefPtr + , normBuf.getRawBuffer() + , attType + , true + ); + toFill.addElement(curAttr); + } + else + { + curAttr = toFill.elementAt(retCount); + curAttr->set + ( + uriId + , suffPtr + , prefPtr + , normBuf.getRawBuffer() + , attType + ); + curAttr->setSpecified(true); + } + + // Bump the count of attrs in the list + retCount++; + } + + // Now, if there are any attributes declared by this element, let's + // go through them and make sure that any required ones are provided, + // and fault in any fixed ones and defaulted ones that are not provided + // literally. + if (hasDefs) + { + // Check after all specified attrs are scanned + // (1) report error for REQUIRED attrs that are missing (V_TAGc) + // (2) add default attrs if missing (FIXED and NOT_FIXED) + XMLAttDefList& attDefList = elemDecl->getAttDefList(); + while (attDefList.hasMoreElements()) + { + // Get the current att def, for convenience and its def type + const XMLAttDef& curDef = attDefList.nextElement(); + const XMLAttDef::DefAttTypes defType = curDef.getDefaultType(); + + if (!curDef.getProvided()) + { + //the attributes is not provided + if (fValidate) + { + // If we are validating and its required, then an error + if ((defType == XMLAttDef::Required) || + (defType == XMLAttDef::Required_And_Fixed) ) + + { + fValidator->emitError + ( + XMLValid::RequiredAttrNotProvided + , curDef.getFullName() + ); + } + else if ((defType == XMLAttDef::Default) || + (defType == XMLAttDef::Fixed) ) + { + if (fStandalone && curDef.isExternal()) + { + // XML 1.0 Section 2.9 + // Document is standalone, so attributes must not be defaulted. + fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName()); + } + } + } + + // Fault in the value if needed, and bump the att count. + // We have to + if ((defType == XMLAttDef::Default) + || (defType == XMLAttDef::Fixed)) + { + // Let the validator pass judgement on the attribute value + if (fValidate) + { + fValidator->validateAttrValue + ( + &curDef + , curDef.getValue() + , false + , elemDecl + ); + } + + XMLAttr* curAtt; + if (retCount >= curAttListSize) + { + curAtt = new XMLAttr; + fValidator->faultInAttr(*curAtt, curDef); + fAttrList->addElement(curAtt); + } + else + { + curAtt = fAttrList->elementAt(retCount); + fValidator->faultInAttr(*curAtt, curDef); + } + + // Indicate it was not explicitly specified and bump count + curAtt->setSpecified(false); + retCount++; + } + } + else + { + //attribute is provided + // (schema) report error for PROHIBITED attrs that are present (V_TAGc) + if (defType == XMLAttDef::Prohibited && fValidate) + fValidator->emitError + ( + XMLValid::ProhibitedAttributePresent + , curDef.getFullName() + ); + } + } + } + return retCount; +} + + +// This method will take a raw attribute value and normalize it according to +// the rules of the attribute type. It will put the resulting value into the +// passed buffer. +// +// This code assumes that escaped characters in the original value (via char +// refs) are prefixed by a 0xFFFF character. This is because some characters +// are legal if escaped only. And some escape chars are not subject to +// normalization rules. +bool SGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef + , const XMLCh* const value + , XMLBuffer& toFill) +{ + // A simple state value for a whitespace processing state machine + enum States + { + InWhitespace + , InContent + }; + + // Get the type and name + const XMLAttDef::AttTypes type = attDef->getType(); + const XMLCh* const attrName = attDef->getFullName(); + + // Assume its going to go fine, and empty the target buffer in preperation + bool retVal = true; + toFill.reset(); + + // Get attribute def - to check to see if it's declared externally or not + bool isAttExternal = attDef->isExternal(); + + // Loop through the chars of the source value and normalize it according + // to the type. + States curState = InContent; + bool escaped; + bool firstNonWS = false; + XMLCh nextCh; + const XMLCh* srcPtr = value; + while (*srcPtr) + { + // Get the next character from the source. We have to watch for + // escaped characters (which are indicated by a 0xFFFF value followed + // by the char that was escaped.) + nextCh = *srcPtr; + escaped = (nextCh == 0xFFFF); + if (escaped) + nextCh = *++srcPtr; + + // If its not escaped, then make sure its not a < character, which is + // not allowed in attribute values. + if (!escaped && (*srcPtr == chOpenAngle)) + { + emitError(XMLErrs::BracketInAttrValue, attrName); + retVal = false; + } + + if (type == XMLAttDef::CData || type > XMLAttDef::Notation) + { + if (!escaped) + { + if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D)) + { + // Check Validity Constraint for Standalone document declaration + // XML 1.0, Section 2.9 + if (fStandalone && fValidate && isAttExternal) + { + // Can't have a standalone document declaration of "yes" if attribute + // values are subject to normalisation + fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); + } + nextCh = chSpace; + } + } + } + else + { + if (curState == InWhitespace) + { + if (!XMLReader::isWhitespace(nextCh)) + { + if (firstNonWS) + toFill.append(chSpace); + curState = InContent; + firstNonWS = true; + } + else + { + srcPtr++; + continue; + } + } + else if (curState == InContent) + { + if (XMLReader::isWhitespace(nextCh)) + { + curState = InWhitespace; + srcPtr++; + + // Check Validity Constraint for Standalone document declaration + // XML 1.0, Section 2.9 + if (fStandalone && fValidate && isAttExternal) + { + if (!firstNonWS || (nextCh != chSpace) || (!*srcPtr) || XMLReader::isWhitespace(*srcPtr)) + { + // Can't have a standalone document declaration of "yes" if attribute + // values are subject to normalisation + fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); + } + } + continue; + } + firstNonWS = true; + } + } + + // Add this char to the target buffer + toFill.append(nextCh); + + // And move up to the next character in the source + srcPtr++; + } + return retVal; +} + +// This method will just normalize the input value as CDATA without +// any standalone checking. +bool SGXMLScanner::normalizeAttRawValue( const XMLCh* const attrName + , const XMLCh* const value + , XMLBuffer& toFill) +{ + // Assume its going to go fine, and empty the target buffer in preperation + bool retVal = true; + toFill.reset(); + + // Loop through the chars of the source value and normalize it according + // to the type. + bool escaped; + XMLCh nextCh; + const XMLCh* srcPtr = value; + while (*srcPtr) + { + // Get the next character from the source. We have to watch for + // escaped characters (which are indicated by a 0xFFFF value followed + // by the char that was escaped.) + nextCh = *srcPtr; + escaped = (nextCh == 0xFFFF); + if (escaped) + nextCh = *++srcPtr; + + // If its not escaped, then make sure its not a < character, which is + // not allowed in attribute values. + if (!escaped && (*srcPtr == chOpenAngle)) + { + emitError(XMLErrs::BracketInAttrValue, attrName); + retVal = false; + } + + if (!escaped) + { + // NOTE: Yes this is a little redundant in that a 0x20 is + // replaced with an 0x20. But its faster to do this (I think) + // than checking for 9, A, and D separately. + if (XMLReader::isWhitespace(nextCh)) + nextCh = chSpace; + } + + // Add this char to the target buffer + toFill.append(nextCh); + + // And move up to the next character in the source + srcPtr++; + } + return retVal; +} + +unsigned int +SGXMLScanner::resolvePrefix( const XMLCh* const prefix + , const ElemStack::MapModes mode) +{ + // Watch for the special namespace prefixes. We always map these to + // special URIs. 'xml' gets mapped to the official URI that its defined + // to map to by the NS spec. xmlns gets mapped to a special place holder + // URI that we define (so that it maps to something checkable.) + if (XMLString::equals(prefix, XMLUni::fgXMLNSString)) + return fXMLNSNamespaceId; + else if (XMLString::equals(prefix, XMLUni::fgXMLString)) + return fXMLNamespaceId; + + // Ask the element stack to search up itself for a mapping for the + // passed prefix. + bool unknown; + unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown); + + // If it was unknown, then the URI was faked in but we have to issue an error + if (unknown) + emitError(XMLErrs::UnknownPrefix, prefix); + + return uriId; +} + +unsigned int +SGXMLScanner::resolvePrefix( const XMLCh* const prefix + , XMLBuffer& bufToFill + , const ElemStack::MapModes mode) +{ + // Watch for the special namespace prefixes. We always map these to + // special URIs. 'xml' gets mapped to the official URI that its defined + // to map to by the NS spec. xmlns gets mapped to a special place holder + // URI that we define (so that it maps to something checkable.) + if (XMLString::equals(prefix, XMLUni::fgXMLNSString)) + return fXMLNSNamespaceId; + else if (XMLString::equals(prefix, XMLUni::fgXMLString)) + return fXMLNamespaceId; + + // Ask the element stack to search up itself for a mapping for the + // passed prefix. + bool unknown; + unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown); + + // If it was unknown, then the URI was faked in but we have to issue an error + if (unknown) + emitError(XMLErrs::UnknownPrefix, prefix); + + getURIText(uriId,bufToFill); + + return uriId; +} + + +// This method will reset the scanner data structures, and related plugged +// in stuff, for a new scan session. We get the input source for the primary +// XML entity, create the reader for it, and push it on the stack so that +// upon successful return from here we are ready to go. +void SGXMLScanner::scanReset(const InputSource& src) +{ + + // This call implicitly tells us that we are going to reuse the scanner + // if it was previously used. So tell the validator to reset itself. + // + // But, if the fUseCacheGrammar flag is set, then don't reset it. + // + // NOTE: The ReaderMgr is flushed on the way out, because that is + // required to insure that files are closed. + fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar); + fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar); + + fGrammar = fSchemaGrammar; + fGrammarType = Grammar::DTDGrammarType; + fRootGrammar = 0; + + fValidator->setGrammar(fGrammar); + if (fValidatorFromUser) { + + ((SchemaValidator*) fValidator)->setErrorReporter(fErrorReporter); + ((SchemaValidator*) fValidator)->setGrammarResolver(fGrammarResolver); + ((SchemaValidator*) fValidator)->setExitOnFirstFatal(fExitOnFirstFatal); + } + + if (fValScheme == Val_Auto) { + fValidate = false; + } + + // And for all installed handlers, send reset events. This gives them + // a chance to flush any cached data. + if (fDocHandler) + fDocHandler->resetDocument(); + if (fEntityHandler) + fEntityHandler->resetEntities(); + if (fErrorReporter) + fErrorReporter->resetErrors(); + + // Clear out the id reference list + fIDRefList->removeAll(); + + // Reset IdentityConstraints + fValueStoreCache->startDocument(); + fMatcherStack->clear(); + + // Reset the element stack, and give it the latest ids for the special + // URIs it has to know about. + fElemStack.reset + ( + fEmptyNamespaceId + , fUnknownNamespaceId + , fXMLNamespaceId + , fXMLNSNamespaceId + ); + + if (!fSchemaNamespaceId) + fSchemaNamespaceId = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI); + + // Reset some status flags + fInException = false; + fStandalone = false; + fErrorCount = 0; + fHasNoDTD = true; + fSeeXsi = false; + fDoNamespaces = true; + fDoSchema = true; + + // Reset the validators + fSchemaValidator->reset(); + fSchemaValidator->setErrorReporter(fErrorReporter); + fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal); + fSchemaValidator->setGrammarResolver(fGrammarResolver); + if (fValidatorFromUser) + fValidator->reset(); + + // Handle the creation of the XML reader object for this input source. + // This will provide us with transcoding and basic lexing services. + XMLReader* newReader = fReaderMgr.createReader + ( + src + , true + , XMLReader::RefFrom_NonLiteral + , XMLReader::Type_General + , XMLReader::Source_External + , fCalculateSrcOfs + ); + + if (!newReader) { + if (src.getIssueFatalErrorIfNotFound()) + ThrowXML1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId()); + else + ThrowXML1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId()); + } + + // Push this read onto the reader manager + fReaderMgr.pushReader(newReader, 0); +} + + +// This method is called between markup in content. It scans for character +// data that is sent to the document handler. It watches for any markup +// characters that would indicate that the character data has ended. It also +// handles expansion of general and character entities. +// +// sendData() is a local static helper for this method which handles some +// code that must be done in three different places here. +void SGXMLScanner::sendCharData(XMLBuffer& toSend) +{ + // If no data in the buffer, then nothing to do + if (toSend.isEmpty()) + return; + + // We do different things according to whether we are validating or + // not. If not, its always just characters; else, it depends on the + // current element's content model. + if (fValidate) + { + // Get the raw data we need for the callback + const XMLCh* const rawBuf = toSend.getRawBuffer(); + const unsigned int len = toSend.getLen(); + + // And see if the current element is a 'Children' style content model + const ElemStack::StackElem* topElem = fElemStack.topElement(); + + // Get the character data opts for the current element + XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts(); + + if (charOpts == XMLElementDecl::NoCharData) + { + // They definitely cannot handle any type of char data + fValidator->emitError(XMLValid::NoCharDataInCM); + } + else if (XMLReader::isAllSpaces(rawBuf, len)) + { + // Its all spaces. So, if they can take spaces, then send it + // as ignorable whitespace. If they can handle any char data + // send it as characters. + if (charOpts == XMLElementDecl::SpacesOk) { + if (fDocHandler) + fDocHandler->ignorableWhitespace(rawBuf, len, false); + } + else if (charOpts == XMLElementDecl::AllCharData) + { + // The normalized data can only be as large as the + // original size, so this will avoid allocating way + // too much or too little memory. + XMLBuffer toFill(len+1); + toFill.set(rawBuf); + + if (fNormalizeData) { + // normalize the character according to schema whitespace facet + XMLBufBid bbtemp(&fBufMgr); + XMLBuffer& tempBuf = bbtemp.getBuffer(); + + DatatypeValidator* tempDV = ((SchemaElementDecl*) topElem->fThisElement)->getDatatypeValidator(); + ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, toFill.getRawBuffer(), tempBuf); + toFill.set(tempBuf.getRawBuffer()); + } + + // tell the schema validation about the character data for checkContent later + ((SchemaValidator*) fValidator)->setDatatypeBuffer(toFill.getRawBuffer()); + + // call all active identity constraints + unsigned int count = fMatcherStack->getMatcherCount(); + + for (unsigned int i = 0; i < count; i++) { + fMatcherStack->getMatcherAt(i)->docCharacters(toFill.getRawBuffer(), toFill.getLen()); + } + + if (fDocHandler) + fDocHandler->docCharacters(toFill.getRawBuffer(), toFill.getLen(), false); + } + } + else + { + // If they can take any char data, then send it. Otherwise, they + // can only handle whitespace and can't handle this stuff so + // issue an error. + if (charOpts == XMLElementDecl::AllCharData) + { + // The normalized data can only be as large as the + // original size, so this will avoid allocating way + // too much or too little memory. + XMLBuffer toFill(len+1); + toFill.set(rawBuf); + + if (fNormalizeData) { + // normalize the character according to schema whitespace facet + XMLBufBid bbtemp(&fBufMgr); + XMLBuffer& tempBuf = bbtemp.getBuffer(); + + DatatypeValidator* tempDV = ((SchemaElementDecl*) topElem->fThisElement)->getDatatypeValidator(); + ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, toFill.getRawBuffer(), tempBuf); + toFill.set(tempBuf.getRawBuffer()); + } + + // tell the schema validation about the character data for checkContent later + ((SchemaValidator*) fValidator)->setDatatypeBuffer(toFill.getRawBuffer()); + + // call all active identity constraints + unsigned int count = fMatcherStack->getMatcherCount(); + + for (unsigned int i = 0; i < count; i++) { + fMatcherStack->getMatcherAt(i)->docCharacters(toFill.getRawBuffer(), toFill.getLen()); + } + + if (fDocHandler) + fDocHandler->docCharacters(toFill.getRawBuffer(), toFill.getLen(), false); + } + else + { + fValidator->emitError(XMLValid::NoCharDataInCM); + } + } + } + else + { + // call all active identity constraints + unsigned int count = fMatcherStack->getMatcherCount(); + + for (unsigned int i = 0; i < count; i++) { + fMatcherStack->getMatcherAt(i)->docCharacters(toSend.getRawBuffer(), toSend.getLen()); + } + + // Always assume its just char data if not validating + if (fDocHandler) + fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false); + } + + // Reset buffer + toSend.reset(); +} + + + +// This method is called with a key/value string pair that represents an +// xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the +// current top of the element stack based on this data. We know that when +// we get here, that it is one of these forms, so we don't bother confirming +// it. +// +// But we have to ensure +// 1. xxx is not xmlns +// 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa +// 3. yyy is not XMLUni::fgXMLNSURIName +// 4. if xxx is not null, then yyy cannot be an empty string. +void SGXMLScanner::updateNSMap(const XMLCh* const attrName + , const XMLCh* const attrValue) +{ + // We need a buffer to normalize the attribute value into + XMLBufBid bbNormal(&fBufMgr); + XMLBuffer& normalBuf = bbNormal.getBuffer(); + + // Normalize the value into the passed buffer. In this case, we don't + // care about the return value. An error was issued for the error, which + // is all we care about here. + normalizeAttRawValue(attrName, attrValue, normalBuf); + XMLCh* namespaceURI = normalBuf.getRawBuffer(); + + // We either have the default prefix (""), or we point it into the attr + // name parameter. Note that the xmlns is not the prefix we care about + // here. To us, the 'prefix' is really the local part of the attrName + // parameter. + // + // Check 1. xxx is not xmlns + // 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa + // 3. yyy is not XMLUni::fgXMLNSURIName + // 4. if xxx is not null, then yyy cannot be an empty string. + const XMLCh* prefPtr = XMLUni::fgZeroLenString; + const unsigned int colonOfs = XMLString::indexOf(attrName, chColon); + if (colonOfs != -1) { + prefPtr = &attrName[colonOfs + 1]; + + if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString)) + emitError(XMLErrs::NoUseOfxmlnsAsPrefix); + else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) { + if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) + emitError(XMLErrs::PrefixXMLNotMatchXMLURI); + } + + if (!namespaceURI || !*namespaceURI) + emitError(XMLErrs::NoEmptyStrNamespace, attrName); + } + + if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName)) + emitError(XMLErrs::NoUseOfxmlnsURI); + else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) { + if (!XMLString::equals(prefPtr, XMLUni::fgXMLString)) + emitError(XMLErrs::XMLURINotMatchXMLPrefix); + } + + // Ok, we have to get the unique id for the attribute value, which is the + // URI that this value should be mapped to. The validator has the + // namespace string pool, so we ask him to find or add this new one. Then + // we ask the element stack to add this prefix to URI Id mapping. + fElemStack.addPrefix + ( + prefPtr + , fURIStringPool->addOrFind(namespaceURI) + ); +} + +void SGXMLScanner::scanRawAttrListforNameSpaces(const RefVectorOf<KVStringPair>* theRawAttrList, int attCount) +{ + // Make an initial pass through the list and find any xmlns attributes or + // schema attributes. + // When we find one, send it off to be used to update the element stack's + // namespace mappings. + int index = 0; + for (index = 0; index < attCount; index++) + { + // each attribute has the prefix:suffix="value" + const KVStringPair* curPair = fRawAttrList->elementAt(index); + const XMLCh* rawPtr = curPair->getKey(); + + // If either the key begins with "xmlns:" or its just plain + // "xmlns", then use it to update the map. + if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6) + || XMLString::equals(rawPtr, XMLUni::fgXMLNSString)) + { + const XMLCh* valuePtr = curPair->getValue(); + + updateNSMap(rawPtr, valuePtr); + + // if the schema URI is seen in the the valuePtr, set the boolean seeXsi + if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) { + fSeeXsi = true; + } + } + } + + // walk through the list again to deal with "xsi:...." + if (fSeeXsi) + { + // Schema Xsi Type yyyy (e.g. xsi:type="yyyyy") + XMLBufBid bbXsi(&fBufMgr); + XMLBuffer& fXsiType = bbXsi.getBuffer(); + + QName attName; + + for (index = 0; index < attCount; index++) + { + // each attribute has the prefix:suffix="value" + const KVStringPair* curPair = fRawAttrList->elementAt(index); + const XMLCh* rawPtr = curPair->getKey(); + + attName.setName(rawPtr, fEmptyNamespaceId); + const XMLCh* prefPtr = attName.getPrefix(); + + // if schema URI has been seen, scan for the schema location and uri + // and resolve the schema grammar; or scan for schema type + if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) { + + const XMLCh* valuePtr = curPair->getValue(); + const XMLCh* suffPtr = attName.getLocalPart(); + + if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCACTION)) + parseSchemaLocation(valuePtr); + else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCACTION)) + resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString); + + if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE)) { + fXsiType.set(valuePtr); + } + else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL) + && fValidator && fValidator->handlesSchema() + && XMLString::equals(valuePtr, SchemaSymbols::fgATTVAL_TRUE)) { + ((SchemaValidator*)fValidator)->setNillable(true); + } + } + } + + if (fValidator && fValidator->handlesSchema()) { + if (!fXsiType.isEmpty()) { + int colonPos = -1; + unsigned int uriId = resolveQName ( + fXsiType.getRawBuffer() + , fPrefixBuf + , ElemStack::Mode_Element + , colonPos + ); + ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId); + } + } + } +} + +void SGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr) +{ + RefVectorOf<XMLCh>* schemaLocation = XMLString::tokenizeString(schemaLocationStr); + unsigned int size = schemaLocation->size(); + if (size % 2 != 0 ) { + emitError(XMLErrs::BadSchemaLocation); + } else { + for(unsigned int i=0; i<size; i=i+2) { + resolveSchemaGrammar(schemaLocation->elementAt(i+1), schemaLocation->elementAt(i)); + } + } + + delete schemaLocation; +} + +void SGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri) { + + Grammar* grammar = fGrammarResolver->getGrammar(uri); + + if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType) { + XSDDOMParser parser; + + parser.setValidationScheme(XercesDOMParser::Val_Never); + parser.setDoNamespaces(true); + parser.setUserEntityHandler(fEntityHandler); + parser.setUserErrorReporter(fErrorReporter); + + // Create a buffer for expanding the system id + XMLBufBid bbSys(&fBufMgr); + XMLBuffer& expSysId = bbSys.getBuffer(); + XMLBuffer& normalizedSysId = bbSys.getBuffer(); + + normalizeURI(loc, normalizedSysId); + + // Allow the entity handler to expand the system id if they choose + // to do so. + InputSource* srcToFill = 0; + const XMLCh* normalizedURI = normalizedSysId.getRawBuffer(); + if (fEntityHandler) + { + if (!fEntityHandler->expandSystemId(normalizedURI, expSysId)) + expSysId.set(normalizedURI); + + srcToFill = fEntityHandler->resolveEntity( XMLUni::fgZeroLenString + , expSysId.getRawBuffer()); + } + else + { + expSysId.set(normalizedURI); + } + + // If they didn't create a source via the entity handler, then we + // have to create one on our own. + if (!srcToFill) + { + ReaderMgr::LastExtEntityInfo lastInfo; + fReaderMgr.getLastExtEntityInfo(lastInfo); + + try + { + XMLURL urlTmp(lastInfo.systemId, expSysId.getRawBuffer()); + if (urlTmp.isRelative()) + { + ThrowXML + ( + MalformedURLException + , XMLExcepts::URL_NoProtocolPresent + ); + } + srcToFill = new URLInputSource(urlTmp); + } + + catch(const MalformedURLException&) + { + // Its not a URL, so lets assume its a local file name. + srcToFill = new LocalFileInputSource + ( + lastInfo.systemId + , expSysId.getRawBuffer() + ); + } + } + + // Put a janitor on the input source + Janitor<InputSource> janSrc(srcToFill); + + // Should just issue warning if the schema is not found + const bool flag = srcToFill->getIssueFatalErrorIfNotFound(); + srcToFill->setIssueFatalErrorIfNotFound(false); + + parser.parse(*srcToFill); + + // Reset the InputSource + srcToFill->setIssueFatalErrorIfNotFound(flag); + + if (parser.getSawFatal() && fExitOnFirstFatal) + emitError(XMLErrs::SchemaScanFatalError); + + DOMDocument* document = parser.getDocument(); //Our Grammar + + if (document != 0) { + + DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema + if (root != 0) + { + const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE); + if (!XMLString::equals(newUri, uri)) { + if (fValidate) + fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri); + grammar = fGrammarResolver->getGrammar(newUri); + } + + if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType) { + + // Since we have seen a grammar, set our validation flag + // at this point if the validation scheme is auto + if (fValScheme == Val_Auto && !fValidate) { + fValidate = true; + fElemStack.setValidationFlag(fValidate); + } + + grammar = new SchemaGrammar(); + TraverseSchema traverseSchema(root, fURIStringPool, (SchemaGrammar*) grammar, fGrammarResolver, this, srcToFill->getSystemId(), fEntityHandler, fErrorReporter); + + if (fGrammarType == Grammar::DTDGrammarType) { + fGrammar = grammar; + fGrammarType = Grammar::SchemaGrammarType; + fValidator->setGrammar(fGrammar); + } + + if (fValidate) { + // validate the Schema scan so far + fValidator->preContentValidation(false); + } + } + } + } + } + else { + + // Since we have seen a grammar, set our validation flag + // at this point if the validation scheme is auto + if (fValScheme == Val_Auto && !fValidate) { + fValidate = true; + fElemStack.setValidationFlag(fValidate); + } + + // we have seen a schema, so set up the fValidator as fSchemaValidator + if (fGrammarType == Grammar::DTDGrammarType) { + fGrammar = grammar; + fGrammarType = Grammar::SchemaGrammarType; + fValidator->setGrammar(fGrammar); + } + } +} + +InputSource* SGXMLScanner::resolveSystemId(const XMLCh* const sysId) +{ + // Create a buffer for expanding the system id + XMLBufBid bbSys(&fBufMgr); + XMLBuffer& expSysId = bbSys.getBuffer(); + + // Allow the entity handler to expand the system id if they choose + // to do so. + InputSource* srcToFill = 0; + if (fEntityHandler) + { + if (!fEntityHandler->expandSystemId(sysId, expSysId)) + expSysId.set(sysId); + + srcToFill = fEntityHandler->resolveEntity( XMLUni::fgZeroLenString + , expSysId.getRawBuffer()); + } + else + { + expSysId.set(sysId); + } + + // If they didn't create a source via the entity handler, then we + // have to create one on our own. + if (!srcToFill) + { + ReaderMgr::LastExtEntityInfo lastInfo; + fReaderMgr.getLastExtEntityInfo(lastInfo); + + try + { + XMLURL urlTmp(lastInfo.systemId, expSysId.getRawBuffer()); + if (urlTmp.isRelative()) + { + ThrowXML + ( + MalformedURLException + , XMLExcepts::URL_NoProtocolPresent + ); + } + srcToFill = new URLInputSource(urlTmp); + } + catch(const MalformedURLException&) + { + // Its not a URL, so lets assume its a local file name. + srcToFill = new LocalFileInputSource + ( + lastInfo.systemId + , expSysId.getRawBuffer() + ); + } + } + + return srcToFill; +} + + +// --------------------------------------------------------------------------- +// SGXMLScanner: Private grammar preparsing methods +// --------------------------------------------------------------------------- +Grammar* SGXMLScanner::loadXMLSchemaGrammar(const InputSource& src, + const bool toCache) +{ + // Reset the validators + fSchemaValidator->reset(); + fSchemaValidator->setErrorReporter(fErrorReporter); + fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal); + + if (fValidatorFromUser) + fValidator->reset(); + + XSDDOMParser parser; + + parser.setValidationScheme(XercesDOMParser::Val_Never); + parser.setDoNamespaces(true); + parser.setUserEntityHandler(fEntityHandler); + parser.setUserErrorReporter(fErrorReporter); + + // Should just issue warning if the schema is not found + const bool flag = src.getIssueFatalErrorIfNotFound(); + ((InputSource&) src).setIssueFatalErrorIfNotFound(false); + + parser.parse(src); + + // Reset the InputSource + ((InputSource&) src).setIssueFatalErrorIfNotFound(flag); + + if (parser.getSawFatal() && fExitOnFirstFatal) + emitError(XMLErrs::SchemaScanFatalError); + + DOMDocument* document = parser.getDocument(); //Our Grammar + + if (document != 0) { + + DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema + if (root != 0) + { + SchemaGrammar* grammar = new SchemaGrammar(); + TraverseSchema traverseSchema(root, fURIStringPool, (SchemaGrammar*) grammar, fGrammarResolver, this, src.getSystemId(), fEntityHandler, fErrorReporter); + + if (fValidate) { + // validate the Schema scan so far + fValidator->setGrammar(grammar); + fValidator->preContentValidation(false, true); + } + + if (toCache) { + fGrammarResolver->cacheGrammars(); + } + + return grammar; + } + } + + return 0; +} + + + +// --------------------------------------------------------------------------- +// SGXMLScanner: Private parsing methods +// --------------------------------------------------------------------------- + +// This method is called to do a raw scan of an attribute value. It does not +// do normalization (since we don't know their types yet.) It just scans the +// value and does entity expansion. +// +// End of entity's must be dealt with here. During DTD scan, they can come +// from external entities. During content, they can come from any entity. +// We just eat the end of entity and continue with our scan until we come +// to the closing quote. If an unterminated value causes us to go through +// subsequent entities, that will cause errors back in the calling code, +// but there's little we can do about it here. +bool SGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill) +{ + // Reset the target buffer + toFill.reset(); + + // Get the next char which must be a single or double quote + XMLCh quoteCh; + if (!fReaderMgr.skipIfQuote(quoteCh)) + return false; + + // We have to get the current reader because we have to ignore closing + // quotes until we hit the same reader again. + const unsigned int curReader = fReaderMgr.getCurrentReaderNum(); + + // Loop until we get the attribute value. Note that we use a double + // loop here to avoid the setup/teardown overhead of the exception + // handler on every round. + XMLCh nextCh; + XMLCh secondCh = 0; + bool gotLeadingSurrogate = false; + bool escaped; + while (true) + { + try + { + while(true) + { + // Get another char. Use second char if one is waiting + if (secondCh) + { + nextCh = secondCh; + secondCh = 0; + } + else + { + nextCh = fReaderMgr.getNextChar(); + } + + if (!nextCh) + ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF); + + // Check for our ending quote. It has to be in the same entity + // as where we started. Quotes in nested entities are ignored. + if (nextCh == quoteCh) + { + if (curReader == fReaderMgr.getCurrentReaderNum()) + return true; + + // Watch for spillover into a previous entity + if (curReader > fReaderMgr.getCurrentReaderNum()) + { + emitError(XMLErrs::PartialMarkupInEntity); + return false; + } + } + + // Check for an entity ref . We ignore the empty flag in + // this one. + escaped = false; + if (nextCh == chAmpersand) + { + // If it was not returned directly, then jump back up + if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned) + { + gotLeadingSurrogate = false; + continue; + } + } + + // Deal with surrogate pairs + if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + { + // Its a leading surrogate. If we already got one, then + // issue an error, else set leading flag to make sure that + // we look for a trailing next time. + if (gotLeadingSurrogate) + { + emitError(XMLErrs::Expected2ndSurrogateChar); + } + else + gotLeadingSurrogate = true; + } + else + { + // If its a trailing surrogate, make sure that we are + // prepared for that. Else, its just a regular char so make + // sure that we were not expected a trailing surrogate. + if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) + { + // Its trailing, so make sure we were expecting it + if (!gotLeadingSurrogate) + emitError(XMLErrs::Unexpected2ndSurrogateChar); + } + else + { + // Its just a char, so make sure we were not expecting a + // trailing surrogate. + if (gotLeadingSurrogate) { + emitError(XMLErrs::Expected2ndSurrogateChar); + } + // Its got to at least be a valid XML character + else if (!XMLReader::isXMLChar(nextCh)) + { + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); + } + } + gotLeadingSurrogate = false; + } + + // If it was escaped, then put in a 0xFFFF value. This will + // be used later during validation and normalization of the + // value to know that the following character was via an + // escape char. + if (escaped) + toFill.append(0xFFFF); + + // Else add it to the buffer + toFill.append(nextCh); + } + } + catch(const EndOfEntityException&) + { + // Just eat it and continue. + gotLeadingSurrogate = false; + escaped = false; + } + } + return true; +} + + +// This method scans a CDATA section. It collects the character into one +// of the temp buffers and calls the document handler, if any, with the +// characters. It assumes that the <![CDATA string has been scanned before +// this call. +void SGXMLScanner::scanCDSection() +{ + // This is the CDATA section opening sequence, minus the '<' character. + // We use this to watch for nested CDATA sections, which are illegal. + static const XMLCh CDataPrefix[] = + { + chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A + , chLatin_T, chLatin_A, chOpenSquare, chNull + }; + + static const XMLCh CDataClose[] = + { + chCloseSquare, chCloseAngle, chNull + }; + + // The next character should be the opening square bracket. If not + // issue an error, but then try to recover by skipping any whitespace + // and checking again. + if (!fReaderMgr.skippedChar(chOpenSquare)) + { + emitError(XMLErrs::ExpectedOpenSquareBracket); + fReaderMgr.skipPastSpaces(); + + // If we still don't find it, then give up, else keep going + if (!fReaderMgr.skippedChar(chOpenSquare)) + return; + } + + // Get a buffer for this + XMLBufBid bbCData(&fBufMgr); + + // We just scan forward until we hit the end of CDATA section sequence. + // CDATA is effectively a big escape mechanism so we don't treat markup + // characters specially here. + bool emittedError = false; + while (true) + { + const XMLCh nextCh = fReaderMgr.getNextChar(); + + // Watch for unexpected end of file + if (!nextCh) + { + emitError(XMLErrs::UnterminatedCDATASection); + ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF); + } + + if (fValidate && fStandalone && (XMLReader::isWhitespace(nextCh))) + { + // This document is standalone; this ignorable CDATA whitespace is forbidden. + // XML 1.0, Section 2.9 + // And see if the current element is a 'Children' style content model + const ElemStack::StackElem* topElem = fElemStack.topElement(); + + if (topElem->fThisElement->isExternal()) { + + // Get the character data opts for the current element + XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts(); + + if (charOpts == XMLElementDecl::SpacesOk) // Element Content + { + // Error - standalone should have a value of "no" as whitespace detected in an + // element type with element content whose element declaration was external + fValidator->emitError(XMLValid::NoWSForStandalone); + } + } + } + + // If this is a close square bracket it could be our closing + // sequence. + if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose)) + { + // call all active identity constraints + unsigned int count = fMatcherStack->getMatcherCount(); + + for (unsigned int i = 0; i < count; i++) { + fMatcherStack->getMatcherAt(i)->docCharacters(bbCData.getRawBuffer(), bbCData.getLen()); + } + + // If we have a doc handler, call it + if (fDocHandler) + { + fDocHandler->docCharacters + ( + bbCData.getRawBuffer() + , bbCData.getLen() + , true + ); + } + + // And we are done + break; + } + + // Make sure its a valid character. But if we've emitted an error + // already, don't bother with the overhead since we've already told + // them about it. + if (!emittedError) + { + if (!XMLReader::isXMLChar(nextCh)) + { + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacter, tmpBuf); + emittedError = true; + } + } + + if (fValidate) { + // And see if the current element is a 'Children' style content model + const ElemStack::StackElem* topElem = fElemStack.topElement(); + + // Get the character data opts for the current element + XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts(); + + if (charOpts != XMLElementDecl::AllCharData) + { + // They definitely cannot handle any type of char data + fValidator->emitError(XMLValid::NoCharDataInCM); + } + } + + // Add it to the buffer + bbCData.append(nextCh); + } +} + + +void SGXMLScanner::scanCharData(XMLBuffer& toUse) +{ + // We have to watch for the stupid ]]> sequence, which is illegal in + // character data. So this is a little state machine that handles that. + enum States + { + State_Waiting + , State_GotOne + , State_GotTwo + }; + + // Reset the buffer before we start + toUse.reset(); + + // Turn on the 'throw at end' flag of the reader manager + ThrowEOEJanitor jan(&fReaderMgr, true); + + // In order to be more efficient we have to use kind of a deeply nested + // set of blocks here. The outer block puts on a try and catches end of + // entity exceptions. The inner loop is the per-character loop. If we + // put the try inside the inner loop, it would work but would require + // the exception handling code setup/teardown code to be invoked for + // each character. + XMLCh nextCh; + XMLCh secondCh = 0; + States curState = State_Waiting; + bool escaped = false; + bool gotLeadingSurrogate = false; + bool notDone = true; + while (notDone) + { + try + { + while (true) + { + if (secondCh) + { + nextCh = secondCh; + secondCh = 0; + } + else + { + // Eat through as many plain content characters as possible without + // needing special handling. Moving most content characters here, + // in this one call, rather than running the overall loop once + // per content character, is a speed optimization. + if (curState == State_Waiting && !gotLeadingSurrogate) + { + fReaderMgr.movePlainContentChars(toUse); + } + + // Try to get another char from the source + // The code from here on down covers all contengencies, + if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) + { + // If we were waiting for a trailing surrogate, its an error + if (gotLeadingSurrogate) + emitError(XMLErrs::Expected2ndSurrogateChar); + + notDone = false; + break; + } + } + + // Watch for a reference. Note that the escapement mechanism + // is ignored in this content. + if (nextCh == chAmpersand) + { + sendCharData(toUse); + + // Turn off the throwing at the end of entity during this + ThrowEOEJanitor jan(&fReaderMgr, false); + + if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned) + { + gotLeadingSurrogate = false; + continue; + } + } + else + { + escaped = false; + } + + // Keep the state machine up to date + if (!escaped) + { + if (nextCh == chCloseSquare) + { + if (curState == State_Waiting) + curState = State_GotOne; + else if (curState == State_GotOne) + curState = State_GotTwo; + } + else if (nextCh == chCloseAngle) + { + if (curState == State_GotTwo) + emitError(XMLErrs::BadSequenceInCharData); + curState = State_Waiting; + } + else + { + curState = State_Waiting; + } + } + else + { + curState = State_Waiting; + } + + // Deal with surrogate pairs + if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + { + // Its a leading surrogate. If we already got one, then + // issue an error, else set leading flag to make sure that + // we look for a trailing next time. + if (gotLeadingSurrogate) + emitError(XMLErrs::Expected2ndSurrogateChar); + else + gotLeadingSurrogate = true; + } + else + { + // If its a trailing surrogate, make sure that we are + // prepared for that. Else, its just a regular char so make + // sure that we were not expected a trailing surrogate. + if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) + { + // Its trailing, so make sure we were expecting it + if (!gotLeadingSurrogate) + emitError(XMLErrs::Unexpected2ndSurrogateChar); + } + else + { + // Its just a char, so make sure we were not expecting a + // trailing surrogate. + if (gotLeadingSurrogate) + emitError(XMLErrs::Expected2ndSurrogateChar); + + // Make sure the returned char is a valid XML char + if (!XMLReader::isXMLChar(nextCh)) + { + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacter, tmpBuf); + } + } + gotLeadingSurrogate = false; + } + + // Add this char to the buffer + toUse.append(nextCh); + } + } + catch(const EndOfEntityException& toCatch) + { + // Some entity ended, so we have to send any accumulated + // chars and send an end of entity event. + sendCharData(toUse); + gotLeadingSurrogate = false; + + if (fDocHandler) + fDocHandler->endEntityReference(toCatch.getEntity()); + } + } + + // Check the validity constraints as per XML 1.0 Section 2.9 + if (fValidate && fStandalone) + { + // See if the text contains whitespace + // Get the raw data we need for the callback + const XMLCh* rawBuf = toUse.getRawBuffer(); + const unsigned int len = toUse.getLen(); + const bool isSpaces = XMLReader::containsWhiteSpace(rawBuf, len); + + if (isSpaces) + { + // And see if the current element is a 'Children' style content model + const ElemStack::StackElem* topElem = fElemStack.topElement(); + + if (topElem->fThisElement->isExternal()) { + + // Get the character data opts for the current element + XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts(); + + if (charOpts == XMLElementDecl::SpacesOk) // => Element Content + { + // Error - standalone should have a value of "no" as whitespace detected in an + // element type with element content whose element declaration was external + // + fValidator->emitError(XMLValid::NoWSForStandalone); + } + } + } + } + // Send any char data that we accumulated into the buffer + sendCharData(toUse); +} + + +// This method will scan a general/character entity ref. It will either +// expand a char ref and return it directly, or push a reader for a general +// entity. +// +// The return value indicates whether the char parameters hold the value +// or whether the value was pushed as a reader, or that it failed. +// +// The escaped flag tells the caller whether the returned parameter resulted +// from a character reference, which escapes the character in some cases. It +// only makes any difference if the return value indicates the value was +// returned directly. +SGXMLScanner::EntityExpRes +SGXMLScanner::scanEntityRef( const bool inAttVal + , XMLCh& firstCh + , XMLCh& secondCh + , bool& escaped) +{ + // Assume no escape + secondCh = 0; + escaped = false; + + // We have to insure that its all in one entity + const unsigned int curReader = fReaderMgr.getCurrentReaderNum(); + + // If the next char is a pound, then its a character reference and we + // need to expand it always. + if (fReaderMgr.skippedChar(chPound)) + { + // Its a character reference, so scan it and get back the numeric + // value it represents. + if (!scanCharRef(firstCh, secondCh)) + return EntityExp_Failed; + + escaped = true; + + if (curReader != fReaderMgr.getCurrentReaderNum()) + emitError(XMLErrs::PartialMarkupInEntity); + + return EntityExp_Returned; + } + + // Expand it since its a normal entity ref + XMLBufBid bbName(&fBufMgr); + if (!fReaderMgr.getName(bbName.getBuffer())) + { + emitError(XMLErrs::ExpectedEntityRefName); + return EntityExp_Failed; + } + + // Next char must be a semi-colon. But if its not, just emit + // an error and try to continue. + if (!fReaderMgr.skippedChar(chSemiColon)) + emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer()); + + // Make sure we ended up on the same entity reader as the & char + if (curReader != fReaderMgr.getCurrentReaderNum()) + emitError(XMLErrs::PartialMarkupInEntity); + + // Look up the name in the general entity pool + // If it does not exist, then obviously an error + if (!fEntityTable->containsKey(bbName.getRawBuffer())) + { + // XML 1.0 Section 4.1 + // Well-formedness Constraint for entity not found: + // In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references, + // or a document with "standalone='yes'", for an entity reference that does not occur within the external subset + // or a parameter entity + if (fStandalone || fHasNoDTD) + emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer()); + + return EntityExp_Failed; + } + + firstCh = fEntityTable->get(bbName.getRawBuffer()); + escaped = true; + return EntityExp_Returned; +} + + +bool SGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace) +{ + Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace); + + if (!tempGrammar) { + tempGrammar = fSchemaGrammar; + } + + if (!tempGrammar) + return false; + else { + fGrammar = tempGrammar; + fGrammarType = fGrammar->getGrammarType(); + if (fGrammarType == Grammar::DTDGrammarType) { + ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator); + } + + fValidator->setGrammar(fGrammar); + return true; + } +} + +// check if we should skip or lax the validation of the element +// if skip - no validation +// if lax - validate only if the element if found +bool SGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv, + const XMLContentModel* const cm, + const unsigned int parentElemDepth) +{ + bool skipThisOne = false; + bool laxThisOne = false; + unsigned int elementURI = element->getURI(); + unsigned int currState = fElemState[parentElemDepth]; + + if (currState == XMLContentModel::gInvalidTrans) { + return laxThisOne; + } + + SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool); + + if (cv) { + unsigned int i = 0; + unsigned int leafCount = cv->getLeafCount(); + + for (; i < leafCount; i++) { + + QName* fElemMap = cv->getLeafNameAt(i); + unsigned int uri = fElemMap->getURI(); + unsigned int nextState; + bool anyEncountered = false; + ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i); + + if (type == ContentSpecNode::Leaf) { + if (((uri == elementURI) + && XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart())) + || comparator.isEquivalentTo(element, fElemMap)) { + + nextState = cm->getNextState(currState, i); + + if (nextState != XMLContentModel::gInvalidTrans) { + fElemState[parentElemDepth] = nextState; + break; + } + } + } else if ((type & 0x0f) == ContentSpecNode::Any) { + anyEncountered = true; + } + else if ((type & 0x0f) == ContentSpecNode::Any_Other) { + if (uri != elementURI) { + anyEncountered = true; + } + } + else if ((type & 0x0f) == ContentSpecNode::Any_NS) { + if (uri == elementURI) { + anyEncountered = true; + } + } + + if (anyEncountered) { + + nextState = cm->getNextState(currState, i); + if (nextState != XMLContentModel::gInvalidTrans) { + fElemState[parentElemDepth] = nextState; + + if (type == ContentSpecNode::Any_Skip || + type == ContentSpecNode::Any_NS_Skip || + type == ContentSpecNode::Any_Other_Skip) { + skipThisOne = true; + } + else if (type == ContentSpecNode::Any_Lax || + type == ContentSpecNode::Any_NS_Lax || + type == ContentSpecNode::Any_Other_Lax) { + laxThisOne = true; + } + + break; + } + } + } // for + + if (i == leafCount) { // no match + fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans; + return laxThisOne; + } + + } // if + + if (skipThisOne) { + fValidate = false; + fElemStack.setValidationFlag(fValidate); + } + + return laxThisOne; +} + + +// check if there is an AnyAttribute, and if so, see if we should lax or skip +// if skip - no validation +// if lax - validate only if the attribute if found +bool SGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne) +{ + XMLAttDef::AttTypes wildCardType = attWildCard->getType(); + bool anyEncountered = false; + skipThisOne = false; + laxThisOne = false; + if (wildCardType == XMLAttDef::Any_Any) + anyEncountered = true; + else if (wildCardType == XMLAttDef::Any_Other) { + if (attWildCard->getAttName()->getURI() != uriId) + anyEncountered = true; + } + else if (wildCardType == XMLAttDef::Any_List) { + ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList(); + unsigned int listSize = (nameURIList) ? nameURIList->size() : 0; + + if (listSize) { + for (unsigned int i=0; i < listSize; i++) { + if (nameURIList->elementAt(i) == uriId) + anyEncountered = true; + } + } + } + + if (anyEncountered) { + XMLAttDef::DefAttTypes defType = attWildCard->getDefaultType(); + if (defType == XMLAttDef::ProcessContents_Skip) { + // attribute should just be bypassed, + skipThisOne = true; + } + else if (defType == XMLAttDef::ProcessContents_Lax) { + laxThisOne = true; + } + } + + return anyEncountered; +} + +void SGXMLScanner::normalizeURI(const XMLCh* const systemURI, + XMLBuffer& normalizedURI) +{ + const XMLCh* pszSrc = systemURI; + + normalizedURI.reset(); + + while (*pszSrc) { + + if ((*(pszSrc) == chPercent) + && (*(pszSrc+1) == chDigit_2) + && (*(pszSrc+2) == chDigit_0)) + { + pszSrc += 3; + normalizedURI.append(chSpace); + } + else if (*pszSrc == 0xFFFF) { //escaped character + pszSrc++; + } + else { + normalizedURI.append(*pszSrc); + pszSrc++; + } + } +} + + +XERCES_CPP_NAMESPACE_END diff --git a/src/xercesc/internal/SGXMLScanner.hpp b/src/xercesc/internal/SGXMLScanner.hpp new file mode 100644 index 000000000..665809bec --- /dev/null +++ b/src/xercesc/internal/SGXMLScanner.hpp @@ -0,0 +1,306 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + +/* + * $Log$ + * Revision 1.1 2002/12/05 16:19:27 knoaman + * Initial check-in. + * + */ + + +#if !defined(SGXMLSCANNER_HPP) +#define SGXMLSCANNER_HPP + +#include <xercesc/internal/XMLScanner.hpp> +#include <xercesc/internal/ElemStack.hpp> +#include <xercesc/util/KVStringPair.hpp> +#include <xercesc/util/ValueHashTableOf.hpp> +#include <xercesc/validators/common/Grammar.hpp> + +XERCES_CPP_NAMESPACE_BEGIN + +class SchemaValidator; +class SchemaGrammar; +class ValueStoreCache; +class XPathMatcherStack; +class FieldActivator; +class IdentityConstraint; +class ContentLeafNameTypeVector; +class SchemaAttDef; +class XMLContentModel; + +// This is a scanner class, which process XML Schema grammar. +class XMLPARSER_EXPORT SGXMLScanner : public XMLScanner +{ +public : + // ----------------------------------------------------------------------- + // Constructors and Destructor + // ----------------------------------------------------------------------- + SGXMLScanner + ( + XMLValidator* const valToAdopt + ); + SGXMLScanner + ( + XMLDocumentHandler* const docHandler + , DocTypeHandler* const docTypeHandler + , XMLEntityHandler* const entityHandler + , XMLErrorReporter* const errReporter + , XMLValidator* const valToAdopt + ); + virtual ~SGXMLScanner(); + + // ----------------------------------------------------------------------- + // XMLScanner public virtual methods + // ----------------------------------------------------------------------- + virtual const XMLCh* getName() const; + virtual NameIdPool<DTDEntityDecl>* getEntityDeclPool(); + virtual const NameIdPool<DTDEntityDecl>* getEntityDeclPool() const; + virtual unsigned int resolveQName + ( + const XMLCh* const qName + , XMLBuffer& prefixBufToFill + , const short mode + , int& prefixColonPos + ); + virtual void scanDocument + ( + const InputSource& src + ); + virtual bool scanNext(XMLPScanToken& toFill); + virtual Grammar* loadGrammar + ( + const InputSource& src + , const short grammarType + , const bool toCache = false + ); + +private : + // ----------------------------------------------------------------------- + // Unimplemented constructors and operators + // ----------------------------------------------------------------------- + SGXMLScanner(); + SGXMLScanner(const SGXMLScanner&); + void operator=(const SGXMLScanner&); + + // ----------------------------------------------------------------------- + // XMLScanner virtual methods + // ----------------------------------------------------------------------- + virtual void scanCDSection(); + virtual void scanCharData(XMLBuffer& toToUse); + virtual EntityExpRes scanEntityRef + ( + const bool inAttVal + , XMLCh& firstCh + , XMLCh& secondCh + , bool& escaped + ); + virtual void scanDocTypeDecl(); + virtual void scanReset(const InputSource& src); + virtual void sendCharData(XMLBuffer& toSend); + + // ----------------------------------------------------------------------- + // Private helper methods + // ----------------------------------------------------------------------- + void commonInit(); + void cleanUp(); + InputSource* resolveSystemId(const XMLCh* const sysId); // return owned by caller + + // Spaces are not allowed in URI, so %20 is used instead. + // Convert %20 to spaces before resolving the URI + void normalizeURI(const XMLCh* const systemURI, XMLBuffer& normalizedURI); + + unsigned int buildAttList + ( + const RefVectorOf<KVStringPair>& providedAttrs + , const unsigned int attCount + , XMLElementDecl* elemDecl + , RefVectorOf<XMLAttr>& toFill + ); + bool normalizeAttValue + ( + const XMLAttDef* const attDef + , const XMLCh* const value + , XMLBuffer& toFill + ); + bool normalizeAttRawValue + ( + const XMLCh* const attrName + , const XMLCh* const value + , XMLBuffer& toFill + ); + unsigned int resolvePrefix + ( + const XMLCh* const prefix + , const ElemStack::MapModes mode + ); + unsigned int resolvePrefix + ( + const XMLCh* const prefix + , XMLBuffer& uriBufToFill + , const ElemStack::MapModes mode + ); + void updateNSMap + ( + const XMLCh* const attrName + , const XMLCh* const attrValue + ); + void scanRawAttrListforNameSpaces(const RefVectorOf<KVStringPair>* theRawAttrList, int attCount); + void parseSchemaLocation(const XMLCh* const schemaLocationStr); + void resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri); + bool switchGrammar(const XMLCh* const newGrammarNameSpace); + bool laxElementValidation(QName* element, ContentLeafNameTypeVector* cv, + const XMLContentModel* const cm, + const unsigned int parentElemDepth); + bool anyAttributeValidation(SchemaAttDef* attWildCard, + unsigned int uriId, + bool& skipThisOne, + bool& laxThisOne); + void resizeElemState(); + + // ----------------------------------------------------------------------- + // Private scanning methods + // ----------------------------------------------------------------------- + bool basicAttrValueScan + ( + const XMLCh* const attrName + , XMLBuffer& toFill + ); + unsigned int rawAttrScan + ( + const XMLCh* const elemName + , RefVectorOf<KVStringPair>& toFill + , bool& isEmpty + ); + bool scanAttValue + ( + const XMLAttDef* const attDef + , XMLBuffer& toFill + ); + bool scanContent(const bool extEntity); + void scanEndTag(bool& gotData); + bool scanStartTag(bool& gotData); + + // ----------------------------------------------------------------------- + // IdentityConstraints Activation methods + // ----------------------------------------------------------------------- + void activateSelectorFor(IdentityConstraint* const ic, const int initialDepth); + + // ----------------------------------------------------------------------- + // Grammar preparsing methods + // ----------------------------------------------------------------------- + Grammar* loadXMLSchemaGrammar(const InputSource& src, const bool toCache = false); + + // ----------------------------------------------------------------------- + // Data members + // + // fElemStack + // This is the element stack that is used to track the elements that + // are currently being worked on. + // + // fRawAttrList + // During the initial scan of the attributes we can only do a raw + // scan for key/value pairs. So this vector is used to store them + // until they can be processed (and put into fAttrList.) + // + // fSchemaValidator + // The Schema validator instance. + // + // fSeeXsi + // This flag indicates a schema has been seen. + // + // fElemState + // fElemStateSize + // Stores an element next state from DFA content model - used for + // wildcard validation + // + // fMatcherStack + // Stack of active XPath matchers for identity constraints. All + // active XPath matchers are notified of startElement, characters + // and endElement callbacks in order to perform their matches. + // + // fValueStoreCache + // Cache of value stores for identity constraint fields. + // + // fFieldActivator + // Activates fields within a certain scope when a selector matches + // its xpath. + // + // ----------------------------------------------------------------------- + bool fSeeXsi; + Grammar::GrammarType fGrammarType; + unsigned int fElemStateSize; + unsigned int* fElemState; + ElemStack fElemStack; + ValueHashTableOf<XMLCh>* fEntityTable; + RefVectorOf<KVStringPair>* fRawAttrList; + SchemaGrammar* fSchemaGrammar; + SchemaValidator* fSchemaValidator; + XPathMatcherStack* fMatcherStack; + ValueStoreCache* fValueStoreCache; + FieldActivator* fFieldActivator; +}; + +inline const XMLCh* SGXMLScanner::getName() const +{ + return XMLUni::fgSGScanner; +} + + +XERCES_CPP_NAMESPACE_END + +#endif -- GitLab