XMLScanner.cpp

/*
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache\@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation, and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.ibm.com .  For more information
 * on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

/*
 * $Id$
 */


// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/Janitor.hpp>
#include <xercesc/util/Mutexes.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/RefVectorOf.hpp>
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/UnexpectedEOFException.hpp>
#include <xercesc/util/XMLMsgLoader.hpp>
#include <xercesc/util/XMLRegisterCleanup.hpp>
#include <xercesc/util/XMLUniDefs.hpp>
#include <xercesc/util/XMLUni.hpp>
#include <xercesc/util/XMLURL.hpp>
#include <xercesc/sax/InputSource.hpp>
#include <xercesc/sax/SAXException.hpp>
#include <xercesc/framework/LocalFileInputSource.hpp>
#include <xercesc/framework/URLInputSource.hpp>
#include <xercesc/framework/XMLDocumentHandler.hpp>
#include <xercesc/framework/XMLErrorReporter.hpp>
#include <xercesc/framework/XMLEntityHandler.hpp>
#include <xercesc/framework/XMLPScanToken.hpp>
#include <xercesc/framework/XMLValidator.hpp>
#include <xercesc/framework/XMLValidityCodes.hpp>
#include <xercesc/internal/XMLScanner.hpp>
#include <xercesc/internal/EndOfEntityException.hpp>
#include <xercesc/validators/DTD/DocTypeHandler.hpp>
#include <xercesc/validators/DTD/DTDScanner.hpp>
#include <xercesc/validators/schema/SchemaSymbols.hpp>
#include <xercesc/validators/schema/identity/FieldActivator.hpp>
#include <xercesc/validators/schema/identity/XPathMatcherStack.hpp>
#include <xercesc/validators/schema/identity/ValueStoreCache.hpp>
#include <xercesc/validators/schema/identity/IC_Selector.hpp>
#include <xercesc/validators/schema/identity/ValueStore.hpp>

// ---------------------------------------------------------------------------
//  Local static data
// ---------------------------------------------------------------------------
static XMLUInt32       gScannerId;
static bool            sRegistered = false;

static XMLMutex*       sScannerMutex = 0;
static XMLRegisterCleanup scannerMutexCleanup;

static XMLMsgLoader*   gMsgLoader = 0;
static XMLRegisterCleanup cleanupMsgLoader;

// ---------------------------------------------------------------------------
//  Local const data
//
//  These are the text for the require char refs that must always be present.
//  We init these into the entity pool upon construction.
// ---------------------------------------------------------------------------
static const XMLCh gAmp[] = { chLatin_a, chLatin_m, chLatin_p, chNull };
static const XMLCh gLT[] = { chLatin_l, chLatin_t, chNull };
static const XMLCh gGT[] = { chLatin_g, chLatin_t, chNull };
static const XMLCh gQuot[] = { chLatin_q, chLatin_u, chLatin_o, chLatin_t, chNull };
static const XMLCh gApos[] = { chLatin_a, chLatin_p, chLatin_o, chLatin_s, chNull };


// ---------------------------------------------------------------------------
//  Local, static functions
// ---------------------------------------------------------------------------

// -----------------------------------------------------------------------
//  Cleanup for the message loader
// -----------------------------------------------------------------------
void XMLScanner::reinitMsgLoader()
{
	delete gMsgLoader;
	gMsgLoader = 0;
}

// -----------------------------------------------------------------------
//  Cleanup for the scanner mutex
// -----------------------------------------------------------------------
void XMLScanner::reinitScannerMutex()
{
    delete sScannerMutex;
    sScannerMutex = 0;
    sRegistered = false;
}

//
//  We need to fault in this mutex. But, since its used for synchronization
//  itself, we have to do this the low level way using a compare and swap.
//
static XMLMutex& gScannerMutex()
{

    if (!sScannerMutex)
    {
        XMLMutex* tmpMutex = new XMLMutex;
        if (XMLPlatformUtils::compareAndSwap((void**)&sScannerMutex, tmpMutex, 0))
        {
            // Someone beat us to it, so let's clean up ours
            delete tmpMutex;
        }

        // Now lock it and try to register it
        XMLMutexLock lock(sScannerMutex);

        // If we got here first, then register it and set the registered flag
        if (!sRegistered)
        {
			scannerMutexCleanup.registerCleanup(XMLScanner::reinitScannerMutex);
            sRegistered = true;
        }
    }
    return *sScannerMutex;
}


// ---------------------------------------------------------------------------
//  XMLScanner: Constructors and Destructor
// ---------------------------------------------------------------------------
XMLScanner::XMLScanner(XMLValidator* const valToAdopt) :

    fDoNamespaces(false)
    , fExitOnFirstFatal(true)
    , fValidationConstraintFatal(false)
    , fInException(false)
    , fReuseGrammar(false)
    , fStandalone(false)
    , fHasNoDTD(true)
    , fValidate(false)
    , fValidatorFromUser(false)
    , fDoSchema(false)
    , fSchemaFullChecking(false)
    , fSeeXsi(false)
    , fErrorCount(0)
    , fEmptyNamespaceId(0)
    , fUnknownNamespaceId(0)
    , fXMLNamespaceId(0)
    , fXMLNSNamespaceId(0)
    , fSchemaNamespaceId(0)
    , fElemStateSize(16)
    , fScannerId(0)
    , fSequenceId(0)
    , fElemState(0)
    , fAttrList(0)
    , fDocHandler(0)
    , fDocTypeHandler(0)
    , fEntityHandler(0)
    , fErrorReporter(0)
    , fErrorHandler(0)
    , fIDRefList(0)
    , fRawAttrList(0)
    , fValidator(valToAdopt)
    , fDTDValidator(0)
    , fSchemaValidator(0)
    , fValScheme(Val_Never)
    , fGrammarResolver(0)
    , fGrammar(0)
    , fEntityDeclPool(0)
    , fURIStringPool(0)
    , fMatcherStack(0)
    , fValueStoreCache(0)
    , fFieldActivator(0)
    , fRootElemName(0)
    , fExternalSchemaLocation(0)
    , fExternalNoNamespaceSchemaLocation(0)
    , fLoadExternalDTD(true)
    , fNormalizeData(true)
{
   commonInit();

   if (fValidator) {
       fValidatorFromUser = true;
       initValidator(fValidator);
   }
   else {
       //use fDTDValidator as the default validator
       fValidator = fDTDValidator;
   }
}

XMLScanner::XMLScanner( XMLDocumentHandler* const  docHandler
                        , DocTypeHandler* const    docTypeHandler
                        , XMLEntityHandler* const  entityHandler
                        , XMLErrorReporter* const  errHandler
                        , XMLValidator* const      valToAdopt) :

    fDoNamespaces(false)
    , fExitOnFirstFatal(true)
    , fValidationConstraintFatal(false)
    , fInException(false)
    , fReuseGrammar(false)
    , fStandalone(false)
    , fHasNoDTD(true)
    , fValidate(false)
    , fValidatorFromUser(false)
    , fDoSchema(false)
    , fSchemaFullChecking(false)
    , fSeeXsi(false)
    , fErrorCount(0)
    , fEmptyNamespaceId(0)
    , fUnknownNamespaceId(0)
    , fXMLNamespaceId(0)
    , fXMLNSNamespaceId(0)
    , fSchemaNamespaceId(0)
    , fElemStateSize(16)
    , fScannerId(0)
    , fSequenceId(0)
    , fElemState(0)
    , fAttrList(0)
    , fDocHandler(docHandler)
    , fDocTypeHandler(docTypeHandler)
    , fEntityHandler(entityHandler)
    , fErrorReporter(errHandler)
    , fErrorHandler(0)
    , fIDRefList(0)
    , fRawAttrList(0)
    , fValidator(valToAdopt)
    , fDTDValidator(0)
    , fSchemaValidator(0)
    , fValScheme(Val_Never)
    , fGrammarResolver(0)
    , fGrammar(0)
    , fEntityDeclPool(0)
    , fURIStringPool(0)
    , fMatcherStack(0)
    , fValueStoreCache(0)
    , fFieldActivator(0)
    , fRootElemName(0)
    , fExternalSchemaLocation(0)
    , fExternalNoNamespaceSchemaLocation(0)
    , fLoadExternalDTD(true)
    , fNormalizeData(true)
{
   commonInit();

   if (valToAdopt){
       fValidatorFromUser = true;
       initValidator(fValidator);
   }
   else {
       //use fDTDValidator as the default validator
       fValidator = fDTDValidator;
   }
}

XMLScanner::~XMLScanner()
{
    delete [] fElemState;
    delete fAttrList;
    delete fIDRefList;
    delete fRawAttrList;
    if (fValidatorFromUser)
        delete fValidator;

    delete fDTDValidator;
    delete fSchemaValidator;

    delete fEntityDeclPool;

    //fGrammarResolver will delete the fGrammar as well
    delete fGrammarResolver;

    delete fURIStringPool;

    delete fFieldActivator;
    delete fMatcherStack;
    delete fValueStoreCache;

    delete [] fRootElemName;
    delete [] fExternalSchemaLocation;
    delete [] fExternalNoNamespaceSchemaLocation;
}


// ---------------------------------------------------------------------------
//  XMLScanner: Main entry point to scan a document
// ---------------------------------------------------------------------------
void XMLScanner::scanDocument(  const   XMLCh* const    systemId
                                , const bool            reuseGrammar)
{
    //
    //  First we try to parse it as a URL. If that fails, we assume its
    //  a file and try it that way.
    //
    InputSource* srcToUse = 0;
    try
    {
        //
        //  Create a temporary URL. Since this is the primary document,
        //  it has to be fully qualified. If not, then assume we are just
        //  mistaking a file for a URL.
        //

        XMLURL tmpURL(systemId);
        if (tmpURL.isRelative()) {
            srcToUse = new LocalFileInputSource(systemId);
        }
        else
        {
            srcToUse = new URLInputSource(tmpURL);
        }

    }

    catch(const MalformedURLException&)
    {
        srcToUse = new LocalFileInputSource(systemId);
    }

    catch(const XMLException& excToCatch)
    {
        //
        //  For any other XMLException,
        //  emit the error and catch any user exception thrown from here.
        //
        fInException = true;
        if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
            emitError
            (
                XMLErrs::XMLException_Warning
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
            emitError
            (
                XMLErrs::XMLException_Fatal
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        else
            emitError
            (
                XMLErrs::XMLException_Error
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        return;
    }

    catch(...)
    {
        // Just rethrow this, since its not our problem
        throw;
    }

    Janitor<InputSource> janSrc(srcToUse);
    scanDocument(*srcToUse, reuseGrammar);
}

void XMLScanner::scanDocument(  const   char* const systemId
                                , const bool        reuseGrammar)
{
    // We just delegate this to the XMLCh version after transcoding
    XMLCh* tmpBuf = XMLString::transcode(systemId);
    ArrayJanitor<XMLCh> janBuf(tmpBuf);
    scanDocument(tmpBuf, reuseGrammar);
}


void XMLScanner::scanDocument(const InputSource& src, const bool reuseGrammar)
{
    //
    //  Bump up the sequence id for this parser instance. This will invalidate
    //  any previous progressive scan tokens.
    //
    fSequenceId++;

    // Store the reuse validator flag
    fReuseGrammar = reuseGrammar;

    try
    {
        //
        //  Reset the scanner and its plugged in stuff for a new run. This
        //  resets all the data structures, creates the initial reader and
        //  pushes it on the stack, and sets up the base document path.
        //
        scanReset(src);

        // If we have a document handler, then call the start document
        if (fDocHandler)
            fDocHandler->startDocument();

        fValueStoreCache->startDocument();

        //
        //  Scan the prolog part, which is everything before the root element
        //  including the DTD subsets.
        //
        scanProlog();

        //
        //  If we got to the end of input, then its not a valid XML file.
        //  Else, go on to scan the content.
        //
        if (fReaderMgr.atEOF())
        {
            emitError(XMLErrs::EmptyMainEntity);
        }
         else
        {
            // Scan content, and tell it its not an external entity
            if (scanContent(false))
            {
                // Do post-parse validation if required
                if (fValidate)
                {
                    //
                    //  We handle ID reference semantics at this level since
                    //  its required by XML 1.0.
                    //
                    checkIDRefs();

                    // Then allow the validator to do any extra stuff it wants
                    fValidator->postParseValidation();
                }

                // That went ok, so scan for any miscellaneous stuff
                if (!fReaderMgr.atEOF())
                    scanMiscellaneous();
            }
        }

        if (fValidate)
            fValueStoreCache->endDocument();

        // If we have a document handler, then call the end document
        if (fDocHandler)
            fDocHandler->endDocument();

        // Reset the reader manager to close all files, sockets, etc...
        fReaderMgr.reset();
    }

    //
    //  NOTE:
    //
    //  In all of the error processing below, the emitError() call MUST come
    //  before the flush of the reader mgr, or it will fail because it tries
    //  to find out the position in the XML source of the error.
    //
    catch(const XMLErrs::Codes)
    {
        // This is a 'first fatal error' type exit, so reset and fall through
        fReaderMgr.reset();
    }

    catch(const XMLValid::Codes)
    {
        // This is a 'first fatal error' type exit, so reset and fall through
        fReaderMgr.reset();
    }

    catch(const XMLException& excToCatch)
    {
        //
        //  Emit the error and catch any user exception thrown from here. Make
        //  sure in all cases we flush the reader manager.
        //
        fInException = true;
        try
        {
            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
                emitError
                (
                    XMLErrs::XMLException_Warning
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
                emitError
                (
                    XMLErrs::XMLException_Fatal
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
            else
                emitError
                (
                    XMLErrs::XMLException_Error
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
        }

        catch(...)
        {
            // Flush the reader manager and rethrow user's error
            fReaderMgr.reset();
            throw;
        }

        // If it returned, then reset the reader manager and fall through
        fReaderMgr.reset();
    }

    catch(...)
    {
        // Reset and rethrow
        fReaderMgr.reset();
        throw;
    }
}


//
//  This method begins a progressive parse. It scans through the prolog and
//  returns a token to be used on subsequent scanNext() calls. If the return
//  value is true, then the token is legal and ready for further use. If it
//  returns false, then the scan of the prolog failed and the token is not
//  going to work on subsequent scanNext() calls.
//
bool XMLScanner::scanFirst( const   XMLCh* const    systemId
                            ,       XMLPScanToken&  toFill
                            , const bool            reuseGrammar)
{
    //
    //  First we try to parse it as a URL. If that fails, we assume its
    //  a file and try it that way.
    //
    InputSource* srcToUse = 0;
    try
    {
        //
        //  Create a temporary URL. Since this is the primary document,
        //  it has to be fully qualified. If not, then assume we are just
        //  mistaking a file for a URL.
        //

        XMLURL tmpURL(systemId);
        if (tmpURL.isRelative()) {
            srcToUse = new LocalFileInputSource(systemId);
        }
        else
        {
            srcToUse = new URLInputSource(tmpURL);
        }
    }

    catch(const MalformedURLException&)
    {
        srcToUse = new LocalFileInputSource(systemId);
    }

    catch(const XMLException& excToCatch)
    {
        //
        //  For any other XMLException,
        //  emit the error and catch any user exception thrown from here.
        //
        fInException = true;
        if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
            emitError
            (
                XMLErrs::XMLException_Warning
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
            emitError
            (
                XMLErrs::XMLException_Fatal
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        else
            emitError
            (
                XMLErrs::XMLException_Error
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        return false;
    }

    catch(...)
    {
        // Just rethrow this, since its not our problem
        throw;
    }

    Janitor<InputSource> janSrc(srcToUse);
    return scanFirst(*srcToUse, toFill, reuseGrammar);
}

bool XMLScanner::scanFirst( const   char* const     systemId
                            ,       XMLPScanToken&  toFill
                            , const bool            reuseGrammar)
{
    // We just delegate this to the XMLCh version after transcoding
    XMLCh* tmpBuf = XMLString::transcode(systemId);
    ArrayJanitor<XMLCh> janBuf(tmpBuf);
    return scanFirst(tmpBuf, toFill, reuseGrammar);
}

bool XMLScanner::scanFirst( const   InputSource&    src
                            ,       XMLPScanToken&  toFill
                            , const bool            reuseGrammar)
{
    // Store the reuse validator flag
    fReuseGrammar = reuseGrammar;

    //
    //  Bump up the sequence id for this new scan cycle. This will invalidate
    //  any previous tokens we've returned.
    //
    fSequenceId++;

    //
    // Reset the scanner and its plugged in stuff for a new run.  This
    // resets all the data structures, creates the initial reader and
    // pushes it on the stack, and sets up the base document path
    //
    scanReset(src);

    // If we have a document handler, then call the start document
    if (fDocHandler)
        fDocHandler->startDocument();

    fValueStoreCache->startDocument();

    try
    {
        //
        //  Scan the prolog part, which is everything before the root element
        //  including the DTD subsets. This is all that is done on the scan
        //  first.
        //
        scanProlog();

        //
        //  If we got to the end of input, then its not a valid XML file.
        //  Else, go on to scan the content.
        //
        if (fReaderMgr.atEOF())
        {
            emitError(XMLErrs::EmptyMainEntity);
        }
    }

    //
    //  NOTE:
    //
    //  In all of the error processing below, the emitError() call MUST come
    //  before the flush of the reader mgr, or it will fail because it tries
    //  to find out the position in the XML source of the error.
    //
    catch(const XMLErrs::Codes)
    {
        // This is a 'first failure' exception so reset and return a failure
        fReaderMgr.reset();
        return false;
    }

    catch(const XMLValid::Codes)
    {
        // This is a 'first fatal error' type exit, so reset and reuturn failure
        fReaderMgr.reset();
        return false;
    }

    catch(const XMLException& excToCatch)
    {
        //
        //  Emit the error and catch any user exception thrown from here. Make
        //  sure in all cases we flush the reader manager.
        //
        fInException = true;
        try
        {
            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
                emitError
                (
                    XMLErrs::XMLException_Warning
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
                emitError
                (
                    XMLErrs::XMLException_Fatal
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
            else
                emitError
                (
                    XMLErrs::XMLException_Error
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
        }

        catch(...)
        {
            // Reset and rethrow the user error
            fReaderMgr.reset();
            throw;
        }

        // Reset and return a failure
        fReaderMgr.reset();
        return false;
    }

    catch(...)
    {
        // Reset and rethrow original error
        fReaderMgr.reset();
        throw;
    }

    // Fill in the caller's token to make it legal and return success
    toFill.set(fScannerId, fSequenceId);
    return true;
}


bool XMLScanner::scanNext(XMLPScanToken& token)
{
    // Make sure this token is still legal
    if (!isLegalToken(token))
        ThrowXML(RuntimeException, XMLExcepts::Scan_BadPScanToken);

    // Find the next token and remember the reader id
    unsigned int orgReader;
    XMLTokens curToken;

    bool retVal = true;

    try
    {
        while (true)
        {
            //
            //  We have to handle any end of entity exceptions that happen here.
            //  We could be at the end of X nested entities, each of which will
            //  generate an end of entity exception as we try to move forward.
            //

            try
            {
                curToken = senseNextToken(orgReader);
                break;
            }

            catch(const EndOfEntityException& toCatch)
            {
                // Send an end of entity reference event
                if (fDocHandler)
                    fDocHandler->endEntityReference(toCatch.getEntity());
            }
        }

        if (curToken == Token_CharData)
        {
            scanCharData(fCDataBuf);
        }
         else if (curToken == Token_EOF)
        {
            if (!fElemStack.isEmpty())
            {
                const ElemStack::StackElem* topElem = fElemStack.popTop();
                emitError
                (
                    XMLErrs::EndedWithTagsOnStack
                    , topElem->fThisElement->getFullName()
                );
            }

            retVal = false;
        }
         else
        {
            // Its some sort of markup
            bool gotData = true;
            switch(curToken)
            {
                case Token_CData :
                    // Make sure we are within content
                    if (fElemStack.isEmpty())
                        emitError(XMLErrs::CDATAOutsideOfContent);
                    scanCDSection();
                    break;

                case Token_Comment :
                    scanComment();
                    break;

                case Token_EndTag :
                    scanEndTag(gotData);
                    break;

                case Token_PI :
                    scanPI();
                    break;

                case Token_StartTag :
                    if (fDoNamespaces)
                        scanStartTagNS(gotData);
                    else
                        scanStartTag(gotData);
                    break;

                default :
                    fReaderMgr.skipToChar(chOpenAngle);
                    break;
            }

            if (orgReader != fReaderMgr.getCurrentReaderNum())
                emitError(XMLErrs::PartialMarkupInEntity);

            // If we hit the end, then do the miscellaneous part
            if (!gotData)
            {
                // Do post-parse validation if required
                if (fValidate)
                {
                    //
                    //  We handle ID reference semantics at this level since
                    //  its required by XML 1.0.
                    //
                    checkIDRefs();

                    // Then allow the validator to do any extra stuff it wants
                    fValidator->postParseValidation();
                }

                // That went ok, so scan for any miscellaneous stuff
                scanMiscellaneous();

                if (fValidate)
                    fValueStoreCache->endDocument();

                if (fDocHandler)
                    fDocHandler->endDocument();
            }
        }
    }

    //
    //  NOTE:
    //
    //  In all of the error processing below, the emitError() call MUST come
    //  before the flush of the reader mgr, or it will fail because it tries
    //  to find out the position in the XML source of the error.
    //
    catch(const XMLErrs::Codes)
    {
        // This is a 'first failure' exception, so reset and return failure
        fReaderMgr.reset();
        return false;
    }

    catch(const XMLValid::Codes)
    {
        // This is a 'first fatal error' type exit, so reset and reuturn failure
        fReaderMgr.reset();
        return false;
    }

    // We have to propogate SAX exceptions
    catch(const SAXException&)
    {
        // Just reset our reader manager and rethrow SAX exception
        fReaderMgr.reset();
        throw;
    }

    catch(const XMLException& excToCatch)
    {
        //
        //  Emit the error and catch any user exception thrown from here. Make
        //  sure in all cases we flush the reader manager.
        //
        fInException = true;
        try
        {
            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
                emitError
                (
                    XMLErrs::XMLException_Warning
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
                emitError
                (
                    XMLErrs::XMLException_Fatal
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
            else
                emitError
                (
                    XMLErrs::XMLException_Error
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
        }

        catch(...)
        {
            // Reset and rethrow user error
            fReaderMgr.reset();
            throw;
        }

        // Reset and return failure
        fReaderMgr.reset();
        return false;
    }

    catch(...)
    {
        // Reset and rethrow original error
        fReaderMgr.reset();
        throw;
    }

    // If we hit the end, then flush the reader manager
    if (!retVal)
        fReaderMgr.reset();

    return retVal;
}


void XMLScanner::scanReset(XMLPScanToken& token)
{
    // Make sure this token is still legal
    if (!isLegalToken(token))
        ThrowXML(RuntimeException, XMLExcepts::Scan_BadPScanToken);

    // Reset the reader manager
    fReaderMgr.reset();

    // And invalidate any tokens by bumping our sequence number
    fSequenceId++;

    // Reset our error count