Skip to content
Snippets Groups Projects
XMLScanner.cpp 78.4 KiB
Newer Older
PeiYong Zhang's avatar
PeiYong Zhang committed
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *      http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
PeiYong Zhang's avatar
PeiYong Zhang committed
 */

/*
Khaled Noaman's avatar
Khaled Noaman committed
 * $Id$
PeiYong Zhang's avatar
PeiYong Zhang committed
 */


// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
Khaled Noaman's avatar
Khaled Noaman committed
#include <xercesc/internal/XMLScanner.hpp>
#include <xercesc/internal/ValidationContextImpl.hpp>
PeiYong Zhang's avatar
PeiYong Zhang committed
#include <xercesc/util/Janitor.hpp>
#include <xercesc/util/Mutexes.hpp>
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/UnexpectedEOFException.hpp>
#include <xercesc/util/XMLMsgLoader.hpp>
#include <xercesc/util/XMLInitializer.hpp>
PeiYong Zhang's avatar
PeiYong Zhang committed
#include <xercesc/framework/LocalFileInputSource.hpp>
#include <xercesc/framework/URLInputSource.hpp>
#include <xercesc/framework/XMLDocumentHandler.hpp>
#include <xercesc/framework/XMLEntityHandler.hpp>
#include <xercesc/framework/XMLPScanToken.hpp>
#include <xercesc/framework/XMLValidator.hpp>
#include <xercesc/internal/EndOfEntityException.hpp>
#include <xercesc/validators/DTD/DocTypeHandler.hpp>
Khaled Noaman's avatar
Khaled Noaman committed
#include <xercesc/validators/common/GrammarResolver.hpp>
#include <xercesc/util/OutOfMemoryException.hpp>
#include <xercesc/util/XMLResourceIdentifier.hpp>
PeiYong Zhang's avatar
PeiYong Zhang committed

Tinny Ng's avatar
Tinny Ng committed
XERCES_CPP_NAMESPACE_BEGIN

PeiYong Zhang's avatar
PeiYong Zhang committed
// ---------------------------------------------------------------------------
//  Local static data
// ---------------------------------------------------------------------------
PeiYong Zhang's avatar
PeiYong Zhang committed
static XMLMutex*       sScannerMutex = 0;
static XMLMsgLoader*   gMsgLoader = 0;

void XMLInitializer::initializeXMLScanner()
{
    gMsgLoader = XMLPlatformUtils::loadMsgSet(XMLUni::fgXMLErrDomain);
PeiYong Zhang's avatar
PeiYong Zhang committed

    if (!gMsgLoader)
      XMLPlatformUtils::panic(PanicHandler::Panic_CantLoadMsgDomain);
PeiYong Zhang's avatar
PeiYong Zhang committed

    sScannerMutex = new XMLMutex(XMLPlatformUtils::fgMemoryManager);
PeiYong Zhang's avatar
PeiYong Zhang committed
}

void XMLInitializer::terminateXMLScanner()
PeiYong Zhang's avatar
PeiYong Zhang committed
{
PeiYong Zhang's avatar
PeiYong Zhang committed
    delete sScannerMutex;
    sScannerMutex = 0;
}

//
//
typedef JanitorMemFunCall<XMLScanner>   CleanupType;
typedef JanitorMemFunCall<ReaderMgr>    ReaderMgrResetType;


PeiYong Zhang's avatar
PeiYong Zhang committed
// ---------------------------------------------------------------------------
//  XMLScanner: Constructors and Destructor
// ---------------------------------------------------------------------------
XMLScanner::XMLScanner(XMLValidator* const valToAdopt,
                       GrammarResolver* const grammarResolver,
    : fBufferSize(1024 * 1024)
    , fStandardUriConformant(false)
Khaled Noaman's avatar
Khaled Noaman committed
    , fDoNamespaces(false)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fExitOnFirstFatal(true)
    , fValidationConstraintFatal(false)
    , fInException(false)
    , fStandalone(false)
    , fHasNoDTD(true)
    , fValidate(false)
    , fValidatorFromUser(false)
    , fDoSchema(false)
    , fSchemaFullChecking(false)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fIdentityConstraintChecking(true)
    , fToCacheGrammar(false)
    , fUseCachedGrammar(false)
Khaled Noaman's avatar
Khaled Noaman committed
    , fLoadExternalDTD(true)
Khaled Noaman's avatar
Khaled Noaman committed
    , fNormalizeData(true)
    , fValidateAnnotations(false)
    , fIgnoreAnnotations(false)
    , fDisableDefaultEntityResolution(false)
    , fSkipDTDValidation(false)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fErrorCount(0)
    , fEntityExpansionLimit(0)
    , fEntityExpansionCount(0)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fEmptyNamespaceId(0)
    , fUnknownNamespaceId(0)
    , fXMLNamespaceId(0)
    , fXMLNSNamespaceId(0)
    , fSchemaNamespaceId(0)
    , fUIntPool(0)
    , fUIntPoolRow(0)
    , fUIntPoolCol(0)
    , fUIntPoolRowTotal(2)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fScannerId(0)
    , fSequenceId(0)
    , fAttrList(0)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fDocHandler(0)
    , fDocTypeHandler(0)
    , fEntityHandler(0)
    , fErrorReporter(0)
    , fErrorHandler(0)
    , fValidationContext(0)
    , fEntityDeclPoolRetrieved(false)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fValidator(valToAdopt)
    , fValScheme(Val_Never)
    , fGrammarResolver(grammarResolver)
    , fGrammarPoolMemoryManager(grammarResolver->getGrammarPoolMemoryManager())
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fGrammar(0)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fURIStringPool(0)
    , fRootElemName(0)
    , fExternalSchemaLocation(0)
    , fExternalNoNamespaceSchemaLocation(0)
    , fSecurityManager(0)
Tinny Ng's avatar
Tinny Ng committed
    , fXMLVersion(XMLReader::XMLV1_0)
    , fMemoryManager(manager)
    , fBufMgr(manager)
    , fAttNameBuf(1023, manager)
    , fAttValueBuf(1023, manager)
    , fCDataBuf(1023, manager)
    , fQNameBuf(1023, manager)
    , fPrefixBuf(1023, manager)
    , fURIBuf(1023, manager)
PeiYong Zhang's avatar
PeiYong Zhang committed
{
    CleanupType cleanup(this, &XMLScanner::cleanUp);
PeiYong Zhang's avatar
PeiYong Zhang committed

    try
    {
        commonInit();
    }
    catch(const OutOfMemoryException&)
    {
        // Don't cleanup when out of memory, since executing the
        // code can cause problems.
        cleanup.release();

        throw;
    }

    cleanup.release();
PeiYong Zhang's avatar
PeiYong Zhang committed
}

XMLScanner::XMLScanner( XMLDocumentHandler* const  docHandler
Khaled Noaman's avatar
Khaled Noaman committed
                          , DocTypeHandler* const    docTypeHandler
                          , XMLEntityHandler* const  entityHandler
                          , XMLErrorReporter* const  errHandler
                          , XMLValidator* const      valToAdopt
                          , GrammarResolver* const   grammarResolver
PeiYong Zhang's avatar
PeiYong Zhang committed

    : fBufferSize(1024 * 1024)
    , fStandardUriConformant(false)
Khaled Noaman's avatar
Khaled Noaman committed
    , fDoNamespaces(false)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fExitOnFirstFatal(true)
    , fValidationConstraintFatal(false)
    , fInException(false)
    , fStandalone(false)
    , fHasNoDTD(true)
    , fValidate(false)
    , fValidatorFromUser(false)
    , fDoSchema(false)
    , fSchemaFullChecking(false)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fIdentityConstraintChecking(true)
    , fToCacheGrammar(false)
    , fUseCachedGrammar(false)
Khaled Noaman's avatar
Khaled Noaman committed
	, fLoadExternalDTD(true)
Khaled Noaman's avatar
Khaled Noaman committed
    , fNormalizeData(true)
    , fValidateAnnotations(false)
    , fIgnoreAnnotations(false)
    , fDisableDefaultEntityResolution(false)
    , fSkipDTDValidation(false)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fErrorCount(0)
    , fEntityExpansionLimit(0)
    , fEntityExpansionCount(0)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fEmptyNamespaceId(0)
    , fUnknownNamespaceId(0)
    , fXMLNamespaceId(0)
    , fXMLNSNamespaceId(0)
    , fSchemaNamespaceId(0)
    , fUIntPool(0)
    , fUIntPoolRow(0)
    , fUIntPoolCol(0)
    , fUIntPoolRowTotal(2)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fScannerId(0)
    , fSequenceId(0)
    , fAttrList(0)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fDocHandler(docHandler)
    , fDocTypeHandler(docTypeHandler)
    , fEntityHandler(entityHandler)
    , fErrorReporter(errHandler)
    , fErrorHandler(0)
    , fValidationContext(0)
    , fEntityDeclPoolRetrieved(false)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fValidator(valToAdopt)
    , fValScheme(Val_Never)
    , fGrammarResolver(grammarResolver)
    , fGrammarPoolMemoryManager(grammarResolver->getGrammarPoolMemoryManager())
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fGrammar(0)
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fURIStringPool(0)
    , fRootElemName(0)
    , fExternalSchemaLocation(0)
    , fExternalNoNamespaceSchemaLocation(0)
    , fSecurityManager(0)
Tinny Ng's avatar
Tinny Ng committed
    , fXMLVersion(XMLReader::XMLV1_0)
    , fMemoryManager(manager)
    , fBufMgr(manager)
    , fAttNameBuf(1023, manager)
    , fAttValueBuf(1023, manager)
    , fCDataBuf(1023, manager)
    , fQNameBuf(1023, manager)
    , fPrefixBuf(1023, manager)
    , fURIBuf(1023, manager)
    , fElemStack(manager)
PeiYong Zhang's avatar
PeiYong Zhang committed
{
    CleanupType cleanup(this, &XMLScanner::cleanUp);
PeiYong Zhang's avatar
PeiYong Zhang committed

    try
    {
        commonInit();
    }
    catch(const OutOfMemoryException&)
    {
        // Don't cleanup when out of memory, since executing the
        // code can cause problems.
        cleanup.release();

        throw;
    }

    cleanup.release();
PeiYong Zhang's avatar
PeiYong Zhang committed
}

XMLScanner::~XMLScanner()
{
Khaled Noaman's avatar
Khaled Noaman committed
void XMLScanner::setValidator(XMLValidator* const valToAdopt)
{
    if (fValidatorFromUser)
        delete fValidator;
    fValidator = valToAdopt;
    fValidatorFromUser = true;
    initValidator(fValidator);
}



PeiYong Zhang's avatar
PeiYong Zhang committed
// ---------------------------------------------------------------------------
//  XMLScanner: Main entry point to scan a document
// ---------------------------------------------------------------------------
void XMLScanner::scanDocument(  const   XMLCh* const    systemId)
PeiYong Zhang's avatar
PeiYong Zhang committed
{
    //  First we try to parse it as a URL. If that fails, we assume its
    //  a file and try it that way.
    InputSource* srcToUse = 0;
    try
    {
        //  Create a temporary URL. Since this is the primary document,
        //  it has to be fully qualified. If not, then assume we are just
        //  mistaking a file for a URL.
        XMLURL tmpURL(fMemoryManager);

        if (XMLURL::parse(systemId, tmpURL)) {

            if (tmpURL.isRelative()) {
                if (!fStandardUriConformant)
                    srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
                else {
                    // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                    // emit the error directly
                    MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);
                    fInException = true;
                    emitError
                    (
                        XMLErrs::XMLException_Fatal
                        , e.getMessage()
                    );
                    return;
                }
            }
            else
            {
                if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
                    MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);
                    fInException = true;
                    emitError
                    (
                        XMLErrs::XMLException_Fatal
                        , e.getMessage()
                    );
                    return;
                }
                srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager);
            }
        }
        else {

                srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
                // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                // lazy bypass ... since all MalformedURLException are fatal, no need to check the type
                MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);
                fInException = true;
                emitError
                (
                    XMLErrs::XMLException_Fatal
PeiYong Zhang's avatar
PeiYong Zhang committed
        }
    }
    catch(const XMLException& excToCatch)
    {
        //  For any other XMLException,
        //  emit the error and catch any user exception thrown from here.
        fInException = true;
        if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
            emitError
            (
                XMLErrs::XMLException_Warning
                , excToCatch.getMessage()
            );
        else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
            emitError
            (
                XMLErrs::XMLException_Fatal
                , excToCatch.getMessage()
            );
        else
            emitError
            (
                XMLErrs::XMLException_Error
                , excToCatch.getMessage()
            );
        return;
    }
PeiYong Zhang's avatar
PeiYong Zhang committed

    Janitor<InputSource> janSrc(srcToUse);
    scanDocument(*srcToUse);
PeiYong Zhang's avatar
PeiYong Zhang committed
}

void XMLScanner::scanDocument(  const   char* const systemId)
PeiYong Zhang's avatar
PeiYong Zhang committed
{
    // We just delegate this to the XMLCh version after transcoding
    XMLCh* tmpBuf = XMLString::transcode(systemId, fMemoryManager);
    ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
    scanDocument(tmpBuf);
PeiYong Zhang's avatar
PeiYong Zhang committed
}


//  This method begins a progressive parse. It scans through the prolog and
//  returns a token to be used on subsequent scanNext() calls. If the return
//  value is true, then the token is legal and ready for further use. If it
//  returns false, then the scan of the prolog failed and the token is not
//  going to work on subsequent scanNext() calls.
bool XMLScanner::scanFirst( const   XMLCh* const    systemId
                            ,       XMLPScanToken&  toFill)
PeiYong Zhang's avatar
PeiYong Zhang committed
{
    //  First we try to parse it as a URL. If that fails, we assume its
    //  a file and try it that way.
    InputSource* srcToUse = 0;
    try
    {
        //  Create a temporary URL. Since this is the primary document,
        //  it has to be fully qualified. If not, then assume we are just
        //  mistaking a file for a URL.
            if (tmpURL.isRelative()) {
                if (!fStandardUriConformant)
                    srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
                else {
                    // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                    // emit the error directly
                    MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);
                    fInException = true;
                    emitError
                    (
                        XMLErrs::XMLException_Fatal
                        , e.getMessage()
                    );
                    return false;
                }
            }
            else
            {
                if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
                    MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);
                    fInException = true;
                    emitError
                    (
                        XMLErrs::XMLException_Fatal
                        , e.getMessage()
                    );
                    return false;
                }
                srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager);
            }
        }
        else {
                srcToUse = new (fMemoryManager) LocalFileInputSource(systemId,  fMemoryManager);
                // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                // lazy bypass ... since all MalformedURLException are fatal, no need to check the type
                MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL);
                fInException = true;
                emitError
                (
                    XMLErrs::XMLException_Fatal
PeiYong Zhang's avatar
PeiYong Zhang committed
    }
    catch(const XMLException& excToCatch)
    {
        //  For any other XMLException,
        //  emit the error and catch any user exception thrown from here.
        fInException = true;
        if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
            emitError
            (
                XMLErrs::XMLException_Warning
                , excToCatch.getMessage()
            );
        else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
            emitError
            (
                XMLErrs::XMLException_Fatal
                , excToCatch.getMessage()
            );
        else
            emitError
            (
                XMLErrs::XMLException_Error
                , excToCatch.getMessage()
            );
        return false;
    }
PeiYong Zhang's avatar
PeiYong Zhang committed

    Janitor<InputSource> janSrc(srcToUse);
    return scanFirst(*srcToUse, toFill);
PeiYong Zhang's avatar
PeiYong Zhang committed
}

bool XMLScanner::scanFirst( const   char* const     systemId
                            ,       XMLPScanToken&  toFill)
PeiYong Zhang's avatar
PeiYong Zhang committed
{
    // We just delegate this to the XMLCh version after transcoding
    XMLCh* tmpBuf = XMLString::transcode(systemId, fMemoryManager);
    ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
    return scanFirst(tmpBuf, toFill);
PeiYong Zhang's avatar
PeiYong Zhang committed
}

bool XMLScanner::scanFirst( const   InputSource&    src
Khaled Noaman's avatar
Khaled Noaman committed
                           ,       XMLPScanToken&  toFill)
PeiYong Zhang's avatar
PeiYong Zhang committed
{
    //  Bump up the sequence id for this new scan cycle. This will invalidate
    //  any previous tokens we've returned.
    fSequenceId++;

    ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);

   // Reset the scanner and its plugged in stuff for a new run.  This
PeiYong Zhang's avatar
PeiYong Zhang committed
    // resets all the data structures, creates the initial reader and
    // pushes it on the stack, and sets up the base document path
    scanReset(src);

    // If we have a document handler, then call the start document
    if (fDocHandler)
        fDocHandler->startDocument();

    try
    {
        //  Scan the prolog part, which is everything before the root element
        //  including the DTD subsets. This is all that is done on the scan
        //  first.
        scanProlog();

        //  If we got to the end of input, then its not a valid XML file.
        //  Else, go on to scan the content.
        if (fReaderMgr.atEOF())
        {
            emitError(XMLErrs::EmptyMainEntity);
        }
    }
    //  NOTE:
    //
    //  In all of the error processing below, the emitError() call MUST come
    //  before the flush of the reader mgr, or it will fail because it tries
    //  to find out the position in the XML source of the error.
    catch(const XMLErrs::Codes)
    {
        // This is a 'first failure' exception so return failure
PeiYong Zhang's avatar
PeiYong Zhang committed
        return false;
    }
    catch(const XMLValid::Codes)
    {
        // This is a 'first fatal error' type exit, return failure
PeiYong Zhang's avatar
PeiYong Zhang committed
        return false;
    }
    catch(const XMLException& excToCatch)
    {
        //  Emit the error and catch any user exception thrown from here. Make
        //  sure in all cases we flush the reader manager.
        fInException = true;
        try
        {
            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
                emitError
                (
                    XMLErrs::XMLException_Warning
PeiYong Zhang's avatar
PeiYong Zhang committed
                    , excToCatch.getMessage()
                );
            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
                emitError
                (
                    XMLErrs::XMLException_Fatal
PeiYong Zhang's avatar
PeiYong Zhang committed
                    , excToCatch.getMessage()
                );
            else
                emitError
                (
                    XMLErrs::XMLException_Error
PeiYong Zhang's avatar
PeiYong Zhang committed
                    , excToCatch.getMessage()
                );
        }
            // This is a special case for out-of-memory
            // conditions, because resetting the ReaderMgr
            // can be problematic.
            resetReaderMgr.release();

PeiYong Zhang's avatar
PeiYong Zhang committed
            throw;
        }

        return false;
    }
        // This is a special case for out-of-memory
        // conditions, because resetting the ReaderMgr
        // can be problematic.
        resetReaderMgr.release();

PeiYong Zhang's avatar
PeiYong Zhang committed
        throw;
    }

    // Fill in the caller's token to make it legal and return success
    toFill.set(fScannerId, fSequenceId);

    // Release the object that will reset the ReaderMgr, since there's
    // more to scan.
    resetReaderMgr.release();

PeiYong Zhang's avatar
PeiYong Zhang committed
    return true;
}


Khaled Noaman's avatar
Khaled Noaman committed
void XMLScanner::scanReset(XMLPScanToken& token)
PeiYong Zhang's avatar
PeiYong Zhang committed
{
    // Make sure this token is still legal
    if (!isLegalToken(token))
        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
    // Reset the reader manager
    fReaderMgr.reset();
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
    // And invalidate any tokens by bumping our sequence number
    fSequenceId++;
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
    // Reset our error count
    fErrorCount = 0;
}
PeiYong Zhang's avatar
PeiYong Zhang committed

void XMLScanner::setParseSettings(XMLScanner* const refScanner)
{
    setDocHandler(refScanner->getDocHandler());
    setDocTypeHandler(refScanner->getDocTypeHandler());
    setErrorHandler(refScanner->getErrorHandler());
    setErrorReporter(refScanner->getErrorReporter());
    setEntityHandler(refScanner->getEntityHandler());
    setDoNamespaces(refScanner->getDoNamespaces());
    setDoSchema(refScanner->getDoSchema());
    setCalculateSrcOfs(refScanner->getCalculateSrcOfs());
    setStandardUriConformant(refScanner->getStandardUriConformant());
    setExitOnFirstFatal(refScanner->getExitOnFirstFatal());
    setValidationConstraintFatal(refScanner->getValidationConstraintFatal());
PeiYong Zhang's avatar
PeiYong Zhang committed
    setIdentityConstraintChecking(refScanner->getIdentityConstraintChecking());
    setValidationSchemaFullChecking(refScanner->getValidationSchemaFullChecking());
    cacheGrammarFromParse(refScanner->isCachingGrammarFromParse());
    useCachedGrammarInParse(refScanner->isUsingCachedGrammarInParse());
    setLoadExternalDTD(refScanner->getLoadExternalDTD());
    setLoadSchema(refScanner->getLoadSchema());
    setNormalizeData(refScanner->getNormalizeData());
    setExternalSchemaLocation(refScanner->getExternalSchemaLocation());
    setExternalNoNamespaceSchemaLocation(refScanner->getExternalNoNamespaceSchemaLocation());
    setValidationScheme(refScanner->getValidationScheme());
    setSecurityManager(refScanner->getSecurityManager());
    setPSVIHandler(refScanner->getPSVIHandler());
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
// ---------------------------------------------------------------------------
//  XMLScanner: Private helper methods.
// ---------------------------------------------------------------------------

//  This method handles the common initialization, to avoid having to do
//  it redundantly in multiple constructors.
void XMLScanner::commonInit()
{
    //  We have to do a little init that involves statics, so we have to
    //  use the mutex to protect it.
    {
Khaled Noaman's avatar
Khaled Noaman committed

        // And assign ourselves the next available scanner id
        fScannerId = ++gScannerId;
    }
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
    //  Create the attribute list, which is used to store attribute values
    //  during start tag processing. Give it a reasonable initial size that
    //  will serve for most folks, though it will grow as required.
    fAttrList = new (fMemoryManager) RefVectorOf<XMLAttr>(32, true, fMemoryManager);
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
    //  Create the id ref list. This is used to enforce XML 1.0 ID ref
    //  semantics, i.e. all id refs must refer to elements that exist
    fValidationContext = new (fMemoryManager) ValidationContextImpl(fMemoryManager);
    fValidationContext->setElemStack(&fElemStack);
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
    //  Create the GrammarResolver
    //fGrammarResolver = new GrammarResolver();

    // create initial, 64-element, fUIntPool
    fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) *fUIntPoolRowTotal);
    memset(fUIntPool, 0, sizeof(unsigned int *) * fUIntPoolRowTotal);
    fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
    memset(fUIntPool[0], 0, sizeof(unsigned int) << 6);

    // Register self as handler for XMLBufferFull events on the CDATA buffer
    fCDataBuf.setFullHandler(this, fBufferSize);

   if (fValidator) {
       fValidatorFromUser = true;
       initValidator(fValidator);
   }
PeiYong Zhang's avatar
PeiYong Zhang committed

void XMLScanner::cleanUp()
{
    delete fAttrList;
    delete fAttrDupChkRegistry;
    delete fValidationContext;
    fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
    fMemoryManager->deallocate(fExternalSchemaLocation);//delete [] fExternalSchemaLocation;
    fMemoryManager->deallocate(fExternalNoNamespaceSchemaLocation);//delete [] fExternalNoNamespaceSchemaLocation;
    // delete fUIntPool
        for (unsigned int i=0; i<=fUIntPoolRow; i++)
        {
            fMemoryManager->deallocate(fUIntPool[i]);
        }
        fMemoryManager->deallocate(fUIntPool);
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
void XMLScanner::initValidator(XMLValidator* theValidator) {
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
    //  Tell the validator about the stuff it needs to know in order to
    //  do its work.
    theValidator->setScannerInfo(this, &fReaderMgr, &fBufMgr);
    theValidator->setErrorReporter(fErrorReporter);
}
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
// ---------------------------------------------------------------------------
//  XMLScanner: Error emitting methods
// ---------------------------------------------------------------------------
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
//  These methods are called whenever the scanner wants to emit an error.
//  It handles getting the message loaded, doing token replacement, etc...
//  and then calling the error handler, if its installed.
bool XMLScanner::emitErrorWillThrowException(const XMLErrs::Codes toEmit)
{
    if (XMLErrs::isFatal(toEmit) && fExitOnFirstFatal && !fInException)
        return true;
    return false;
}

Khaled Noaman's avatar
Khaled Noaman committed
void XMLScanner::emitError(const XMLErrs::Codes toEmit)
{
    // Bump the error count if it is not a warning
    if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
        incrementErrorCount();
PeiYong Zhang's avatar
PeiYong Zhang committed

Khaled Noaman's avatar
Khaled Noaman committed
    if (fErrorReporter)
    {
        // Load the message into a local for display
        const XMLSize_t msgSize = 1023;
Khaled Noaman's avatar
Khaled Noaman committed
        XMLCh errText[msgSize + 1];
PeiYong Zhang's avatar
PeiYong Zhang committed

        if (!gMsgLoader->loadMsg(toEmit, errText, msgSize))
Khaled Noaman's avatar
Khaled Noaman committed
        {
                // <TBD> Probably should load a default msg here
PeiYong Zhang's avatar
PeiYong Zhang committed
        }

        //  Create a LastExtEntityInfo structure and get the reader manager
        //  to fill it in for us. This will give us the information about
        //  the last reader on the stack that was an external entity of some
        //  sort (i.e. it will ignore internal entities.
        ReaderMgr::LastExtEntityInfo lastInfo;
        fReaderMgr.getLastExtEntityInfo(lastInfo);

        fErrorReporter->error
        (
            toEmit
            , XMLUni::fgXMLErrDomain
            , XMLErrs::errorType(toEmit)
            , errText
            , lastInfo.systemId
            , lastInfo.publicId
            , lastInfo.lineNumber
            , lastInfo.colNumber
        );
    }

    // Bail out if its fatal an we are to give up on the first fatal error
    if (emitErrorWillThrowException(toEmit))
PeiYong Zhang's avatar
PeiYong Zhang committed
        throw toEmit;
}

void XMLScanner::emitError( const   XMLErrs::Codes    toEmit
                            , const XMLCh* const        text1
                            , const XMLCh* const        text2
                            , const XMLCh* const        text3
                            , const XMLCh* const        text4)
{
    // Bump the error count if it is not a warning
    if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
        incrementErrorCount();

    if (fErrorReporter)
    {
        //  Load the message into alocal and replace any tokens found in
        //  the text.
        const XMLSize_t maxChars = 2047;
PeiYong Zhang's avatar
PeiYong Zhang committed
        XMLCh errText[maxChars + 1];

        if (!gMsgLoader->loadMsg(toEmit, errText, maxChars, text1, text2, text3, text4, fMemoryManager))
PeiYong Zhang's avatar
PeiYong Zhang committed
        {
                // <TBD> Should probably load a default message here
        }

        //  Create a LastExtEntityInfo structure and get the reader manager
        //  to fill it in for us. This will give us the information about
        //  the last reader on the stack that was an external entity of some
        //  sort (i.e. it will ignore internal entities.
        ReaderMgr::LastExtEntityInfo lastInfo;
        fReaderMgr.getLastExtEntityInfo(lastInfo);

        fErrorReporter->error
        (
            toEmit
            , XMLUni::fgXMLErrDomain
            , XMLErrs::errorType(toEmit)
            , errText
            , lastInfo.systemId
            , lastInfo.publicId
            , lastInfo.lineNumber
            , lastInfo.colNumber
        );
    }

    // Bail out if its fatal an we are to give up on the first fatal error
    if (emitErrorWillThrowException(toEmit))
PeiYong Zhang's avatar
PeiYong Zhang committed
        throw toEmit;
}

void XMLScanner::emitError( const   XMLErrs::Codes    toEmit
                            , const char* const         text1
                            , const char* const         text2
                            , const char* const         text3
                            , const char* const         text4)
{
    // Bump the error count if it is not a warning
    if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
        incrementErrorCount();

    if (fErrorReporter)
    {
        //  Load the message into alocal and replace any tokens found in
        //  the text.
        const XMLSize_t maxChars = 2047;
PeiYong Zhang's avatar
PeiYong Zhang committed
        XMLCh errText[maxChars + 1];

        if (!gMsgLoader->loadMsg(toEmit, errText, maxChars, text1, text2, text3, text4, fMemoryManager))
PeiYong Zhang's avatar
PeiYong Zhang committed
        {
                // <TBD> Should probably load a default message here
        }

        //  Create a LastExtEntityInfo structure and get the reader manager
        //  to fill it in for us. This will give us the information about
        //  the last reader on the stack that was an external entity of some
        //  sort (i.e. it will ignore internal entities.
        ReaderMgr::LastExtEntityInfo lastInfo;
        fReaderMgr.getLastExtEntityInfo(lastInfo);

        fErrorReporter->error
        (
            toEmit
            , XMLUni::fgXMLErrDomain
            , XMLErrs::errorType(toEmit)
            , errText
            , lastInfo.systemId
            , lastInfo.publicId
            , lastInfo.lineNumber
            , lastInfo.colNumber
        );
    }

    // Bail out if its fatal an we are to give up on the first fatal error
    if (emitErrorWillThrowException(toEmit))
PeiYong Zhang's avatar
PeiYong Zhang committed
        throw toEmit;
}

void XMLScanner::emitError( const   XMLErrs::Codes      toEmit
                            , const XMLExcepts::Codes   originalExceptCode
                            , const XMLCh* const        text1
                            , const XMLCh* const        text2
                            , const XMLCh* const        text3
                            , const XMLCh* const        text4)
{
    // Bump the error count if it is not a warning
    if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
        incrementErrorCount();

    if (fErrorReporter)
    {
        //  Load the message into alocal and replace any tokens found in
        //  the text.
        const XMLSize_t maxChars = 2047;
        XMLCh errText[maxChars + 1];

        if (!gMsgLoader->loadMsg(toEmit, errText, maxChars, text1, text2, text3, text4, fMemoryManager))
        {
                // <TBD> Should probably load a default message here
        }

        //  Create a LastExtEntityInfo structure and get the reader manager
        //  to fill it in for us. This will give us the information about
        //  the last reader on the stack that was an external entity of some
        //  sort (i.e. it will ignore internal entities.
        ReaderMgr::LastExtEntityInfo lastInfo;
        fReaderMgr.getLastExtEntityInfo(lastInfo);

        fErrorReporter->error
        (
            originalExceptCode
            , XMLUni::fgExceptDomain    //fgXMLErrDomain
            , XMLErrs::errorType(toEmit)
            , errText
            , lastInfo.systemId
            , lastInfo.publicId
            , lastInfo.lineNumber
            , lastInfo.colNumber
        );
    }

    // Bail out if its fatal an we are to give up on the first fatal error
    if (emitErrorWillThrowException(toEmit))
        throw toEmit;
}
PeiYong Zhang's avatar
PeiYong Zhang committed

// ---------------------------------------------------------------------------
//  XMLScanner: Getter methods
// ---------------------------------------------------------------------------

//  This method allows the caller to query the current location of the scanner.
//  It will return the sys/public ids of the current entity, and the line/col
//  position within it.
//
//  NOTE: This API returns the location with the last external file. So if its
//  currently scanning an entity, the position returned will be the end of
//  the entity reference in the file that had the reference.
//
Khaled Noaman's avatar
Khaled Noaman committed
/*bool
PeiYong Zhang's avatar
PeiYong Zhang committed
XMLScanner::getLastExtLocation(         XMLCh* const    sysIdToFill
                                , const unsigned int    maxSysIdChars
                                ,       XMLCh* const    pubIdToFill
                                , const unsigned int    maxPubIdChars
                                ,       XMLSSize_t&     colToFill) const
PeiYong Zhang's avatar
PeiYong Zhang committed
{
    // Create a local info object and get it filled in by the reader manager
    ReaderMgr::LastExtEntityInfo lastInfo;
    fReaderMgr.getLastExtEntityInfo(lastInfo);

    // Fill in the line and column number
    lineToFill = lastInfo.lineNumber;
    colToFill = lastInfo.colNumber;

    // And copy over as much of the ids as will fit
    sysIdToFill[0] = 0;
    if (lastInfo.systemId)
    {
        if (XMLString::stringLen(lastInfo.systemId) > maxSysIdChars)
            return false;
        XMLString::copyString(sysIdToFill, lastInfo.systemId);
    }

    pubIdToFill[0] = 0;
    if (lastInfo.publicId)
    {
        if (XMLString::stringLen(lastInfo.publicId) > maxPubIdChars)
            return false;
        XMLString::copyString(pubIdToFill, lastInfo.publicId);
    }
    return true;
Khaled Noaman's avatar
Khaled Noaman committed
}*/
PeiYong Zhang's avatar
PeiYong Zhang committed


// ---------------------------------------------------------------------------
//  XMLScanner: Private scanning methods
// ---------------------------------------------------------------------------

Khaled Noaman's avatar
Khaled Noaman committed
//  This method is called after the end of the root element, to handle
//  any miscellaneous stuff hanging around.
void XMLScanner::scanMiscellaneous()
PeiYong Zhang's avatar
PeiYong Zhang committed
{
Khaled Noaman's avatar
Khaled Noaman committed
    // Get a buffer for this work
    XMLBufBid bbCData(&fBufMgr);
PeiYong Zhang's avatar
PeiYong Zhang committed