Newer
Older
 * Copyright 1999-2002,2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include <xercesc/internal/ValidationContextImpl.hpp>
#include <xercesc/util/Janitor.hpp>
#include <xercesc/util/Mutexes.hpp>
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/UnexpectedEOFException.hpp>
#include <xercesc/util/XMLMsgLoader.hpp>
#include <xercesc/util/XMLRegisterCleanup.hpp>
#include <xercesc/util/XMLInitializer.hpp>
#include <xercesc/framework/LocalFileInputSource.hpp>
#include <xercesc/framework/URLInputSource.hpp>
#include <xercesc/framework/XMLDocumentHandler.hpp>
#include <xercesc/framework/XMLEntityHandler.hpp>
#include <xercesc/framework/XMLPScanToken.hpp>
#include <xercesc/framework/XMLValidator.hpp>
#include <xercesc/internal/EndOfEntityException.hpp>
#include <xercesc/validators/DTD/DocTypeHandler.hpp>
#include <xercesc/validators/common/GrammarResolver.hpp>
Neil Graham
committed
#include <xercesc/util/OutOfMemoryException.hpp>
#include <xercesc/util/XMLResourceIdentifier.hpp>
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// ---------------------------------------------------------------------------
//  Local static data
// ---------------------------------------------------------------------------
static XMLUInt32       gScannerId;
static bool            sRegistered = false;
static XMLMutex*       sScannerMutex = 0;
static XMLRegisterCleanup scannerMutexCleanup;
static XMLMsgLoader*   gMsgLoader = 0;
static XMLRegisterCleanup cleanupMsgLoader;
// ---------------------------------------------------------------------------
//  Local, static functions
// ---------------------------------------------------------------------------
//  Cleanup for the message loader
void XMLScanner::reinitMsgLoader()
{
	delete gMsgLoader;
	gMsgLoader = 0;
}
//  Cleanup for the scanner mutex
void XMLScanner::reinitScannerMutex()
{
    delete sScannerMutex;
    sScannerMutex = 0;
    sRegistered = false;
}
//
//  We need to fault in this mutex. But, since its used for synchronization
//  itself, we have to do this the low level way using a compare and swap.
//
static XMLMutex& gScannerMutex()
{
Khaled Noaman
committed
    if (!sRegistered)
Khaled Noaman
committed
        XMLMutexLock lockInit(XMLPlatformUtils::fgAtomicMutex);
Khaled Noaman
committed
            sScannerMutex = new XMLMutex;
            scannerMutexCleanup.registerCleanup(XMLScanner::reinitScannerMutex);
            sRegistered = true;
        }
    }
    return *sScannerMutex;
}
static XMLMsgLoader& gScannerMsgLoader()
{
    if (!gMsgLoader)
    {
Khaled Noaman
committed
        XMLMutexLock lockInit(&gScannerMutex());
        // If we haven't loaded our message yet, then do that
Khaled Noaman
committed
        {
            gMsgLoader = XMLPlatformUtils::loadMsgSet(XMLUni::fgXMLErrDomain);
            if (!gMsgLoader)
                XMLPlatformUtils::panic(PanicHandler::Panic_CantLoadMsgDomain);
Khaled Noaman
committed
            // Register this object to be cleaned up at termination
            cleanupMsgLoader.registerCleanup(XMLScanner::reinitMsgLoader);
        }
    }
    return *gMsgLoader;
}
void XMLInitializer::initializeScannerMsgLoader()
{
    gMsgLoader = XMLPlatformUtils::loadMsgSet(XMLUni::fgXMLErrDomain);
    // Register this object to be cleaned up at termination
    if (gMsgLoader) {
        cleanupMsgLoader.registerCleanup(XMLScanner::reinitMsgLoader);
    }
    sScannerMutex = new XMLMutex;
    if (sScannerMutex) {
        scannerMutexCleanup.registerCleanup(XMLScanner::reinitScannerMutex);
        sRegistered = true;
    }
}
// ---------------------------------------------------------------------------
//  XMLScanner: Constructors and Destructor
// ---------------------------------------------------------------------------
XMLScanner::XMLScanner(XMLValidator* const valToAdopt,
                       GrammarResolver* const grammarResolver,
David Abram Cargill
committed
                       MemoryManager* const manager)
    : fBufferSize(1024 * 1024)
    , fStandardUriConformant(false)
David Abram Cargill
committed
    , fCalculateSrcOfs(false)
    , fExitOnFirstFatal(true)
    , fValidationConstraintFatal(false)
    , fInException(false)
    , fStandalone(false)
    , fHasNoDTD(true)
    , fValidate(false)
    , fValidatorFromUser(false)
    , fDoSchema(false)
    , fSchemaFullChecking(false)
    , fToCacheGrammar(false)
    , fUseCachedGrammar(false)
    , fLoadExternalDTD(true)
    , fNormalizeData(true)
David Abram Cargill
committed
    , fGenerateSyntheticAnnotations(false)
    , fValidateAnnotations(false)
David Abram Cargill
committed
    , fEntityExpansionLimit(0)
    , fEntityExpansionCount(0)
    , fEmptyNamespaceId(0)
    , fUnknownNamespaceId(0)
    , fXMLNamespaceId(0)
    , fXMLNSNamespaceId(0)
    , fSchemaNamespaceId(0)
David Abram Cargill
committed
    , fUIntPool(0)
    , fUIntPoolRow(0)
    , fUIntPoolCol(0)
    , fUIntPoolRowTotal(2)
    , fScannerId(0)
    , fSequenceId(0)
    , fAttrList(0)
    , fAttrDupChkRegistry(0)
    , fDocHandler(0)
    , fDocTypeHandler(0)
    , fEntityHandler(0)
    , fErrorReporter(0)
    , fErrorHandler(0)
David Abram Cargill
committed
    , fPSVIHandler(0)
    , fValidationContext(0)
    , fEntityDeclPoolRetrieved(false)
    , fReaderMgr(manager)
    , fGrammarResolver(grammarResolver)
    , fGrammarPoolMemoryManager(grammarResolver->getGrammarPoolMemoryManager())
    , fURIStringPool(0)
    , fRootElemName(0)
    , fExternalSchemaLocation(0)
David Abram Cargill
committed
    , fExternalNoNamespaceSchemaLocation(0)    
    , fMemoryManager(manager)
    , fBufMgr(manager)
    , fAttNameBuf(1023, manager)
    , fAttValueBuf(1023, manager)
    , fCDataBuf(1023, manager)
    , fQNameBuf(1023, manager)
    , fPrefixBuf(1023, manager)
    , fURIBuf(1023, manager)
David Abram Cargill
committed
    , fElemStack(manager)   
{
   commonInit();
   if (fValidator) {
       fValidatorFromUser = true;
       initValidator(fValidator);
   }
}
XMLScanner::XMLScanner( XMLDocumentHandler* const  docHandler
                          , DocTypeHandler* const    docTypeHandler
                          , XMLEntityHandler* const  entityHandler
                          , XMLErrorReporter* const  errHandler
                          , XMLValidator* const      valToAdopt
                          , GrammarResolver* const   grammarResolver
David Abram Cargill
committed
                          , MemoryManager* const     manager)
    : fBufferSize(1024 * 1024)
    , fStandardUriConformant(false)
David Abram Cargill
committed
    , fCalculateSrcOfs(false)
    , fExitOnFirstFatal(true)
    , fValidationConstraintFatal(false)
    , fInException(false)
    , fStandalone(false)
    , fHasNoDTD(true)
    , fValidate(false)
    , fValidatorFromUser(false)
    , fDoSchema(false)
    , fSchemaFullChecking(false)
    , fToCacheGrammar(false)
    , fUseCachedGrammar(false)
	, fLoadExternalDTD(true)
    , fNormalizeData(true)
David Abram Cargill
committed
    , fGenerateSyntheticAnnotations(false)
    , fValidateAnnotations(false)
David Abram Cargill
committed
    , fEntityExpansionLimit(0)
    , fEntityExpansionCount(0)
    , fEmptyNamespaceId(0)
    , fUnknownNamespaceId(0)
    , fXMLNamespaceId(0)
    , fXMLNSNamespaceId(0)
    , fSchemaNamespaceId(0)
David Abram Cargill
committed
    , fUIntPool(0)
    , fUIntPoolRow(0)
    , fUIntPoolCol(0)
    , fUIntPoolRowTotal(2)
    , fScannerId(0)
    , fSequenceId(0)
    , fAttrList(0)
    , fAttrDupChkRegistry(0)
    , fDocHandler(docHandler)
    , fDocTypeHandler(docTypeHandler)
    , fEntityHandler(entityHandler)
    , fErrorReporter(errHandler)
    , fErrorHandler(0)
David Abram Cargill
committed
    , fPSVIHandler(0)
    , fValidationContext(0)
    , fEntityDeclPoolRetrieved(false)
    , fReaderMgr(manager)
    , fGrammarResolver(grammarResolver)
    , fGrammarPoolMemoryManager(grammarResolver->getGrammarPoolMemoryManager())
    , fURIStringPool(0)
    , fRootElemName(0)
    , fExternalSchemaLocation(0)
David Abram Cargill
committed
    , fExternalNoNamespaceSchemaLocation(0)    
    , fMemoryManager(manager)
    , fBufMgr(manager)
    , fAttNameBuf(1023, manager)
    , fAttValueBuf(1023, manager)
    , fCDataBuf(1023, manager)
    , fQNameBuf(1023, manager)
    , fPrefixBuf(1023, manager)
    , fURIBuf(1023, manager)
{
   commonInit();
   if (valToAdopt){
       fValidatorFromUser = true;
       initValidator(fValidator);
   }
}
XMLScanner::~XMLScanner()
{
    delete fAttrList;
    delete fAttrDupChkRegistry;
    fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
    fMemoryManager->deallocate(fExternalSchemaLocation);//delete [] fExternalSchemaLocation;
    fMemoryManager->deallocate(fExternalNoNamespaceSchemaLocation);//delete [] fExternalNoNamespaceSchemaLocation;
    // delete fUIntPool
    for (unsigned int i=0; i<=fUIntPoolRow; i++)
    {
        fMemoryManager->deallocate(fUIntPool[i]);
    }
    fMemoryManager->deallocate(fUIntPool);
void XMLScanner::setValidator(XMLValidator* const valToAdopt)
{
    if (fValidatorFromUser)
        delete fValidator;
    fValidator = valToAdopt;
    fValidatorFromUser = true;
    initValidator(fValidator);
}
// ---------------------------------------------------------------------------
//  XMLScanner: Main entry point to scan a document
// ---------------------------------------------------------------------------
void XMLScanner::scanDocument(  const   XMLCh* const    systemId)
{
    //  First we try to parse it as a URL. If that fails, we assume its
    //  a file and try it that way.
    InputSource* srcToUse = 0;
    try
    {
        //  Create a temporary URL. Since this is the primary document,
        //  it has to be fully qualified. If not, then assume we are just
        //  mistaking a file for a URL.
        XMLURL tmpURL(fMemoryManager);
        if (XMLURL::parse(systemId, tmpURL)) {
            if (tmpURL.isRelative()) {
                if (!fStandardUriConformant)
                    srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
                else {
David Abram Cargill
committed
                    // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                    // emit the error directly
David Abram Cargill
committed
                    MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);
                    fInException = true;
                    emitError
                    (
                        XMLErrs::XMLException_Fatal
                        , e.getType()
                        , e.getMessage()
                    );
                    return;
                }
            }
            else
            {
                if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
David Abram Cargill
committed
                    MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);
                    fInException = true;
                    emitError
                    (
                        XMLErrs::XMLException_Fatal
                        , e.getType()
                        , e.getMessage()
                    );
                    return;
                }
                srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager);
            }
        }
        else {
            if (!fStandardUriConformant)
Khaled Noaman
committed
                srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
            else {
David Abram Cargill
committed
                // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                // emit the error directly
                // lazy bypass ... since all MalformedURLException are fatal, no need to check the type
David Abram Cargill
committed
                MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);
                fInException = true;
                emitError
                (
                    XMLErrs::XMLException_Fatal
                    , e.getType()
                    , e.getMessage()
                );
                return;
            }
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
    catch(const XMLException& excToCatch)
    {
        //  For any other XMLException,
        //  emit the error and catch any user exception thrown from here.
        fInException = true;
        if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
            emitError
            (
                XMLErrs::XMLException_Warning
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
            emitError
            (
                XMLErrs::XMLException_Fatal
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        else
            emitError
            (
                XMLErrs::XMLException_Error
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        return;
    }
    scanDocument(*srcToUse);
void XMLScanner::scanDocument(  const   char* const systemId)
{
    // We just delegate this to the XMLCh version after transcoding
    XMLCh* tmpBuf = XMLString::transcode(systemId, fMemoryManager);
    ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
    scanDocument(tmpBuf);
}
//  This method begins a progressive parse. It scans through the prolog and
//  returns a token to be used on subsequent scanNext() calls. If the return
//  value is true, then the token is legal and ready for further use. If it
//  returns false, then the scan of the prolog failed and the token is not
//  going to work on subsequent scanNext() calls.
bool XMLScanner::scanFirst( const   XMLCh* const    systemId
                            ,       XMLPScanToken&  toFill)
{
    //  First we try to parse it as a URL. If that fails, we assume its
    //  a file and try it that way.
    InputSource* srcToUse = 0;
    try
    {
        //  Create a temporary URL. Since this is the primary document,
        //  it has to be fully qualified. If not, then assume we are just
        //  mistaking a file for a URL.
David Abram Cargill
committed
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
        XMLURL tmpURL(fMemoryManager);
        if (XMLURL::parse(systemId, tmpURL)) {        
            if (tmpURL.isRelative()) {
                if (!fStandardUriConformant)
                    srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
                else {
                    // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                    // emit the error directly
                    MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);
                    fInException = true;
                    emitError
                    (
                        XMLErrs::XMLException_Fatal
                        , e.getType()
                        , e.getMessage()
                    );
                    return false;
                }
            }
            else
            {
                if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
                    MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);
                    fInException = true;
                    emitError
                    (
                        XMLErrs::XMLException_Fatal
                        , e.getType()
                        , e.getMessage()
                    );
                    return false;
                }
                srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager);
            }
        }
        else {
            if (!fStandardUriConformant)
David Abram Cargill
committed
                srcToUse = new (fMemoryManager) LocalFileInputSource(systemId,  fMemoryManager);
            else {
David Abram Cargill
committed
                // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                // emit the error directly
David Abram Cargill
committed
                // lazy bypass ... since all MalformedURLException are fatal, no need to check the type
                MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL);
                fInException = true;
                emitError
                (
                    XMLErrs::XMLException_Fatal
                    , e.getType()
                    , e.getMessage()
                );
                return false;
            }
        }
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
    catch(const XMLException& excToCatch)
    {
        //  For any other XMLException,
        //  emit the error and catch any user exception thrown from here.
        fInException = true;
        if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
            emitError
            (
                XMLErrs::XMLException_Warning
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
            emitError
            (
                XMLErrs::XMLException_Fatal
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        else
            emitError
            (
                XMLErrs::XMLException_Error
                , excToCatch.getType()
                , excToCatch.getMessage()
            );
        return false;
    }
    return scanFirst(*srcToUse, toFill);
}
bool XMLScanner::scanFirst( const   char* const     systemId
                            ,       XMLPScanToken&  toFill)
{
    // We just delegate this to the XMLCh version after transcoding
    XMLCh* tmpBuf = XMLString::transcode(systemId, fMemoryManager);
    ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
    return scanFirst(tmpBuf, toFill);
}
bool XMLScanner::scanFirst( const   InputSource&    src
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
{
    //  Bump up the sequence id for this new scan cycle. This will invalidate
    //  any previous tokens we've returned.
    fSequenceId++;
    // Reset the scanner and its plugged in stuff for a new run.  This
    // resets all the data structures, creates the initial reader and
    // pushes it on the stack, and sets up the base document path
    scanReset(src);
    // If we have a document handler, then call the start document
    if (fDocHandler)
        fDocHandler->startDocument();
    try
    {
        //  Scan the prolog part, which is everything before the root element
        //  including the DTD subsets. This is all that is done on the scan
        //  first.
        scanProlog();
        //  If we got to the end of input, then its not a valid XML file.
        //  Else, go on to scan the content.
        if (fReaderMgr.atEOF())
        {
            emitError(XMLErrs::EmptyMainEntity);
        }
    }
    //  NOTE:
    //
    //  In all of the error processing below, the emitError() call MUST come
    //  before the flush of the reader mgr, or it will fail because it tries
    //  to find out the position in the XML source of the error.
    catch(const XMLErrs::Codes)
    {
        // This is a 'first failure' exception so reset and return a failure
        fReaderMgr.reset();
        return false;
    }
    catch(const XMLValid::Codes)
    {
        // This is a 'first fatal error' type exit, so reset and reuturn failure
        fReaderMgr.reset();
        return false;
    }
    catch(const XMLException& excToCatch)
    {
        //  Emit the error and catch any user exception thrown from here. Make
        //  sure in all cases we flush the reader manager.
        fInException = true;
        try
        {
            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
                emitError
                (
                    XMLErrs::XMLException_Warning
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
                emitError
                (
                    XMLErrs::XMLException_Fatal
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
            else
                emitError
                (
                    XMLErrs::XMLException_Error
                    , excToCatch.getType()
                    , excToCatch.getMessage()
                );
        }
Neil Graham
committed
        catch(const OutOfMemoryException&)
        {
            throw;
        }
        catch(...)
        {
            // Reset and rethrow the user error
            fReaderMgr.reset();
            throw;
        }
        // Reset and return a failure
        fReaderMgr.reset();
        return false;
    }
Neil Graham
committed
    catch(const OutOfMemoryException&)
    {
        throw;
    }
    catch(...)
    {
        // Reset and rethrow original error
        fReaderMgr.reset();
        throw;
    }
    // Fill in the caller's token to make it legal and return success
    toFill.set(fScannerId, fSequenceId);
    return true;
}
void XMLScanner::scanReset(XMLPScanToken& token)
{
    // Make sure this token is still legal
    if (!isLegalToken(token))
David Abram Cargill
committed
        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
    // Reset the reader manager
    fReaderMgr.reset();
    // And invalidate any tokens by bumping our sequence number
    fSequenceId++;
    // Reset our error count
    fErrorCount = 0;
}
void XMLScanner::setParseSettings(XMLScanner* const refScanner)
{
    setDocHandler(refScanner->getDocHandler());
    setDocTypeHandler(refScanner->getDocTypeHandler());
    setErrorHandler(refScanner->getErrorHandler());
    setErrorReporter(refScanner->getErrorReporter());
    setEntityHandler(refScanner->getEntityHandler());
    setDoNamespaces(refScanner->getDoNamespaces());
    setDoSchema(refScanner->getDoSchema());
    setCalculateSrcOfs(refScanner->getCalculateSrcOfs());
    setStandardUriConformant(refScanner->getStandardUriConformant());
    setExitOnFirstFatal(refScanner->getExitOnFirstFatal());
    setValidationConstraintFatal(refScanner->getValidationConstraintFatal());
    setIdentityConstraintChecking(refScanner->getIdentityConstraintChecking());
    setValidationSchemaFullChecking(refScanner->getValidationSchemaFullChecking());
    cacheGrammarFromParse(refScanner->isCachingGrammarFromParse());
    useCachedGrammarInParse(refScanner->isUsingCachedGrammarInParse());
    setLoadExternalDTD(refScanner->getLoadExternalDTD());
    setNormalizeData(refScanner->getNormalizeData());
    setExternalSchemaLocation(refScanner->getExternalSchemaLocation());
    setExternalNoNamespaceSchemaLocation(refScanner->getExternalNoNamespaceSchemaLocation());
    setValidationScheme(refScanner->getValidationScheme());
    setSecurityManager(refScanner->getSecurityManager());
    setPSVIHandler(refScanner->getPSVIHandler());
// ---------------------------------------------------------------------------
//  XMLScanner: Private helper methods.
// ---------------------------------------------------------------------------
//  This method handles the common initialization, to avoid having to do
//  it redundantly in multiple constructors.
void XMLScanner::commonInit()
{
    //  We have to do a little init that involves statics, so we have to
    //  use the mutex to protect it.
    {
        XMLMutexLock lockInit(&gScannerMutex());
        // And assign ourselves the next available scanner id
        fScannerId = ++gScannerId;
    }
    //  Create the attribute list, which is used to store attribute values
    //  during start tag processing. Give it a reasonable initial size that
    //  will serve for most folks, though it will grow as required.
    fAttrList = new (fMemoryManager) RefVectorOf<XMLAttr>(32, true, fMemoryManager);
    //  Create the id ref list. This is used to enforce XML 1.0 ID ref
    //  semantics, i.e. all id refs must refer to elements that exist
    fValidationContext = new (fMemoryManager) ValidationContextImpl(fMemoryManager);
    //  Create the GrammarResolver
    //fGrammarResolver = new GrammarResolver();
    // create initial, 64-element, fUIntPool
    fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) *fUIntPoolRowTotal);
    fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
    memset(fUIntPool[0], 0, sizeof(unsigned int) << 6);
    fUIntPool[1] = 0;
    // Register self as handler for XMLBufferFull events on the CDATA buffer
    fCDataBuf.setFullHandler(this, fBufferSize);
void XMLScanner::initValidator(XMLValidator* theValidator) {
    //  Tell the validator about the stuff it needs to know in order to
    //  do its work.
    theValidator->setScannerInfo(this, &fReaderMgr, &fBufMgr);
    theValidator->setErrorReporter(fErrorReporter);
}
// ---------------------------------------------------------------------------
//  XMLScanner: Error emitting methods
// ---------------------------------------------------------------------------
//  These methods are called whenever the scanner wants to emit an error.
//  It handles getting the message loaded, doing token replacement, etc...
//  and then calling the error handler, if its installed.
bool XMLScanner::emitErrorWillThrowException(const XMLErrs::Codes toEmit)
{
    if (XMLErrs::isFatal(toEmit) && fExitOnFirstFatal && !fInException)
        return true;
    return false;
}
void XMLScanner::emitError(const XMLErrs::Codes toEmit)
{
    // Bump the error count if it is not a warning
    if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
        incrementErrorCount();
    if (fErrorReporter)
    {
        // Load the message into a local for display
        const unsigned int msgSize = 1023;
        XMLCh errText[msgSize + 1];
        if (!gScannerMsgLoader().loadMsg(toEmit, errText, msgSize))
        {
                // <TBD> Probably should load a default msg here
        }
        //  Create a LastExtEntityInfo structure and get the reader manager
        //  to fill it in for us. This will give us the information about
        //  the last reader on the stack that was an external entity of some
        //  sort (i.e. it will ignore internal entities.
        ReaderMgr::LastExtEntityInfo lastInfo;
        fReaderMgr.getLastExtEntityInfo(lastInfo);
        fErrorReporter->error
        (
            toEmit
            , XMLUni::fgXMLErrDomain
            , XMLErrs::errorType(toEmit)
            , errText
            , lastInfo.systemId
            , lastInfo.publicId
            , lastInfo.lineNumber
            , lastInfo.colNumber
        );
    }
    // Bail out if its fatal an we are to give up on the first fatal error
    if (emitErrorWillThrowException(toEmit))
        throw toEmit;
}
void XMLScanner::emitError( const   XMLErrs::Codes    toEmit
                            , const XMLCh* const        text1
                            , const XMLCh* const        text2
                            , const XMLCh* const        text3
                            , const XMLCh* const        text4)
{
    // Bump the error count if it is not a warning
    if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
        incrementErrorCount();
    if (fErrorReporter)
    {
        //  Load the message into alocal and replace any tokens found in
        //  the text.
        const unsigned int maxChars = 2047;
        XMLCh errText[maxChars + 1];
        if (!gScannerMsgLoader().loadMsg(toEmit, errText, maxChars, text1, text2, text3, text4, fMemoryManager))
        {
                // <TBD> Should probably load a default message here
        }
        //  Create a LastExtEntityInfo structure and get the reader manager
        //  to fill it in for us. This will give us the information about
        //  the last reader on the stack that was an external entity of some
        //  sort (i.e. it will ignore internal entities.
        ReaderMgr::LastExtEntityInfo lastInfo;
        fReaderMgr.getLastExtEntityInfo(lastInfo);
        fErrorReporter->error
        (
            toEmit
            , XMLUni::fgXMLErrDomain
            , XMLErrs::errorType(toEmit)
            , errText
            , lastInfo.systemId
            , lastInfo.publicId
            , lastInfo.lineNumber
            , lastInfo.colNumber
        );
    }
    // Bail out if its fatal an we are to give up on the first fatal error
    if (emitErrorWillThrowException(toEmit))
        throw toEmit;
}
void XMLScanner::emitError( const   XMLErrs::Codes    toEmit
                            , const char* const         text1
                            , const char* const         text2
                            , const char* const         text3
                            , const char* const         text4)
{
    // Bump the error count if it is not a warning
    if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
        incrementErrorCount();
    if (fErrorReporter)
    {
        //  Load the message into alocal and replace any tokens found in
        //  the text.
        const unsigned int maxChars = 2047;
        XMLCh errText[maxChars + 1];
David Abram Cargill
committed
        if (!gScannerMsgLoader().loadMsg(toEmit, errText, maxChars, text1, text2, text3, text4, fMemoryManager))
        {
                // <TBD> Should probably load a default message here
        }
        //  Create a LastExtEntityInfo structure and get the reader manager
        //  to fill it in for us. This will give us the information about
        //  the last reader on the stack that was an external entity of some
        //  sort (i.e. it will ignore internal entities.
        ReaderMgr::LastExtEntityInfo lastInfo;
        fReaderMgr.getLastExtEntityInfo(lastInfo);
        fErrorReporter->error
        (
            toEmit
            , XMLUni::fgXMLErrDomain
            , XMLErrs::errorType(toEmit)
            , errText
            , lastInfo.systemId
            , lastInfo.publicId
            , lastInfo.lineNumber
            , lastInfo.colNumber
        );
    }
    // Bail out if its fatal an we are to give up on the first fatal error
    if (emitErrorWillThrowException(toEmit))
        throw toEmit;
}
// ---------------------------------------------------------------------------
//  XMLScanner: Getter methods
// ---------------------------------------------------------------------------
//  This method allows the caller to query the current location of the scanner.
//  It will return the sys/public ids of the current entity, and the line/col
//  position within it.
//
//  NOTE: This API returns the location with the last external file. So if its
//  currently scanning an entity, the position returned will be the end of
//  the entity reference in the file that had the reference.
//
XMLScanner::getLastExtLocation(         XMLCh* const    sysIdToFill
                                , const unsigned int    maxSysIdChars
                                ,       XMLCh* const    pubIdToFill
                                , const unsigned int    maxPubIdChars
Tinny Ng
committed
                                ,       XMLSSize_t&     lineToFill
                                ,       XMLSSize_t&     colToFill) const
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
{
    // Create a local info object and get it filled in by the reader manager
    ReaderMgr::LastExtEntityInfo lastInfo;
    fReaderMgr.getLastExtEntityInfo(lastInfo);
    // Fill in the line and column number
    lineToFill = lastInfo.lineNumber;
    colToFill = lastInfo.colNumber;
    // And copy over as much of the ids as will fit
    sysIdToFill[0] = 0;
    if (lastInfo.systemId)
    {
        if (XMLString::stringLen(lastInfo.systemId) > maxSysIdChars)
            return false;
        XMLString::copyString(sysIdToFill, lastInfo.systemId);
    }
    pubIdToFill[0] = 0;
    if (lastInfo.publicId)
    {
        if (XMLString::stringLen(lastInfo.publicId) > maxPubIdChars)
            return false;
        XMLString::copyString(pubIdToFill, lastInfo.publicId);
    }
    return true;
// ---------------------------------------------------------------------------
//  XMLScanner: Private scanning methods
// ---------------------------------------------------------------------------
//  This method is called after the end of the root element, to handle
//  any miscellaneous stuff hanging around.
void XMLScanner::scanMiscellaneous()
    // Get a buffer for this work
    XMLBufBid bbCData(&fBufMgr);
            const XMLCh nextCh = fReaderMgr.peekNextChar();
            // Watch for end of file and break out
            if (!nextCh)
                break;
                    // Can't have an XML decl here
                    emitError(XMLErrs::NotValidAfterContent);
                    fReaderMgr.skipPastChar(chCloseAngle);
                else if (fReaderMgr.skippedString(XMLUni::fgPIString))
                 else if (fReaderMgr.skippedString(XMLUni::fgCommentString))
                    // This can't be possible, so just give up
                    emitError(XMLErrs::ExpectedCommentOrPI);
                    fReaderMgr.skipPastChar(chCloseAngle);
                //  If we have a doc handler, then gather up the spaces and
                //  call back. Otherwise, just skip over whitespace.
                if (fDocHandler)