Skip to content
Snippets Groups Projects
ReaderMgr.hpp 16.3 KiB
Newer Older
PeiYong Zhang's avatar
PeiYong Zhang committed
/*
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache\@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation, and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.ibm.com .  For more information
 * on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

/*
 * $Log$
 * Revision 1.2  2002/05/27 18:42:14  tng
 * To get ready for 64 bit large file, use XMLSSize_t to represent line and column number.
 *
 * Revision 1.1.1.1  2002/02/01 22:21:58  peiyongz
 * sane_include
PeiYong Zhang's avatar
PeiYong Zhang committed
 *
 * Revision 1.13  2001/07/12 18:50:08  tng
 * Some performance modification regarding standalone check and xml decl check.
 *
 * Revision 1.12  2000/09/09 00:18:18  andyh
 * Reordered member variables in ThrowEOEJanitor.  Patch submitted
 * by Kirk Wylie.
 *
 * Revision 1.11  2000/07/08 00:17:13  andyh
 * Cleanup of yesterday's speedup changes.  Merged new bit into the
 * scanner character properties table.
 *
 * Revision 1.10  2000/07/07 01:08:44  andyh
 * Parser speed up in scan of XML content.
 *
 * Revision 1.9  2000/03/02 19:54:29  roddey
 * This checkin includes many changes done while waiting for the
 * 1.1.0 code to be finished. I can't list them all here, but a list is
 * available elsewhere.
 *
 * Revision 1.8  2000/02/24 20:18:07  abagchi
 * Swat for removing Log from API docs
 *
 * Revision 1.7  2000/02/24 02:12:53  aruna1
 * ReaderMgr:;getReaderDepth() added
 *
 * Revision 1.6  2000/02/06 07:47:53  rahulj
 * Year 2K copyright swat.
 *
 * Revision 1.5  2000/01/25 01:04:21  roddey
 * Fixes a bogus error about ]]> in char data.
 *
 * Revision 1.4  2000/01/24 20:40:43  roddey
 * Exposed the APIs to get to the byte offset in the source XML buffer. This stuff
 * is not tested yet, but I wanted to get the API changes in now so that the API
 * can be stablized.
 *
 * Revision 1.3  2000/01/12 00:15:04  roddey
 * Changes to deal with multiply nested, relative pathed, entities and to deal
 * with the new URL class changes.
 *
 * Revision 1.2  1999/12/15 19:48:03  roddey
 * Changed to use new split of transcoder interfaces into XML transcoders and
 * LCP transcoders, and implementation of intrinsic transcoders as pluggable
 * transcoders, and addition of Latin1 intrinsic support.
 *
 * Revision 1.1.1.1  1999/11/09 01:08:13  twl
 * Initial checkin
 *
 * Revision 1.4  1999/11/08 20:56:54  droddey
 * If the main xml entity does not exist, we need to get the error handling for that
 * inside the main XMLScanner::scanDocument() try block so that it gets reported
 * in the normal way. We have to add a little extra safety code because, when this
 * happens, there is no reader on the reader stack to get position ino from.
 *
 * Revision 1.3  1999/11/08 20:44:43  rahul
 * Swat for adding in Product name and CVS comment log variable.
 *
 */

#if !defined(READERMGR_HPP)
#define READERMGR_HPP

#include <xercesc/util/RefStackOf.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/sax/Locator.hpp>
#include <xercesc/framework/XMLBuffer.hpp>
#include <xercesc/internal/XMLReader.hpp>

class XMLBuffer;
class XMLEntityDecl;
class XMLEntityHandler;
class XMLDocumentHandler;
class XMLScanner;


// ---------------------------------------------------------------------------
//  This class is used by the scanner. The scanner must deal with expansion
//  of entities, some of which are totally different files (external parsed
//  entities.) It does so by pushing readers onto a stack. The top reader is
//  the one it wants to read out of, but that one must be popped when it is
//  empty. To keep that logic from being all over the place, the scanner
//  talks to the reader manager, which handles the stack and popping off
//  used up readers.
// ---------------------------------------------------------------------------
class XMLPARSER_EXPORT ReaderMgr : public Locator
{
public :
    // -----------------------------------------------------------------------
    //  Class specific types
    // -----------------------------------------------------------------------
    struct LastExtEntityInfo
    {
        const   XMLCh*          systemId;
        const   XMLCh*          publicId;
PeiYong Zhang's avatar
PeiYong Zhang committed
    };


    // -----------------------------------------------------------------------
    //  Constructors and Destructor
    // -----------------------------------------------------------------------
    ReaderMgr();
    ~ReaderMgr();


    // -----------------------------------------------------------------------
    //  Convenience scanning methods
    //
    //  This are all convenience methods that work in terms of the core
    //  character spooling methods.
    // -----------------------------------------------------------------------
    bool atEOF() const;
    bool getName(XMLBuffer& toFill);
    bool getNameToken(XMLBuffer& toFill);
    XMLCh getNextChar();
    bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
    void movePlainContentChars(XMLBuffer &dest);
    void getSpaces(XMLBuffer& toFill);
    void getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
    bool isEmpty() const;
    bool lookingAtChar(const XMLCh toCheck);
    bool lookingAtSpace();
    XMLCh peekNextChar();
    bool skipIfQuote(XMLCh& chGotten);
    void skipPastChar(const XMLCh toSkip);
    bool skipPastSpaces();
    void skipToChar(const XMLCh toSkipTo);
    bool skippedChar(const XMLCh toSkip);
    bool skippedSpace();
    bool skippedString(const XMLCh* const toSkip);
    void skipQuotedString(const XMLCh quoteCh);
    XMLCh skipUntilIn(const XMLCh* const listToSkip);
    XMLCh skipUntilInOrWS(const XMLCh* const listToSkip);
    bool peekString(const XMLCh* const toPeek);


    // -----------------------------------------------------------------------
    //  Control methods
    // -----------------------------------------------------------------------
    void cleanStackBackTo(const unsigned int readerNum);
    XMLReader* createReader
    (
        const   InputSource&        src
        , const bool                xmlDecl
        , const XMLReader::RefFrom  refFrom
        , const XMLReader::Types    type
        , const XMLReader::Sources  source
    );
    XMLReader* createReader
    (
        const   XMLCh* const        sysId
        , const XMLCh* const        pubId
        , const bool                xmlDecl
        , const XMLReader::RefFrom  refFrom
        , const XMLReader::Types    type
        , const XMLReader::Sources  source
        ,       InputSource*&       srcToFill
    );
    XMLReader* createIntEntReader
    (
        const   XMLCh* const        sysId
        , const XMLReader::RefFrom  refFrom
        , const XMLReader::Types    type
        , const XMLCh* const        dataBuf
        , const unsigned int        dataLen
        , const bool                copyBuf
    );
    bool isScanningPERefOutOfLiteral() const;
    bool pushReader
    (
                XMLReader* const        reader
        ,       XMLEntityDecl* const    entity
    );
    void reset();


    // -----------------------------------------------------------------------
    //  Getter methods
    // -----------------------------------------------------------------------
    const XMLCh* getCurrentEncodingStr() const;
    const XMLEntityDecl* getCurrentEntity() const;
    XMLEntityDecl* getCurrentEntity();
    const XMLReader* getCurrentReader() const;
    XMLReader* getCurrentReader();
    unsigned int getCurrentReaderNum() const;
    unsigned int getReaderDepth() const;
    void getLastExtEntityInfo(LastExtEntityInfo& lastInfo) const;
    unsigned int getSrcOffset() const;
    bool getThrowEOE() const;


    // -----------------------------------------------------------------------
    //  Setter methods
    // -----------------------------------------------------------------------
    void setEntityHandler(XMLEntityHandler* const newHandler);
    void setThrowEOE(const bool newValue);


    // -----------------------------------------------------------------------
    //  Implement the SAX Locator interface
    // -----------------------------------------------------------------------
    virtual const XMLCh* getPublicId() const;
    virtual const XMLCh* getSystemId() const;
    virtual XMLSSize_t getLineNumber() const;
    virtual XMLSSize_t getColumnNumber() const;
PeiYong Zhang's avatar
PeiYong Zhang committed


private :
    // -----------------------------------------------------------------------
    //  Private helper methods
    // -----------------------------------------------------------------------
    const XMLReader* getLastExtEntity(const XMLEntityDecl*& itsEntity) const;
    bool popReader();


    // -----------------------------------------------------------------------
    //  Private data members
    //
    //  fCurEntity
    //      This is the current top of stack entity. We pull it off the stack
    //      and store it here for efficiency.
    //
    //  fCurReader
    //      This is the current top of stack reader. We pull it off the
    //      stack and store it here for efficiency.
    //
    //  fEntityHandler
    //      This is the installed entity handler. Its installed via the
    //      scanner but he passes it on to us since we need it the most, in
    //      process of creating external entity readers.
    //
    //  fEntityStack
    //      We need to keep up with which of the pushed readers are pushed
    //      entity values that are being spooled. This is done to avoid the
    //      problem of recursive definitions. This stack consists of refs to
    //      EntityDecl objects for the pushed entities.
    //
    //  fNextReaderNum
    //      This is the reader serial number value. Each new reader that is
    //      created from this reader is given a successive number. This lets
    //      us catch things like partial markup errors and such.
    //
    //  fReaderStack
    //      This is the stack of reader references. We own all the readers
    //      and destroy them when they are used up.
    //
    //  fThrowEOE
    //      This flag controls whether we throw an exception when we hit an
    //      end of entity. The scanner doesn't really need to know about ends
    //      of entities in the int/ext subsets, so it will turn this flag off
    //      until it gets into the content usually.
    // -----------------------------------------------------------------------
    XMLEntityDecl*              fCurEntity;
    XMLReader*                  fCurReader;
    XMLEntityHandler*           fEntityHandler;
    RefStackOf<XMLEntityDecl>*  fEntityStack;
    unsigned int                fNextReaderNum;
    RefStackOf<XMLReader>*      fReaderStack;
    bool                        fThrowEOE;
};



// ---------------------------------------------------------------------------
//  ReaderMgr: Inlined methods
//
//  NOTE: We cannot put these in alphabetical and type order as we usually
//  do because some of the compilers we have to support are too stupid to
//  understand out of order inlines!
// ---------------------------------------------------------------------------
inline unsigned int ReaderMgr::getCurrentReaderNum() const
{
    return fCurReader->getReaderNum();
}

inline bool ReaderMgr::getName(XMLBuffer& toFill)
{
    toFill.reset();
    return fCurReader->getName(toFill, false);
}

inline bool ReaderMgr::getNameToken(XMLBuffer& toFill)
{
    toFill.reset();
    return fCurReader->getName(toFill, true);
}

inline bool ReaderMgr::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
{
    return fCurReader->getNextCharIfNot(chNotToGet, chGotten);
}

inline void ReaderMgr::movePlainContentChars(XMLBuffer &dest)
{
    fCurReader->movePlainContentChars(dest);
}

inline bool ReaderMgr::getThrowEOE() const
{
    return fThrowEOE;
}

inline unsigned int ReaderMgr::getSrcOffset() const
{
    return fCurReader->getSrcOffset();
}

inline bool ReaderMgr::lookingAtChar(const XMLCh chToCheck)
{
    return (chToCheck == peekNextChar());
}

inline bool ReaderMgr::lookingAtSpace()
{
    return XMLReader::isWhitespace(peekNextChar());
}

inline void ReaderMgr::setThrowEOE(const bool newValue)
{
    fThrowEOE = newValue;
}

inline bool ReaderMgr::skippedString(const XMLCh* const toSkip)
{
    return fCurReader->skippedString(toSkip);
}

inline void ReaderMgr::skipToChar(const XMLCh toSkipTo)
{
    while (true)
    {
        // Get chars until we find the one to skip
        const XMLCh nextCh = getNextChar();

        // Break out at end of input or the char to skip
        if ((nextCh == toSkipTo) || !nextCh)
            break;
    }
}

inline void ReaderMgr::skipPastChar(const XMLCh toSkipPast)
{
    while (true)
    {
        // Get chars until we find the one to skip
        const XMLCh nextCh = getNextChar();

        if ((nextCh == toSkipPast) || !nextCh)
            break;
    }
}

inline bool ReaderMgr::peekString(const XMLCh* const toPeek)
{
    return fCurReader->peekString(toPeek);
}

inline void ReaderMgr::setEntityHandler(XMLEntityHandler* const newHandler)
{
    fEntityHandler = newHandler;
}


//
//  This is a simple class to temporarily change the 'throw at end of entity'
//  flag of the reader manager. There are some places where we need to
//  turn this on and off on a scoped basis.
//
class XMLPARSER_EXPORT ThrowEOEJanitor
{
public :
    // -----------------------------------------------------------------------
    //  Constructors and destructor
    // -----------------------------------------------------------------------
    ThrowEOEJanitor(ReaderMgr* mgrTarget, const bool newValue) :

        fOld(mgrTarget->getThrowEOE())
        , fMgr(mgrTarget)
    {
        mgrTarget->setThrowEOE(newValue);
    }

    ~ThrowEOEJanitor()
    {
        fMgr->setThrowEOE(fOld);
    };

private :
    // -----------------------------------------------------------------------
    //  Private data members
    //
    //  fOld
    //      The previous value of the flag, which we replaced during ctor,
    //      and will replace during dtor.
    //
    //  fMgr
    //      A pointer to the reader manager we are going to set/reset the
    //      flag on.
    // -----------------------------------------------------------------------
    bool        fOld;
    ReaderMgr*  fMgr;
};

#endif