Skip to content
Snippets Groups Projects
XMLReader.cpp 461 KiB
Newer Older
PeiYong Zhang's avatar
PeiYong Zhang committed
/*
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache\@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation, and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.ibm.com .  For more information
 * on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

/*
 * $Id$
 */

// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/BitOps.hpp>
#include <xercesc/util/BinInputStream.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/TranscodingException.hpp>
#include <xercesc/util/TransService.hpp>
#include <xercesc/util/UTFDataFormatException.hpp>
#include <xercesc/util/XMLEBCDICTranscoder.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/XMLUni.hpp>
#include <xercesc/sax/InputSource.hpp>
#include <xercesc/framework/XMLBuffer.hpp>
#include <xercesc/internal/CharTypeTables.hpp>
#include <xercesc/internal/XMLReader.hpp>
#include <xercesc/internal/XMLScanner.hpp>
#include <string.h>

Tinny Ng's avatar
Tinny Ng committed
XERCES_CPP_NAMESPACE_BEGIN
PeiYong Zhang's avatar
PeiYong Zhang committed

// ---------------------------------------------------------------------------
//  XMLReader: static data initialization
// ---------------------------------------------------------------------------
bool XMLReader::fNEL = false;

// ---------------------------------------------------------------------------
//  XMLReader: Public, static methods
// ---------------------------------------------------------------------------
bool XMLReader::isFirstNameChar(const XMLCh toCheck)
{
    static const XMLByte ourMask = gBaseCharMask | gLetterCharMask;

    if ((fgCharCharsTable[toCheck] & ourMask) != 0)
        return true;

    // Check the two special case name start chars
    if ((toCheck == chUnderscore) || (toCheck == chColon))
        return true;

    return false;
}


//
//  Checks whether all of the chars in the passed buffer are whitespace or
//  not. Breaks out on the first non-whitespace.
//
bool XMLReader::isAllSpaces(const   XMLCh* const    toCheck
                            , const unsigned int    count)
{
    const XMLCh* curCh = toCheck;
    const XMLCh* endPtr = toCheck + count;
    while (curCh < endPtr)
    {
        if (!(fgCharCharsTable[*curCh++] & gWhitespaceCharMask))
            return false;
    }
    return true;
}


//
//  Checks whether at least one of the chars in the passed buffer are whitespace or
//  not.
//
bool XMLReader::containsWhiteSpace(const   XMLCh* const    toCheck
                            , const unsigned int    count)
{
    const XMLCh* curCh = toCheck;
    const XMLCh* endPtr = toCheck + count;
    while (curCh < endPtr)
    {
        if (fgCharCharsTable[*curCh++] & gWhitespaceCharMask)
            return true;
    }
    return false;
}



//
//  This one is not called terribly often, so its done manually in order
//  give up more bits in the character characteristics table for more often
//  used characteristics.
//
bool XMLReader::isPublicIdChar(const XMLCh toCheck)
{
    return checkTable(gPublicIdChars, toCheck);
}


void XMLReader::enableNELWS() {

    if (!fNEL) {
        fNEL = true;
        // When option is on, treat NEL same as LF
        fgCharCharsTable[chNEL] = fgCharCharsTable[chLF];
    }
}

// ---------------------------------------------------------------------------
//  XMLReader: Constructors and Destructor
// ---------------------------------------------------------------------------
XMLReader::XMLReader(const  XMLCh* const                pubId
                    , const XMLCh* const                sysId
                    ,       BinInputStream* const       streamToAdopt
                    , const RefFrom                     from
                    , const Types                       type
                    , const Sources                     source
                    , const bool                        throwAtEnd) :
    fCharIndex(0)
    , fCharsAvail(0)
    , fCurCol(1)
    , fCurLine(1)
    , fEncodingStr(0)
    , fForcedEncoding(false)
    , fNoMore(false)
    , fPublicId(XMLString::replicate(pubId))
    , fRawBufIndex(0)
    , fRawBytesAvail(0)
    , fReaderNum(0xFFFFFFFF)
    , fRefFrom(from)
    , fSentTrailingSpace(false)
    , fSource(source)
    , fSpareCh(0)
    , fSrcOfsBase(0)
    , fSrcOfsSupported(false)
    , fStream(streamToAdopt)
    , fSystemId(XMLString::replicate(sysId))
    , fSwapped(false)
    , fThrowAtEnd(throwAtEnd)
    , fTranscoder(0)
    , fType(type)
{
    // Do an initial load of raw bytes
    refreshRawBuffer();

    // Ask the transcoding service if it supports src offset info
    fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();

    //
    //  Use the recognizer class to get a basic sense of what family of
    //  encodings this file is in. We'll start off with a reader of that
    //  type, and update it later if needed when we read the XMLDecl line.
    //
    fEncoding = XMLRecognizer::basicEncodingProbe(fRawByteBuf, fRawBytesAvail);

    #if defined(XERCES_DEBUG)
    if ((fEncoding < XMLRecognizer::Encodings_Min)
    ||  (fEncoding > XMLRecognizer::Encodings_Max))
    {
        ThrowXML(RuntimeException, XMLExcepts::Reader_BadAutoEncoding);
    }
    #endif

    fEncodingStr = XMLString::replicate(XMLRecognizer::nameForEncoding(fEncoding));

    // Check whether the fSwapped flag should be set or not
    checkForSwapped();

    //
    //  This will check to see if the first line is an XMLDecl and, if
    //  so, decode that first line manually one character at a time. This
    //  leaves enough characters in the buffer that the high level code
    //  can get through the Decl and call us back with the real encoding.
    //
    doInitDecode();

    //
    //  NOTE: We won't create a transcoder until we either get a call to
    //  setEncoding() or we get a call to refreshCharBuffer() and no
    //  transcoder has been set yet.
    //
}


XMLReader::XMLReader(const  XMLCh* const            pubId
                    , const XMLCh* const            sysId
                    ,       BinInputStream* const   streamToAdopt
                    , const XMLCh* const            encodingStr
                    , const RefFrom                 from
                    , const Types                   type
                    , const Sources                 source
                    , const bool                    throwAtEnd) :
    fCharIndex(0)
    , fCharsAvail(0)
    , fCurCol(1)
    , fCurLine(1)
    , fEncoding(XMLRecognizer::UTF_8)
    , fEncodingStr(0)
    , fForcedEncoding(true)
    , fNoMore(false)
    , fPublicId(XMLString::replicate(pubId))
    , fRawBufIndex(0)
    , fRawBytesAvail(0)
    , fReaderNum(0xFFFFFFFF)
    , fRefFrom(from)
    , fSentTrailingSpace(false)
    , fSource(source)
    , fSpareCh(0)
    , fSrcOfsBase(0)
    , fSrcOfsSupported(false)
    , fStream(streamToAdopt)
    , fSystemId(XMLString::replicate(sysId))
    , fSwapped(false)
    , fThrowAtEnd(throwAtEnd)
    , fTranscoder(0)
    , fType(type)
{
    // Do an initial load of raw bytes
    refreshRawBuffer();

    // Copy the encoding string to our member
    fEncodingStr = XMLString::replicate(encodingStr);
Tinny Ng's avatar
Tinny Ng committed
    XMLString::upperCase(fEncodingStr);
PeiYong Zhang's avatar
PeiYong Zhang committed

    // Ask the transcoding service if it supports src offset info
    fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();

    //
    //  Map the passed encoding name to one of our enums. If it does not
    //  match one of the intrinsic encodings, it will come back 'other',
    //  which tells us to create a transcoder based reader.
    //
    fEncoding = XMLRecognizer::encodingForName(fEncodingStr);

    // Check whether the fSwapped flag should be set or not
    checkForSwapped();

Tinny Ng's avatar
Tinny Ng committed
    //
    //  Create a transcoder for the encoding. Since the encoding has been
    //  forced, this will be the one we will use, period.
    //
    XMLTransService::Codes failReason;
    if (fEncoding == XMLRecognizer::OtherEncoding)
    {
        //
        //  fEncodingStr not  pre-recognized, use it
        //  directly for transcoder
        //
        fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
        (
            fEncodingStr
            , failReason
            , kCharBufSize
        );
    }
     else
    {
        //
        //  Use the recognized fEncoding to create the transcoder
        //
        fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
        (
            fEncoding
            , failReason
            , kCharBufSize
        );

    }

    if (!fTranscoder)
    {
        ThrowXML1
        (
            TranscodingException
            , XMLExcepts::Trans_CantCreateCvtrFor
            , fEncodingStr
        );
    }

    //
    //  Note that, unlike above, we do not do an initial decode of the
    //  first line. We take the caller's word that the encoding is correct
    //  and just assume that the first bulk decode (kicked off by the first
    //  get of a character) will work.
    //
    //  So we do here the slipping in of the leading space if required.
    //
    if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
    {
        // This represents no data from the source
        fCharSizeBuf[fCharsAvail] = 0;
        fCharBuf[fCharsAvail++] = chSpace;
    }
}


XMLReader::XMLReader(const  XMLCh* const            pubId
                    , const XMLCh* const            sysId
                    ,       BinInputStream* const   streamToAdopt
                    , XMLRecognizer::Encodings      encodingEnum
                    , const RefFrom                 from
                    , const Types                   type
                    , const Sources                 source
                    , const bool                    throwAtEnd) :
    fCharIndex(0)
    , fCharsAvail(0)
    , fCurCol(1)
    , fCurLine(1)
    , fEncoding(XMLRecognizer::UTF_8)
    , fEncodingStr(0)
    , fForcedEncoding(true)
    , fNoMore(false)
    , fPublicId(XMLString::replicate(pubId))
    , fRawBufIndex(0)
    , fRawBytesAvail(0)
    , fReaderNum(0xFFFFFFFF)
    , fRefFrom(from)
    , fSentTrailingSpace(false)
    , fSource(source)
    , fSpareCh(0)
    , fSrcOfsBase(0)
    , fSrcOfsSupported(false)
    , fStream(streamToAdopt)
    , fSystemId(XMLString::replicate(sysId))
    , fSwapped(false)
    , fThrowAtEnd(throwAtEnd)
    , fTranscoder(0)
    , fType(type)
{
    // Do an initial load of raw bytes
    refreshRawBuffer();

    // Ask the transcoding service if it supports src offset info
    fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();

    //
    //  Use the passed encoding code
    //
    fEncoding = encodingEnum;
    fEncodingStr = XMLString::replicate(XMLRecognizer::nameForEncoding(fEncoding));

    // Check whether the fSwapped flag should be set or not
    checkForSwapped();

PeiYong Zhang's avatar
PeiYong Zhang committed
    //
    //  Create a transcoder for the encoding. Since the encoding has been
    //  forced, this will be the one we will use, period.
    //
    XMLTransService::Codes failReason;
    fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
    (
Tinny Ng's avatar
Tinny Ng committed
        fEncoding
PeiYong Zhang's avatar
PeiYong Zhang committed
        , failReason
        , kCharBufSize
    );

    if (!fTranscoder)
    {
        ThrowXML1
        (
            TranscodingException
            , XMLExcepts::Trans_CantCreateCvtrFor
            , fEncodingStr
        );
    }

    //
    //  Note that, unlike above, we do not do an initial decode of the
    //  first line. We take the caller's word that the encoding is correct
    //  and just assume that the first bulk decode (kicked off by the first
    //  get of a character) will work.
    //
    //  So we do here the slipping in of the leading space if required.
    //
    if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
    {
        // This represents no data from the source
        fCharSizeBuf[fCharsAvail] = 0;
        fCharBuf[fCharsAvail++] = chSpace;
    }
}


XMLReader::~XMLReader()
{
    delete [] fEncodingStr;
    delete [] fPublicId;
    delete [] fSystemId;
    delete fStream;
    delete fTranscoder;
}


// ---------------------------------------------------------------------------
//  XMLReader: Character buffer management methods
// ---------------------------------------------------------------------------
unsigned int XMLReader::getSrcOffset() const
{
    if (!fSrcOfsSupported)
        ThrowXML(RuntimeException, XMLExcepts::Reader_SrcOfsNotSupported);

    //
    //  Take the current source offset and add in the sizes that we've
    //  eaten from the source so far.
    //
    unsigned int offset = fSrcOfsBase;
    for (unsigned int index = 0; index < fCharIndex; index++)
        offset += fCharSizeBuf[index];

    return offset;
}


bool XMLReader::refreshCharBuffer()
{
    // If the no more flag is set, then don't both doing anything
    if (fNoMore)
        return false;

    unsigned int startInd;

    // See if we have any existing chars.
    const unsigned int spareChars = fCharsAvail - fCharIndex;

    // If we are full, then don't do anything.
    if (spareChars == kCharBufSize)
        return false;

    //
    //  If no transcoder has been created yet, then we never saw the
    //  any encoding="" string and the encoding was not forced, so lets
    //  create one now. We know that it won't change now.
    //
    //  However, note that if we autosensed EBCDIC, then we have to
    //  consider it an error if we never got an encoding since we don't
    //  know what variant of EBCDIC it is.
    //
    if (!fTranscoder)
    {
        if (fEncoding == XMLRecognizer::EBCDIC)
            ThrowXML(RuntimeException, XMLExcepts::Reader_EncodingStrRequired);

        // Ask the transcoding service to make use a transcoder
        XMLTransService::Codes failReason;
        fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
        (
            fEncodingStr
            , failReason
            , kCharBufSize
        );

        if (!fTranscoder)
        {
            ThrowXML1
            (
                TranscodingException
                , XMLExcepts::Trans_CantCreateCvtrFor
                , fEncodingStr
            );
        }
    }

    //
    //  Add the number of source bytes eaten so far to the base src
    //  offset member.
    //
    for (startInd = 0; startInd < fCharIndex; startInd++)
        fSrcOfsBase += fCharSizeBuf[startInd];

    //
    //  If there are spare chars, then move then down to the bottom. We
    //  have to move the char sizes down also.
    //
    startInd = 0;
    if (spareChars)
    {
        for (unsigned int index = fCharIndex; index < fCharsAvail; index++)
        {
            fCharBuf[startInd] = fCharBuf[index];
            fCharSizeBuf[startInd] = fCharSizeBuf[index];
            startInd++;
        }
    }

    //
    //  And then get more chars, starting after any spare chars that were
    //  left over from the last time.
    //
    fCharsAvail = xcodeMoreChars
    (
        &fCharBuf[startInd]
        , &fCharSizeBuf[startInd]
        , kCharBufSize - spareChars
    );

    // Add back in the spare chars
    fCharsAvail += spareChars;

    // Reset the buffer index to zero, so we start from the 0th char again
    fCharIndex = 0;

    //
    //  If no chars available, then we have to check for one last thing. If
    //  this is reader for a PE and its not being expanded inside a literal,
    //  then unget a trailing space. We use a boolean to avoid triggering
    //  this more than once.
    //
    if (!fCharsAvail
    &&  (fType == Type_PE)
    &&  (fRefFrom == RefFrom_NonLiteral)
    &&  !fSentTrailingSpace)
    {
        fCharBuf[0] = chSpace;
        fCharsAvail = 1;
        fSentTrailingSpace = true;
    }

    //
    //  If we are on our first block of chars and the encoding is one of the
    //  UTF-16 formats, then check the first char for the BOM and skip over
    //  it manually.
    //
    if (fCharsAvail)
    {
        if ((fCurLine == 1) && (fCurCol == 1))
        {
            if (((fEncoding == XMLRecognizer::UTF_16L)
            ||   (fEncoding == XMLRecognizer::UTF_16B))
            &&  !startInd)
            {
                if ((fCharBuf[startInd] == chUnicodeMarker)
                ||  (fCharBuf[startInd] == chSwappedUnicodeMarker))
                {
PeiYong Zhang's avatar
PeiYong Zhang committed
                }
            }
        }
    }

    //
    //  If we get here with no more chars, then set the fNoMore flag which
    //  lets us optimize and know without checking that no more chars are
    //  available.
    //
    if (!fCharsAvail)
        fNoMore = true;

    return (fCharsAvail != 0);
}



// ---------------------------------------------------------------------------
//  XMLReader: Scanning methods
// ---------------------------------------------------------------------------
bool XMLReader::getName(XMLBuffer& toFill, const bool token)
{
    //
    //  Ok, first lets see if we have chars in the buffer. If not, then lets
    //  reload.
    //
    if (fCharIndex == fCharsAvail)
    {
        if (!refreshCharBuffer())
            return false;
    }

    //
    //  Lets check the first char for being a first name char. If not, then
    //  what's the point in living mannnn? Just give up now. We only do this
    //  if its a name and not a name token that they want.
    //
    if (!token)
    {
        if (!XMLReader::isFirstNameChar(fCharBuf[fCharIndex]))
            return false;

        // Looks ok, so lets eat it and put it in our buffer. Update column also!
        toFill.append(fCharBuf[fCharIndex++]);
        fCurCol++;
    }

    //
    //  And now we loop until we run out of data in this reader or we hit
    //  a non-name char.
    //
    do {
        unsigned int curCol = fCurCol;
        unsigned int charIndex = fCharIndex;
        unsigned int charsAvail = fCharsAvail;

        while (charIndex < charsAvail)
        {
            const XMLCh curCh = fCharBuf[charIndex];

            //
            //  Check the current char and take it if its a name char. Else
            //  break out.
            //
            if (!XMLReader::isNameChar(curCh))
            {
                fCharIndex  = charIndex;
                fCurCol = curCol;

                return !toFill.isEmpty();
            }

            toFill.append(curCh);
            curCol++;
            charIndex++;
        }

        fCharIndex  = charIndex;
        fCurCol = curCol;
    // If we don't get no more, then break out.
    } while (refreshCharBuffer());

    return !toFill.isEmpty();
}


bool XMLReader::getSpaces(XMLBuffer& toFill)
{
    //
    //  We just loop until we either hit a non-space or the end of this
    //  entity. We return true if we returned because of a non-space and
    //  false if because of end of entity.
    //
    //  NOTE:   We have to maintain line/col info here and we have to do
    //          whitespace normalization if we are not already internalized.
    //
    while (true)
    {
        // Loop through the current chars in the buffer
        while (fCharIndex < fCharsAvail)
        {
            // Get the current char out of the buffer
            XMLCh curCh = fCharBuf[fCharIndex];

            //
            //  See if its a white space char. If so, then process it. Else
            //  we've hit a non-space and need to return.
            //
            if (XMLReader::isWhitespace(curCh))
            {
                // Eat this char
                fCharIndex++;

                //
                //  Ok, we've got some whitespace here. So we have to store
                //  it. But we have to normalize it and update the line and
                //  column info along the way.
                //
                if (curCh == chCR)
                {
                    fCurCol = 1;
                    fCurLine++;

                    //
                    //  If not already internalized, then convert it to an
                    //  LF and eat any following LF.
                    //
                    if (fSource == Source_External)
                    {
                        if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
                        {
                            if (fCharBuf[fCharIndex] == chLF
                                || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
                                fCharIndex++;
                        }
                        curCh = chLF;
                    }
                }
                 else if (curCh == chLF
                          || ((curCh == chNEL) && fNEL))
                {
                    curCh = chLF;
                    fCurCol = 1;
                    fCurLine++;
                }
                 else
                {
                    fCurCol++;
                }

                // Ok we can add this guy to our buffer
                toFill.append(curCh);
            }
             else
            {
                // Return true to indicate we broke out due to a whitespace
                return true;
            }
        }

        //
        //  We've eaten up the current buffer, so lets try to reload it. If
        //  we don't get anything new, then break out. If we do, then we go
        //  back to the top to keep getting spaces.
        //
        if (!refreshCharBuffer())
            break;
    }
    return false;
}


bool XMLReader::getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck)
{
    while (true)
    {
        // Loop through the current chars in the buffer
        while (fCharIndex < fCharsAvail)
        {
            // Get the current char out of the buffer
            XMLCh curCh = fCharBuf[fCharIndex];

            //
            //  See if its not a white space or our target char, then process
            //  it. Else, we need to return.
            //
            if (!XMLReader::isWhitespace(curCh) && (curCh != toCheck))
            {
                // Eat this char
                fCharIndex++;

                //
                //  Ok, we've got some whitespace here. So we have to store
                //  it. But we have to normalize it and update the line and
                //  column info along the way.
                //
                if (curCh == chCR)
                {
                    fCurCol = 1;
                    fCurLine++;

                    //
                    //  If not already internalized, then convert it to an
                    //  LF and eat any following LF.
                    //
                    if (fSource == Source_External)
                    {
                        if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
                        {
                            if (fCharBuf[fCharIndex] == chLF
                                || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
                                fCharIndex++;
                        }
                        curCh = chLF;
                    }
                }
                 else if (curCh == chLF
                          || ((curCh == chNEL) && fNEL))
                {
                    curCh = chLF;
                    fCurCol = 1;
                    fCurLine++;
                }
                 else
                {
                    fCurCol++;
                }

                // Add it to our buffer
                toFill.append(curCh);
            }
             else
            {
                return true;
            }
        }

        //
        //  We've eaten up the current buffer, so lets try to reload it. If
        //  we don't get anything new, then break out. If we do, then we go
        //  back to the top to keep getting spaces.
        //
        if (!refreshCharBuffer())
            break;
    }

    // We never hit any non-space and ate up the whole reader
    return false;

}

bool XMLReader::skipIfQuote(XMLCh& chGotten)
{
    if (fCharIndex == fCharsAvail)
    {
        if (!refreshCharBuffer())
            return false;
    }

    const XMLCh curCh = fCharBuf[fCharIndex];
    if ((curCh == chDoubleQuote) || (curCh == chSingleQuote))
    {
        chGotten = curCh;
        fCharIndex++;
        fCurCol++;
        return true;
    }
    return false;
}


bool XMLReader::skipSpaces(bool& skippedSomething)
{
    // Remember the current line and column
    XMLSSize_t    orgLine = fCurLine;
    XMLSSize_t    orgCol  = fCurCol;
PeiYong Zhang's avatar
PeiYong Zhang committed

    //
    //  We enter a loop where we skip over spaces until we hit the end of
    //  this reader or a non-space value. The return indicates whether we
    //  hit the non-space (true) or the end (false).
    //
    while (true)
    {
        // Loop through the current chars in the buffer
        while (fCharIndex < fCharsAvail)
        {
            // Get the current char out of the buffer
            XMLCh curCh = fCharBuf[fCharIndex];

            //
            //  See if its a white space char. If so, then process it. Else
            //  we've hit a non-space and need to return.
            //
            if (XMLReader::isWhitespace(curCh))
            {
                // Eat this char
                fCharIndex++;

                //
                //  Ok, we've got some whitespace here. So we have to store
                //  it. But we have to normalize it and update the line and
                //  column info along the way.
                //
                if (curCh == chCR)
                {
                    fCurCol = 1;
                    fCurLine++;

                    //
                    //  If not already internalized, then convert it to an
                    //  LF and eat any following LF.
                    //
                    if (fSource == Source_External)
                    {
                        if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
                        {
                            if (fCharBuf[fCharIndex] == chLF
                                || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
                                fCharIndex++;
                        }
                        curCh = chLF;
                    }
                }
                 else if (curCh == chLF
                          || ((curCh == chNEL) && fNEL))
                {
                    curCh = chLF;
                    fCurCol = 1;
                    fCurLine++;
                }
                 else
                {
                    fCurCol++;
                }
            }
             else
            {
                skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
                return true;
            }
        }

        //
        //  We've eaten up the current buffer, so lets try to reload it. If
        //  we don't get anything new, then break out. If we do, then we go
        //  back to the top to keep getting spaces.
        //
        if (!refreshCharBuffer())
            break;
    }

    // We never hit any non-space and ate up the whole reader
    skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
    return false;
}


bool XMLReader::skippedChar(const XMLCh toSkip)
{
    //
    //  If the buffer is empty, then try to reload it. If we still get
    //  nothing, then return false.
    //
    if (fCharIndex == fCharsAvail)
    {
        if (!refreshCharBuffer())
            return false;
    }

    //
    //  See if the current char is the one we want. If so, then we need
    //  to eat it and return true.
    //
    if (fCharBuf[fCharIndex] == toSkip)
    {
        fCharIndex++;
        fCurCol++;
        return true;
    }
    return false;
}


bool XMLReader::skippedSpace()
{
    //
    //  If the buffer is empty, then try to reload it. If we still get
    //  nothing, then return false.
    //
    if (fCharIndex == fCharsAvail)
    {
        if (!refreshCharBuffer())
            return false;
    }

    //
    //  See if the current char is a whitespace. If so, then we need to eat
    //  it and return true.
    //
    const XMLCh curCh = fCharBuf[fCharIndex];
    if (XMLReader::isWhitespace(curCh))
    {
        // Eat the character
        fCharIndex++;

        if (curCh == chCR)
        {
            fCurLine++;
            fCurCol = 1;