Skip to content
Snippets Groups Projects
XMLReader.cpp 47.9 KiB
Newer Older
PeiYong Zhang's avatar
PeiYong Zhang committed
/*
 * Copyright 1999-2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
PeiYong Zhang's avatar
PeiYong Zhang committed
 */

/*
 * $Id$
 */

// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include <xercesc/internal/XMLReader.hpp>
PeiYong Zhang's avatar
PeiYong Zhang committed
#include <xercesc/util/BitOps.hpp>
#include <xercesc/util/BinInputStream.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/TransService.hpp>
#include <xercesc/util/XMLEBCDICTranscoder.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/Janitor.hpp>
Tinny Ng's avatar
Tinny Ng committed
XERCES_CPP_NAMESPACE_BEGIN
PeiYong Zhang's avatar
PeiYong Zhang committed

// ---------------------------------------------------------------------------
Tinny Ng's avatar
Tinny Ng committed
//  XMLReader: Query Methods
PeiYong Zhang's avatar
PeiYong Zhang committed
// ---------------------------------------------------------------------------
//  Checks whether all of the chars in the passed buffer are whitespace or
//  not. Breaks out on the first non-whitespace.
//
bool XMLReader::isAllSpaces(const   XMLCh* const    toCheck
                            , const unsigned int    count)
{
    const XMLCh* curCh = toCheck;
    const XMLCh* endPtr = toCheck + count;
    while (curCh < endPtr)
    {
        if (!(fgCharCharsTable[*curCh++] & gWhitespaceCharMask))
            return false;
    }
    return true;
}


//
//  Checks whether at least one of the chars in the passed buffer are whitespace or
//  not.
//
bool XMLReader::containsWhiteSpace(const   XMLCh* const    toCheck
                            , const unsigned int    count)
{
    const XMLCh* curCh = toCheck;
    const XMLCh* endPtr = toCheck + count;
    while (curCh < endPtr)
    {
        if (fgCharCharsTable[*curCh++] & gWhitespaceCharMask)
            return true;
    }
    return false;
}

//
Tinny Ng's avatar
Tinny Ng committed
//  This one is not called terribly often, so call the XMLChar utility
PeiYong Zhang's avatar
PeiYong Zhang committed
//
bool XMLReader::isPublicIdChar(const XMLCh toCheck)
{
Tinny Ng's avatar
Tinny Ng committed
    if (fXMLVersion == XMLV1_1)
        return XMLChar1_1::isPublicIdChar(toCheck);
    else
        return XMLChar1_0::isPublicIdChar(toCheck);
PeiYong Zhang's avatar
PeiYong Zhang committed
}

// ---------------------------------------------------------------------------
//  XMLReader: Constructors and Destructor
// ---------------------------------------------------------------------------
XMLReader::XMLReader(const  XMLCh* const          pubId
                    , const XMLCh* const          sysId
                    ,       BinInputStream* const streamToAdopt
                    , const RefFrom               from
                    , const Types                 type
                    , const Sources               source
                    , const bool                  throwAtEnd
                    , const bool                  calculateSrcOfs
                    , const XMLVersion            version
                    ,       MemoryManager* const  manager) :
PeiYong Zhang's avatar
PeiYong Zhang committed
    fCharIndex(0)
    , fCharsAvail(0)
    , fCurCol(1)
    , fCurLine(1)
    , fEncodingStr(0)
    , fForcedEncoding(false)
    , fNoMore(false)
    , fPublicId(XMLString::replicate(pubId, manager))
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fRawBufIndex(0)
    , fRawBytesAvail(0)
    , fReaderNum(0xFFFFFFFF)
    , fRefFrom(from)
    , fSentTrailingSpace(false)
    , fSource(source)
    , fSrcOfsBase(0)
    , fSrcOfsSupported(false)
    , fSystemId(XMLString::replicate(sysId, manager))
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fSwapped(false)
    , fThrowAtEnd(throwAtEnd)
    , fTranscoder(0)
    , fType(type)
PeiYong Zhang's avatar
PeiYong Zhang committed
{
Tinny Ng's avatar
Tinny Ng committed
    setXMLVersion(version);

PeiYong Zhang's avatar
PeiYong Zhang committed
    // Do an initial load of raw bytes
    refreshRawBuffer();

    // Ask the transcoding service if it supports src offset info
    fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();

    //
    //  Use the recognizer class to get a basic sense of what family of
    //  encodings this file is in. We'll start off with a reader of that
    //  type, and update it later if needed when we read the XMLDecl line.
    //
    fEncoding = XMLRecognizer::basicEncodingProbe(fRawByteBuf, fRawBytesAvail);

    #if defined(XERCES_DEBUG)
    if ((fEncoding < XMLRecognizer::Encodings_Min)
    ||  (fEncoding > XMLRecognizer::Encodings_Max))
    {
        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Reader_BadAutoEncoding, fMemoryManager);
PeiYong Zhang's avatar
PeiYong Zhang committed
    }
    #endif

    fEncodingStr = XMLString::replicate(XMLRecognizer::nameForEncoding(fEncoding, fMemoryManager), fMemoryManager);
PeiYong Zhang's avatar
PeiYong Zhang committed

    // Check whether the fSwapped flag should be set or not
    checkForSwapped();

    //
    //  This will check to see if the first line is an XMLDecl and, if
    //  so, decode that first line manually one character at a time. This
    //  leaves enough characters in the buffer that the high level code
    //  can get through the Decl and call us back with the real encoding.
    //
    doInitDecode();

    //
    //  NOTE: We won't create a transcoder until we either get a call to
    //  setEncoding() or we get a call to refreshCharBuffer() and no
    //  transcoder has been set yet.
    //
}


XMLReader::XMLReader(const  XMLCh* const          pubId
                    , const XMLCh* const          sysId
                    ,       BinInputStream* const streamToAdopt
                    , const XMLCh* const          encodingStr
                    , const RefFrom               from
                    , const Types                 type
                    , const Sources               source
                    , const bool                  throwAtEnd
                    , const bool                  calculateSrcOfs
                    , const XMLVersion            version
                    ,       MemoryManager* const  manager) :
PeiYong Zhang's avatar
PeiYong Zhang committed
    fCharIndex(0)
    , fCharsAvail(0)
    , fCurCol(1)
    , fCurLine(1)
    , fEncoding(XMLRecognizer::UTF_8)
    , fEncodingStr(0)
    , fForcedEncoding(true)
    , fNoMore(false)
    , fPublicId(XMLString::replicate(pubId, manager))
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fRawBufIndex(0)
    , fRawBytesAvail(0)
    , fReaderNum(0xFFFFFFFF)
    , fRefFrom(from)
    , fSentTrailingSpace(false)
    , fSource(source)
    , fSrcOfsBase(0)
    , fSrcOfsSupported(false)
    , fSystemId(XMLString::replicate(sysId, manager))
PeiYong Zhang's avatar
PeiYong Zhang committed
    , fSwapped(false)
    , fThrowAtEnd(throwAtEnd)
    , fTranscoder(0)
    , fType(type)
Loading
Loading full blame...