IGXMLScanner2.cpp

/*
 * Copyright 2002, 2003,2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Id$
 */


// ---------------------------------------------------------------------------
//  This file holds some of the grunt work methods of IGXMLScanner.cpp to keep
//  it a little more readable.
// ---------------------------------------------------------------------------


// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include <xercesc/internal/IGXMLScanner.hpp>
#include <xercesc/internal/EndOfEntityException.hpp>
#include <xercesc/util/UnexpectedEOFException.hpp>
#include <xercesc/util/XMLUri.hpp>
#include <xercesc/framework/LocalFileInputSource.hpp>
#include <xercesc/framework/URLInputSource.hpp>
#include <xercesc/framework/XMLDocumentHandler.hpp>
#include <xercesc/framework/XMLEntityHandler.hpp>
#include <xercesc/framework/XMLPScanToken.hpp>
#include <xercesc/framework/XMLRefInfo.hpp>
#include <xercesc/framework/XMLGrammarPool.hpp>
#include <xercesc/framework/psvi/PSVIAttributeList.hpp>
#include <xercesc/framework/psvi/PSVIElement.hpp>
#include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
#include <xercesc/validators/DTD/DTDGrammar.hpp>
#include <xercesc/validators/DTD/DTDValidator.hpp>
#include <xercesc/validators/DTD/XMLDTDDescriptionImpl.hpp>
#include <xercesc/validators/datatype/DatatypeValidator.hpp>
#include <xercesc/validators/schema/XMLSchemaDescriptionImpl.hpp>
#include <xercesc/validators/schema/SchemaGrammar.hpp>
#include <xercesc/validators/schema/SchemaValidator.hpp>
#include <xercesc/validators/schema/TraverseSchema.hpp>
#include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
#include <xercesc/validators/schema/XSDDOMParser.hpp>
#include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>

XERCES_CPP_NAMESPACE_BEGIN

inline XMLAttDefList& getAttDefList(bool              isSchemaGrammar
                                  , ComplexTypeInfo*  currType
                                  , XMLElementDecl*   elemDecl);

// ---------------------------------------------------------------------------
//  IGXMLScanner: Private helper methods
// ---------------------------------------------------------------------------

//  This method is called from scanStartTagNS() to build up the list of
//  XMLAttr objects that will be passed out in the start tag callout. We
//  get the key/value pairs from the raw scan of explicitly provided attrs,
//  which have not been normalized. And we get the element declaration from
//  which we will get any defaulted or fixed attribute defs and add those
//  in as well.
unsigned int
IGXMLScanner::buildAttList(const  RefVectorOf<KVStringPair>&  providedAttrs
                          , const unsigned int                attCount
                          ,       XMLElementDecl*             elemDecl
                          ,       RefVectorOf<XMLAttr>&       toFill)
{
    //  If doing DTD's, Ask the element to clear the 'provided' flag on all of the att defs
    //  that it owns, and to return us a boolean indicating whether it has
    //  any defs.  If schemas are being validated, the complexType
    // at the top of the SchemaValidator's stack will
    // know what's best.  REVISIT:  don't modify grammar at all; eliminate
    // this step...
    ComplexTypeInfo *currType = 0;
    DatatypeValidator *currDV = 0;
    if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType && fValidate)
    {
        currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
        if (!currType) {
            currDV = ((SchemaValidator*)fValidator)->getCurrentDatatypeValidator();
        }
    }

    const bool hasDefs = (currType && fValidate)
            ? currType->hasAttDefs()
            : elemDecl->hasAttDefs();

    // another set of attributes; increment element counter
    fElemCount++;

    //  If there are no expliclitily provided attributes and there are no
    //  defined attributes for the element, the we don't have anything to do.
    //  So just return zero in this case.
    if (!hasDefs && !attCount)
        return 0;

    // Keep up with how many attrs we end up with total
    unsigned int retCount = 0;

    //  And get the current size of the output vector. This lets us use
    //  existing elements until we fill it, then start adding new ones.
    const unsigned int curAttListSize = toFill.size();

    //  We need a buffer into which raw scanned attribute values will be
    //  normalized.
    XMLBufBid bbNormal(&fBufMgr);
    XMLBuffer& normBuf = bbNormal.getBuffer();

    //
    // Decide if to use hash table to do duplicate checking
    //
    bool toUseHashTable = false;
    if (fGrammarType == Grammar::DTDGrammarType)
    {
        setAttrDupChkRegistry(attCount, toUseHashTable);
    }

    //  Loop through our explicitly provided attributes, which are in the raw
    //  scanned form, and build up XMLAttr objects.
    unsigned int index;
    for (index = 0; index < attCount; index++)
    {
        PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID;
        PSVIItem::ASSESSMENT_TYPE attrAssessed = PSVIItem::VALIDATION_FULL;
        const KVStringPair* curPair = providedAttrs.elementAt(index);

        //  We have to split the name into its prefix and name parts. Then
        //  we map the prefix to its URI.
        const XMLCh* const namePtr = curPair->getKey();
        ArrayJanitor<XMLCh> janName(0);

        // use a stack-based buffer when possible.
        XMLCh tempBuffer[100];

        const int colonInd = XMLString::indexOf(namePtr, chColon);
        const XMLCh* prefPtr = XMLUni::fgZeroLenString;
        const XMLCh* suffPtr = XMLUni::fgZeroLenString;
        if (colonInd != -1)
        {
            // We have to split the string, so make a copy.
            if (XMLString::stringLen(namePtr) < sizeof(tempBuffer) / sizeof(tempBuffer[0]))
            {
                XMLString::copyString(tempBuffer, namePtr);
                tempBuffer[colonInd] = chNull;
                prefPtr = tempBuffer;
            }
            else
            {
                janName.reset(XMLString::replicate(namePtr, fMemoryManager), fMemoryManager);
                janName[colonInd] = chNull;
                prefPtr = janName.get();
            }

            suffPtr = namePtr + colonInd + 1;
        }
        else
        {
            // No colon, so we just have a name with no prefix
            suffPtr = namePtr;
        }

        //  Map the prefix to a URI id. We tell him that we are mapping an
        //  attr prefix, so any xmlns attrs at this level will not affect it.
        const unsigned int uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute);

        //  If the uri comes back as the xmlns or xml URI or its just a name
        //  and that name is 'xmlns', then we handle it specially. So set a
        //  boolean flag that lets us quickly below know which we are dealing
        //  with.
        const bool isNSAttr = (uriId == fXMLNSNamespaceId)
                              || (uriId == fXMLNamespaceId)
                              || XMLString::equals(suffPtr, XMLUni::fgXMLNSString)
                              || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI);


        //  If its not a special case namespace attr of some sort, then we
        //  do normal checking and processing.
        XMLAttDef::AttTypes attType;
        DatatypeValidator *attrValidator = 0;
        PSVIAttribute *psviAttr = 0;
        if (!isNSAttr || fGrammarType == Grammar::DTDGrammarType)
        {
            // Some checking for attribute wild card first (for schema)
            bool laxThisOne = false;
            bool skipThisOne = false;

            XMLAttDef* attDefForWildCard = 0;
            XMLAttDef*  attDef = 0;