Skip to content
Snippets Groups Projects
DOMLSSerializerImpl.cpp 61.6 KiB
Newer Older
                //
                //  There were no children. Output the short form close of
                //  the element start tag, making it an empty-element tag.
                //
                if (filterAction == DOMNodeFilter::FILTER_ACCEPT)
                {
                    TRY_CATCH_THROW
                        *fFormatter << XMLFormatter::NoEscapes << chForwardSlash << chCloseAngle;
                    )
                }
            if(namespaceMap!=NULL)
                fNamespaceStack->removeLastElement();
    case DOMNode::ATTRIBUTE_NODE:
        {
            if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
                break;
            const XMLCh* localName = nodeToWrite->getLocalName();

            // check if this is a DOM Level 1 Node
                *fFormatter  << XMLFormatter::NoEscapes
                *fFormatter  << XMLFormatter::NoEscapes
                             << chOpenCurly << nodeToWrite->getNamespaceURI()
                             << chCloseCurly << localName;
            *fFormatter  << chEqual << chDoubleQuote
                         << XMLFormatter::AttrEscapes;
            if (getFeature(ENTITIES_ID))
            {
                DOMNodeSPtr child = nodeToWrite->getFirstChild();
                while( child != 0)
                {
                    if(child->getNodeType()==DOMNode::TEXT_NODE)
                    {
                        ensureValidString(nodeToWrite, child->getNodeValue());
                    else if(child->getNodeType()==DOMNode::ENTITY_REFERENCE_NODE)
                        *fFormatter << XMLFormatter::NoEscapes
                                    << chAmpersand << child->getNodeName() << chSemiColon
                                    << XMLFormatter::AttrEscapes;
                    child = child->getNextSibling();
                }
            *fFormatter  << XMLFormatter::NoEscapes
                         << chDoubleQuote;
    case DOMNode::ENTITY_REFERENCE_NODE:
        {
            //"entities"
            //true
            //[required] (default)
            //Keep EntityReference and Entity nodes in the document.

            //false
            //[optional]
            //Remove all EntityReference and Entity nodes from the document,
            //       putting the entity expansions directly in their place.
            //       Text nodes are into "normal" form.
            //Only EntityReference nodes to non-defined entities are kept in the document.

            if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
                break;

            if (getFeature(ENTITIES_ID))
            {
                TRY_CATCH_THROW
                (
                    *fFormatter << XMLFormatter::NoEscapes << chAmpersand
                                << nodeName << chSemiColon;
                // check if the referenced entity is defined or not
                if (nodeToWrite->getOwnerDocument()->getDoctype()->getEntities()->getNamedItem(nodeName))
                {
                    for (child = nodeToWrite->getFirstChild();
                    child != 0;
                    child = child->getNextSibling())
                    {
                    }
                }
                else
                {
                    TRY_CATCH_THROW
                   (
                        *fFormatter<<XMLFormatter::NoEscapes<<chAmpersand<<nodeName<<chSemiColon;
        //
        //  feature:split_cdata_sections     occurence of ]]>   unrep-char
        //  ===============================================================
        //          true                        split            split
        //          false                       fails            fails
        //
    case DOMNode::CDATA_SECTION_NODE:
        {
            if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
                break;
            if (getFeature(SPLIT_CDATA_SECTIONS_ID))
            {
                // it is fairly complicated and we process this
                // in a separate function.
                ensureValidString(nodeToWrite, nodeValue);
Tinny Ng's avatar
Tinny Ng committed
                // search for "]]>", the node value is not supposed to have this
                if (XMLString::patternMatch(nodeValue, gEndCDATA) != -1)
                    reportError(nodeToWrite, DOMError::DOM_SEVERITY_FATAL_ERROR, XMLDOMMsg::Writer_NestedCDATA);
Tinny Ng's avatar
Tinny Ng committed
                    // transcoder throws exception for unrep chars
                    *fFormatter << XMLFormatter::NoEscapes << gStartCDATA << nodeValue << gEndCDATA;
            if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
                break;

            // Figure out if we want pretty-printing for this comment.
            // If this comment node does not have any element siblings
            // (i.e., it is a text node) then we don't want to add any
            // whitespaces since that might be significant to the
            // application. Otherwise we want pretty-printing.
            //

            bool pretty = (level == 0); // Document-level comments.

            if (!pretty)
            {
                // See if we have any element siblings.
                //
                const DOMNode* s = nodeToWrite->getNextSibling ();

                while (s != 0 && s->getNodeType () != DOMNode::ELEMENT_NODE)
                    pretty = true;
                else
                {
                    s = nodeToWrite->getPreviousSibling ();

                    while (s != 0 && s->getNodeType () != DOMNode::ELEMENT_NODE)
                        s = s->getPreviousSibling ();

                    if (s != 0)
                       pretty = true;
                }
                if(level == 1 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID))
                    printNewLine();
                *fFormatter << XMLFormatter::NoEscapes << gStartComment
    case DOMNode::DOCUMENT_TYPE_NODE:  // Not to be shown to Filter
        {
            const DOMDocumentType *doctype = (const DOMDocumentType *)nodeToWrite;
            fFormatter->setEscapeFlags(XMLFormatter::NoEscapes);

            printNewLine();
            printIndent(level);
            (
                *fFormatter << gStartDoctype << nodeName;
                const XMLCh  *id = doctype->getPublicId();
                    *fFormatter << chSpace << gPublic << id << chDoubleQuote;

                    id = doctype->getSystemId();
                    if (id && *id)
                    {
                        *fFormatter << chSpace << chDoubleQuote << id << chDoubleQuote;
                    }
                    else
                    {
                        //
                        // 4.2.2 External Entities
                        // [Definition: If the entity is not internal,
                        //           it is an external entity, declared as follows:]
                        // External Entity Declaration
                        // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
                        //                   | 'PUBLIC' S PubidLiteral S SystemLiteral
                        //
                        reportError(nodeToWrite, DOMError::DOM_SEVERITY_FATAL_ERROR, XMLDOMMsg::Writer_NotRecognizedType);
                        // systemLiteral not found
                    }
                }
                else
                {
                    id = doctype->getSystemId();
                    if (id && *id)
                    {
                        *fFormatter << chSpace << gSystem << id << chDoubleQuote;
                    }
                }

                id = doctype->getInternalSubset();
                if (id && *id)
                {
                    *fFormatter << chSpace << chOpenSquare << id << chCloseSquare;
                }

                *fFormatter << chCloseAngle;
            ) // end of TRY_CATCH_THROW

            break;
    case DOMNode::ENTITY_NODE:  // Not to be shown to Filter
            //
            // REVISIT: how does the feature "entities" impact
            // entity node?
            //
            printNewLine();
            printIndent(level);
            fFormatter->setEscapeFlags(XMLFormatter::NoEscapes);
            *fFormatter << gStartEntity    << nodeName;
            const XMLCh * id = ((const DOMEntity*)nodeToWrite)->getPublicId();
            if (id)
                *fFormatter << gPublic << id << chDoubleQuote;
            id = ((const DOMEntity*)nodeToWrite)->getSystemId();
            if (id)
                *fFormatter << gSystem << id << chDoubleQuote;
            id = ((const DOMEntity*)nodeToWrite)->getNotationName();
            if (id)
                *fFormatter << gNotation << id << chDoubleQuote;
            *fFormatter << chCloseAngle;

            This is an implementation specific behaviour, we abort if a user derived class has not dealt with
            this node type.
            if(!customNodeSerialize(nodeToWrite, level)) {
                reportError(nodeToWrite, DOMError::DOM_SEVERITY_FATAL_ERROR, XMLDOMMsg::Writer_NotRecognizedType);
                // UnreognizedNodeType;
            }
bool DOMLSSerializerImpl::customNodeSerialize(const DOMNode* const, int) {
PeiYong Zhang's avatar
PeiYong Zhang committed
//
//
DOMNodeFilter::FilterAction DOMLSSerializerImpl::checkFilter(const DOMNode* const node) const
PeiYong Zhang's avatar
PeiYong Zhang committed
        ((fFilter->getWhatToShow() & (1 << (node->getNodeType() - 1))) == 0))
        return DOMNodeFilter::FILTER_ACCEPT;

    //
    // if and only if there is a filter, and it is interested
    // in the node type, then we pass the node to the filter
    // for examination
    //
    return (DOMNodeFilter::FilterAction) fFilter->acceptNode(node);
bool DOMLSSerializerImpl::checkFeature(const XMLCh* const featName
                               , bool               toThrow
                               , int&               featureId) const
    // check for null and/or empty feature name
            throw DOMException(DOMException::NOT_FOUND_ERR, 0, fMemoryManager);
    if (XMLString::equals(featName, XMLUni::fgDOMWRTCanonicalForm))
        featureId = CANONICAL_FORM_ID;
    else if (XMLString::equals(featName, XMLUni::fgDOMWRTDiscardDefaultContent))
        featureId = DISCARD_DEFAULT_CONTENT_ID;
    else if (XMLString::equals(featName, XMLUni::fgDOMWRTEntities))
        featureId = ENTITIES_ID;
    else if (XMLString::equals(featName, XMLUni::fgDOMWRTFormatPrettyPrint))
        featureId = FORMAT_PRETTY_PRINT_ID;
    else if (XMLString::equals(featName, XMLUni::fgDOMWRTNormalizeCharacters))
        featureId = NORMALIZE_CHARACTERS_ID;
    else if (XMLString::equals(featName, XMLUni::fgDOMWRTSplitCdataSections))
        featureId = SPLIT_CDATA_SECTIONS_ID;
    else if (XMLString::equals(featName, XMLUni::fgDOMWRTValidation))
        featureId = VALIDATION_ID;
    else if (XMLString::equals(featName, XMLUni::fgDOMWRTWhitespaceInElementContent))
        featureId = WHITESPACE_IN_ELEMENT_CONTENT_ID;
    else if (XMLString::equals(featName, XMLUni::fgDOMWRTBOM))
        featureId = BYTE_ORDER_MARK_ID;
    else if (XMLString::equals(featName, XMLUni::fgDOMXMLDeclaration))
Neil Graham's avatar
Neil Graham committed
        featureId = XML_DECLARATION;
Alberto Massari's avatar
Alberto Massari committed
    else if (XMLString::equals(featName, XMLUni::fgDOMWRTXercesPrettyPrint))
        featureId = FORMAT_PRETTY_PRINT_1ST_LEVEL_ID;

    //feature name not resolvable
    if (featureId == INVALID_FEATURE_ID)
    {
        if (toThrow)
            throw DOMException(DOMException::NOT_FOUND_ERR, 0, fMemoryManager);
bool DOMLSSerializerImpl::reportError(const DOMNode* const    errorNode
                              , DOMError::ErrorSeverity errorType
                              , const XMLCh*   const    errorMsg)
{
    bool toContinueProcess = true;   // default value for no error handler

    if (fErrorHandler)
    {
        DOMLocatorImpl  locator(0, 0, (DOMNode*) errorNode, 0);
        DOMErrorImpl    domError(errorType , errorMsg, &locator);
        try
        {
            toContinueProcess = fErrorHandler->handleError(domError);
        }
        catch(...)
        {
        }
Tinny Ng's avatar
Tinny Ng committed
    if (errorType != DOMError::DOM_SEVERITY_WARNING)
        fErrorCount++;
bool DOMLSSerializerImpl::reportError(const DOMNode* const    errorNode
                              , DOMError::ErrorSeverity errorType
                              , XMLDOMMsg::Codes        toEmit)
{
    const XMLSize_t msgSize = 1023;
    XMLCh errText[msgSize + 1];

    DOMImplementationImpl::getMsgLoader4DOM()->loadMsg(toEmit, errText, msgSize);

    bool toContinueProcess = true;   // default value for no error handler

    if (fErrorHandler)
    {
        DOMLocatorImpl  locator(0, 0, (DOMNode*) errorNode, 0);
        DOMErrorImpl    domError(errorType , errText, &locator);
        try
        {
            toContinueProcess = fErrorHandler->handleError(domError);
        }
        catch(...)
        {
        }
Tinny Ng's avatar
Tinny Ng committed
    if (errorType != DOMError::DOM_SEVERITY_WARNING)
        fErrorCount++;
    if (errorType == DOMError::DOM_SEVERITY_FATAL_ERROR || !toContinueProcess)
        throw DOMLSException(DOMLSException::SERIALIZE_ERR, toEmit, fMemoryManager);
Tinny Ng's avatar
Tinny Ng committed
//
void DOMLSSerializerImpl::procCdataSection(const XMLCh*   const nodeValue
    static const XMLSize_t offset = XMLString::stringLen(gEndCDATA);

    /***
     * Append a ']]>' at the end
     */
    XMLSize_t len = XMLString::stringLen(nodeValue);
    XMLCh* repNodeValue = (XMLCh*) fMemoryManager->allocate
    (
        (len + offset + 1) * sizeof(XMLCh)
    );//new XMLCh [len + offset + 1];
    XMLString::copyString(repNodeValue, nodeValue);
    XMLString::catString(repNodeValue, gEndCDATA);
    ArrayJanitor<XMLCh>  jName(repNodeValue, fMemoryManager);

    XMLCh* curPtr  = (XMLCh*) repNodeValue;
    XMLCh* nextPtr = 0;
    int    endTagPos = -1;
    bool   endTagFound = true;

    while (endTagFound)
    {
Tinny Ng's avatar
Tinny Ng committed
        endTagPos = XMLString::patternMatch(curPtr, gEndCDATA);
        if (endTagPos != -1)
        {
            nextPtr = curPtr + endTagPos + offset;  // skip the ']]>'
            *(curPtr + endTagPos) = chNull;         //nullify the first ']'
            if (XMLSize_t(endTagPos) != len)
Tinny Ng's avatar
Tinny Ng committed
                reportError(nodeToWrite, DOMError::DOM_SEVERITY_WARNING, XMLDOMMsg::Writer_NestedCDATA);
            len = len - endTagPos - offset;
PeiYong Zhang's avatar
PeiYong Zhang committed
        /***
            to check ]]>]]>
        ***/
        if (endTagPos == 0)
        {
            TRY_CATCH_THROW
PeiYong Zhang's avatar
PeiYong Zhang committed
                *fFormatter << XMLFormatter::NoEscapes << gStartCDATA << gEndCDATA;
            )
        }
        else
            procUnrepCharInCdataSection(curPtr, nodeToWrite);
PeiYong Zhang's avatar
PeiYong Zhang committed
        }

        if (endTagFound)
        {
            *(nextPtr - offset) = chCloseSquare;   //restore the first ']'
            curPtr = nextPtr;
        }
    }
}

//
Tinny Ng's avatar
Tinny Ng committed
//
void DOMLSSerializerImpl::procUnrepCharInCdataSection(const XMLCh*   const nodeValue
{
    //
    //  We have to check each character and see if it could be represented.
    //  As long as it can, we just keep up with where we started and how
    //  many chars we've checked. When we hit an unrepresentable one, we
    //  stop, transcode everything we've collected, then start handling
    //  the unrepresentables via char refs. We repeat this until we get all
    //  the chars done.
    //
    const XMLCh*    srcPtr = nodeValue;
    const XMLCh*    endPtr = nodeValue +  XMLString::stringLen(nodeValue);

    // Set up the common part of the buffer that we build char refs into
    XMLCh tmpBuf[32];
    tmpBuf[0] = chAmpersand;
    tmpBuf[1] = chPound;
    tmpBuf[2] = chLatin_x;

    while (srcPtr < endPtr)
    {
        const XMLCh* tmpPtr = srcPtr;
        while (tmpPtr < endPtr)
        {
            if (fFormatter->getTranscoder()->canTranscodeTo(*tmpPtr))
                tmpPtr++;
            else
                break;
        }

        if (tmpPtr > srcPtr)
        {
            TRY_CATCH_THROW
                *fFormatter << XMLFormatter::NoEscapes << gStartCDATA;
            )

            // We got at least some chars that can be done normally
            fFormatter->formatBuf
            (
                srcPtr
                , tmpPtr - srcPtr
                , XMLFormatter::UnRep_Fail
            );

            TRY_CATCH_THROW
            (
                *fFormatter << XMLFormatter::NoEscapes << gEndCDATA;
            )

            // Update the source pointer to our new spot
            srcPtr = tmpPtr;
        }
            //
            //  We hit something unrepresentable. So continue forward doing
            //  char refs until we hit something representable again or the
            //  end of input.
            //
            // one warning for consective unrep chars
            reportError(nodeToWrite, DOMError::DOM_SEVERITY_WARNING, XMLDOMMsg::Writer_NotRepresentChar);

            while (srcPtr < endPtr)
            {
                // Build a char ref for the current char
                XMLString::binToText(*srcPtr, &tmpBuf[3], 8, 16, fMemoryManager);
                const XMLSize_t bufLen = XMLString::stringLen(tmpBuf);
                tmpBuf[bufLen] = chSemiColon;
                tmpBuf[bufLen+1] = chNull;

                // And now call recursively back to our caller to format this
                fFormatter->formatBuf
                (
                    tmpBuf
                    , bufLen + 1
                    , XMLFormatter::NoEscapes
                    , XMLFormatter::UnRep_Fail
                );

                // Move up the source pointer and break out if needed
                srcPtr++;
                if (fFormatter->getTranscoder()->canTranscodeTo(*srcPtr))
                    break;
            }
        }
    }
void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite)
bool DOMLSSerializerImpl::canSetFeature(const int featureId
    return featuresSupported[2*featureId + (val? 0: 1)];
void DOMLSSerializerImpl::printNewLine()
    if (getFeature(FORMAT_PRETTY_PRINT_ID))
        *fFormatter << fNewLineUsed;
void DOMLSSerializerImpl::printIndent(unsigned int level)
Alberto Massari's avatar
Alberto Massari committed
        if (fLastWhiteSpaceInTextNode)
PeiYong Zhang's avatar
PeiYong Zhang committed
        {
            unsigned int indentLevel = fLastWhiteSpaceInTextNode/2; // two chSpaces equals one indent level
Alberto Massari's avatar
Alberto Massari committed
            fLastWhiteSpaceInTextNode = 0;
            // if fLastWhiteSpaceInTextNode/2 is greater than level, then
PeiYong Zhang's avatar
PeiYong Zhang committed
            // it means too many spaces have been written to the
            // output stream and we can no longer indent properly
            if(indentLevel < level)
                level -= indentLevel;
            else
                level = 0;
        for(unsigned int i = 0; i < level; i++)
Tinny Ng's avatar
Tinny Ng committed

void DOMLSSerializerImpl::release()
Tinny Ng's avatar
Tinny Ng committed
{
    DOMLSSerializerImpl* writer = (DOMLSSerializerImpl*) this;
Tinny Ng's avatar
Tinny Ng committed
    delete writer;
}

void DOMLSSerializerImpl::processBOM()
{
    // if the feature is not set, don't output bom
    if (!getFeature(BYTE_ORDER_MARK_ID))
        return;

    if ((XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF8EncodingString)  == 0) ||
        (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF8EncodingString2) == 0)  )
    {
        fFormatter->writeBOM(BOM_utf8, 3);
    }
    else if ((XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16LEncodingString)  == 0) ||
        (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16LEncodingString2) == 0)  )
    {
        fFormatter->writeBOM(BOM_utf16le, 2);
    }
    else if ((XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16BEncodingString)  == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16BEncodingString2) == 0)  )
    {
        fFormatter->writeBOM(BOM_utf16be, 2);
    }
    else if ((XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString)  == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString2) == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString3) == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString4) == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString5) == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString6) == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString7) == 0)  )
            fFormatter->writeBOM(BOM_utf16be, 2);
    else if ((XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4LEncodingString)  == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4LEncodingString2) == 0)  )
    {
        fFormatter->writeBOM(BOM_ucs4le, 4);
    }
    else if ((XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4BEncodingString)  == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4BEncodingString2) == 0)  )
    {
        fFormatter->writeBOM(BOM_ucs4be, 4);
    }
    else if ((XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4EncodingString)  == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4EncodingString2) == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4EncodingString3) == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4EncodingString4) == 0) ||
             (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4EncodingString5) == 0)  )
        if (XMLPlatformUtils::fgXMLChBigEndian)
            fFormatter->writeBOM(BOM_ucs4be, 4);
        else
            fFormatter->writeBOM(BOM_ucs4le, 4);
bool DOMLSSerializerImpl::isDefaultNamespacePrefixDeclared() const
{
    for(XMLSize_t i=fNamespaceStack->size();i>0;i--)
        RefHashTableOf<XMLCh>* curNamespaceMap=fNamespaceStack->elementAt(i-1);
        const XMLCh* thisUri=curNamespaceMap->get((void*)XMLUni::fgZeroLenString);
        if(thisUri)
            return true;
    }
    return false;
}

bool DOMLSSerializerImpl::isNamespaceBindingActive(const XMLCh* prefix, const XMLCh* uri) const
{
    for(XMLSize_t i=fNamespaceStack->size();i>0;i--)
        RefHashTableOf<XMLCh>* curNamespaceMap=fNamespaceStack->elementAt(i-1);
        const XMLCh* thisUri=curNamespaceMap->get((void*)prefix);
        // if the prefix has been declared, check if it binds to the correct namespace, otherwise, reports it isn't bound
        if(thisUri)
            return XMLString::equals(thisUri,uri);
    }
    return false;
}

void DOMLSSerializerImpl::ensureValidString(const DOMNode* nodeToWrite, const XMLCh* string)
{
    // XERCESC-1854: prevent illegal characters from being written
Alberto Massari's avatar
Alberto Massari committed
    if(string==0)
        return;
    const XMLCh* cursor=string;
    while(*cursor!=0)
    {
        if((fIsXml11 && !XMLChar1_1::isXMLChar(*cursor)) || (!fIsXml11 && !XMLChar1_0::isXMLChar(*cursor)))
            reportError(nodeToWrite, DOMError::DOM_SEVERITY_FATAL_ERROR, XMLDOMMsg::INVALID_CHARACTER_ERR);
Alberto Massari's avatar
Alberto Massari committed
        cursor++;
    }
Tinny Ng's avatar
Tinny Ng committed
XERCES_CPP_NAMESPACE_END