From 0993487ababbeeafb6122032bdbf1993dbaced64 Mon Sep 17 00:00:00 2001
From: Alberto Massari <amassari@apache.org>
Date: Tue, 3 Jan 2012 17:34:38 +0000
Subject: [PATCH] Ensure that the characters being written are valid XML
 characters (XERCESC-1854)

git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@1226891 13f79535-47bb-0310-9956-ffa450edef68
---
 src/xercesc/dom/impl/DOMLSSerializerImpl.cpp | 92 +++++++++++++-------
 src/xercesc/dom/impl/DOMLSSerializerImpl.hpp |  6 +-
 src/xercesc/framework/XMLFormatter.cpp       |  3 +-
 3 files changed, 66 insertions(+), 35 deletions(-)

diff --git a/src/xercesc/dom/impl/DOMLSSerializerImpl.cpp b/src/xercesc/dom/impl/DOMLSSerializerImpl.cpp
index 8212f47c9..8bd591dfd 100644
--- a/src/xercesc/dom/impl/DOMLSSerializerImpl.cpp
+++ b/src/xercesc/dom/impl/DOMLSSerializerImpl.cpp
@@ -277,6 +277,7 @@ DOMLSSerializerImpl::DOMLSSerializerImpl(MemoryManager* const manager)
 ,fCurrentLine(0)
 ,fLineFeedInTextNodePrinted(false)
 ,fLastWhiteSpaceInTextNode(0)
+,fIsXml11(false)
 ,fNamespaceStack(0)
 ,fMemoryManager(manager)
 {
@@ -486,6 +487,7 @@ bool DOMLSSerializerImpl::write(const DOMNode* nodeToWrite,
      *  get Document Version
      */
     fDocumentVersion = (docu && docu->getXmlVersion() && *(docu->getXmlVersion()))?docu->getXmlVersion():XMLUni::fgVersion1_0;
+    fIsXml11 = XMLString::equals(fDocumentVersion, XMLUni::fgVersion1_1);
 
     fErrorCount = 0;
 
@@ -646,6 +648,7 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve
             if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
                 break;
 
+            ensureValidString(nodeToWrite, nodeValue);
             if (getFeature(FORMAT_PRETTY_PRINT_ID))
             {
                 fLineFeedInTextNodePrinted = false;
@@ -697,6 +700,9 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve
             if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
                 break;
 
+            ensureValidString(nodeToWrite, nodeName);
+            ensureValidString(nodeToWrite, nodeValue);
+
             if(level == 1 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID))
                 printNewLine();
 
@@ -882,11 +888,11 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve
                                 namespaceMap=new (fMemoryManager) RefHashTableOf<XMLCh>(12, false, fMemoryManager);
                                 fNamespaceStack->addElement(namespaceMap);
                             }
-			                const XMLCh* nsPrefix = attribute->getLocalName();
+                            const XMLCh* nsPrefix = attribute->getLocalName();
                             if(XMLString::equals(attribute->getNodeName(),XMLUni::fgXMLNSString))
-								nsPrefix = XMLUni::fgZeroLenString;
-							if(namespaceMap->containsKey((void*)nsPrefix))
-								continue;
+                                nsPrefix = XMLUni::fgZeroLenString;
+                            if(namespaceMap->containsKey((void*)nsPrefix))
+                                continue;
                             namespaceMap->put((void*)attribute->getLocalName(),(XMLCh*)attribute->getNodeValue());
                         }
                         else if(!XMLString::equals(ns, XMLUni::fgXMLURIName))
@@ -927,7 +933,10 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve
                             while( child != 0)
                             {
                                 if(child->getNodeType()==DOMNode::TEXT_NODE)
+                                {
+                                    ensureValidString(attribute, child->getNodeValue());
                                     *fFormatter  << child->getNodeValue();
+                                }
                                 else if(child->getNodeType()==DOMNode::ENTITY_REFERENCE_NODE)
                                     *fFormatter << XMLFormatter::NoEscapes
                                                 << chAmpersand << child->getNodeName() << chSemiColon
@@ -936,7 +945,10 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve
                             }
                         }
                         else
+                        {
+                            ensureValidString(attribute, attribute->getNodeValue());
                             *fFormatter  << attribute->getNodeValue();
+                        }
                         *fFormatter  << XMLFormatter::NoEscapes
                                      << chDoubleQuote;
                     }
@@ -1041,7 +1053,10 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve
                 while( child != 0)
                 {
                     if(child->getNodeType()==DOMNode::TEXT_NODE)
+                    {
+                        ensureValidString(nodeToWrite, child->getNodeValue());
                         *fFormatter  << child->getNodeValue();
+                    }
                     else if(child->getNodeType()==DOMNode::ENTITY_REFERENCE_NODE)
                         *fFormatter << XMLFormatter::NoEscapes
                                     << chAmpersand << child->getNodeName() << chSemiColon
@@ -1050,7 +1065,10 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve
                 }
             }
             else
+            {
+                ensureValidString(nodeToWrite, nodeValue);
                 *fFormatter  << nodeValue;
+            }
             *fFormatter  << XMLFormatter::NoEscapes
                          << chDoubleQuote;
 
@@ -1124,8 +1142,9 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve
             }
             else
             {
+                ensureValidString(nodeToWrite, nodeValue);
                 // search for "]]>", the node value is not supposed to have this
-                if (XMLString::patternMatch((XMLCh*) nodeValue, gEndCDATA) != -1)
+                if (XMLString::patternMatch(nodeValue, gEndCDATA) != -1)
                 {
                     reportError(nodeToWrite, DOMError::DOM_SEVERITY_FATAL_ERROR, XMLDOMMsg::Writer_NestedCDATA);
                 }
@@ -1145,6 +1164,8 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve
             if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT)
                 break;
 
+            ensureValidString(nodeToWrite, nodeValue);
+
             // Figure out if we want pretty-printing for this comment.
             // If this comment node does not have any element siblings
             // (i.e., it is a text node) then we don't want to add any
@@ -1156,40 +1177,40 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve
 
             if (!pretty)
             {
-              // See if we have any element siblings.
-              //
-              const DOMNode* s = nodeToWrite->getNextSibling ();
-
-              while (s != 0 && s->getNodeType () != DOMNode::ELEMENT_NODE)
-                s = s->getNextSibling ();
-
-              if (s != 0)
-                pretty = true;
-              else
-              {
-                s = nodeToWrite->getPreviousSibling ();
+                // See if we have any element siblings.
+                //
+                const DOMNode* s = nodeToWrite->getNextSibling ();
 
                 while (s != 0 && s->getNodeType () != DOMNode::ELEMENT_NODE)
-                  s = s->getPreviousSibling ();
+                    s = s->getNextSibling ();
 
                 if (s != 0)
-                  pretty = true;
-              }
+                    pretty = true;
+                else
+                {
+                    s = nodeToWrite->getPreviousSibling ();
+
+                    while (s != 0 && s->getNodeType () != DOMNode::ELEMENT_NODE)
+                        s = s->getPreviousSibling ();
+
+                    if (s != 0)
+                       pretty = true;
+                }
             }
 
             if (pretty)
             {
-              if(level == 1 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID))
-                printNewLine();
+                if(level == 1 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID))
+                    printNewLine();
 
-              printNewLine();
-              printIndent(level);
+                printNewLine();
+                printIndent(level);
             }
 
             TRY_CATCH_THROW
             (
                 *fFormatter << XMLFormatter::NoEscapes << gStartComment
-                << nodeValue << gEndComment;
+                            << nodeValue << gEndComment;
             )
             break;
         }
@@ -1423,7 +1444,7 @@ bool DOMLSSerializerImpl::reportError(const DOMNode* const    errorNode
         fErrorCount++;
 
     if (errorType == DOMError::DOM_SEVERITY_FATAL_ERROR || !toContinueProcess)
-        throw toEmit;
+        throw DOMLSException(DOMLSException::SERIALIZE_ERR, toEmit, fMemoryManager);
 
     return toContinueProcess;
 }
@@ -1665,7 +1686,7 @@ void DOMLSSerializerImpl::processBOM()
              (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString6) == 0) ||
              (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString7) == 0)  )
     {
-    	if (XMLPlatformUtils::fgXMLChBigEndian)
+        if (XMLPlatformUtils::fgXMLChBigEndian)
             fFormatter->writeBOM(BOM_utf16be, 2);
         else
             fFormatter->writeBOM(BOM_utf16le, 2);
@@ -1686,10 +1707,10 @@ void DOMLSSerializerImpl::processBOM()
              (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4EncodingString4) == 0) ||
              (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4EncodingString5) == 0)  )
     {
-		if (XMLPlatformUtils::fgXMLChBigEndian)
-	        fFormatter->writeBOM(BOM_ucs4be, 4);
-	    else
-			fFormatter->writeBOM(BOM_ucs4le, 4);
+        if (XMLPlatformUtils::fgXMLChBigEndian)
+            fFormatter->writeBOM(BOM_ucs4be, 4);
+        else
+            fFormatter->writeBOM(BOM_ucs4le, 4);
     }
 }
 
@@ -1718,4 +1739,13 @@ bool DOMLSSerializerImpl::isNamespaceBindingActive(const XMLCh* prefix, const XM
     return false;
 }
 
+void DOMLSSerializerImpl::ensureValidString(const DOMNode* nodeToWrite, const XMLCh* string)
+{
+    // XERCESC-1854: prevent illegal characters from being written
+    XMLSize_t count=XMLString::stringLen(string);
+    for(XMLSize_t i=0;i<count;i++)
+        if((fIsXml11 && !XMLChar1_1::isXMLChar(string[i])) || (!fIsXml11 && !XMLChar1_0::isXMLChar(string[i])))
+            reportError(nodeToWrite, DOMError::DOM_SEVERITY_FATAL_ERROR, XMLDOMMsg::INVALID_CHARACTER_ERR);
+}
+
 XERCES_CPP_NAMESPACE_END
diff --git a/src/xercesc/dom/impl/DOMLSSerializerImpl.hpp b/src/xercesc/dom/impl/DOMLSSerializerImpl.hpp
index ce449784c..af115fda5 100644
--- a/src/xercesc/dom/impl/DOMLSSerializerImpl.hpp
+++ b/src/xercesc/dom/impl/DOMLSSerializerImpl.hpp
@@ -70,8 +70,8 @@ public:
     virtual bool                    writeToURI(const DOMNode*   nodeToWrite,
                                                const XMLCh*     uri);
     /**
-	  *  The caller is responsible for the release of the returned string
-	  */
+      *  The caller is responsible for the release of the returned string
+      */
     virtual XMLCh*                  writeToString(const DOMNode* nodeToWrite, MemoryManager* manager = NULL);
     virtual void                    release();
 
@@ -134,6 +134,7 @@ protected:
     void                          setURCharRef();
     bool                          isDefaultNamespacePrefixDeclared() const;
     bool                          isNamespaceBindingActive(const XMLCh* prefix, const XMLCh* uri) const;
+    void                          ensureValidString(const DOMNode* nodeToWrite, const XMLCh* string);
 
 
     void printIndent(unsigned int level);
@@ -201,6 +202,7 @@ protected:
     int                           fCurrentLine;
     bool                          fLineFeedInTextNodePrinted;
     unsigned int                  fLastWhiteSpaceInTextNode;
+    bool                          fIsXml11;
 
     RefVectorOf< RefHashTableOf<XMLCh> >* fNamespaceStack;
     MemoryManager*               fMemoryManager;
diff --git a/src/xercesc/framework/XMLFormatter.cpp b/src/xercesc/framework/XMLFormatter.cpp
index 170647f8e..96a5d4ef5 100644
--- a/src/xercesc/framework/XMLFormatter.cpp
+++ b/src/xercesc/framework/XMLFormatter.cpp
@@ -241,7 +241,6 @@ XMLFormatter::XMLFormatter( const   XMLCh* const            outEncoding
     // Copy the encoding string
     fOutEncoding = XMLString::replicate(outEncoding, fMemoryManager);
 
-
     fIsXML11 = XMLString::equals(docVersion, XMLUni::fgVersion1_1);
 }
 
@@ -418,7 +417,7 @@ XMLFormatter::formatBuf(const   XMLCh* const    toFormat
      else
     {
         //
-        //  Escape chars that require it according tot he scale flags
+        //  Escape chars that require it according to the scale flags
         //  we were given. For the others, try to accumulate them and
         //  format them in as big as bulk as we can.
         //
-- 
GitLab