From 0993487ababbeeafb6122032bdbf1993dbaced64 Mon Sep 17 00:00:00 2001 From: Alberto Massari <amassari@apache.org> Date: Tue, 3 Jan 2012 17:34:38 +0000 Subject: [PATCH] Ensure that the characters being written are valid XML characters (XERCESC-1854) git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@1226891 13f79535-47bb-0310-9956-ffa450edef68 --- src/xercesc/dom/impl/DOMLSSerializerImpl.cpp | 92 +++++++++++++------- src/xercesc/dom/impl/DOMLSSerializerImpl.hpp | 6 +- src/xercesc/framework/XMLFormatter.cpp | 3 +- 3 files changed, 66 insertions(+), 35 deletions(-) diff --git a/src/xercesc/dom/impl/DOMLSSerializerImpl.cpp b/src/xercesc/dom/impl/DOMLSSerializerImpl.cpp index 8212f47c9..8bd591dfd 100644 --- a/src/xercesc/dom/impl/DOMLSSerializerImpl.cpp +++ b/src/xercesc/dom/impl/DOMLSSerializerImpl.cpp @@ -277,6 +277,7 @@ DOMLSSerializerImpl::DOMLSSerializerImpl(MemoryManager* const manager) ,fCurrentLine(0) ,fLineFeedInTextNodePrinted(false) ,fLastWhiteSpaceInTextNode(0) +,fIsXml11(false) ,fNamespaceStack(0) ,fMemoryManager(manager) { @@ -486,6 +487,7 @@ bool DOMLSSerializerImpl::write(const DOMNode* nodeToWrite, * get Document Version */ fDocumentVersion = (docu && docu->getXmlVersion() && *(docu->getXmlVersion()))?docu->getXmlVersion():XMLUni::fgVersion1_0; + fIsXml11 = XMLString::equals(fDocumentVersion, XMLUni::fgVersion1_1); fErrorCount = 0; @@ -646,6 +648,7 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT) break; + ensureValidString(nodeToWrite, nodeValue); if (getFeature(FORMAT_PRETTY_PRINT_ID)) { fLineFeedInTextNodePrinted = false; @@ -697,6 +700,9 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT) break; + ensureValidString(nodeToWrite, nodeName); + ensureValidString(nodeToWrite, nodeValue); + if(level == 1 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID)) printNewLine(); @@ -882,11 +888,11 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve namespaceMap=new (fMemoryManager) RefHashTableOf<XMLCh>(12, false, fMemoryManager); fNamespaceStack->addElement(namespaceMap); } - const XMLCh* nsPrefix = attribute->getLocalName(); + const XMLCh* nsPrefix = attribute->getLocalName(); if(XMLString::equals(attribute->getNodeName(),XMLUni::fgXMLNSString)) - nsPrefix = XMLUni::fgZeroLenString; - if(namespaceMap->containsKey((void*)nsPrefix)) - continue; + nsPrefix = XMLUni::fgZeroLenString; + if(namespaceMap->containsKey((void*)nsPrefix)) + continue; namespaceMap->put((void*)attribute->getLocalName(),(XMLCh*)attribute->getNodeValue()); } else if(!XMLString::equals(ns, XMLUni::fgXMLURIName)) @@ -927,7 +933,10 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve while( child != 0) { if(child->getNodeType()==DOMNode::TEXT_NODE) + { + ensureValidString(attribute, child->getNodeValue()); *fFormatter << child->getNodeValue(); + } else if(child->getNodeType()==DOMNode::ENTITY_REFERENCE_NODE) *fFormatter << XMLFormatter::NoEscapes << chAmpersand << child->getNodeName() << chSemiColon @@ -936,7 +945,10 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve } } else + { + ensureValidString(attribute, attribute->getNodeValue()); *fFormatter << attribute->getNodeValue(); + } *fFormatter << XMLFormatter::NoEscapes << chDoubleQuote; } @@ -1041,7 +1053,10 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve while( child != 0) { if(child->getNodeType()==DOMNode::TEXT_NODE) + { + ensureValidString(nodeToWrite, child->getNodeValue()); *fFormatter << child->getNodeValue(); + } else if(child->getNodeType()==DOMNode::ENTITY_REFERENCE_NODE) *fFormatter << XMLFormatter::NoEscapes << chAmpersand << child->getNodeName() << chSemiColon @@ -1050,7 +1065,10 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve } } else + { + ensureValidString(nodeToWrite, nodeValue); *fFormatter << nodeValue; + } *fFormatter << XMLFormatter::NoEscapes << chDoubleQuote; @@ -1124,8 +1142,9 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve } else { + ensureValidString(nodeToWrite, nodeValue); // search for "]]>", the node value is not supposed to have this - if (XMLString::patternMatch((XMLCh*) nodeValue, gEndCDATA) != -1) + if (XMLString::patternMatch(nodeValue, gEndCDATA) != -1) { reportError(nodeToWrite, DOMError::DOM_SEVERITY_FATAL_ERROR, XMLDOMMsg::Writer_NestedCDATA); } @@ -1145,6 +1164,8 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve if (checkFilter(nodeToWrite) != DOMNodeFilter::FILTER_ACCEPT) break; + ensureValidString(nodeToWrite, nodeValue); + // Figure out if we want pretty-printing for this comment. // If this comment node does not have any element siblings // (i.e., it is a text node) then we don't want to add any @@ -1156,40 +1177,40 @@ void DOMLSSerializerImpl::processNode(const DOMNode* const nodeToWrite, int leve if (!pretty) { - // See if we have any element siblings. - // - const DOMNode* s = nodeToWrite->getNextSibling (); - - while (s != 0 && s->getNodeType () != DOMNode::ELEMENT_NODE) - s = s->getNextSibling (); - - if (s != 0) - pretty = true; - else - { - s = nodeToWrite->getPreviousSibling (); + // See if we have any element siblings. + // + const DOMNode* s = nodeToWrite->getNextSibling (); while (s != 0 && s->getNodeType () != DOMNode::ELEMENT_NODE) - s = s->getPreviousSibling (); + s = s->getNextSibling (); if (s != 0) - pretty = true; - } + pretty = true; + else + { + s = nodeToWrite->getPreviousSibling (); + + while (s != 0 && s->getNodeType () != DOMNode::ELEMENT_NODE) + s = s->getPreviousSibling (); + + if (s != 0) + pretty = true; + } } if (pretty) { - if(level == 1 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID)) - printNewLine(); + if(level == 1 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID)) + printNewLine(); - printNewLine(); - printIndent(level); + printNewLine(); + printIndent(level); } TRY_CATCH_THROW ( *fFormatter << XMLFormatter::NoEscapes << gStartComment - << nodeValue << gEndComment; + << nodeValue << gEndComment; ) break; } @@ -1423,7 +1444,7 @@ bool DOMLSSerializerImpl::reportError(const DOMNode* const errorNode fErrorCount++; if (errorType == DOMError::DOM_SEVERITY_FATAL_ERROR || !toContinueProcess) - throw toEmit; + throw DOMLSException(DOMLSException::SERIALIZE_ERR, toEmit, fMemoryManager); return toContinueProcess; } @@ -1665,7 +1686,7 @@ void DOMLSSerializerImpl::processBOM() (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString6) == 0) || (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUTF16EncodingString7) == 0) ) { - if (XMLPlatformUtils::fgXMLChBigEndian) + if (XMLPlatformUtils::fgXMLChBigEndian) fFormatter->writeBOM(BOM_utf16be, 2); else fFormatter->writeBOM(BOM_utf16le, 2); @@ -1686,10 +1707,10 @@ void DOMLSSerializerImpl::processBOM() (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4EncodingString4) == 0) || (XMLString::compareIStringASCII(fEncodingUsed, XMLUni::fgUCS4EncodingString5) == 0) ) { - if (XMLPlatformUtils::fgXMLChBigEndian) - fFormatter->writeBOM(BOM_ucs4be, 4); - else - fFormatter->writeBOM(BOM_ucs4le, 4); + if (XMLPlatformUtils::fgXMLChBigEndian) + fFormatter->writeBOM(BOM_ucs4be, 4); + else + fFormatter->writeBOM(BOM_ucs4le, 4); } } @@ -1718,4 +1739,13 @@ bool DOMLSSerializerImpl::isNamespaceBindingActive(const XMLCh* prefix, const XM return false; } +void DOMLSSerializerImpl::ensureValidString(const DOMNode* nodeToWrite, const XMLCh* string) +{ + // XERCESC-1854: prevent illegal characters from being written + XMLSize_t count=XMLString::stringLen(string); + for(XMLSize_t i=0;i<count;i++) + if((fIsXml11 && !XMLChar1_1::isXMLChar(string[i])) || (!fIsXml11 && !XMLChar1_0::isXMLChar(string[i]))) + reportError(nodeToWrite, DOMError::DOM_SEVERITY_FATAL_ERROR, XMLDOMMsg::INVALID_CHARACTER_ERR); +} + XERCES_CPP_NAMESPACE_END diff --git a/src/xercesc/dom/impl/DOMLSSerializerImpl.hpp b/src/xercesc/dom/impl/DOMLSSerializerImpl.hpp index ce449784c..af115fda5 100644 --- a/src/xercesc/dom/impl/DOMLSSerializerImpl.hpp +++ b/src/xercesc/dom/impl/DOMLSSerializerImpl.hpp @@ -70,8 +70,8 @@ public: virtual bool writeToURI(const DOMNode* nodeToWrite, const XMLCh* uri); /** - * The caller is responsible for the release of the returned string - */ + * The caller is responsible for the release of the returned string + */ virtual XMLCh* writeToString(const DOMNode* nodeToWrite, MemoryManager* manager = NULL); virtual void release(); @@ -134,6 +134,7 @@ protected: void setURCharRef(); bool isDefaultNamespacePrefixDeclared() const; bool isNamespaceBindingActive(const XMLCh* prefix, const XMLCh* uri) const; + void ensureValidString(const DOMNode* nodeToWrite, const XMLCh* string); void printIndent(unsigned int level); @@ -201,6 +202,7 @@ protected: int fCurrentLine; bool fLineFeedInTextNodePrinted; unsigned int fLastWhiteSpaceInTextNode; + bool fIsXml11; RefVectorOf< RefHashTableOf<XMLCh> >* fNamespaceStack; MemoryManager* fMemoryManager; diff --git a/src/xercesc/framework/XMLFormatter.cpp b/src/xercesc/framework/XMLFormatter.cpp index 170647f8e..96a5d4ef5 100644 --- a/src/xercesc/framework/XMLFormatter.cpp +++ b/src/xercesc/framework/XMLFormatter.cpp @@ -241,7 +241,6 @@ XMLFormatter::XMLFormatter( const XMLCh* const outEncoding // Copy the encoding string fOutEncoding = XMLString::replicate(outEncoding, fMemoryManager); - fIsXML11 = XMLString::equals(docVersion, XMLUni::fgVersion1_1); } @@ -418,7 +417,7 @@ XMLFormatter::formatBuf(const XMLCh* const toFormat else { // - // Escape chars that require it according tot he scale flags + // Escape chars that require it according to the scale flags // we were given. For the others, try to accumulate them and // format them in as big as bulk as we can. // -- GitLab