Skip to content
Snippets Groups Projects
Commit a454ca92 authored by PeiYong Zhang's avatar PeiYong Zhang
Browse files

Bug#17983 Formatter does not escape control characters

git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@174864 13f79535-47bb-0310-9956-ffa450edef68
parent 2ef67cfb
No related branches found
No related tags found
No related merge requests found
...@@ -69,6 +69,9 @@ ...@@ -69,6 +69,9 @@
#include <xercesc/util/TranscodingException.hpp> #include <xercesc/util/TranscodingException.hpp>
#include <xercesc/util/XMLExceptMsgs.hpp> #include <xercesc/util/XMLExceptMsgs.hpp>
#include <xercesc/framework/XMLFormatter.hpp> #include <xercesc/framework/XMLFormatter.hpp>
#include <xercesc/util/Janitor.hpp>
#include <xercesc/util/XMLChar.hpp>
#include <string.h> #include <string.h>
XERCES_CPP_NAMESPACE_BEGIN XERCES_CPP_NAMESPACE_BEGIN
...@@ -124,8 +127,8 @@ static const XMLCh gEscapeChars[XMLFormatter::EscapeFlags_Count][kEscapeCount] = ...@@ -124,8 +127,8 @@ static const XMLCh gEscapeChars[XMLFormatter::EscapeFlags_Count][kEscapeCount] =
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Local methods // Local methods
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
static inline bool inEscapeList(const XMLFormatter::EscapeFlags escStyle bool XMLFormatter::inEscapeList(const XMLFormatter::EscapeFlags escStyle
, const XMLCh toCheck) , const XMLCh toCheck)
{ {
const XMLCh* escList = gEscapeChars[escStyle]; const XMLCh* escList = gEscapeChars[escStyle];
while (*escList) while (*escList)
...@@ -133,7 +136,42 @@ static inline bool inEscapeList(const XMLFormatter::EscapeFlags escStyle ...@@ -133,7 +136,42 @@ static inline bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
if (*escList++ == toCheck) if (*escList++ == toCheck)
return true; return true;
} }
return false;
/***
* XML1.1
*
* Finally, there is considerable demand to define a standard representation of
* arbitrary Unicode characters in XML documents. Therefore, XML 1.1 allows the
* use of character references to the control characters #x1 through #x1F,
* most of which are forbidden in XML 1.0. For reasons of robustness, however,
* these characters still cannot be used directly in documents.
* In order to improve the robustness of character encoding detection, the
* additional control characters #x7F through #x9F, which were freely allowed in
* XML 1.0 documents, now must also appear only as character references.
* (Whitespace characters are of course exempt.) The minor sacrifice of backward
* compatibility is considered not significant.
* Due to potential problems with APIs, #x0 is still forbidden both directly and
* as a character reference.
*
***/
if (fIsXML11)
{
// for XML11
if ( XMLChar1_1::isControlChar(toCheck, 0) &&
!XMLChar1_1::isWhitespace(toCheck, 0) )
{
return true;
}
else
{
return false;
}
}
else
{
return false;
}
} }
...@@ -141,24 +179,26 @@ static inline bool inEscapeList(const XMLFormatter::EscapeFlags escStyle ...@@ -141,24 +179,26 @@ static inline bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
// XMLFormatter: Constructors and Destructor // XMLFormatter: Constructors and Destructor
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
XMLFormatter::XMLFormatter( const char* const outEncoding XMLFormatter::XMLFormatter( const char* const outEncoding
, const char* const docVersion
, XMLFormatTarget* const target , XMLFormatTarget* const target
, const EscapeFlags escapeFlags , const EscapeFlags escapeFlags
, const UnRepFlags unrepFlags) : , const UnRepFlags unrepFlags)
fEscapeFlags(escapeFlags) : fEscapeFlags(escapeFlags)
, fOutEncoding(0) , fOutEncoding(0)
, fTarget(target) , fTarget(target)
, fUnRepFlags(unrepFlags) , fUnRepFlags(unrepFlags)
, fXCoder(0) , fXCoder(0)
, fAposRef(0) , fAposRef(0)
, fAmpRef(0)
, fGTRef(0)
, fLTRef(0)
, fQuoteRef(0)
, fAposLen(0) , fAposLen(0)
, fAmpRef(0)
, fAmpLen(0) , fAmpLen(0)
, fGTRef(0)
, fGTLen(0) , fGTLen(0)
, fLTRef(0)
, fLTLen(0) , fLTLen(0)
, fQuoteRef(0)
, fQuoteLen(0) , fQuoteLen(0)
, fIsXML11(false)
{ {
// Transcode the encoding string // Transcode the encoding string
fOutEncoding = XMLString::transcode(outEncoding); fOutEncoding = XMLString::transcode(outEncoding);
...@@ -183,24 +223,34 @@ XMLFormatter::XMLFormatter( const char* const outEncoding ...@@ -183,24 +223,34 @@ XMLFormatter::XMLFormatter( const char* const outEncoding
, outEncoding , outEncoding
); );
} }
XMLCh* const tmpDocVer = XMLString::transcode(docVersion);
ArrayJanitor<XMLCh> jname(tmpDocVer);
fIsXML11 = XMLString::equals(tmpDocVer, XMLUni::fgVersion1_1);
} }
XMLFormatter::XMLFormatter( const XMLCh* const outEncoding XMLFormatter::XMLFormatter( const XMLCh* const outEncoding
, const XMLCh* const docVersion
, XMLFormatTarget* const target , XMLFormatTarget* const target
, const EscapeFlags escapeFlags , const EscapeFlags escapeFlags
, const UnRepFlags unrepFlags) : , const UnRepFlags unrepFlags)
fEscapeFlags(escapeFlags) : fEscapeFlags(escapeFlags)
, fOutEncoding(0) , fOutEncoding(0)
, fTarget(target) , fTarget(target)
, fUnRepFlags(unrepFlags) , fUnRepFlags(unrepFlags)
, fXCoder(0) , fXCoder(0)
, fAposRef(0) , fAposRef(0)
, fAmpRef(0) , fAposLen(0)
, fAmpRef(0)
, fAmpLen(0)
, fGTRef(0) , fGTRef(0)
, fGTLen(0)
, fLTRef(0) , fLTRef(0)
, fLTLen(0)
, fQuoteRef(0) , fQuoteRef(0)
, fQuoteLen(0)
, fIsXML11(false)
{ {
// Copy the encoding string // Copy the encoding string
fOutEncoding = XMLString::replicate(outEncoding); fOutEncoding = XMLString::replicate(outEncoding);
...@@ -225,6 +275,8 @@ XMLFormatter::XMLFormatter( const XMLCh* const outEncoding ...@@ -225,6 +275,8 @@ XMLFormatter::XMLFormatter( const XMLCh* const outEncoding
, outEncoding , outEncoding
); );
} }
fIsXML11 = XMLString::equals(docVersion, XMLUni::fgVersion1_1);
} }
XMLFormatter::~XMLFormatter() XMLFormatter::~XMLFormatter()
...@@ -323,32 +375,33 @@ XMLFormatter::formatBuf(const XMLCh* const toFormat ...@@ -323,32 +375,33 @@ XMLFormatter::formatBuf(const XMLCh* const toFormat
const XMLByte * theChars; const XMLByte * theChars;
switch (*srcPtr) { switch (*srcPtr) {
case chAmpersand : case chAmpersand :
theChars = getCharRef(fAmpLen, fAmpRef, gAmpRef); theChars = getCharRef(fAmpLen, fAmpRef, gAmpRef);
fTarget->writeChars(theChars, fAmpLen, this); fTarget->writeChars(theChars, fAmpLen, this);
break; break;
case chSingleQuote : case chSingleQuote :
theChars = getCharRef(fAposLen, fAposRef, gAposRef); theChars = getCharRef(fAposLen, fAposRef, gAposRef);
fTarget->writeChars(theChars, fAposLen, this); fTarget->writeChars(theChars, fAposLen, this);
break; break;
case chDoubleQuote : case chDoubleQuote :
theChars = getCharRef(fQuoteLen, fQuoteRef, gQuoteRef); theChars = getCharRef(fQuoteLen, fQuoteRef, gQuoteRef);
fTarget->writeChars(theChars, fQuoteLen, this); fTarget->writeChars(theChars, fQuoteLen, this);
break; break;
case chCloseAngle : case chCloseAngle :
theChars = getCharRef(fGTLen, fGTRef, gGTRef); theChars = getCharRef(fGTLen, fGTRef, gGTRef);
fTarget->writeChars(theChars, fGTLen, this); fTarget->writeChars(theChars, fGTLen, this);
break; break;
case chOpenAngle : case chOpenAngle :
theChars = getCharRef(fLTLen, fLTRef, gLTRef); theChars = getCharRef(fLTLen, fLTRef, gLTRef);
fTarget->writeChars(theChars, fLTLen, this); fTarget->writeChars(theChars, fLTLen, this);
break; break;
default: default:
// <TBD> This is obviously an error // control characters
writeCharRef(*srcPtr);
break; break;
} }
srcPtr++; srcPtr++;
...@@ -357,7 +410,7 @@ XMLFormatter::formatBuf(const XMLCh* const toFormat ...@@ -357,7 +410,7 @@ XMLFormatter::formatBuf(const XMLCh* const toFormat
} }
} }
unsigned int unsigned int
XMLFormatter::handleUnEscapedChars(const XMLCh * srcPtr, XMLFormatter::handleUnEscapedChars(const XMLCh * srcPtr,
const unsigned int oCount, const unsigned int oCount,
...@@ -432,29 +485,52 @@ void XMLFormatter::writeBOM(const XMLByte* const toFormat ...@@ -432,29 +485,52 @@ void XMLFormatter::writeBOM(const XMLByte* const toFormat
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// XMLFormatter: Private helper methods // XMLFormatter: Private helper methods
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
const void XMLFormatter::writeCharRef(const XMLCh &toWrite)
{
XMLCh tmpBuf[32];
tmpBuf[0] = chAmpersand;
tmpBuf[1] = chPound;
tmpBuf[2] = chLatin_x;
// Build a char ref for the current char
XMLString::binToText(toWrite, &tmpBuf[3], 8, 16);
const unsigned int bufLen = XMLString::stringLen(tmpBuf);
tmpBuf[bufLen] = chSemiColon;
tmpBuf[bufLen+1] = chNull;
// write it out
formatBuf(tmpBuf
, bufLen + 1
, XMLFormatter::NoEscapes
, XMLFormatter::UnRep_Fail);
}
const XMLByte* XMLFormatter::getCharRef(unsigned int & count, const XMLByte* XMLFormatter::getCharRef(unsigned int & count,
XMLByte * ref, XMLByte * ref,
const XMLCh * stdRef) const XMLCh * stdRef)
{ {
if (!ref) { if (!ref) {
unsigned int charsEaten;
const unsigned int outBytes unsigned int charsEaten;
= fXCoder->transcodeTo(stdRef, XMLString::stringLen(stdRef), const unsigned int outBytes =
fXCoder->transcodeTo(stdRef, XMLString::stringLen(stdRef),
fTmpBuf, kTmpBufSize, charsEaten, fTmpBuf, kTmpBufSize, charsEaten,
XMLTranscoder::UnRep_Throw); XMLTranscoder::UnRep_Throw);
fTmpBuf[outBytes] = 0; fTmpBuf[outBytes + 1] = 0; fTmpBuf[outBytes] = 0;
fTmpBuf[outBytes + 2] = 0; fTmpBuf[outBytes + 3] = 0; fTmpBuf[outBytes + 1] = 0;
fTmpBuf[outBytes + 2] = 0;
fTmpBuf[outBytes + 3] = 0;
ref = new XMLByte[outBytes + 4]; ref = new XMLByte[outBytes + 4];
memcpy(ref, fTmpBuf, outBytes + 4); memcpy(ref, fTmpBuf, outBytes + 4);
count = outBytes; count = outBytes;
} }
return ref; return ref;
} }
void XMLFormatter::specialFormat(const XMLCh* const toFormat void XMLFormatter::specialFormat(const XMLCh* const toFormat
, const unsigned int count , const unsigned int count
, const EscapeFlags escapeFlags) , const EscapeFlags escapeFlags)
...@@ -470,12 +546,6 @@ void XMLFormatter::specialFormat(const XMLCh* const toFormat ...@@ -470,12 +546,6 @@ void XMLFormatter::specialFormat(const XMLCh* const toFormat
const XMLCh* srcPtr = toFormat; const XMLCh* srcPtr = toFormat;
const XMLCh* endPtr = toFormat + count; const XMLCh* endPtr = toFormat + count;
// Set up the common part of the buffer that we build char refs into
XMLCh tmpBuf[32];
tmpBuf[0] = chAmpersand;
tmpBuf[1] = chPound;
tmpBuf[2] = chLatin_x;
while (srcPtr < endPtr) while (srcPtr < endPtr)
{ {
const XMLCh* tmpPtr = srcPtr; const XMLCh* tmpPtr = srcPtr;
...@@ -510,20 +580,7 @@ void XMLFormatter::specialFormat(const XMLCh* const toFormat ...@@ -510,20 +580,7 @@ void XMLFormatter::specialFormat(const XMLCh* const toFormat
// //
while (srcPtr < endPtr) while (srcPtr < endPtr)
{ {
// Build a char ref for the current char writeCharRef(*srcPtr);
XMLString::binToText(*srcPtr, &tmpBuf[3], 8, 16);
const unsigned int bufLen = XMLString::stringLen(tmpBuf);
tmpBuf[bufLen] = chSemiColon;
tmpBuf[bufLen+1] = chNull;
// And now call recursively back to our caller to format this
formatBuf
(
tmpBuf
, bufLen + 1
, XMLFormatter::NoEscapes
, XMLFormatter::UnRep_Fail
);
// Move up the source pointer and break out if needed // Move up the source pointer and break out if needed
srcPtr++; srcPtr++;
......
...@@ -56,6 +56,9 @@ ...@@ -56,6 +56,9 @@
/* /*
* $Log$ * $Log$
* Revision 1.11 2003/03/16 06:00:43 peiyongz
* Bug#17983 Formatter does not escape control characters
*
* Revision 1.10 2003/03/11 12:58:36 tng * Revision 1.10 2003/03/11 12:58:36 tng
* Fix compilation error on AIX. * Fix compilation error on AIX.
* *
...@@ -273,6 +276,7 @@ public: ...@@ -273,6 +276,7 @@ public:
XMLFormatter XMLFormatter
( (
const XMLCh* const outEncoding const XMLCh* const outEncoding
, const XMLCh* const docVersion
, XMLFormatTarget* const target , XMLFormatTarget* const target
, const EscapeFlags escapeFlags = NoEscapes , const EscapeFlags escapeFlags = NoEscapes
, const UnRepFlags unrepFlags = UnRep_Fail , const UnRepFlags unrepFlags = UnRep_Fail
...@@ -281,6 +285,7 @@ public: ...@@ -281,6 +285,7 @@ public:
XMLFormatter XMLFormatter
( (
const char* const outEncoding const char* const outEncoding
, const char* const docVersion
, XMLFormatTarget* const target , XMLFormatTarget* const target
, const EscapeFlags escapeFlags = NoEscapes , const EscapeFlags escapeFlags = NoEscapes
, const UnRepFlags unrepFlags = UnRep_Fail , const UnRepFlags unrepFlags = UnRep_Fail
...@@ -411,9 +416,15 @@ private : ...@@ -411,9 +416,15 @@ private :
XMLByte * ref, XMLByte * ref,
const XMLCh * stdRef); const XMLCh * stdRef);
unsigned int handleUnEscapedChars(const XMLCh * srcPtr, const void writeCharRef(const XMLCh &toWrite);
const unsigned int count,
const UnRepFlags unrepFlags); bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
, const XMLCh toCheck);
unsigned int handleUnEscapedChars(const XMLCh * srcPtr,
const unsigned int count,
const UnRepFlags unrepFlags);
void specialFormat void specialFormat
( (
...@@ -458,6 +469,11 @@ private : ...@@ -458,6 +469,11 @@ private :
// These are character refs for the standard char refs, in the // These are character refs for the standard char refs, in the
// output encoding. They are faulted in as required, by transcoding // output encoding. They are faulted in as required, by transcoding
// them from fixed Unicode versions. // them from fixed Unicode versions.
//
// fIsXML11
// for performance reason, we do not store the actual version string
// and do the string comparison again and again.
//
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------
EscapeFlags fEscapeFlags; EscapeFlags fEscapeFlags;
XMLCh* fOutEncoding; XMLCh* fOutEncoding;
...@@ -476,6 +492,9 @@ private : ...@@ -476,6 +492,9 @@ private :
unsigned int fLTLen; unsigned int fLTLen;
XMLByte* fQuoteRef; XMLByte* fQuoteRef;
unsigned int fQuoteLen; unsigned int fQuoteLen;
bool fIsXML11;
}; };
...@@ -493,7 +512,7 @@ public: ...@@ -493,7 +512,7 @@ public:
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------
virtual void writeChars virtual void writeChars
( (
const XMLByte* const toWrite const XMLByte* const toWrite
, const unsigned int count , const unsigned int count
, XMLFormatter* const formatter , XMLFormatter* const formatter
) = 0; ) = 0;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment