From 31e7708fd4affa327a7ae701f0e07cd34b1e08e7 Mon Sep 17 00:00:00 2001 From: "Unknown (roddey)" <dev-null@apache.org> Date: Thu, 6 Apr 2000 23:50:38 +0000 Subject: [PATCH] Now the low level formatter handles doing char refs for unrepresentable chars (in addition to the replacement char style already done.) git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@172031 13f79535-47bb-0310-9956-ffa450edef68 --- src/framework/XMLFormatter.cpp | 97 ++++++++++++++++++++++++++++++++++ src/framework/XMLFormatter.hpp | 11 ++++ 2 files changed, 108 insertions(+) diff --git a/src/framework/XMLFormatter.cpp b/src/framework/XMLFormatter.cpp index 9651f56ce..d509146e0 100644 --- a/src/framework/XMLFormatter.cpp +++ b/src/framework/XMLFormatter.cpp @@ -56,6 +56,11 @@ /** * $Log$ + * Revision 1.4 2000/04/06 23:50:38 roddey + * Now the low level formatter handles doing char refs for + * unrepresentable chars (in addition to the replacement char style + * already done.) + * * Revision 1.3 2000/04/06 19:09:21 roddey * Some more improvements to output formatting. Now it will correctly * handle doing the 'replacement char' style of dealing with chars @@ -269,6 +274,17 @@ XMLFormatter::formatBuf(const XMLCh* const toFormat const UnRepFlags actualUnRep = (unrepFlags == DefaultUnRep) ? fUnRepFlags : unrepFlags; + // + // If the actual unrep action is that they want to provide char refs + // for unrepresentable chars, then this one is a much more difficult + // one to do cleanly, and we handle it separately. + // + if (actualUnRep == UnRep_CharRef) + { + specialFormat(toFormat, count, actualEsc); + return; + } + // // Use that to figure out what I should pass to the transcoder. If we // are doing character references or failing for unrepresentable chars, @@ -428,6 +444,7 @@ XMLFormatter::formatBuf(const XMLCh* const toFormat srcPtr++; } } + } } } @@ -564,3 +581,83 @@ const XMLByte* XMLFormatter::getQuoteRef() memcpy(fQuoteRef, fTmpBuf, outBytes + 1); return fQuoteRef; } + + +void XMLFormatter::specialFormat(const XMLCh* const toFormat + , const unsigned int count + , const EscapeFlags escapeFlags) +{ + // + // We have to check each character and see if it could be represented. + // As long as it can, we just keep up with where we started and how + // many chars we've checked. When we hit an unrepresentable one, we + // stop, transcode everything we've collected, then start handling + // the unrepresentables via char refs. We repeat this until we get all + // the chars done. + // + const XMLCh* srcPtr = toFormat; + const XMLCh* endPtr = toFormat + count; + + // Set up the common part of the buffer that we build char refs into + XMLCh tmpBuf[32]; + tmpBuf[0] = chAmpersand; + tmpBuf[1] = chPound; + tmpBuf[2] = chLatin_x; + + while (srcPtr < endPtr) + { + const XMLCh* tmpPtr = srcPtr; + while (tmpPtr < endPtr) + { + if (fXCoder->canTranscodeTo(*tmpPtr)) + tmpPtr++; + else + break; + } + + if (tmpPtr > srcPtr) + { + // We got at least some chars that can be done normally + formatBuf + ( + srcPtr + , tmpPtr - srcPtr + , escapeFlags + , XMLFormatter::UnRep_Fail + ); + + // Update the source pointer to our new spot + srcPtr = tmpPtr; + } + else + { + // + // We hit something unrepresentable. So continue forward doing + // char refs until we hit something representable again or the + // end of input. + // + while (srcPtr < endPtr) + { + // Build a char ref for the current char + XMLString::binToText(*srcPtr, &tmpBuf[3], 8, 16); + const unsigned int bufLen = XMLString::stringLen(tmpBuf); + tmpBuf[bufLen] = chSemiColon; + tmpBuf[bufLen+1] = chNull; + + // And now call recursively back to our caller to format this + formatBuf + ( + tmpBuf + , bufLen + 1 + , XMLFormatter::NoEscapes + , XMLFormatter::UnRep_Fail + ); + + // Move up the source pointer and break out if needed + srcPtr++; + if (fXCoder->canTranscodeTo(*srcPtr)) + break; + } + } + } +} diff --git a/src/framework/XMLFormatter.hpp b/src/framework/XMLFormatter.hpp index 6d7ab02fb..3e1722854 100644 --- a/src/framework/XMLFormatter.hpp +++ b/src/framework/XMLFormatter.hpp @@ -56,6 +56,11 @@ /* * $Log$ + * Revision 1.4 2000/04/06 23:50:38 roddey + * Now the low level formatter handles doing char refs for + * unrepresentable chars (in addition to the replacement char style + * already done.) + * * Revision 1.3 2000/04/06 19:09:21 roddey * Some more improvements to output formatting. Now it will correctly * handle doing the 'replacement char' style of dealing with chars @@ -216,6 +221,12 @@ private : const XMLByte* getLTRef(); const XMLByte* getQuoteRef(); + void specialFormat + ( + const XMLCh* const toFormat + , const unsigned int count + , const EscapeFlags escapeFlags + ); // ----------------------------------------------------------------------- -- GitLab