From 31e7708fd4affa327a7ae701f0e07cd34b1e08e7 Mon Sep 17 00:00:00 2001
From: "Unknown (roddey)" <dev-null@apache.org>
Date: Thu, 6 Apr 2000 23:50:38 +0000
Subject: [PATCH] Now the low level formatter handles doing char refs for
 unrepresentable chars (in addition to the replacement char style already
 done.)

git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@172031 13f79535-47bb-0310-9956-ffa450edef68
---
 src/framework/XMLFormatter.cpp | 97 ++++++++++++++++++++++++++++++++++
 src/framework/XMLFormatter.hpp | 11 ++++
 2 files changed, 108 insertions(+)

diff --git a/src/framework/XMLFormatter.cpp b/src/framework/XMLFormatter.cpp
index 9651f56ce..d509146e0 100644
--- a/src/framework/XMLFormatter.cpp
+++ b/src/framework/XMLFormatter.cpp
@@ -56,6 +56,11 @@
 
 /**
  * $Log$
+ * Revision 1.4  2000/04/06 23:50:38  roddey
+ * Now the low level formatter handles doing char refs for
+ * unrepresentable chars (in addition to the replacement char style
+ * already done.)
+ *
  * Revision 1.3  2000/04/06 19:09:21  roddey
  * Some more improvements to output formatting. Now it will correctly
  * handle doing the 'replacement char' style of dealing with chars
@@ -269,6 +274,17 @@ XMLFormatter::formatBuf(const   XMLCh* const    toFormat
     const UnRepFlags  actualUnRep = (unrepFlags == DefaultUnRep)
                                     ? fUnRepFlags : unrepFlags;
 
+    //
+    //  If the actual unrep action is that they want to provide char refs
+    //  for unrepresentable chars, then this one is a much more difficult
+    //  one to do cleanly, and we handle it separately.
+    //
+    if (actualUnRep == UnRep_CharRef)
+    {
+        specialFormat(toFormat, count, actualEsc);
+        return;
+    }
+
     //
     //  Use that to figure out what I should pass to the transcoder. If we
     //  are doing character references or failing for unrepresentable chars,
@@ -428,6 +444,7 @@ XMLFormatter::formatBuf(const   XMLCh* const    toFormat
                         srcPtr++;
                 }
             }
+
         }
     }
 }
@@ -564,3 +581,83 @@ const XMLByte* XMLFormatter::getQuoteRef()
     memcpy(fQuoteRef, fTmpBuf, outBytes + 1);
     return fQuoteRef;
 }
+
+
+void XMLFormatter::specialFormat(const  XMLCh* const    toFormat
+                                , const unsigned int    count
+                                , const EscapeFlags     escapeFlags)
+{
+    //
+    //  We have to check each character and see if it could be represented.
+    //  As long as it can, we just keep up with where we started and how
+    //  many chars we've checked. When we hit an unrepresentable one, we
+    //  stop, transcode everything we've collected, then start handling
+    //  the unrepresentables via char refs. We repeat this until we get all
+    //  the chars done.
+    //
+    const XMLCh*    srcPtr = toFormat;
+    const XMLCh*    endPtr = toFormat + count;
+
+    // Set up the common part of the buffer that we build char refs into
+    XMLCh tmpBuf[32];
+    tmpBuf[0] = chAmpersand;
+    tmpBuf[1] = chPound;
+    tmpBuf[2] = chLatin_x;
+
+    while (srcPtr < endPtr)
+    {
+        const XMLCh* tmpPtr = srcPtr;
+        while (tmpPtr < endPtr)
+        {
+            if (fXCoder->canTranscodeTo(*tmpPtr))
+                tmpPtr++;
+            else
+                break;
+        }
+
+        if (tmpPtr > srcPtr)
+        {
+            // We got at least some chars that can be done normally
+            formatBuf
+            (
+                srcPtr
+                , tmpPtr - srcPtr
+                , escapeFlags
+                , XMLFormatter::UnRep_Fail
+            );
+
+            // Update the source pointer to our new spot
+            srcPtr = tmpPtr;
+        }
+         else
+        {
+            //
+            //  We hit something unrepresentable. So continue forward doing
+            //  char refs until we hit something representable again or the
+            //  end of input.
+            //
+            while (srcPtr < endPtr)
+            {
+                // Build a char ref for the current char
+                XMLString::binToText(*srcPtr, &tmpBuf[3], 8, 16);
+                const unsigned int bufLen = XMLString::stringLen(tmpBuf);
+                tmpBuf[bufLen] = chSemiColon;
+                tmpBuf[bufLen+1] = chNull;
+
+                // And now call recursively back to our caller to format this
+                formatBuf
+                (
+                    tmpBuf
+                    , bufLen + 1
+                    , XMLFormatter::NoEscapes
+                    , XMLFormatter::UnRep_Fail
+                );
+
+                // Move up the source pointer and break out if needed
+                srcPtr++;
+                if (fXCoder->canTranscodeTo(*srcPtr))
+                    break;
+            }
+        }
+    }
+}
diff --git a/src/framework/XMLFormatter.hpp b/src/framework/XMLFormatter.hpp
index 6d7ab02fb..3e1722854 100644
--- a/src/framework/XMLFormatter.hpp
+++ b/src/framework/XMLFormatter.hpp
@@ -56,6 +56,11 @@
 
 /*
  * $Log$
+ * Revision 1.4  2000/04/06 23:50:38  roddey
+ * Now the low level formatter handles doing char refs for
+ * unrepresentable chars (in addition to the replacement char style
+ * already done.)
+ *
  * Revision 1.3  2000/04/06 19:09:21  roddey
  * Some more improvements to output formatting. Now it will correctly
  * handle doing the 'replacement char' style of dealing with chars
@@ -216,6 +221,12 @@ private :
     const XMLByte* getLTRef();
     const XMLByte* getQuoteRef();
 
+    void specialFormat
+    (
+        const   XMLCh* const    toFormat
+        , const unsigned int    count
+        , const EscapeFlags     escapeFlags
+    );
 
 
     // -----------------------------------------------------------------------
-- 
GitLab