From 6a2be9c7ba819dee32a369fab72700493d7d09ac Mon Sep 17 00:00:00 2001
From: "Unknown (roddey)" <dev-null@apache.org>
Date: Fri, 7 Apr 2000 01:02:00 +0000
Subject: [PATCH] Fixed an error message so that it indicated the correct radix
 for the rep token. Get all of the basic output formatting functionality in
 place for at least ICU and Win32 transcoders.

git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@172034 13f79535-47bb-0310-9956-ffa450edef68
---
 src/NLS/EN_US/XMLErrList_EN_US.Xml            |  2 +-
 src/framework/XMLFormatter.cpp                | 77 +++++++++----------
 src/framework/XMLFormatter.hpp                | 12 ++-
 .../MsgLoaders/InMemory/CppErrMsgs_EN_US.hpp  |  2 +-
 src/util/Platforms/Win32/Version.rc           |  2 +-
 src/util/Transcoders/ICU/ICUTransService.cpp  | 72 +++++++++++++----
 6 files changed, 107 insertions(+), 60 deletions(-)

diff --git a/src/NLS/EN_US/XMLErrList_EN_US.Xml b/src/NLS/EN_US/XMLErrList_EN_US.Xml
index 28080f17d..74c2a3554 100644
--- a/src/NLS/EN_US/XMLErrList_EN_US.Xml
+++ b/src/NLS/EN_US/XMLErrList_EN_US.Xml
@@ -249,7 +249,7 @@
             <Message Id="StrPool_IllegalId" Text="String pool id was not legal"/>
             <Message Id="Trans_CouldNotCreateDefCvtr" Text="Could not create a default transcoder"/>
             <Message Id="Trans_InvalidSizeReq" Text="The maximum size to xlat is larger than the declared block size"/>
-            <Message Id="Trans_Unrepresentable" Text="Unicode char {0} is not representable in encoding {1}"/>
+            <Message Id="Trans_Unrepresentable" Text="Unicode char 0x{0} is not representable in encoding {1}"/>
             <Message Id="Trans_NotValidForEncoding" Text="Character {0} is not valid for encoding {1}"/>
             <Message Id="Trans_BadBlockSize" Text="The requested block size is not equal to the size set during construction"/>
             <Message Id="Trans_BadSrcSeq" Text="An invalid multi-byte source text sequence was encountered"/>
diff --git a/src/framework/XMLFormatter.cpp b/src/framework/XMLFormatter.cpp
index d509146e0..f22ff29db 100644
--- a/src/framework/XMLFormatter.cpp
+++ b/src/framework/XMLFormatter.cpp
@@ -56,6 +56,11 @@
 
 /**
  * $Log$
+ * Revision 1.5  2000/04/07 01:01:55  roddey
+ * Fixed an error message so that it indicated the correct radix for the rep
+ * token. Get all of the basic output formatting functionality in place for
+ * at least ICU and Win32 transcoders.
+ *
  * Revision 1.4  2000/04/06 23:50:38  roddey
  * Now the low level formatter handles doing char refs for
  * unrepresentable chars (in addition to the replacement char style
@@ -295,7 +300,6 @@ XMLFormatter::formatBuf(const   XMLCh* const    toFormat
     const XMLTranscoder::UnRepOpts unRepOpts = (actualUnRep == UnRep_Replace)
                                              ? XMLTranscoder::UnRep_RepChar
                                              : XMLTranscoder::UnRep_Throw;
-
     //
     //  If we don't have any escape flags set, then we can do the most
     //  efficient loop, else we have to do it the hard way.
@@ -347,12 +351,9 @@ XMLFormatter::formatBuf(const   XMLCh* const    toFormat
      else
     {
         //
-        //  This one just escapes the standard set of XML defined character
-        //  refs: apos, amp, lt, gt, and quot.
-        //
-        //  For now, just whimp out and do it the simple but slow way in
-        //  order to get this concept out for evaluation. Come back later
-        //  and spiff it up.
+        //  Escap chars that require it according tot he scale flags we were
+        //  given. For the others, try to accumulate them and format them in
+        //  as big as bulk as we can.
         //
         while (srcPtr < endPtr)
         {
@@ -406,45 +407,37 @@ XMLFormatter::formatBuf(const   XMLCh* const    toFormat
              else if (tmpPtr < endPtr)
             {
                 //
-                //  Ok, so we've hit a char that must be escaped. So loop
-                //  until we hit the end or a non-escaped char and put out
-                //  char refs for these.
+                //  Ok, so we've hit a char that must be escaped. So do
+                //  this one specially.
                 //
-                bool done = false;
-                while ((srcPtr < endPtr) && !done)
+                switch(*srcPtr)
                 {
-                    switch(*srcPtr)
-                    {
-                        case chAmpersand :
-                            fTarget->writeChars(getAmpRef());
-                            break;
-
-                        case chSingleQuote :
-                            fTarget->writeChars(getAposRef());
-                            break;
-
-                        case chDoubleQuote :
-                            fTarget->writeChars(getQuoteRef());
-                            break;
-
-                        case chCloseAngle :
-                            fTarget->writeChars(getGTRef());
-                            break;
-
-                        case chOpenAngle :
-                            fTarget->writeChars(getLTRef());
-                            break;
-
-                        default:
-                            done = true;
-                            break;
-                    }
-
-                    if (!done)
-                        srcPtr++;
+                    case chAmpersand :
+                        fTarget->writeChars(getAmpRef());
+                        break;
+
+                    case chSingleQuote :
+                        fTarget->writeChars(getAposRef());
+                        break;
+
+                    case chDoubleQuote :
+                        fTarget->writeChars(getQuoteRef());
+                        break;
+
+                    case chCloseAngle :
+                        fTarget->writeChars(getGTRef());
+                        break;
+
+                    case chOpenAngle :
+                        fTarget->writeChars(getLTRef());
+                        break;
+
+                    default:
+                        // <TBD> This is obviously an error
+                        break;
                 }
+                srcPtr++;
             }
-
         }
     }
 }
diff --git a/src/framework/XMLFormatter.hpp b/src/framework/XMLFormatter.hpp
index 3e1722854..56b276278 100644
--- a/src/framework/XMLFormatter.hpp
+++ b/src/framework/XMLFormatter.hpp
@@ -56,6 +56,11 @@
 
 /*
  * $Log$
+ * Revision 1.5  2000/04/07 01:01:56  roddey
+ * Fixed an error message so that it indicated the correct radix for the rep
+ * token. Get all of the basic output formatting functionality in place for
+ * at least ICU and Win32 transcoders.
+ *
  * Revision 1.4  2000/04/06 23:50:38  roddey
  * Now the low level formatter handles doing char refs for
  * unrepresentable chars (in addition to the replacement char style
@@ -208,7 +213,7 @@ private :
     // -----------------------------------------------------------------------
     enum Constants
     {
-        kTmpBufSize     = 8192
+        kTmpBufSize     = 16 * 1024
     };
 
 
@@ -241,7 +246,6 @@ private :
     //      This the name of the output encoding. Saved mainly for meaningful
     //      error messages.
     //
-    //  fRef
     //  fTarget
     //      This is the target object for the formatting operation.
     //
@@ -253,6 +257,10 @@ private :
     //      This the transcoder that we will use. It is created using the
     //      encoding name we were told to use.
     //
+    //  fTmpBuf
+    //      An output buffer that we use to transcode chars into before we
+    //      send them off to be output.
+    //
     //  fAposRef
     //  fAmpRef
     //  fGTRef
diff --git a/src/util/MsgLoaders/InMemory/CppErrMsgs_EN_US.hpp b/src/util/MsgLoaders/InMemory/CppErrMsgs_EN_US.hpp
index 8fd2f4f3a..0cb61a361 100644
--- a/src/util/MsgLoaders/InMemory/CppErrMsgs_EN_US.hpp
+++ b/src/util/MsgLoaders/InMemory/CppErrMsgs_EN_US.hpp
@@ -255,7 +255,7 @@ const XMLCh gXMLExceptArray[][128] =
   , { 0x0053,0x0074,0x0072,0x0069,0x006E,0x0067,0x0020,0x0070,0x006F,0x006F,0x006C,0x0020,0x0069,0x0064,0x0020,0x0077,0x0061,0x0073,0x0020,0x006E,0x006F,0x0074,0x0020,0x006C,0x0065,0x0067,0x0061,0x006C,0x00 }
   , { 0x0043,0x006F,0x0075,0x006C,0x0064,0x0020,0x006E,0x006F,0x0074,0x0020,0x0063,0x0072,0x0065,0x0061,0x0074,0x0065,0x0020,0x0061,0x0020,0x0064,0x0065,0x0066,0x0061,0x0075,0x006C,0x0074,0x0020,0x0074,0x0072,0x0061,0x006E,0x0073,0x0063,0x006F,0x0064,0x0065,0x0072,0x00 }
   , { 0x0054,0x0068,0x0065,0x0020,0x006D,0x0061,0x0078,0x0069,0x006D,0x0075,0x006D,0x0020,0x0073,0x0069,0x007A,0x0065,0x0020,0x0074,0x006F,0x0020,0x0078,0x006C,0x0061,0x0074,0x0020,0x0069,0x0073,0x0020,0x006C,0x0061,0x0072,0x0067,0x0065,0x0072,0x0020,0x0074,0x0068,0x0061,0x006E,0x0020,0x0074,0x0068,0x0065,0x0020,0x0064,0x0065,0x0063,0x006C,0x0061,0x0072,0x0065,0x0064,0x0020,0x0062,0x006C,0x006F,0x0063,0x006B,0x0020,0x0073,0x0069,0x007A,0x0065,0x00 }
-  , { 0x0055,0x006E,0x0069,0x0063,0x006F,0x0064,0x0065,0x0020,0x0063,0x0068,0x0061,0x0072,0x0020,0x007B,0x0030,0x007D,0x0020,0x0069,0x0073,0x0020,0x006E,0x006F,0x0074,0x0020,0x0072,0x0065,0x0070,0x0072,0x0065,0x0073,0x0065,0x006E,0x0074,0x0061,0x0062,0x006C,0x0065,0x0020,0x0069,0x006E,0x0020,0x0065,0x006E,0x0063,0x006F,0x0064,0x0069,0x006E,0x0067,0x0020,0x007B,0x0031,0x007D,0x00 }
+  , { 0x0055,0x006E,0x0069,0x0063,0x006F,0x0064,0x0065,0x0020,0x0063,0x0068,0x0061,0x0072,0x0020,0x0030,0x0078,0x007B,0x0030,0x007D,0x0020,0x0069,0x0073,0x0020,0x006E,0x006F,0x0074,0x0020,0x0072,0x0065,0x0070,0x0072,0x0065,0x0073,0x0065,0x006E,0x0074,0x0061,0x0062,0x006C,0x0065,0x0020,0x0069,0x006E,0x0020,0x0065,0x006E,0x0063,0x006F,0x0064,0x0069,0x006E,0x0067,0x0020,0x007B,0x0031,0x007D,0x00 }
   , { 0x0043,0x0068,0x0061,0x0072,0x0061,0x0063,0x0074,0x0065,0x0072,0x0020,0x007B,0x0030,0x007D,0x0020,0x0069,0x0073,0x0020,0x006E,0x006F,0x0074,0x0020,0x0076,0x0061,0x006C,0x0069,0x0064,0x0020,0x0066,0x006F,0x0072,0x0020,0x0065,0x006E,0x0063,0x006F,0x0064,0x0069,0x006E,0x0067,0x0020,0x007B,0x0031,0x007D,0x00 }
   , { 0x0054,0x0068,0x0065,0x0020,0x0072,0x0065,0x0071,0x0075,0x0065,0x0073,0x0074,0x0065,0x0064,0x0020,0x0062,0x006C,0x006F,0x0063,0x006B,0x0020,0x0073,0x0069,0x007A,0x0065,0x0020,0x0069,0x0073,0x0020,0x006E,0x006F,0x0074,0x0020,0x0065,0x0071,0x0075,0x0061,0x006C,0x0020,0x0074,0x006F,0x0020,0x0074,0x0068,0x0065,0x0020,0x0073,0x0069,0x007A,0x0065,0x0020,0x0073,0x0065,0x0074,0x0020,0x0064,0x0075,0x0072,0x0069,0x006E,0x0067,0x0020,0x0063,0x006F,0x006E,0x0073,0x0074,0x0072,0x0075,0x0063,0x0074,0x0069,0x006F,0x006E,0x00 }
   , { 0x0041,0x006E,0x0020,0x0069,0x006E,0x0076,0x0061,0x006C,0x0069,0x0064,0x0020,0x006D,0x0075,0x006C,0x0074,0x0069,0x002D,0x0062,0x0079,0x0074,0x0065,0x0020,0x0073,0x006F,0x0075,0x0072,0x0063,0x0065,0x0020,0x0074,0x0065,0x0078,0x0074,0x0020,0x0073,0x0065,0x0071,0x0075,0x0065,0x006E,0x0063,0x0065,0x0020,0x0077,0x0061,0x0073,0x0020,0x0065,0x006E,0x0063,0x006F,0x0075,0x006E,0x0074,0x0065,0x0072,0x0065,0x0064,0x00 }
diff --git a/src/util/Platforms/Win32/Version.rc b/src/util/Platforms/Win32/Version.rc
index c011fd906..bbf1021cb 100644
--- a/src/util/Platforms/Win32/Version.rc
+++ b/src/util/Platforms/Win32/Version.rc
@@ -342,7 +342,7 @@ BEGIN
     8271              L"\x0053\x0074\x0072\x0069\x006E\x0067\x0020\x0070\x006F\x006F\x006C\x0020\x0069\x0064\x0020\x0077\x0061\x0073\x0020\x006E\x006F\x0074\x0020\x006C\x0065\x0067\x0061\x006C\x00"
     8272              L"\x0043\x006F\x0075\x006C\x0064\x0020\x006E\x006F\x0074\x0020\x0063\x0072\x0065\x0061\x0074\x0065\x0020\x0061\x0020\x0064\x0065\x0066\x0061\x0075\x006C\x0074\x0020\x0074\x0072\x0061\x006E\x0073\x0063\x006F\x0064\x0065\x0072\x00"
     8273              L"\x0054\x0068\x0065\x0020\x006D\x0061\x0078\x0069\x006D\x0075\x006D\x0020\x0073\x0069\x007A\x0065\x0020\x0074\x006F\x0020\x0078\x006C\x0061\x0074\x0020\x0069\x0073\x0020\x006C\x0061\x0072\x0067\x0065\x0072\x0020\x0074\x0068\x0061\x006E\x0020\x0074\x0068\x0065\x0020\x0064\x0065\x0063\x006C\x0061\x0072\x0065\x0064\x0020\x0062\x006C\x006F\x0063\x006B\x0020\x0073\x0069\x007A\x0065\x00"
-    8274              L"\x0055\x006E\x0069\x0063\x006F\x0064\x0065\x0020\x0063\x0068\x0061\x0072\x0020\x007B\x0030\x007D\x0020\x0069\x0073\x0020\x006E\x006F\x0074\x0020\x0072\x0065\x0070\x0072\x0065\x0073\x0065\x006E\x0074\x0061\x0062\x006C\x0065\x0020\x0069\x006E\x0020\x0065\x006E\x0063\x006F\x0064\x0069\x006E\x0067\x0020\x007B\x0031\x007D\x00"
+    8274              L"\x0055\x006E\x0069\x0063\x006F\x0064\x0065\x0020\x0063\x0068\x0061\x0072\x0020\x0030\x0078\x007B\x0030\x007D\x0020\x0069\x0073\x0020\x006E\x006F\x0074\x0020\x0072\x0065\x0070\x0072\x0065\x0073\x0065\x006E\x0074\x0061\x0062\x006C\x0065\x0020\x0069\x006E\x0020\x0065\x006E\x0063\x006F\x0064\x0069\x006E\x0067\x0020\x007B\x0031\x007D\x00"
     8275              L"\x0043\x0068\x0061\x0072\x0061\x0063\x0074\x0065\x0072\x0020\x007B\x0030\x007D\x0020\x0069\x0073\x0020\x006E\x006F\x0074\x0020\x0076\x0061\x006C\x0069\x0064\x0020\x0066\x006F\x0072\x0020\x0065\x006E\x0063\x006F\x0064\x0069\x006E\x0067\x0020\x007B\x0031\x007D\x00"
     8276              L"\x0054\x0068\x0065\x0020\x0072\x0065\x0071\x0075\x0065\x0073\x0074\x0065\x0064\x0020\x0062\x006C\x006F\x0063\x006B\x0020\x0073\x0069\x007A\x0065\x0020\x0069\x0073\x0020\x006E\x006F\x0074\x0020\x0065\x0071\x0075\x0061\x006C\x0020\x0074\x006F\x0020\x0074\x0068\x0065\x0020\x0073\x0069\x007A\x0065\x0020\x0073\x0065\x0074\x0020\x0064\x0075\x0072\x0069\x006E\x0067\x0020\x0063\x006F\x006E\x0073\x0074\x0072\x0075\x0063\x0074\x0069\x006F\x006E\x00"
     8277              L"\x0041\x006E\x0020\x0069\x006E\x0076\x0061\x006C\x0069\x0064\x0020\x006D\x0075\x006C\x0074\x0069\x002D\x0062\x0079\x0074\x0065\x0020\x0073\x006F\x0075\x0072\x0063\x0065\x0020\x0074\x0065\x0078\x0074\x0020\x0073\x0065\x0071\x0075\x0065\x006E\x0063\x0065\x0020\x0077\x0061\x0073\x0020\x0065\x006E\x0063\x006F\x0075\x006E\x0074\x0065\x0072\x0065\x0064\x00"
diff --git a/src/util/Transcoders/ICU/ICUTransService.cpp b/src/util/Transcoders/ICU/ICUTransService.cpp
index b68824bcb..d4905bccd 100644
--- a/src/util/Transcoders/ICU/ICUTransService.cpp
+++ b/src/util/Transcoders/ICU/ICUTransService.cpp
@@ -56,6 +56,11 @@
 
 /*
  * $Log$
+ * Revision 1.17  2000/04/07 01:02:00  roddey
+ * Fixed an error message so that it indicated the correct radix for the rep
+ * token. Get all of the basic output formatting functionality in place for
+ * at least ICU and Win32 transcoders.
+ *
  * Revision 1.16  2000/03/18 00:00:03  roddey
  * Initial updates for two way transcoding support
  *
@@ -129,6 +134,7 @@
 #include <unicode/uloc.h>
 #include <unicode/unicode.h>
 #include <unicode/ucnv.h>
+#include <unicode/ucnv_err.h>
 #include <unicode/ustring.h>
 
 
@@ -563,13 +569,26 @@ ICUTranscoder::transcodeTo( const   XMLCh* const    srcData
     }
     ArrayJanitor<UChar> janTmpBuf(tmpBufPtr);
 
+    //
+    //  Set the appropriate callback so that it will either fail or use
+    //  the rep char. Remember the old one so we can put it back.
+    //
+    UErrorCode  err = U_ZERO_ERROR;
+    UConverterFromUCallback oldCB = ucnv_setFromUCallBack
+    (
+        (UConverter*)&fConverter
+        , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP
+                                   : UCNV_FROM_U_CALLBACK_SUBSTITUTE
+        , &err
+    );
+
     //
     //  Ok, lets transcode as many chars as we we can in one shot. The
     //  ICU API gives enough info not to have to do this one char by char.
     //
-    UErrorCode  err = U_ZERO_ERROR;
     XMLByte*        startTarget = toFill;
     const UChar*    startSrc = srcPtr;
+    err = U_ZERO_ERROR;
     ucnv_fromUnicode
     (
         fConverter
@@ -582,13 +601,24 @@ ICUTranscoder::transcodeTo( const   XMLCh* const    srcData
         , &err
     );
 
-    //
-    // <TBD>
-    //  This is really right yet. We need to differentiate between
-    //  just an error and the use of a replacement char.
-    //
-    if (err != U_ZERO_ERROR)
+    // Rememember the status before we possibly overite the error code
+    const bool res = (err == U_ZERO_ERROR);
+
+    // Put the old handler back
+    err = U_ZERO_ERROR;
+    ucnv_setFromUCallBack(fConverter, oldCB, &err);
+
+    if (!res)
     {
+        XMLCh tmpBuf[16];
+        XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16);
+        ThrowXML2
+        (
+            TranscodingException
+            , XMLExcepts::Trans_Unrepresentable
+            , tmpBuf
+            , getEncodingName()
+        );
     }
 
     // Fill in the chars we ate from the input
@@ -619,12 +649,24 @@ bool ICUTranscoder::canTranscodeTo(const unsigned int toCheck) const
         srcBuf[0] = UChar(toCheck);
     }
 
-    char tmpBuf[64];
-
-
+    //
+    //  Set the callback so that it will fail instead of using the rep char.
+    //  Remember the old one so we can put it back.
+    //
     UErrorCode  err = U_ZERO_ERROR;
+    UConverterFromUCallback oldCB = ucnv_setFromUCallBack
+    (
+        (UConverter*)&fConverter
+        , UCNV_FROM_U_CALLBACK_STOP
+        , &err
+    );
+
+    // Set upa temp buffer to format into. Make it more than big enough
+    char            tmpBuf[64];
     char*           startTarget = tmpBuf;
     const UChar*    startSrc = srcBuf;
+
+    err = U_ZERO_ERROR;
     ucnv_fromUnicode
     (
         fConverter
@@ -637,10 +679,14 @@ bool ICUTranscoder::canTranscodeTo(const unsigned int toCheck) const
         , &err
     );
 
-    if (err != U_ZERO_ERROR)
-        return false;
+    // Save the result before we overight the error code
+    const bool res = (err == U_ZERO_ERROR);
 
-    return true;
+    // Put the old handler back
+    err = U_ZERO_ERROR;
+    ucnv_setFromUCallBack(fConverter, oldCB, &err);
+
+    return res;
 }
 
 
-- 
GitLab