Newer
Older
* Copyright (c) 1999-2000 The Apache Software Foundation. All rights
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
* Revision 1.19 2000/05/11 23:13:31 rahulj
* Works with latest revision of ICU which provides a hard
* linked data DLL. i.e. icudata.dll will be loaded when xerces-c is
* loaded.
*
* Revision 1.18 2000/04/12 18:41:28 roddey
* Fixed a small 'one off' problem in the calls to ICU.
*
Unknown (roddey)
committed
* Revision 1.17 2000/04/07 01:02:00 roddey
* Fixed an error message so that it indicated the correct radix for the rep
* token. Get all of the basic output formatting functionality in place for
* at least ICU and Win32 transcoders.
*
* Revision 1.16 2000/03/18 00:00:03 roddey
* Initial updates for two way transcoding support
*
* Revision 1.15 2000/03/02 19:55:34 roddey
* This checkin includes many changes done while waiting for the
* 1.1.0 code to be finished. I can't list them all here, but a list is
* available elsewhere.
*
Unknown (aruna1)
committed
* Revision 1.14 2000/02/10 21:28:53 aruna1
* Problem solved for converting UChar to XMLCh. This problem showed up on solaris as UChar was != XMLCh
*
* Revision 1.13 2000/02/06 07:48:32 rahulj
* Year 2K copyright swat.
*
* Revision 1.12 2000/01/25 22:49:56 roddey
* Moved the supportsSrcOfs() method from the individual transcoder to the
* transcoding service, where it should have been to begin with.
*
Unknown (roddey)
committed
* Revision 1.11 2000/01/25 19:19:07 roddey
* Simple addition of a getId() method to the xcode and netacess abstractions to
* allow each impl to give back an id string.
*
* Revision 1.10 2000/01/24 20:39:47 roddey
* Fixed a bug introduced in the recent move to always have XMLCh be
* the same as wchar_t.
*
* Revision 1.9 2000/01/21 23:59:06 roddey
* Added code to deal with system configurations where XMLCh is not
* the same size as ICU's UChar.
*
* Revision 1.8 2000/01/19 23:21:11 abagchi
* Made this file compatible with ICU 1.4
*
* Revision 1.7 2000/01/19 00:58:07 roddey
* Update to support new ICU 1.4 release.
*
Unknown (roddey)
committed
* Revision 1.6 1999/12/18 00:22:32 roddey
* Changes to support the new, completely orthagonal, transcoder architecture.
*
* Revision 1.5 1999/12/15 19:43:45 roddey
* Now implements the new transcoding abstractions, with separate interface
* classes for XML transcoders and local code page transcoders.
*
Unknown (roddey)
committed
* Revision 1.4 1999/12/07 23:08:41 roddey
* Add in code to test for some control characters and report them as whitespace.
* ICU is not doing this currently, so we need to do it until they get that fixed.
*
* Revision 1.3 1999/11/18 20:16:52 abagchi
* Now works with ICU 1.3.1
*
* Revision 1.2 1999/11/17 22:36:41 rahulj
* Code works with ICU transcoding service
*
* Revision 1.1.1.1 1999/11/09 01:06:07 twl
* Initial checkin
*
* Revision 1.3 1999/11/08 20:45:33 rahul
* Swat for adding in Product name and CVS comment log variable.
*
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <util/Janitor.hpp>
#include <util/XMLString.hpp>
#include <unicode/uloc.h>
#include <unicode/unicode.h>
#include <unicode/ucnv.h>
Unknown (roddey)
committed
#include <unicode/ucnv_err.h>
#include <unicode/udata.h>
#if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX)
// Forward reference the symbol which points to the ICU converter data.
extern "C" const uint8_t U_IMPORT icudata_dat[];
#endif
Unknown (roddey)
committed
// ---------------------------------------------------------------------------
// Local, const data
// ---------------------------------------------------------------------------
static const XMLCh gMyServiceId[] =
{
chLatin_I, chLatin_C, chLatin_U, chNull
};
// ---------------------------------------------------------------------------
// Local functions
// ---------------------------------------------------------------------------
//
// When XMLCh and ICU's UChar are not the same size, we have to do a temp
// conversion of all strings. These local helper methods make that easier.
//
static UChar* convertToUChar( const XMLCh* const toConvert
, const unsigned int srcLen = 0)
const unsigned int actualLen = srcLen
? srcLen : XMLString::stringLen(toConvert);
UChar* tmpBuf = new UChar[srcLen + 1];
const XMLCh* srcPtr = toConvert;
UChar* outPtr = tmpBuf;
while (*srcPtr)
*outPtr++ = UChar(*srcPtr++);
*outPtr = 0;
return tmpBuf;
}
static XMLCh* convertToXMLCh(const UChar* const toConvert)
{
const unsigned int srcLen = u_strlen(toConvert);
XMLCh* retBuf = new XMLCh[srcLen + 1];
XMLCh* outPtr = retBuf;
const UChar* srcPtr = toConvert;
Unknown (aruna1)
committed
while (*srcPtr)
*outPtr++ = XMLCh(*srcPtr++);
*outPtr = 0;
return retBuf;
}
// ---------------------------------------------------------------------------
// ICUTransService: Constructors and Destructor
// ---------------------------------------------------------------------------
ICUTransService::ICUTransService()
{
#if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX)
// Pass the location of the converter data to ICU. By doing so, we are
// forcing the load of ICU converter data DLL, after the Xerces-C DLL is
// loaded. This implies that Xerces-C, now has to explicitly link with the
// ICU converter dll. However, the advantage is that we no longer depend
// on the code which does demand dynamic loading of DLL's. The demand
// loading is highly system dependent and was a constant source of support
// calls.
UErrorCode uerr = U_ZERO_ERROR;
udata_setCommonData((void *) icudata_dat, &uerr);
#endif
}
ICUTransService::~ICUTransService()
{
}
// ---------------------------------------------------------------------------
// ICUTransService: The virtual transcoding service API
// ---------------------------------------------------------------------------
int ICUTransService::compareIString(const XMLCh* const comp1
, const XMLCh* const comp2)
{
const XMLCh* psz1 = comp1;
const XMLCh* psz2 = comp2;
unsigned int curCount = 0;
while (true)
{
//
// If an inequality, then return the difference. Note that the XMLCh
// might be bigger physically than UChar, but it won't hold anything
// larger than 0xFFFF, so our cast here will work for both possible
// sizes of XMLCh.
//
if (Unicode::toUpperCase(UChar(*psz1)) != Unicode::toUpperCase(UChar(*psz2)))
return int(*psz1) - int(*psz2);
// If either has ended, then they both ended, so equal
if (!*psz1 || !*psz2)
break;
// Move upwards for the next round
psz1++;
psz2++;
}
return 0;
}
int ICUTransService::compareNIString(const XMLCh* const comp1
, const XMLCh* const comp2
, const unsigned int maxChars)
{
const XMLCh* psz1 = comp1;
const XMLCh* psz2 = comp2;
unsigned int curCount = 0;
while (true)
{
//
// If an inequality, then return the difference. Note that the XMLCh
// might be bigger physically than UChar, but it won't hold anything
// larger than 0xFFFF, so our cast here will work for both possible
// sizes of XMLCh.
//
if (Unicode::toUpperCase(UChar(*psz1)) != Unicode::toUpperCase(UChar(*psz2)))
return int(*psz1) - int(*psz2);
// If either ended, then both ended, so equal
if (!*psz1 || !*psz2)
break;
// Move upwards to next chars
psz1++;
psz2++;
//
// Bump the count of chars done. If it equals the count then we
// are equal for the requested count, so break out and return
// equal.
//
curCount++;
if (maxChars == curCount)
break;
}
return 0;
}
Unknown (roddey)
committed
const XMLCh* ICUTransService::getId() const
{
return gMyServiceId;
}
bool ICUTransService::isSpace(const XMLCh toCheck) const
{
Unknown (roddey)
committed
//
// <TBD>
// For now, we short circuit some of the control chars because ICU
// is not correctly reporting them as space. Later, when they change
// this, we can get rid of this special case.
//
if ((toCheck == 0x09)
|| (toCheck == 0x0A)
|| (toCheck == 0x0D))
{
return true;
}
return (Unicode::isSpaceChar(UChar(toCheck)) != 0);
XMLLCPTranscoder* ICUTransService::makeNewLCPTranscoder()
// Try to create a default converter. If it fails, return a null
// pointer which will basically cause the system to give up because
// we really can't do anything without one.
UConverter* converter = ucnv_open(NULL, &uerr);
if (!converter)
return 0;
// That went ok, so create an ICU LCP transcoder wrapper and return it
return new ICULCPTranscoder(converter);
bool ICUTransService::supportsSrcOfs() const
{
// This implementation supports source offset information
return true;
}
Unknown (roddey)
committed
void ICUTransService::upperCase(XMLCh* const toUpperCase) const
{
XMLCh* outPtr = toUpperCase;
while (*outPtr)
{
*outPtr = XMLCh(Unicode::toUpperCase(UChar(*outPtr)));
Unknown (roddey)
committed
outPtr++;
}
}
// ---------------------------------------------------------------------------
// ICUTransService: The protected virtual transcoding service API
// ---------------------------------------------------------------------------
XMLTranscoder* ICUTransService::
makeNewXMLTranscoder(const XMLCh* const encodingName
, XMLTransService::Codes& resValue
, const unsigned int blockSize)
//
// If UChar and XMLCh are not the same size, then we have premassage the
// encoding name into a UChar type string.
//
const UChar* actualName;
UChar* tmpName = 0;
if (sizeof(UChar) == sizeof(XMLCh))
{
actualName = (const UChar*)encodingName;
}
else
{
tmpName = convertToUChar(encodingName);
actualName = tmpName;
}
ArrayJanitor<UChar> janTmp(tmpName);
UConverter* converter = ucnv_openU(actualName, &uerr);
if (!converter)
{
resValue = XMLTransService::UnsupportedEncoding;
return 0;
}
return new ICUTranscoder(encodingName, converter, blockSize);
}
// ---------------------------------------------------------------------------
// ICUTranscoder: Constructors and Destructor
// ---------------------------------------------------------------------------
ICUTranscoder::ICUTranscoder(const XMLCh* const encodingName
, UConverter* const toAdopt
XMLTranscoder(encodingName, blockSize)
, fFixed(false)
, fSrcOffsets(0)
// If there is a block size, then allocate our source offset array
fSrcOffsets = new XMLUInt32[blockSize];
// Remember if its a fixed size encoding
fFixed = (ucnv_getMaxCharSize(fConverter) == ucnv_getMinCharSize(fConverter));
delete [] fSrcOffsets;
// If there is a converter, ask ICU to clean it up
if (fConverter)
{
// <TBD> Does this actually delete the structure???
ucnv_close(fConverter);
fConverter = 0;
}
}
// ---------------------------------------------------------------------------
// ICUTranscoder: The virtual transcoder API
// ---------------------------------------------------------------------------
unsigned int
ICUTranscoder::transcodeFrom(const XMLByte* const srcData
, const unsigned int srcCount
, XMLCh* const toFill
, const unsigned int maxChars
, unsigned int& bytesEaten
, unsigned char* const charSizes)
// If debugging, insure the block size is legal
#if defined(XERCES_DEBUG)
checkBlockSize(maxChars);
#endif
// Set up pointers to the start and end of the source buffer
const XMLByte* startSrc = srcData;
const XMLByte* endSrc = srcData + srcCount;
//
// And now do the target buffer. This works differently according to
// whether XMLCh and UChar are the same size or not.
//
UChar* startTarget;
if (sizeof(XMLCh) == sizeof(UChar))
startTarget = (UChar*)toFill;
else
startTarget = new UChar[maxChars];
UChar* orgTarget = startTarget;
//
// Transoode the buffer. Buffer overflow errors are normal, occuring
// when the raw input buffer holds more characters than will fit in
// the Unicode output buffer.
//
UErrorCode err = U_ZERO_ERROR;
ucnv_toUnicode
, &startTarget
, startTarget + maxChars
, (const char**)&startSrc
, (const char*)endSrc
, (fFixed ? 0 : (int32_t*)fSrcOffsets)
, false
if ((err != U_ZERO_ERROR) && (err != U_INDEX_OUTOFBOUNDS_ERROR))
{
if (orgTarget != (UChar*)toFill)
delete [] orgTarget;
if (fFixed)
{
XMLCh tmpBuf[16];
XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16);
ThrowXML2
(
TranscodingException
, XMLExcepts::Trans_BadSrcCP
, tmpBuf
, getEncodingName()
);
}
else
{
ThrowXML(TranscodingException, XMLExcepts::Trans_BadSrcSeq);
}
// Calculate the bytes eaten and store in caller's param
bytesEaten = startSrc - srcData;
// And the characters decoded
const unsigned int charsDecoded = startTarget - orgTarget;
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
//
// Translate the array of char offsets into an array of character
// sizes, which is what the transcoder interface semantics requires.
// If its fixed, then we can optimize it.
//
if (fFixed)
{
const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter);;
memset(charSizes, fillSize, maxChars);
}
else
{
//
// We have to convert the series of offsets into a series of
// sizes. If just one char was decoded, then its the total bytes
// eaten. Otherwise, do a loop and subtract out each element from
// its previous element.
//
if (charsDecoded == 1)
{
charSizes[0] = (unsigned char)bytesEaten;
}
else
{
// <TBD> Does ICU return an extra element to allow us to figure
// out the last char size? It better!!
unsigned int index;
for (index = 0; index < charsDecoded; index++)
{
charSizes[index] = (unsigned char)(fSrcOffsets[index + 1]
- fSrcOffsets[index]);
}
}
}
//
// If XMLCh and UChar are not the same size, then we need to copy over
// the temp buffer to the new one.
//
if (sizeof(UChar) != sizeof(XMLCh))
{
XMLCh* outPtr = toFill;
startTarget = orgTarget;
for (unsigned int index = 0; index < charsDecoded; index++)
*outPtr++ = XMLCh(*startTarget++);
// And delete the temp buffer
delete [] orgTarget;
}
// Return the chars we put into the target buffer
return charsDecoded;
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
unsigned int
ICUTranscoder::transcodeTo( const XMLCh* const srcData
, const unsigned int srcCount
, XMLByte* const toFill
, const unsigned int maxBytes
, unsigned int& charsEaten
, const UnRepOpts options)
{
//
// Get a pointer to the buffer to transcode. If UChar and XMLCh are
// the same size here, then use the original. Else, create a temp
// one and put a janitor on it.
//
const UChar* srcPtr;
UChar* tmpBufPtr = 0;
if (sizeof(XMLCh) == sizeof(UChar))
{
srcPtr = (const UChar*)srcData;
}
else
{
tmpBufPtr = convertToUChar(srcData, srcCount);
srcPtr = tmpBufPtr;
}
ArrayJanitor<UChar> janTmpBuf(tmpBufPtr);
Unknown (roddey)
committed
//
// Set the appropriate callback so that it will either fail or use
// the rep char. Remember the old one so we can put it back.
//
UErrorCode err = U_ZERO_ERROR;
UConverterFromUCallback oldCB = ucnv_setFromUCallBack
(
(UConverter*)&fConverter
, (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP
: UCNV_FROM_U_CALLBACK_SUBSTITUTE
, &err
);
//
// Ok, lets transcode as many chars as we we can in one shot. The
// ICU API gives enough info not to have to do this one char by char.
//
XMLByte* startTarget = toFill;
const UChar* startSrc = srcPtr;
Unknown (roddey)
committed
err = U_ZERO_ERROR;
ucnv_fromUnicode
(
fConverter
, (char**)&startTarget
, (char*)(startTarget + maxBytes)
, &startSrc
, srcPtr + srcCount
, 0
, false
, &err
);
Unknown (roddey)
committed
// Rememember the status before we possibly overite the error code
const bool res = (err == U_ZERO_ERROR);
// Put the old handler back
err = U_ZERO_ERROR;
ucnv_setFromUCallBack(fConverter, oldCB, &err);
if (!res)
Unknown (roddey)
committed
XMLCh tmpBuf[16];
XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16);
ThrowXML2
(
TranscodingException
, XMLExcepts::Trans_Unrepresentable
, tmpBuf
, getEncodingName()
);
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
}
// Fill in the chars we ate from the input
charsEaten = startSrc - srcPtr;
// Return the chars we stored
return startTarget - toFill;
}
bool ICUTranscoder::canTranscodeTo(const unsigned int toCheck) const
{
//
// If the passed value is really a surrogate embedded together, then
// we need to break it out into its two chars. Else just one. While
// we are ate it, convert them to UChar format if required.
//
UChar srcBuf[2];
unsigned int srcCount = 1;
if (toCheck & 0xFFFF0000)
{
srcBuf[0] = UChar((toCheck >> 10) + 0xD800);
srcBuf[1] = UChar(toCheck & 0x3FF) + 0xDC00;
srcCount++;
}
else
{
srcBuf[0] = UChar(toCheck);
}
Unknown (roddey)
committed
//
// Set the callback so that it will fail instead of using the rep char.
// Remember the old one so we can put it back.
//
UErrorCode err = U_ZERO_ERROR;
Unknown (roddey)
committed
UConverterFromUCallback oldCB = ucnv_setFromUCallBack
(
(UConverter*)&fConverter
, UCNV_FROM_U_CALLBACK_STOP
, &err
);
// Set upa temp buffer to format into. Make it more than big enough
char tmpBuf[64];
char* startTarget = tmpBuf;
const UChar* startSrc = srcBuf;
Unknown (roddey)
committed
err = U_ZERO_ERROR;
ucnv_fromUnicode
(
fConverter
, &startTarget
, startTarget + 64
, &startSrc
, srcBuf + srcCount
, 0
, false
, &err
);
Unknown (roddey)
committed
// Save the result before we overight the error code
const bool res = (err == U_ZERO_ERROR);
Unknown (roddey)
committed
// Put the old handler back
err = U_ZERO_ERROR;
ucnv_setFromUCallBack(fConverter, oldCB, &err);
return res;
// ---------------------------------------------------------------------------
// ICULCPTranscoder: Constructors and Destructor
// ---------------------------------------------------------------------------
ICULCPTranscoder::ICULCPTranscoder(UConverter* const toAdopt) :
fConverter(toAdopt)
}
ICULCPTranscoder::~ICULCPTranscoder()
{
// If there is a converter, ask ICU to clean it up
if (fConverter)
{
// <TBD> Does this actually delete the structure???
ucnv_close(fConverter);
fConverter = 0;
}
}
// ---------------------------------------------------------------------------
// ICULCPTranscoder: Constructors and Destructor
// ---------------------------------------------------------------------------
unsigned int ICULCPTranscoder::calcRequiredSize(const XMLCh* const srcText)
{
if (!srcText)
//
// We do two different versions of this, according to whether XMLCh
// is the same size as UChar or not.
//
UErrorCode err = U_ZERO_ERROR;
int32_t targetCap;
if (sizeof(XMLCh) == sizeof(UChar))
{
// Use a faux scope to synchronize while we do this
{
XMLMutexLock lockConverter(&fMutex);
targetCap = ucnv_fromUChars
(
fConverter
, 0
, 0
, (const UChar*)srcText
, &err
);
}
}
else
{
// Copy the source to a local temp
UChar* tmpBuf = convertToUChar(srcText);
ArrayJanitor<UChar> janTmp(tmpBuf);
// Use a faux scope to synchronize while we do this
{
XMLMutexLock lockConverter(&fMutex);
targetCap = ucnv_fromUChars
(
fConverter
, 0
, 0
, tmpBuf
, &err
);
}
}
if (err != U_BUFFER_OVERFLOW_ERROR)
return 0;
return (unsigned int)targetCap;
}
unsigned int ICULCPTranscoder::calcRequiredSize(const char* const srcText)
{
if (!srcText)
return 0;
int32_t targetCap;
// Use a faux scope to synchronize while we do this
{
XMLMutexLock lockConverter(&fMutex);
targetCap = ucnv_toUChars
(
fConverter
, 0
, 0
, srcText
, strlen(srcText)
, &err
);
}
if (err != U_BUFFER_OVERFLOW_ERROR)
// Subtract one since it includes the terminator space
return (unsigned int)(targetCap - 1);
char* ICULCPTranscoder::transcode(const XMLCh* const toTranscode)
{
char* retBuf = 0;
// Check for a couple of special cases
if (!toTranscode)
return retBuf;
if (!*toTranscode)
{
retBuf = new char[1];
retBuf[0] = 0;
return retBuf;
}
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
//
// Get the length of the source string since we'll have to use it in
// a couple places below.
//
const unsigned int srcLen = XMLString::stringLen(toTranscode);
//
// If XMLCh and UChar are not the same size, then we have to make a
// temp copy of the text to pass to ICU.
//
const UChar* actualSrc;
UChar* ncActual = 0;
if (sizeof(XMLCh) == sizeof(UChar))
{
actualSrc = (const UChar*)toTranscode;
}
else
{
// Allocate a non-const temp buf, but store it also in the actual
ncActual = convertToUChar(toTranscode);
actualSrc = ncActual;
}
// Insure that the temp buffer, if any, gets cleaned up via the nc pointer
ArrayJanitor<UChar> janTmp(ncActual);
// Caculate a return buffer size not too big, but less likely to overflow
int32_t targetLen = (int32_t)(srcLen * 1.25);
// Allocate the return buffer
retBuf = new char[targetLen + 1];
// Lock now while we call the converter. Use a faux block to do the
// lock so that it unlocks immediately afterwards.
int32_t targetCap;
{
XMLMutexLock lockConverter(&fMutex);
targetCap = ucnv_fromUChars
(
fConverter
, retBuf
, targetLen + 1
, actualSrc
, &err
);
}
// If targetLen is not enough then buffer overflow might occur
if (err == U_BUFFER_OVERFLOW_ERROR)
//
// Reset the error, delete the old buffer, allocate a new one,
// and try again.
//
retBuf = new char[targetCap + 1];
// Lock again before we retry
XMLMutexLock lockConverter(&fMutex);
targetCap = ucnv_fromUChars
(
fConverter
, retBuf
, targetCap
, actualSrc
, &err
);
{
delete [] retBuf;
return 0;
}
// Cap it off and return
retBuf[targetCap] = 0;
return retBuf;
}
XMLCh* ICULCPTranscoder::transcode(const char* const toTranscode)
{
// Watch for a few pyscho corner cases
if (!toTranscode)
return 0;
if (!*toTranscode)
{
XMLCh* retVal = new XMLCh[1];
retVal[0] = 0;
return retVal;
}
//
// Get the length of the string to transcode. The Unicode string will
// almost always be no more chars than were in the source, so this is
// the best guess as to the storage needed.
//
const int32_t srcLen = (int32_t)strlen(toTranscode);
// We need a target buffer of UChars to fill in
UChar* targetBuf = 0;
// Now lock while we do these calculations
int32_t targetCap;
{
XMLMutexLock lockConverter(&fMutex);
//
// Here we don't know what the target length will be so use 0 and
// expect an U_BUFFER_OVERFLOW_ERROR in which case it'd get resolved
// by the correct capacity value.
//
targetCap = ucnv_toUChars
(
fConverter
, 0
, 0
, toTranscode
, srcLen
, &err
);
if (err != U_BUFFER_OVERFLOW_ERROR)
return 0;
err = U_ZERO_ERROR;
targetBuf = new UChar[targetCap + 1];
ucnv_toUChars
(
fConverter
, targetBuf
, targetCap
, toTranscode
, srcLen
, &err
);
}
{
// Clean up if we got anything allocated
delete [] targetBuf;
}
// Cap it off to make sure
targetBuf[targetCap] = 0;
//
// If XMLCh and UChar are the same size, then we can return retVal
// as is. Else, we have to allocate another buffer and copy the data