From 97bcc64e89d78c50f2cfc8bc3edbcdd874445e74 Mon Sep 17 00:00:00 2001 From: Tinny Ng <tng@apache.org> Date: Tue, 11 Dec 2001 15:10:28 +0000 Subject: [PATCH] More changes to IconvFBSDTransService. Allow using "old" TransServece implementation (via '-t native' option to runConfigure) or to employ libiconv (it is a part of FreeBSD ports-collection) services. By Max Gotlib. git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@173371 13f79535-47bb-0310-9956-ffa450edef68 --- src/runConfigure | 8 +- .../IconvFBSD/IconvFBSDTransService.cpp | 1010 ++++++++++++++--- .../IconvFBSD/IconvFBSDTransService.hpp | 276 ++++- 3 files changed, 1133 insertions(+), 161 deletions(-) diff --git a/src/runConfigure b/src/runConfigure index 0f9f88354..c11dadc7b 100755 --- a/src/runConfigure +++ b/src/runConfigure @@ -376,10 +376,12 @@ case $transcoder in transcodingDefines="-DXML_USE_ICONV390_TRANSCODER" ; TRANSCODER=Iconv390 ;; IconvFBSD) - TRANSCODER=IconvFBSD ;; + TRANSCODER=IconvFBSD ; + transcodingDefines="-DXML_USE_LIBICONV -I/usr/local/include" ;; native) - ;; - + if test $platform = "freebsd"; then + TRANSCODER=IconvFBSD ; + fi ;; *) echo "I do not recognize the transcoder option '$transcoder'. Please type '${0} -h' for help." exit ${ERROR_EXIT_CODE};; diff --git a/src/util/Transcoders/IconvFBSD/IconvFBSDTransService.cpp b/src/util/Transcoders/IconvFBSD/IconvFBSDTransService.cpp index 14688b252..46ad95bc3 100644 --- a/src/util/Transcoders/IconvFBSD/IconvFBSDTransService.cpp +++ b/src/util/Transcoders/IconvFBSD/IconvFBSDTransService.cpp @@ -56,32 +56,125 @@ /* * $Log$ + * Revision 1.2 2001/12/11 15:10:14 tng + * More changes to IconvFBSDTransService. Allow using "old" TransServece implementation (via '-t native' option to runConfigure) or + * to employ libiconv (it is a part of FreeBSD ports-collection) services. By Max Gotlib. + * * Revision 1.1 2001/12/03 14:45:11 tng * FreeBSD native transcoder (IconvFBSD) added by Max Gotlib. * */ - // --------------------------------------------------------------------------- // Includes // --------------------------------------------------------------------------- +#include <ctype.h> + +#ifdef XML_USE_LIBICONV +# include <locale.h> +# include <iconv.h> +# include <errno.h> +# include <machine/endian.h> + +// --------------------------------------------------------------------------- +// Description of encoding schemas, supported by iconv() +// --------------------------------------------------------------------------- +typedef struct __IconvFBSDEncoding { + const char* fSchema; // schema name + size_t fUChSize; // size of the character + unsigned int fUBO; // byte order, relative to the host +} IconvFBSDEncoding; + +static const IconvFBSDEncoding gIconvFBSDEncodings[] = { + { "ucs-2-internal", 2, LITTLE_ENDIAN }, + { "ucs-4-internal", 4, LITTLE_ENDIAN }, + { "iso-10646-ucs-2", 4, BIG_ENDIAN }, + { "iso-10646-ucs-4", 4, BIG_ENDIAN }, + { "iso-10646-utf-16", 2, BIG_ENDIAN }, + { NULL, 0, 0 } +}; + +//-------------------------------------------------- +// Macro-definitions to translate "native unicode" +// characters <-> XMLCh with different host byte order +// and encoding schemas. + +# if BYTE_ORDER == LITTLE_ENDIAN +# define IXMLCh2WC16(x,w) \ + *(w) = ((*(x)) >> 8) & 0xFF; \ + *((w)+1) = (*(x)) & 0xFF +# define IWC162XMLCh(w,x) *(x) = ((*(w)) << 8) | (*((w)+1)) +# define XMLCh2WC16(x,w) \ + *(w) = (*(x)) & 0xFF; \ + *((w)+1) = ((*(x)) >> 8) & 0xFF +# define WC162XMLCh(w,x) *(x) = ((*((w)+1)) << 8) | (*(w)) + +# define IXMLCh2WC32(x,w) \ + *(w) = ((*(x)) >> 24) & 0xFF; \ + *((w)+1) = ((*(x)) >> 16) & 0xFF; \ + *((w)+2) = ((*(x)) >> 8) & 0xFF; \ + *((w)+3) = (*(x)) & 0xFF +# define IWC322XMLCh(w,x) \ + *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) | \ + ((*((w)+2)) << 8) | (*((w)+3)) +# define XMLCh2WC32(x,w) \ + *((w)+3) = ((*(x)) >> 24) & 0xFF; \ + *((w)+2) = ((*(x)) >> 16) & 0xFF; \ + *((w)+1) = ((*(x)) >> 8) & 0xFF; \ + *(w) = (*(x)) & 0xFF +# define WC322XMLCh(w,x) \ + *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) | \ + ((*((w)+1)) << 8) | (*(w)) + +# else /* BYTE_ORDER != LITTLE_ENDIAN */ + +# define XMLCh2WC16(x,w) \ + *(w) = ((*(x)) >> 8) & 0xFF; \ + *((w)+1) = (*(x)) & 0xFF +# define WC162XMLCh(w,x) *(x) = ((*(w)) << 8) | (*((w)+1)) +# define IXMLCh2WC16(x,w) \ + *(w) = (*(x)) & 0xFF; \ + *((w)+1) = ((*(x)) >> 8) & 0xFF +# define IWC162XMLCh(w,x) *(x) = ((*((w)+1)) << 8) | (*(w)) + +# define XMLCh2WC32(x,w) \ + *(w) = ((*(x)) >> 24) & 0xFF; \ + *((w)+1) = ((*(x)) >> 16) & 0xFF; \ + *((w)+2) = ((*(x)) >> 8) & 0xFF; \ + *((w)+3) = (*(x)) & 0xFF +# define WC322XMLCh(w,x) \ + *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) | \ + ((*((w)+2)) << 8) | (*((w)+3)) +# define IXMLCh2WC32(x,w) \ + *((w)+3) = ((*(x)) >> 24) & 0xFF; \ + *((w)+2) = ((*(x)) >> 16) & 0xFF; \ + *((w)+1) = ((*(x)) >> 8) & 0xFF; \ + *(w) = (*(x)) & 0xFF +# define IWC322XMLCh(w,x) \ + *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) | \ + ((*((w)+1)) << 8) | (*(w)) +# endif /* BYTE_ORDER == LITTLE_ENDIAN */ + +#else /* !XML_USE_LIBICONV */ + +# if __FreeBSD_cc_version > 430000 +# include <wchar.h> +# else +# define wint_t XMLCh +# endif + +#endif /* XML_USE_LIBICONV */ + #include <util/XMLUniDefs.hpp> #include <util/XMLUni.hpp> +#include <util/PlatformUtils.hpp> +#include <util/TranscodingException.hpp> #include "IconvFBSDTransService.hpp" -#include <ctype.h> - -#if __FreeBSD_cc_version > 430000 -# include <wchar.h> -#else -# define wint_t XMLCh -#endif - #include <string.h> #include <stdlib.h> #include <stdio.h> - // --------------------------------------------------------------------------- // Local, const data // --------------------------------------------------------------------------- @@ -107,6 +200,8 @@ static unsigned int getWideCharLength(const XMLCh* const src) return len; } +#ifndef XML_USE_LIBICONV + // --------------------------------------------------------------------------- // FreeBSD got the wide-characters support since 4.0 version. But (at least // up to the 4.4) this support differs from "others" in that the xxstoyys() @@ -123,7 +218,7 @@ static size_t fbsd_wcstombs(char *dest, const wchar_t *src, size_t n) char* ptr; size_t slen; wchar_t* wptr; - + if (dest) return ::wcstombs(dest, src, n); if (!src) @@ -164,54 +259,428 @@ static size_t fbsd_mbstowcs(wchar_t *dest, const char *src, size_t n) static wint_t fbsd_towupper(wint_t ch) { - char buf[16]; - wcstombs(buf, (wchar_t*) &ch, 1); - return toupper(ch); + if (ch < 0x7F) + return toupper(ch); + char buf[16]; + wchar_t wc = wchar_t(ch); + wcstombs(buf, &wc, 1); + return toupper(*buf); +} + +#else /* XML_USE_LIBICONV */ + +//---------------------------------------------------------------------------- +// There is implementation of the libiconv for FreeBSD (available through the +// ports collection). The following is a wrapper around the iconv(). +//---------------------------------------------------------------------------- + +IconvFBSDCD::IconvFBSDCD () + : fTmpXMLBuf(0), fTmpXMLSize(0), fTmpUBuf(0), fTmpUSize(0), + fUChSize(0), fUBO(LITTLE_ENDIAN), + fCDTo((iconv_t)-1), fCDFrom((iconv_t)-1) +{ +} + +IconvFBSDCD::IconvFBSDCD ( iconv_t cd_from, + iconv_t cd_to, + size_t uchsize, + unsigned int ubo ) + : fTmpXMLBuf(0), fTmpXMLSize(0), + fTmpUBuf(0), fTmpUSize(0), + fUChSize(uchsize), fUBO(ubo), + fCDTo(cd_to), fCDFrom(cd_from) +{ + if (fCDFrom == (iconv_t) -1 || fCDTo == (iconv_t) -1) { + XMLPlatformUtils::panic (XMLPlatformUtils::Panic_NoTransService); + } +} + +IconvFBSDCD::~IconvFBSDCD() +{ + if (fTmpXMLBuf) + delete [] fTmpXMLBuf; + if (fTmpUBuf) + delete [] fTmpUBuf; +} + +// Convert "native unicode" character into XMLCh +void IconvFBSDCD::mbcToXMLCh (const char *mbc, XMLCh *toRet) const +{ + if (fUBO == LITTLE_ENDIAN) { + if (fUChSize == sizeof(XMLCh)) + *toRet = *((XMLCh*) mbc); + else if (fUChSize == 2) { + WC162XMLCh( mbc, toRet ); + } else { + WC322XMLCh( mbc, toRet ); + } + } else { + if (fUChSize == 2) { + IWC162XMLCh( mbc, toRet ); + } else { + IWC322XMLCh( mbc, toRet ); + } + } +} + +// Convert XMLCh into "native unicode" character +void IconvFBSDCD::xmlChToMbc (XMLCh xch, char *mbc) const +{ + if (fUBO == LITTLE_ENDIAN) { + if (fUChSize == sizeof(XMLCh)) { + memcpy (mbc, &xch, fUChSize); + return; + } + if (fUChSize == 2) { + XMLCh2WC16( &xch, mbc ); + } else { + XMLCh2WC32( &xch, mbc ); + } + } else { + if (fUChSize == 2) { + IXMLCh2WC16( &xch, mbc ); + } else { + IXMLCh2WC32( &xch, mbc ); + } + } +} + +// Return uppercase equivalent for XMLCh +XMLCh IconvFBSDCD::toUpper (const XMLCh ch) const +{ + if (ch < 0x7F) + return toupper(ch); + + char wcbuf[fUChSize * 2]; + char tmpArr[4]; + + xmlChToMbc (ch, wcbuf); + char* ptr = wcbuf; + size_t len = fUChSize; + char *pTmpArr = tmpArr; + size_t bLen = 2; + { + XMLMutexLock lockConverter(&fMutex); + + if (::iconv (fCDTo, (const char**) &ptr, &len, + &pTmpArr, &bLen) == (size_t) -1) + return 0; + tmpArr[1] = ::toupper (*tmpArr); + *tmpArr = tmpArr[1]; + len = 1; + pTmpArr = wcbuf; + bLen = fUChSize; + ptr = tmpArr; + if (::iconv (fCDFrom, (const char **)&ptr, &len, + &pTmpArr, &bLen) == (size_t) -1) + return 0; + } + mbcToXMLCh (wcbuf, (XMLCh*) &ch); + return ch; +} + +// Check if passed characters belongs to the :space: class +bool IconvFBSDCD::isSpace(const XMLCh toCheck) const +{ + if (toCheck < 0x7F) + return toupper(toCheck); + char wcbuf[fUChSize * 2]; + char tmpArr[4]; + xmlChToMbc (toCheck, wcbuf); + char* ptr = wcbuf; + size_t len = fUChSize; + char *pTmpArr = tmpArr; + size_t bLen = 2; + { + XMLMutexLock lockConverter(&fMutex); + + if (::iconv (fCDTo, (const char**) &ptr, &len, + &pTmpArr, &bLen) == (size_t) -1) + return 0; + } + return isspace(*tmpArr); +} + +// Allocate internal buffer space, large enough to hold 'cnt' +// XMLCh characters, and fill it with data, supplyed in the array +// of "native unicode" characters. +XMLCh* IconvFBSDCD::xmlFromMbs (const char *str, size_t cnt ) +{ + if (str == NULL && cnt == 0) + return NULL; + if (cnt > fTmpXMLSize) { + delete [] fTmpXMLBuf; + fTmpXMLBuf = NULL; + fTmpXMLSize = 0; + } + if (fTmpXMLBuf == NULL) { + fTmpXMLBuf = new XMLCh[cnt]; + if (fTmpXMLBuf == NULL) + return NULL; + fTmpXMLSize = cnt; + } + if (str == NULL) + return fTmpXMLBuf; + return mbsToXML (str, cnt, fTmpXMLBuf, cnt); +} + +// Fill array of XMLCh characters with data, supplyed in the array +// of "native unicode" characters. +XMLCh* IconvFBSDCD::mbsToXML +( + const char* mbs_str + , size_t mbs_cnt + , XMLCh* xml_str + , size_t xml_cnt +) const +{ + if (mbs_str == NULL || mbs_cnt == 0 || xml_str == NULL || xml_cnt == 0) + return NULL; + size_t cnt = (mbs_cnt < xml_cnt) ? mbs_cnt : xml_cnt; + if (fUBO == LITTLE_ENDIAN) { + if (fUChSize == sizeof(XMLCh)) { + // null-transformation + memcpy (xml_str, mbs_str, fUChSize * cnt); + return xml_str; + } + if (fUChSize == 2) + for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { + WC162XMLCh( mbs_str, xml_str + i); + } + else + for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { + WC322XMLCh( mbs_str, xml_str + i ); + } + } else { + if (fUChSize == 2) + for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { + IWC162XMLCh( mbs_str, xml_str + i ); + } + else + for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { + IWC322XMLCh( mbs_str, xml_str + i ); + } + } + return xml_str; +} + +// Allocate internal buffer space, large enough to hold 'cnt' +// "native unicode" characters, and fill it with data, supplyed +// in the array of XMLCh characters. +char* IconvFBSDCD::mbsFromXML (const XMLCh *str, size_t cnt) +{ + if (str == NULL && cnt == 0) + return NULL; + if (cnt > fTmpUSize) { + delete [] fTmpUBuf; + fTmpUBuf = NULL; + fTmpUSize = 0; + } + if (fTmpUBuf == NULL) { + fTmpUBuf = new char[cnt * fUChSize]; + if (fTmpUBuf == NULL) + return NULL; + fTmpUSize = cnt; + } + if (str == NULL) + return fTmpUBuf; + return xmlToMbs (str, cnt, fTmpUBuf, cnt); +} + +// Fill array of "native unicode" characters with data, supplyed +// in the array of XMLCh characters. +char* IconvFBSDCD::xmlToMbs +( + const XMLCh* xml_str + , size_t xml_cnt + , char* mbs_str + , size_t mbs_cnt +) const +{ + if (mbs_str == NULL || mbs_cnt == 0 || xml_str == NULL || xml_cnt == 0) + return NULL; + size_t cnt = (mbs_cnt < xml_cnt) ? mbs_cnt : xml_cnt; + char *toReturn = mbs_str; + if (fUBO == LITTLE_ENDIAN) { + if (fUChSize == sizeof(XMLCh)) { + // null-transformation + memcpy (mbs_str, xml_str, fUChSize * cnt); + return toReturn; + } + if (fUChSize == 2) + for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { + XMLCh2WC16( xml_str, mbs_str ); + } + else + for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { + XMLCh2WC32( xml_str, mbs_str ); + } + } else { + if (fUChSize == 2) + for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { + IXMLCh2WC16( xml_str, mbs_str ); + } + else + for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { + IXMLCh2WC32( xml_str, mbs_str ); + } + } + return toReturn; } +size_t IconvFBSDCD::iconvFrom ( const char *fromPtr, + size_t *fromLen, + char **toPtr, + size_t toLen ) const +{ + XMLMutexLock lockConverter(&fMutex); + return ::iconv (fCDFrom, &fromPtr, fromLen, toPtr, &toLen); +} + +size_t IconvFBSDCD::iconvTo ( const char *fromPtr, + size_t *fromLen, + char **toPtr, + size_t toLen ) const +{ + XMLMutexLock lockConverter(&fMutex); + return ::iconv (fCDTo, &fromPtr, fromLen, toPtr, &toLen); +} + +#endif /* !XML_USE_LIBICONV */ + + // --------------------------------------------------------------------------- // IconvFBSDTransService: Constructors and Destructor // --------------------------------------------------------------------------- + IconvFBSDTransService::IconvFBSDTransService() +#ifdef XML_USE_LIBICONV + : IconvFBSDCD(), fUnicodeCP(0) +#endif /* XML_USE_LIBICONV */ { +#ifdef XML_USE_LIBICONV + + // Try to obtain local (host) characterset through the environment + char* fLocalCP = setlocale (LC_CTYPE, ""); + if (fLocalCP == NULL) + fLocalCP = "iso-8859-1"; // fallback locale + else { + char *ptr = strchr (fLocalCP, '.'); + if (ptr == NULL) + fLocalCP = "iso-8859-1"; // fallback locale + else + fLocalCP = ptr + 1; + } + + // Select the native unicode characters encoding schema + const IconvFBSDEncoding *eptr; + // first - try to use the schema with character size, equil to XMLCh + for (eptr = gIconvFBSDEncodings; eptr->fSchema; eptr++) { + if (eptr->fUChSize != sizeof(XMLCh)) + continue; + // try to create conversion descriptor + iconv_t cd_to = iconv_open(fLocalCP, eptr->fSchema); + if (cd_to == (iconv_t)-1) + continue; + iconv_t cd_from = iconv_open(eptr->fSchema, fLocalCP); + if (cd_to == (iconv_t)-1) { + iconv_close (cd_to); + continue; + } + // got it + setUChSize(eptr->fUChSize); + setUBO(eptr->fUBO); + setCDTo(cd_to); + setCDFrom(cd_from); + fUnicodeCP = eptr->fSchema; + break; + } + if (fUnicodeCP == NULL) + // try to use any known schema + for (eptr = gIconvFBSDEncodings; eptr->fSchema; eptr++) { + // try to create conversion descriptor + iconv_t cd_to = iconv_open(fLocalCP, eptr->fSchema); + if (cd_to == (iconv_t)-1) + continue; + iconv_t cd_from = iconv_open(eptr->fSchema, fLocalCP); + if (cd_to == (iconv_t)-1) { + iconv_close (cd_to); + continue; + } + // got it + setUChSize(eptr->fUChSize); + setUBO(eptr->fUBO); + setCDTo(cd_to); + setCDFrom(cd_from); + fUnicodeCP = eptr->fSchema; + break; + } + + if (fUnicodeCP == NULL || cdTo() == (iconv_t)-1 || cdFrom() == (iconv_t)-1) + XMLPlatformUtils::panic (XMLPlatformUtils::Panic_NoTransService); + +#endif /* XML_USE_LIBICONV */ } IconvFBSDTransService::~IconvFBSDTransService() { +#ifdef XML_USE_LIBICONV + if (cdTo() != (iconv_t) -1) + iconv_close (cdTo()); + if (cdFrom() != (iconv_t) -1) + iconv_close (cdFrom()); +#endif /* XML_USE_LIBICONV */ } - // --------------------------------------------------------------------------- // IconvFBSDTransService: The virtual transcoding service API // --------------------------------------------------------------------------- -int IconvFBSDTransService::compareIString( const XMLCh* const comp1 - , const XMLCh* const comp2) +int IconvFBSDTransService::compareIString(const XMLCh* const comp1 + , const XMLCh* const comp2) { const XMLCh* cptr1 = comp1; const XMLCh* cptr2 = comp2; - while ( (*cptr1 != 0) && (*cptr2 != 0) ) - { +#ifndef XML_USE_LIBICONV + + while ( (*cptr1 != 0) && (*cptr2 != 0) ) { wint_t wch1 = fbsd_towupper(*cptr1); wint_t wch2 = fbsd_towupper(*cptr2); if (wch1 != wch2) break; - cptr1++; cptr2++; } return (int) ( fbsd_towupper(*cptr1) - fbsd_towupper(*cptr2) ); + +#else /* XML_USE_LIBICONV */ + + XMLCh c1 = 0; + XMLCh c2 = 0; + while ( (*cptr1 != 0) && (*cptr2 != 0) ) { + c1 = toUpper(*cptr1); + c2 = toUpper(*cptr2); + if (c1 != c2) + break; + cptr1++; + cptr2++; + } + return (int) ( c1 - c2 ); + +#endif /* !XML_USE_LIBICONV */ } -int IconvFBSDTransService::compareNIString( const XMLCh* const comp1 - , const XMLCh* const comp2 - , const unsigned int maxChars) +int IconvFBSDTransService::compareNIString(const XMLCh* const comp1 + , const XMLCh* const comp2 + , const unsigned int maxChars) { unsigned int n = 0; const XMLCh* cptr1 = comp1; const XMLCh* cptr2 = comp2; +#ifndef XML_USE_LIBICONV + while (true && maxChars) { wint_t wch1 = fbsd_towupper(*cptr1); @@ -235,6 +704,33 @@ int IconvFBSDTransService::compareNIString( const XMLCh* const comp1 break; } +#else /* XML_USE_LIBICONV */ + + while (true && maxChars) + { + XMLCh c1 = toUpper(*cptr1); + XMLCh c2 = toUpper(*cptr2); + + if (c1 != c2) + return (int) (c1 - c2); + + // If either ended, then both ended, so equal + if (!*cptr1 || !*cptr2) + break; + + cptr1++; + cptr2++; + + // Bump the count of chars done. If it equals the count then we + // are equal for the requested count, so break out and return + // equal. + n++; + if (n == maxChars) + break; + } + +#endif /* !XML_USE_LIBICONV */ + return 0; } @@ -247,16 +743,27 @@ const XMLCh* IconvFBSDTransService::getId() const bool IconvFBSDTransService::isSpace(const XMLCh toCheck) const { +#ifndef XML_USE_LIBICONV + if (toCheck < 0x7F) + return toupper(toCheck); char buf[16]; - mbstowcs( (wchar_t*) &toCheck, buf, 1 ); + wchar_t wc = wchar_t(toCheck); + mbstowcs( &wc, buf, 1 ); return (isspace(*buf) != 0); +#else /* XML_USE_LIBICONV */ + return IconvFBSDCD::isSpace(toCheck); +#endif /* !XML_USE_LIBICONV */ } XMLLCPTranscoder* IconvFBSDTransService::makeNewLCPTranscoder() { // Just allocate a new transcoder of our type +#ifndef XML_USE_LIBICONV return new IconvFBSDLCPTranscoder; +#else /* XML_USE_LIBICONV */ + return new IconvFBSDLCPTranscoder (cdFrom(), cdTo(), uChSize(), UBO()); +#endif /* !XML_USE_LIBICONV */ } bool IconvFBSDTransService::supportsSrcOfs() const @@ -264,15 +771,18 @@ bool IconvFBSDTransService::supportsSrcOfs() const return true; } - // --------------------------------------------------------------------------- // IconvFBSDTransService: The protected virtual transcoding service API // --------------------------------------------------------------------------- XMLTranscoder* -IconvFBSDTransService::makeNewXMLTranscoder(const XMLCh* const encodingName - , XMLTransService::Codes& resValue - , const unsigned int ) +IconvFBSDTransService::makeNewXMLTranscoder +( + const XMLCh* const encodingName + , XMLTransService::Codes& resValue + , const unsigned int blockSize +) { +#ifndef XML_USE_LIBICONV // // NOTE: We don't use the block size here // @@ -283,6 +793,43 @@ IconvFBSDTransService::makeNewXMLTranscoder(const XMLCh* const enco // resValue = XMLTransService::UnsupportedEncoding; return 0; +#else /* XML_USE_LIBICONV */ + + resValue = XMLTransService::UnsupportedEncoding; + + IconvFBSDTranscoder *newTranscoder = NULL; + size_t wLent = getWideCharLength (encodingName); + char* encWName = mbsFromXML (encodingName, wLent); + char encLocal[wLent+1]; + char* pEnclocal = encLocal; + size_t len = wLent; + wLent *= uChSize(); + if (iconvTo (encWName, &wLent, &pEnclocal, len) == (size_t) -1 && + errno != E2BIG) + return 0; + encLocal[len] = 0; + + iconv_t cd_from, cd_to; + cd_from = iconv_open (fUnicodeCP, encLocal); + if (cd_from == (iconv_t)-1) { + resValue = XMLTransService::SupportFilesNotFound; + return NULL; + } + cd_to = iconv_open (encLocal, fUnicodeCP); + if (cd_to == (iconv_t)-1) { + resValue = XMLTransService::SupportFilesNotFound; + iconv_close (cd_from); + return NULL; + } + newTranscoder = new IconvFBSDTranscoder (encodingName, + blockSize, + cd_from, cd_to, + uChSize(), UBO()); + if (newTranscoder) + resValue = XMLTransService::Ok; + return newTranscoder; + +#endif /* !XML_USE_LIBICONV */ } void IconvFBSDTransService::upperCase(XMLCh* const toUpperCase) const @@ -290,34 +837,70 @@ void IconvFBSDTransService::upperCase(XMLCh* const toUpperCase) const XMLCh* outPtr = toUpperCase; while (*outPtr) { +#ifndef XML_USE_LIBICONV *outPtr = fbsd_towupper(*outPtr); +#else /* XML_USE_LIBICONV */ + *outPtr = toUpper(*outPtr); +#endif /* !XML_USE_LIBICONV */ outPtr++; - } + } } - // --------------------------------------------------------------------------- // IconvFBSDLCPTranscoder: The virtual transcoder API // --------------------------------------------------------------------------- -unsigned int IconvFBSDLCPTranscoder::calcRequiredSize(const char* const srcText) +unsigned int +IconvFBSDLCPTranscoder::calcRequiredSize (const char* const srcText) { if (!srcText) return 0; + +#ifndef XML_USE_LIBICONV - const unsigned int retVal = fbsd_mbstowcs(NULL, srcText, 0); - + unsigned int retVal = fbsd_mbstowcs(NULL, srcText, 0); if (retVal == ~0) return 0; return retVal; + +#else /* XML_USE_LIBICONV */ + + char *tmpWideArr = mbsFromXML (NULL, gTempBuffArraySize); + if (tmpWideArr == NULL) + return 0; + size_t len, srcLen; + size_t totalLen = 0; + + len = srcLen = strlen(srcText); + if (len == 0) + return 0; + + size_t bLen = gTempBuffArraySize * uChSize(); + for (;;) { + char *pTmpArr = tmpWideArr; + const char *ptr = srcText + srcLen - len; + size_t rc = iconvFrom(ptr, &len, &pTmpArr, bLen); + if (rc == (size_t) -1 && errno != E2BIG) + return 0; + size_t sz = pTmpArr - (char *) tmpWideArr; + totalLen += sz; + if (sz == 0 || len == 0) + break; + } + return totalLen / uChSize(); + +#endif /* XML_USE_LIBICONV */ } -unsigned int IconvFBSDLCPTranscoder::calcRequiredSize(const XMLCh* const srcText) +unsigned int +IconvFBSDLCPTranscoder::calcRequiredSize(const XMLCh* const srcText) { if (!srcText) return 0; +#ifndef XML_USE_LIBICONV + unsigned int wLent = getWideCharLength(srcText); wchar_t tmpWideCharArr[gTempBuffArraySize]; wchar_t* allocatedArray = 0; @@ -336,11 +919,39 @@ unsigned int IconvFBSDLCPTranscoder::calcRequiredSize(const XMLCh* const srcText const unsigned int retVal = fbsd_wcstombs(NULL, wideCharBuf, 0); if (allocatedArray) - delete [] allocatedArray; + delete [] allocatedArray; if (retVal == ~0) return 0; return retVal; + +#else /* XML_USE_LIBICONV */ + + size_t wLent = getWideCharLength(srcText); + if (wLent == 0) + return 0; + char *wBuf = mbsFromXML (srcText, wLent); + if (wBuf == NULL) + return 0; + char tmpBuff[ gTempBuffArraySize ]; + size_t len; + size_t totalLen = 0; + + len = wLent * uChSize(); + for (;;) { + char *pTmpArr = tmpBuff; + const char *ptr = wBuf + wLent * uChSize() - len; + size_t rc = iconvTo(ptr, &len, &pTmpArr, gTempBuffArraySize); + if (rc == (size_t) -1 && errno != E2BIG) + return 0; + size_t sz = pTmpArr - tmpBuff; + totalLen += sz; + if (sz == 0 || len == 0) + break; + } + return totalLen; + +#endif /* !XML_USE_LIBICONV */ } @@ -354,6 +965,8 @@ char* IconvFBSDLCPTranscoder::transcode(const XMLCh* const toTranscode) { unsigned int wLent = getWideCharLength(toTranscode); +#ifndef XML_USE_LIBICONV + wchar_t tmpWideCharArr[gTempBuffArraySize]; wchar_t* allocatedArray = 0; wchar_t* wideCharBuf = 0; @@ -373,75 +986,109 @@ char* IconvFBSDLCPTranscoder::transcode(const XMLCh* const toTranscode) const size_t neededLen = fbsd_wcstombs(NULL, wideCharBuf, 0); if (neededLen == -1) { - if (allocatedArray) - delete [] allocatedArray; + if (allocatedArray) + delete [] allocatedArray; return 0; } retVal = new char[neededLen + 1]; fbsd_wcstombs(retVal, wideCharBuf, neededLen); retVal[neededLen] = 0; - if (allocatedArray) - delete [] allocatedArray; + if (allocatedArray) + delete [] allocatedArray; + +#else /* XML_USE_LIBICONV */ + + // Calc needed size. + const size_t neededLen = calcRequiredSize (toTranscode); + if (neededLen == 0) + return 0; + // allocate output buffer + retVal = new char[neededLen + 1]; + if (retVal == NULL) + return 0; + // prepare the original + char *wideCharBuf = mbsFromXML (toTranscode, wLent); + if (wideCharBuf == NULL) + return 0; + // perform conversion + wLent *= uChSize(); + char *ptr = retVal; + if (iconvTo(wideCharBuf, &wLent, &ptr, neededLen) == (size_t)-1) + return 0; + retVal[neededLen] = 0; + +#endif /* !XML_USE_LIBICONV */ + } else { retVal = new char[1]; + if (retVal == NULL) + return 0; retVal[0] = 0; } return retVal; } -bool IconvFBSDLCPTranscoder::transcode( const XMLCh* const toTranscode - , char* const toFill - , const unsigned int maxBytes) +bool IconvFBSDLCPTranscoder::transcode( const XMLCh* const toTranscode + , char* const toFill + , const unsigned int maxBytes) { // Watch for a couple of pyscho corner cases - if (!toTranscode || !maxBytes) - { + if (!toTranscode || !maxBytes) { toFill[0] = 0; return true; } - - if (!*toTranscode) - { + if (!*toTranscode) { toFill[0] = 0; return true; } unsigned int wLent = getWideCharLength(toTranscode); + if (wLent > maxBytes) + wLent = maxBytes; + +#ifndef XML_USE_LIBICONV + wchar_t tmpWideCharArr[gTempBuffArraySize]; wchar_t* allocatedArray = 0; wchar_t* wideCharBuf = 0; - if (wLent > maxBytes) { - wLent = maxBytes; - } - if (maxBytes >= gTempBuffArraySize) wideCharBuf = allocatedArray = new wchar_t[maxBytes + 1]; else wideCharBuf = tmpWideCharArr; for (unsigned int i = 0; i < wLent; i++) - { wideCharBuf[i] = toTranscode[i]; - } wideCharBuf[wLent] = 0x00; // Ok, go ahead and try the transcoding. If it fails, then ... - if (fbsd_wcstombs(toFill, wideCharBuf, maxBytes) == -1) - { - if (allocatedArray) - delete [] allocatedArray; + if (fbsd_wcstombs(toFill, wideCharBuf, maxBytes) == -1) { + if (allocatedArray) + delete [] allocatedArray; return false; } + if (allocatedArray) + delete [] allocatedArray; + +#else /* XML_USE_LIBICONV */ + + // Fill the "unicode" string + char *wideCharBuf = mbsFromXML (toTranscode, wLent); + + // Ok, go ahead and try the transcoding. If it fails, then ... + char *ptr = toFill; + size_t len = wLent * uChSize(); + if (iconvTo(wideCharBuf, &len, &ptr, maxBytes) == (size_t)-1) + return false; + +#endif /* !XML_USE_LIBICONV */ // Cap it off just in case toFill[wLent] = 0; - if (allocatedArray) - delete [] allocatedArray; return true; } @@ -453,16 +1100,16 @@ XMLCh* IconvFBSDLCPTranscoder::transcode(const char* const toTranscode) return 0; XMLCh* retVal = 0; - if (*toTranscode) - { + if (*toTranscode) { const unsigned int len = calcRequiredSize(toTranscode); - if (len == 0) - { + if (len == 0) { retVal = new XMLCh[1]; retVal[0] = 0; return retVal; } +#ifndef XML_USE_LIBICONV + wchar_t tmpWideCharArr[gTempBuffArraySize]; wchar_t* allocatedArray = 0; wchar_t* wideCharBuf = 0; @@ -475,25 +1122,45 @@ XMLCh* IconvFBSDLCPTranscoder::transcode(const char* const toTranscode) fbsd_mbstowcs(wideCharBuf, toTranscode, len); retVal = new XMLCh[len + 1]; for (unsigned int i = 0; i < len; i++) - { retVal[i] = (XMLCh) wideCharBuf[i]; - } retVal[len] = 0x00; - if (allocatedArray) - delete [] allocatedArray; + if (allocatedArray) + delete [] allocatedArray; + +#else /* XML_USE_LIBICONV */ + + char *wideCharBuf = mbsFromXML(NULL, len + 1); + if (wideCharBuf == NULL) + return 0; + size_t flen = strlen(toTranscode); + char *ptr = wideCharBuf; + if (iconvFrom(toTranscode, &flen, &ptr, len*uChSize()) == (size_t) -1) + return 0; + retVal = xmlFromMbs (wideCharBuf, len + 1); + if (retVal == NULL) { + if (wideCharBuf) + delete [] wideCharBuf; + return 0; + } + setTmpXMLBuf(NULL, 0); + retVal[len] = 0x00; + +#endif /* !XML_USE_LIBICONV */ + } - else - { + else { retVal = new XMLCh[1]; + if (retVal == NULL ) + return 0; retVal[0] = 0; } return retVal; } -bool IconvFBSDLCPTranscoder::transcode( const char* const toTranscode - , XMLCh* const toFill - , const unsigned int maxChars) +bool IconvFBSDLCPTranscoder::transcode(const char* const toTranscode + , XMLCh* const toFill + , const unsigned int maxChars) { // Check for a couple of psycho corner cases if (!toTranscode || !maxChars) @@ -508,115 +1175,190 @@ bool IconvFBSDLCPTranscoder::transcode( const char* const toTranscode return true; } - unsigned int len = calcRequiredSize(toTranscode); + size_t len = calcRequiredSize(toTranscode); + if (len > maxChars) + len = maxChars; + +#ifndef XML_USE_LIBICONV + wchar_t tmpWideCharArr[gTempBuffArraySize]; wchar_t* allocatedArray = 0; wchar_t* wideCharBuf = 0; - if (len > maxChars) { - len = maxChars; - } - if (maxChars >= gTempBuffArraySize) - wideCharBuf = allocatedArray = new wchar_t[maxChars + 1]; + wideCharBuf = allocatedArray = new wchar_t[len + 1]; else wideCharBuf = tmpWideCharArr; - if (fbsd_mbstowcs(wideCharBuf, toTranscode, maxChars) == -1) - { - if (allocatedArray) - delete [] allocatedArray; + if (fbsd_mbstowcs(wideCharBuf, toTranscode, len) == -1) { + if (allocatedArray) + delete [] allocatedArray; return false; } - for (unsigned int i = 0; i < len; i++) - { toFill[i] = (XMLCh) wideCharBuf[i]; - } - toFill[len] = 0x00; if (allocatedArray) - delete [] allocatedArray; + delete [] allocatedArray; + +#else /* XML_USE_LIBICONV */ + + char *wideCharBuf = mbsFromXML (toFill, maxChars); + if (wideCharBuf == NULL) + return 0; + size_t flen = len; + char *ptr = wideCharBuf; + if (iconvFrom(toTranscode, &flen, &ptr, maxChars*uChSize()) == (size_t)-1) + return false; + mbsToXML (wideCharBuf, maxChars, toFill, maxChars); + +#endif /* !XML_USE_LIBICONV */ + + toFill[maxChars] = 0x00; return true; } - // --------------------------------------------------------------------------- // IconvFBSDLCPTranscoder: Constructors and Destructor // --------------------------------------------------------------------------- + +#ifdef XML_USE_LIBICONV + +IconvFBSDLCPTranscoder::IconvFBSDLCPTranscoder (iconv_t cd_from, + iconv_t cd_to, + size_t uchsize, + unsigned int ubo) + : IconvFBSDCD (cd_from, cd_to, uchsize, ubo) +{ +} + +#endif /* XML_USE_LIBICONV */ + + +#ifndef XML_USE_LIBICONV + IconvFBSDLCPTranscoder::IconvFBSDLCPTranscoder() { } +#endif /* !XML_USE_LIBICONV */ + IconvFBSDLCPTranscoder::~IconvFBSDLCPTranscoder() { } + +#ifdef XML_USE_LIBICONV + // --------------------------------------------------------------------------- // IconvFBSDTranscoder: Constructors and Destructor // --------------------------------------------------------------------------- -IconvFBSDTranscoder::IconvFBSDTranscoder(const XMLCh* const encodingName - , const unsigned int blockSize) : - - XMLTranscoder(encodingName, blockSize) +IconvFBSDTranscoder::IconvFBSDTranscoder (const XMLCh* const encodingName + , const unsigned int blockSize + , iconv_t cd_from + , iconv_t cd_to + , size_t uchsize + , unsigned int ubo + ) + : XMLTranscoder(encodingName, blockSize) + , IconvFBSDCD (cd_from, cd_to, uchsize, ubo) { } IconvFBSDTranscoder::~IconvFBSDTranscoder() { + if (cdTo() != (iconv_t)-1) + iconv_close (cdTo()); + if (cdFrom() != (iconv_t)-1) + iconv_close (cdFrom()); } - // --------------------------------------------------------------------------- // IconvFBSDTranscoder: Implementation of the virtual transcoder API // --------------------------------------------------------------------------- -XMLCh IconvFBSDTranscoder::transcodeOne(const XMLByte* const srcData - , const unsigned int srcBytes - , unsigned int& bytesEaten) +unsigned int IconvFBSDTranscoder::transcodeFrom +( + const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes ) { - wchar_t toFill; - int eaten = ::mbtowc(&toFill, (const char*)srcData, srcBytes); - if (eaten == -1) - { - bytesEaten = 0; - return 0; + // Transcode TO XMLCh + + const char* startSrc = (const char*) srcData; + const char* endSrc = (const char*) srcData + srcCount; + char* startTarget = mbsFromXML (NULL, maxChars); + if (startTarget == NULL) + return 0; + char* orgTarget = startTarget; + + // Do character-by-character transcoding + size_t srcLen = srcCount; + unsigned int toReturn = 0; + bytesEaten = 0; + for (size_t cnt = 0; cnt < maxChars; cnt++) { + size_t rc = iconvFrom(startSrc, &srcLen, &orgTarget, uChSize()); + if (rc == (size_t)-1) { + if (errno != E2BIG) + ThrowXML(TranscodingException, XMLExcepts::Trans_BadSrcSeq); + break; + } + charSizes[cnt] = endSrc - srcLen - startSrc; + bytesEaten += charSizes[cnt]; + startSrc = endSrc - srcLen; + toReturn++; } - - // Return the bytes we ate and the resulting char. - bytesEaten = eaten; - return toFill; + mbsToXML (startTarget, maxChars, toFill, maxChars); + return toReturn; } +unsigned int IconvFBSDTranscoder::transcodeTo +( + const XMLCh* const srcData + , const unsigned int srcCount + , XMLByte* const toFill + , const unsigned int maxBytes + , unsigned int& charsEaten + , const UnRepOpts options ) +{ + // Transcode FROM XMLCh + + const char* startSrc = mbsFromXML(srcData, srcCount); + char* startTarget = (char *) toFill; + size_t srcLen = srcCount * sizeof(XMLCh); + size_t rc = iconvTo (startSrc, &srcLen, &startTarget, maxBytes); + if (rc == (size_t)-1 && errno != E2BIG) + ThrowXML(TranscodingException, XMLExcepts::Trans_BadSrcSeq); + charsEaten = srcCount - srcLen / sizeof(XMLCh); + return startTarget - (char *)toFill; +} -unsigned int -IconvFBSDTranscoder::transcodeXML( const XMLByte* const srcData - , const unsigned int srcCount - , XMLCh* const toFill - , const unsigned int maxChars - , unsigned int& bytesEaten - , unsigned char* const charSizes) +bool IconvFBSDTranscoder::canTranscodeTo +( + const unsigned int toCheck +) const { // - // For this one, because we have to maintain the offset table, we have - // to do them one char at a time until we run out of source data. + // If the passed value is really a surrogate embedded together, then + // we need to break it out into its two chars. Else just one. // - unsigned int countIn = 0; - unsigned int countOut = 0; - while (countOut < maxChars) - { - wchar_t oneWideChar; - const int bytesEaten = - ::mbtowc(&oneWideChar, (const char*)&srcData[countIn], srcCount - countIn); - - // We are done, so break out - if (bytesEaten == -1) - break; - toFill[countOut] = (XMLCh) oneWideChar; - countIn += (unsigned int) bytesEaten; - countOut++; - } - - // Give back the counts of eaten and transcoded - bytesEaten = countIn; - return countOut; + char srcBuf[2 * uChSize()]; + unsigned int srcCount = 1; + if (toCheck & 0xFFFF0000) { + XMLCh ch1 = (toCheck >> 10) + 0xD800; + XMLCh ch2 = toCheck & 0x3FF + 0xDC00; + xmlToMbs(&ch1, 1, srcBuf, 1); + xmlToMbs(&ch2, 1, srcBuf + uChSize(), 1); + srcCount++; + } else + xmlToMbs((const XMLCh*) &toCheck, 1, srcBuf, 1); + size_t len = srcCount * uChSize(); + char tmpBuf[64]; + char* pTmpBuf = tmpBuf; + size_t rc = iconvTo( srcBuf, &len, &pTmpBuf, 64); + return (rc != (size_t)-1) && (len == 0); } + +#endif /* XML_USE_LIBICONV */ diff --git a/src/util/Transcoders/IconvFBSD/IconvFBSDTransService.hpp b/src/util/Transcoders/IconvFBSD/IconvFBSDTransService.hpp index ab97b9963..7981513d3 100644 --- a/src/util/Transcoders/IconvFBSD/IconvFBSDTransService.hpp +++ b/src/util/Transcoders/IconvFBSD/IconvFBSDTransService.hpp @@ -1,37 +1,37 @@ /* * The Apache Software License, Version 1.1 - * + * * Copyright (c) 2001 The Apache Software Foundation. All rights * reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * + * * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * + * notice, this list of conditions and the following disclaimer. + * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * + * * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: + * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. - * + * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this - * software without prior written permission. For written + * software without prior written permission. For written * permission, please contact apache\@apache.org. - * + * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE @@ -45,7 +45,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== - * + * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation, and was * originally based on software copyright (c) 2001, International @@ -56,6 +56,10 @@ /* * $Log$ + * Revision 1.2 2001/12/11 15:10:14 tng + * More changes to IconvFBSDTransService. Allow using "old" TransServece implementation (via '-t native' option to runConfigure) or + * to employ libiconv (it is a part of FreeBSD ports-collection) services. By Max Gotlib. + * * Revision 1.1 2001/12/03 14:45:11 tng * FreeBSD native transcoder (IconvFBSD) added by Max Gotlib. * @@ -66,7 +70,171 @@ #include <util/TransService.hpp> +#ifdef XML_USE_LIBICONV + +# include <util/Mutexes.hpp> +# include <iconv.h> + +// --------------------------------------------------------------------------- +// Libiconv wrapper (low-level conversion utilities collection) +// --------------------------------------------------------------------------- + +class XMLUTIL_EXPORT IconvFBSDCD +{ +public: + // ----------------------------------------------------------------------- + // Constructors and Destructor + // ----------------------------------------------------------------------- + IconvFBSDCD + ( + iconv_t cd_from, + iconv_t cd_to, + size_t uchsize, + unsigned int ubo + ); + virtual ~IconvFBSDCD(); + + // Convert "native unicode" character into XMLCh + void mbcToXMLCh (const char *mbc, XMLCh *toRet) const; + + // Convert XMLCh into "native unicode" character + void xmlChToMbc (XMLCh xch, char *mbc) const; + + // Return uppercase equivalent for XMLCh + XMLCh toUpper (const XMLCh ch) const; + + // Check if passed characters belongs to the :space: class + virtual bool isSpace(const XMLCh toCheck) const; + + // Allocate internal buffer space, large enough to hold 'cnt' + // XMLCh characters, and fill it with data, supplyed in the array + // of "native unicode" characters. + XMLCh* xmlFromMbs + ( + const char* str, + size_t cnt + ); + + // Fill array of XMLCh characters with data, supplyed in the array + // of "native unicode" characters. + XMLCh* mbsToXML ( + const char* mbs_str, + size_t mbs_cnt, + XMLCh* xml_str, + size_t xml_cnt + ) const; + + // Allocate internal buffer space, large enough to hold 'cnt' + // "native unicode" characters, and fill it with data, supplyed + // in the array of XMLCh characters. + char* mbsFromXML + ( + const XMLCh* str, + size_t cnt + ); + + // Fill array of "native unicode" characters with data, supplyed + // in the array of XMLCh characters. + char* xmlToMbs + ( + const XMLCh* xml_str, + size_t xml_cnt, + char* mbs_str, + size_t mbs_cnt + ) const; + + // Wrapper aroung the iconv() for transcoding from the local charset + size_t iconvFrom + ( + const char *fromPtr, + size_t *fromLen, + char **toPtr, + size_t toLen + ) const; + + // Wrapper aroung the iconv() for transcoding to the local charset + size_t iconvTo + ( + const char *fromPtr, + size_t *fromLen, + char **toPtr, + size_t toLen + ) const; + + // Private data accessors + inline iconv_t cdTo () const { return fCDTo; } + inline iconv_t cdFrom () const { return fCDFrom; } + inline size_t uChSize () const { return fUChSize; } + inline unsigned int UBO () const { return fUBO; } + +protected: + + // Hiden defaull constructor + IconvFBSDCD(); + + // Private data accessors + inline void setCDTo (iconv_t cd) { fCDTo = cd; } + inline void setCDFrom (iconv_t cd) { fCDFrom = cd; } + inline void setUChSize (size_t sz) { fUChSize = sz; } + inline void setUBO (unsigned int u) { fUBO = u; } + inline void setTmpXMLBuf (XMLCh* b, size_t s) { + fTmpXMLBuf = b; fTmpXMLSize = s; + } + +private: + // ----------------------------------------------------------------------- + // Unimplemented constructors and operators + // ----------------------------------------------------------------------- + IconvFBSDCD(const IconvFBSDCD&); + void operator=(const IconvFBSDCD&); + + // ----------------------------------------------------------------------- + // Private data members + // + // fTmpXMLBuf + // Temporary buffer for holding arrays of XMLCh characters + // fTmpXMLSize + // Size of the XMLCh temporary buffer + // fTmpUBuf + // Temporary buffer for holding arrays of "native unicode" characters + // fTmpUSize + // Size of the "native unicode" temporary buffer + // fCDTo + // Characterset conversion descriptor TO the local-host encoding + // fCDFrom + // Characterset conversion descriptor FROM the local-host encoding + // fTmpUSize + // Size of the "native unicode" temporary buffer + // fUChSize + // Sizeof the "native unicode" character in bytes + // fUBO + // "Native unicode" characters byte order + // fMutex + // We have to synchronize threaded calls to the converter. + // ----------------------------------------------------------------------- + XMLCh* fTmpXMLBuf; + size_t fTmpXMLSize; + char* fTmpUBuf; + size_t fTmpUSize; + size_t fUChSize; + unsigned int fUBO; + iconv_t fCDTo; + iconv_t fCDFrom; + mutable XMLMutex fMutex; +}; + +#endif /* XML_USE_LIBICONV */ + + + +// --------------------------------------------------------------------------- +// FreeBSD-specific Transcoding Service implementation +// --------------------------------------------------------------------------- + class XMLUTIL_EXPORT IconvFBSDTransService : public XMLTransService +#ifdef XML_USE_LIBICONV +, IconvFBSDCD +#endif { public : // ----------------------------------------------------------------------- @@ -113,39 +281,56 @@ protected : , const unsigned int blockSize ); - + private : // ----------------------------------------------------------------------- // Unimplemented constructors and operators // ----------------------------------------------------------------------- IconvFBSDTransService(const IconvFBSDTransService&); void operator=(const IconvFBSDTransService&); -}; +#ifdef XML_USE_LIBICONV + + // ----------------------------------------------------------------------- + // Private data members + // + // fLocalCP + // Local (host) character set name + // fUnicodeCP + // Unicode encoding schema name + // ----------------------------------------------------------------------- + const char* fUnicodeCP; + +#endif /* XML_USE_LIBICONV */ +}; +#ifdef XML_USE_LIBICONV +//---------------------------------------------------------------------------- +// Implementation of the transcoders for arbitrary input characterset is +// supported ONLY through libiconv interface +//---------------------------------------------------------------------------- -class XMLUTIL_EXPORT IconvFBSDTranscoder : public XMLTranscoder +class XMLUTIL_EXPORT IconvFBSDTranscoder : public XMLTranscoder, IconvFBSDCD { public : // ----------------------------------------------------------------------- // Constructors and Destructor // ----------------------------------------------------------------------- - IconvFBSDTranscoder(const XMLCh* const encodingName, const unsigned int blockSize); + IconvFBSDTranscoder(const XMLCh* const encodingName + , const unsigned int blockSize + , iconv_t cd_from + , iconv_t cd_to + , size_t uchsize + , unsigned int ubo + ); ~IconvFBSDTranscoder(); // ----------------------------------------------------------------------- // Implementation of the virtual transcoder interface // ----------------------------------------------------------------------- - virtual XMLCh transcodeOne - ( - const XMLByte* const srcData - , const unsigned int srcBytes - , unsigned int& bytesEaten - ); - - virtual unsigned int transcodeXML + virtual unsigned int transcodeFrom ( const XMLByte* const srcData , const unsigned int srcCount @@ -155,24 +340,67 @@ public : , unsigned char* const charSizes ); + virtual unsigned int transcodeTo + ( + const XMLCh* const srcData + , const unsigned int srcCount + , XMLByte* const toFill + , const unsigned int maxBytes + , unsigned int& charsEaten + , const UnRepOpts options + ); + + virtual bool canTranscodeTo + ( + const unsigned int toCheck + ) const; private : // ----------------------------------------------------------------------- // Unimplemented constructors and operators // ----------------------------------------------------------------------- + IconvFBSDTranscoder(); IconvFBSDTranscoder(const IconvFBSDTranscoder&); void operator=(const IconvFBSDTranscoder&); }; +#endif /* XML_USE_LIBICONV */ + +// --------------------------------------------------------------------------- +// FreeBSD-specific XMLCh <-> local (host) characterset transcoder +// --------------------------------------------------------------------------- class XMLUTIL_EXPORT IconvFBSDLCPTranscoder : public XMLLCPTranscoder +#ifdef XML_USE_LIBICONV +, IconvFBSDCD +#endif { public : // ----------------------------------------------------------------------- // Constructors and Destructor // ----------------------------------------------------------------------- +#ifdef XML_USE_LIBICONV + + IconvFBSDLCPTranscoder + ( + iconv_t from, + iconv_t to, + size_t uchsize, + unsigned int ubo + ); + +protected: + IconvFBSDLCPTranscoder(); // Unimplemented + +public: + +#else /* !XML_USE_LIBICONV */ + IconvFBSDLCPTranscoder(); + +#endif /* XML_USE_LIBICONV */ + ~IconvFBSDLCPTranscoder(); -- GitLab