diff --git a/src/util/TransService.cpp b/src/util/TransService.cpp new file mode 100644 index 0000000000000000000000000000000000000000..65a5f309339c4cd3246abc4c15ef6570b6ad739a --- /dev/null +++ b/src/util/TransService.cpp @@ -0,0 +1,126 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + + +// --------------------------------------------------------------------------- +// Includes +// --------------------------------------------------------------------------- +#include <util/TransService.hpp> +#include <util/XMLString.hpp> + + + +// --------------------------------------------------------------------------- +// XLMTransService: Constructors and destructor +// --------------------------------------------------------------------------- +XMLTransService::XMLTransService() +{ +} + +XMLTransService::~XMLTransService() +{ +} + + +// --------------------------------------------------------------------------- +// XLMTranscoder: Public Destructor +// --------------------------------------------------------------------------- +XMLTranscoder::~XMLTranscoder() +{ + delete [] fEncodingName; +} + + +// --------------------------------------------------------------------------- +// XLMTranscoder: Hidden Constructors +// --------------------------------------------------------------------------- +XMLTranscoder::XMLTranscoder(const XMLCh* const encodingName + , const unsigned int blockSize) : + fEncodingName(0) + , fBlockSize(blockSize) +{ + fEncodingName = XMLString::replicate(encodingName); +} + + +// --------------------------------------------------------------------------- +// XLMTranscoder: Protected helpers +// --------------------------------------------------------------------------- +void XMLTranscoder::checkBlockSize(const unsigned int toCheck) +{ + if (toCheck > fBlockSize) + { + // <TBD> Throw an exception here + } +} + + + + +// --------------------------------------------------------------------------- +// XLMLCPTranscoder: Public Destructor +// --------------------------------------------------------------------------- +XMLLCPTranscoder::XMLLCPTranscoder() +{ +} + + +// --------------------------------------------------------------------------- +// XLMTranscoder: Hidden Constructors +// --------------------------------------------------------------------------- +XMLLCPTranscoder::~XMLLCPTranscoder() +{ +} diff --git a/src/util/XML88591Transcoder.cpp b/src/util/XML88591Transcoder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8e80df209dc4b17442a7dbb0c8dfbab672a21ff6 --- /dev/null +++ b/src/util/XML88591Transcoder.cpp @@ -0,0 +1,145 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + + +// --------------------------------------------------------------------------- +// Includes +// --------------------------------------------------------------------------- +#include <util/XML88591Transcoder.hpp> +#include <util/TranscodingException.hpp> +#include <memory.h> + + + +// --------------------------------------------------------------------------- +// XML88591Transcoder: Constructors and Destructor +// --------------------------------------------------------------------------- +XML88591Transcoder::XML88591Transcoder( const XMLCh* const encodingName + , const unsigned int blockSize) : + + XMLTranscoder(encodingName, blockSize) +{ +} + + +XML88591Transcoder::~XML88591Transcoder() +{ +} + + +// --------------------------------------------------------------------------- +// XML88591Transcoder: Implementation of the transcoder API +// --------------------------------------------------------------------------- +bool XML88591Transcoder::supportsSrcOfs() const +{ + // Yes we support this + return true; +} + + +XMLCh +XML88591Transcoder::transcodeOne(const XMLByte* const srcData + , const unsigned int srcBytes + , unsigned int& bytesEaten) +{ + // If not enough source bytes, return zero + if (!srcBytes) + return 0; + + // We are going to eat one byte + bytesEaten = 1; + + // And return the value, cast to an XMLCh + return XMLCh(*srcData); +} + + +unsigned int +XML88591Transcoder::transcodeXML(const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes) +{ + // If debugging, make sure that the block size is legal + #if defined(XML4C_DEBUG) + checkBlockSize(maxChars); + #endif + + // + // Calculate the max chars we can do here. Its the lesser of the + // max output chars and the number of chars in the source. + // + const unsigned int countToDo = srcCount < maxChars ? srcCount : maxChars; + + // + // Loop through the bytes to do and convert over each byte. Its just + // a cast to the wide char type. + // + const XMLByte* srcPtr = srcData; + XMLCh* destPtr = toFill; + const XMLByte* srcEnd = srcPtr + countToDo; + while (srcPtr < srcEnd) + *destPtr++ = XMLCh(*srcPtr++); + + // Set the bytes eaten, and set the char size array to the fixed size + bytesEaten = countToDo; + memset(charSizes, 1, countToDo); + + // Return the chars we transcoded + return countToDo; +} diff --git a/src/util/XML88591Transcoder.hpp b/src/util/XML88591Transcoder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f5d9de5c5a743cbc9ecd24d7f4905b0d34b27b97 --- /dev/null +++ b/src/util/XML88591Transcoder.hpp @@ -0,0 +1,118 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + +#ifndef XML88591TRANSCODER_HPP +#define XML88591TRANSCODER_HPP + +#include <util/XML4CDefs.hpp> +#include <util/TransService.hpp> + + +// +// This class provides an implementation of the XMLTranscoder interface +// for a simple 8859-1 transcoder. The parser does some encodings +// intrinsically without depending upon external transcoding services. +// To make everything more orthagonal, we implement these internal +// transcoders using the same transcoder abstraction as the pluggable +// transcoding services do. +// +class XMLUTIL_EXPORT XML88591Transcoder : public XMLTranscoder +{ +public : + // ----------------------------------------------------------------------- + // Public constructors and destructor + // ----------------------------------------------------------------------- + XML88591Transcoder + ( + const XMLCh* const encodingName + , const unsigned int blockSize + ); + + virtual ~XML88591Transcoder(); + + + // ----------------------------------------------------------------------- + // Implementation of the XMLTranscoder interface + // ----------------------------------------------------------------------- + virtual bool supportsSrcOfs() const; + + virtual XMLCh transcodeOne + ( + const XMLByte* const srcData + , const unsigned int srcBytes + , unsigned int& bytesEaten + ); + + virtual unsigned int transcodeXML + ( + const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes + ); + + +private : + // ----------------------------------------------------------------------- + // Unimplemented constructors and operators + // ----------------------------------------------------------------------- + XML88591Transcoder(const XML88591Transcoder&); + void operator=(const XML88591Transcoder&); +}; + +#endif diff --git a/src/util/XMLASCIITranscoder.cpp b/src/util/XMLASCIITranscoder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..24f01018f3294802fbd28f9fbf0a8a6751546666 --- /dev/null +++ b/src/util/XMLASCIITranscoder.cpp @@ -0,0 +1,179 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + + +// --------------------------------------------------------------------------- +// Includes +// --------------------------------------------------------------------------- +#include <util/XMLASCIITranscoder.hpp> +#include <util/XMLString.hpp> +#include <util/TranscodingException.hpp> +#include <memory.h> + + + +// --------------------------------------------------------------------------- +// XMLASCIITranscoder: Constructors and Destructor +// --------------------------------------------------------------------------- +XMLASCIITranscoder::XMLASCIITranscoder(const unsigned int blockSize) : + + XMLTranscoder(XMLUni::fgUSASCIIEncodingString, blockSize) +{ +} + + +XMLASCIITranscoder::~XMLASCIITranscoder() +{ +} + + +// --------------------------------------------------------------------------- +// XMLASCIITranscoder: Implementation of the transcoder API +// --------------------------------------------------------------------------- +bool XMLASCIITranscoder::supportsSrcOfs() const +{ + // Yes we support this + return true; +} + + +XMLCh +XMLASCIITranscoder::transcodeOne(const XMLByte* const srcData + , const unsigned int srcBytes + , unsigned int& bytesEaten) +{ + // If no source, then give up + if (!srcBytes) + return 0; + + // If the source char is invalid, then give up + if (*srcData > 0x7F) + { + ThrowXML1 + ( + TranscodingException + , XML4CExcepts::Trans_NotInSourceSet + , XMLUni::fgUSASCIIEncodingString + ); + } + + // We eat one source byte and just cast the ASCII char to XMLCh + bytesEaten = 1; + return XMLCh(*srcData); +} + + +unsigned int +XMLASCIITranscoder::transcodeXML(const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes) +{ + // If debugging, make sure that the block size is legal + #if defined(XML4C_DEBUG) + checkBlockSize(maxChars); + #endif + + // + // Calculate the max chars we can do here. Its the lesser of the + // max output chars and the source count. + // + const unsigned int countToDo = srcCount < maxChars ? srcCount : maxChars; + + // + // Now loop through that many source chars and just cast each one + // over to the XMLCh format. Check each source that its really a + // valid ASCI char. + // + const XMLByte* inPtr = srcData; + XMLCh* outPtr = toFill; + unsigned int countDone = 0; + for (; countDone < countToDo; countDone++) + { + // Do the optimistic work up front + if (*inPtr < 0x80) + { + *outPtr++ = XMLCh(*inPtr++); + continue; + } + + // + // We got non source encoding char. If we got more than 32 chars, + // the just break out. We'll come back here later to hit this again + // and give an error much closer to the real source position. + // + if (countDone > 32) + break; + + ThrowXML1 + ( + TranscodingException + , XML4CExcepts::Trans_NotInSourceSet + , XMLUni::fgUSASCIIEncodingString + ); + } + + // Set the bytes we ate + bytesEaten = countDone; + + // Set the char sizes to the fixed size + memset(charSizes, 1, countDone); + + // Return the chars we transcoded + return countDone; +} diff --git a/src/util/XMLASCIITranscoder.hpp b/src/util/XMLASCIITranscoder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..77640a104de82cdd326329e949cfd3b45080fc30 --- /dev/null +++ b/src/util/XMLASCIITranscoder.hpp @@ -0,0 +1,117 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + +#ifndef XMLASCIITRANSCODER_HPP +#define XMLASCIITRANSCODER_HPP + +#include <util/XML4CDefs.hpp> +#include <util/TransService.hpp> + + +// +// This class provides an implementation of the XMLTranscoder interface +// for a simple ASCII transcoder. The parser does some encodings +// intrinsically without depending upon external transcoding services. +// To make everything more orthagonal, we implement these internal +// transcoders using the same transcoder abstraction as the pluggable +// transcoding services do. +// +class XMLUTIL_EXPORT XMLASCIITranscoder : public XMLTranscoder +{ +public : + // ----------------------------------------------------------------------- + // Constructors and destructor + // ----------------------------------------------------------------------- + XMLASCIITranscoder + ( + const unsigned int blockSize + ); + + virtual ~XMLASCIITranscoder(); + + + // ----------------------------------------------------------------------- + // Implementation of the XMLTranscoder interface + // ----------------------------------------------------------------------- + virtual bool supportsSrcOfs() const; + + virtual XMLCh transcodeOne + ( + const XMLByte* const srcData + , const unsigned int srcBytes + , unsigned int& bytesEaten + ); + + virtual unsigned int transcodeXML + ( + const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes + ); + + +private : + // ----------------------------------------------------------------------- + // Unimplemented constructors and operators + // ----------------------------------------------------------------------- + XMLASCIITranscoder(const XMLASCIITranscoder&); + void operator=(const XMLASCIITranscoder&); +}; + +#endif diff --git a/src/util/XMLUCS4Transcoder.hpp b/src/util/XMLUCS4Transcoder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..027a852d3eda839df5b737066e67cd8b38a5ce37 --- /dev/null +++ b/src/util/XMLUCS4Transcoder.hpp @@ -0,0 +1,135 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + +#ifndef XMLUCS4TRANSCODER_HPP +#define XMLUCS4TRANSCODER_HPP + +#include <util/XML4CDefs.hpp> +#include <util/TransService.hpp> + + +// +// This class provides an implementation of the XMLTranscoder interface +// for a simple UCS4 transcoder. The parser does some encodings +// intrinsically without depending upon external transcoding services. +// To make everything more orthagonal, we implement these internal +// transcoders using the same transcoder abstraction as the pluggable +// transcoding services do. +// +class XMLUTIL_EXPORT XMLUCS4Transcoder : public XMLTranscoder +{ +public : + // ----------------------------------------------------------------------- + // Public constructors and destructor + // ----------------------------------------------------------------------- + XMLUCS4Transcoder + ( + const XMLCh* const encodingName + , const unsigned int blockSize + , const bool swapped + ); + + virtual ~XMLUCS4Transcoder(); + + + // ----------------------------------------------------------------------- + // Implementation of the XMLTranscoder interface + // ----------------------------------------------------------------------- + virtual bool supportsSrcOfs() const; + + virtual XMLCh transcodeOne + ( + const XMLByte* const srcData + , const unsigned int srcBytes + , unsigned int& bytesEaten + ); + + virtual unsigned int transcodeXML + ( + const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes + ); + + +private : + // ----------------------------------------------------------------------- + // Unimplemented constructors and operators + // ----------------------------------------------------------------------- + XMLUCS4Transcoder(const XMLUCS4Transcoder&); + void operator=(const XMLUCS4Transcoder&); + + + // ----------------------------------------------------------------------- + // Private data members + // + // fSpareCh + // If we decode a surrogate pair, and only have space for one of + // the values in the output, then we have to store the trailing + // value until the next time. + // + // fSwapped + // This tells us if our input is going to be in the same endianess + // as the local host or swapped. + // ----------------------------------------------------------------------- + XMLCh fSpareCh; + bool fSwapped; +}; + +#endif diff --git a/src/util/XMLUCSTranscoder.cpp b/src/util/XMLUCSTranscoder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f6d46469eae1128d6d35a4fda1c4fbaf9b030ce8 --- /dev/null +++ b/src/util/XMLUCSTranscoder.cpp @@ -0,0 +1,249 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + + +// --------------------------------------------------------------------------- +// Includes +// --------------------------------------------------------------------------- +#include <util/BitOps.hpp> +#include <util/XMLUCS4Transcoder.hpp> +#include <util/TranscodingException.hpp> +#include <memory.h> + + + +// --------------------------------------------------------------------------- +// XMLUCS4Transcoder: Constructors and Destructor +// --------------------------------------------------------------------------- +XMLUCS4Transcoder::XMLUCS4Transcoder(const XMLCh* const encodingName + , const unsigned int blockSize + , const bool swapped) : + + XMLTranscoder(encodingName, blockSize) + , fSpareCh(0) + , fSwapped(swapped) +{ +} + + +XMLUCS4Transcoder::~XMLUCS4Transcoder() +{ +} + + +// --------------------------------------------------------------------------- +// XMLUCS4Transcoder: Implementation of the transcoder API +// --------------------------------------------------------------------------- +bool XMLUCS4Transcoder::supportsSrcOfs() const +{ + // Yes we support this + return true; +} + + +XMLCh +XMLUCS4Transcoder::transcodeOne(const XMLByte* const srcData + , const unsigned int srcBytes + , unsigned int& bytesEaten) +{ + // If there is a spare char, return it first + if (fSpareCh) + { + const XMLCh retCh = fSpareCh; + fSpareCh = 0; + return retCh; + } + + // If not enough bytes for an input char, return zero + if (srcBytes < sizeof(UCS4Ch)) + return 0; + + // + // Get the next char. If the source is swapped from native format, + // unswap it + // + UCS4Ch nextCh = *((const UCS4Ch*)srcData); + if (fSwapped) + nextCh = BitOps::swapBytes(nextCh); + + // + // See if this requires a surrogate pair to store. If so, then we + // return the first and save the next. Else just return the one + // resulting char. + // + XMLCh retCh; + if (nextCh & 0xFFFF0000) + { + const XMLCh ch1 = XMLCh(((nextCh - 0x10000) >> 10) + 0xD800); + const XMLCh ch2 = XMLCh(((nextCh - 0x10000) & 0x3FF) + 0xDC00); + + retCh = ch1; + fSpareCh = ch2; + } + else + { + // No surrogate, so just cast it to the correct type + retCh = XMLCh(nextCh); + } + + // We ate one UCS4 char's worth of bytes + bytesEaten = sizeof(UCS4Ch); + return retCh; +} + + +unsigned int +XMLUCS4Transcoder::transcodeXML(const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes) +{ + // If debugging, make sure that the block size is legal + #if defined(XML4C_DEBUG) + checkBlockSize(maxChars); + #endif + + // This will be the index into the output buffer and init the bytes eaten + unsigned int charsRead = 0; + bytesEaten = 0; + + // + // If there is a spare character, then we have to take that one first + // being sure to bump the chars read. + // + if (fSpareCh) + { + // + // This char was a trailing surrogate so it took no space from + // the original source itself. The leading surrogate accounted + // for all the source eaten. + // + charSizes[charsRead] = 0; + toFill[charsRead++] = fSpareCh; + fSpareCh = chNull; + } + + // + // Calculate how many UCS-4 characters could possibly be pulled out + // of the raw buffer right now. If none, return zero. Shouldn't happen + // unless an odd number of bytes is in the buffer, since an empty + // buffer would have been refilled before we were called. + // + const unsigned int charsAvail = (srcCount / sizeof(UCS4Ch)); + if (!charsAvail) + return 0; + + // + // Calculate the maximum chars we can do. Its the lesser of the chars + // requested and the UCS-4 chars available in the buffer. We have to + // account for the possibility that we used one target position on + // the trailing surrogate above. + // + const unsigned int charsToDo = ((maxChars - charsRead) < charsAvail) ? + (maxChars - charsRead): charsAvail; + + // Just loop until we get the max chars we need or run out of source + const UCS4Ch* asUCS4 = (const UCS4Ch*)srcData; + while (charsRead < charsToDo) + { + // Get the next int out of the buffer + UCS4Ch nextVal = *asUCS4++; + bytesEaten += sizeof(UCS4Ch); + + // If it needs to be swapped, then do it + if (fSwapped) + nextVal = BitOps::swapBytes(nextVal); + + // Handle a surrogate pair if needed + if (nextVal & 0xFFFF0000) + { + const XMLCh ch1 = XMLCh(((nextVal - 0x10000) >> 10) + 0xD800); + const XMLCh ch2 = XMLCh(((nextVal - 0x10000) & 0x3FF) + 0xDC00); + + // + // If we have room for two chars, then put them both in and bump + // the chars read by two. Otherwise, put one in and store the + // other in the spare char for the next round. + // + if (charsRead + 1 == maxChars) + { + charSizes[charsRead] = sizeof(UCS4Ch); + toFill[charsRead++] = ch1; + fSpareCh = ch2; + } + else + { + // + // We have room so store them both. But note that the + // second one took up no source bytes! + // + charSizes[charsRead] = sizeof(UCS4Ch); + toFill[charsRead++] = ch1; + charSizes[charsRead] = 0; + toFill[charsRead++] = ch2; + } + } + else + { + // No surrogate, so just store it and bump the count + charSizes[charsRead] = sizeof(UCS4Ch); + toFill[charsRead++] = XMLCh(nextVal); + } + } + return charsRead; +} diff --git a/src/util/XMLUTF16Transcoder.cpp b/src/util/XMLUTF16Transcoder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7abc7b4d532bab1040cd6efebf1c4752a3ab309d --- /dev/null +++ b/src/util/XMLUTF16Transcoder.cpp @@ -0,0 +1,185 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + + +// --------------------------------------------------------------------------- +// Includes +// --------------------------------------------------------------------------- +#include <util/BitOps.hpp> +#include <util/XMLUTF16Transcoder.hpp> +#include <util/TranscodingException.hpp> +#include <memory.h> + + + +// --------------------------------------------------------------------------- +// XMLUTF16Transcoder: Constructors and Destructor +// --------------------------------------------------------------------------- +XMLUTF16Transcoder::XMLUTF16Transcoder( const XMLCh* const encodingName + , const unsigned int blockSize + , const bool swapped) : + + XMLTranscoder(encodingName, blockSize) + , fSwapped(swapped) +{ +} + + +XMLUTF16Transcoder::~XMLUTF16Transcoder() +{ +} + + +// --------------------------------------------------------------------------- +// XMLUTF16Transcoder: Implementation of the transcoder API +// --------------------------------------------------------------------------- +bool XMLUTF16Transcoder::supportsSrcOfs() const +{ + // Yes we support this + return true; +} + + +XMLCh +XMLUTF16Transcoder::transcodeOne(const XMLByte* const srcData + , const unsigned int srcBytes + , unsigned int& bytesEaten) +{ + // If not enough source bytes, return zero + if (srcBytes < sizeof(UTF16Ch)) + return 0; + + // We are going to eat one UTF16 char's worth of bytes + bytesEaten = sizeof(UTF16Ch); + + // Get the byte out and swap it if needed + UTF16Ch nextCh = *((const UTF16Ch*)srcData); + if (fSwapped) + nextCh = BitOps::swapBytes(nextCh); + + // And return the value, cast to an XMLCh + return XMLCh(nextCh); +} + + +unsigned int +XMLUTF16Transcoder::transcodeXML(const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes) +{ + // If debugging, make sure that the block size is legal + #if defined(XML4C_DEBUG) + checkBlockSize(maxChars); + #endif + + // + // Calculate the max chars we can do here. Its the lesser of the + // max output chars and the number of chars in the source. + // + const unsigned int srcChars = srcCount / sizeof(UTF16Ch); + const unsigned int countToDo = srcChars < maxChars ? srcChars : maxChars; + + // Look at the source data as UTF16 chars + const UTF16Ch* asUTF16 = (const UTF16Ch*)srcData; + + // + // If its swapped, we have to do a char by char swap and cast. Else + // we have to check whether our XMLCh and UTF16Ch types are the same + // size or not. If so, we can optimize by just doing a buffer copy. + // + if (fSwapped) + { + // + // And then do the swapping loop for the count we precalculated. Note + // that this also handles size conversion as well if XMLCh is not the + // same size as UTF16Char. + // + for (unsigned int index = 0; index < countToDo; index++) + toFill[index] = BitOps::swapBytes(*asUTF16++); + } + else + { + // + // If the XMLCh type is the same size as a UTF16 value on this + // platform, then we can do just a buffer copy straight to the target + // buffer since our source chars are UTF-16 chars. If its not, then + // we still have to do a loop and assign each one, in order to + // implicitly convert. + // + if (sizeof(XMLCh) == sizeof(UTF16Ch)) + { + // Notice we convert char count to byte count here!!! + memcpy(toFill, srcData, countToDo * sizeof(UTF16Ch)); + } + else + { + for (unsigned int index = 0; index < countToDo; index++) + toFill[index] = XMLCh(*asUTF16++); + } + } + + // Set the bytes eaten + bytesEaten = countToDo * sizeof(UTF16Ch); + + // Set the character sizes to the fixed size + memset(charSizes, sizeof(UTF16Ch), countToDo); + + // Return the chars we transcoded + return countToDo; +} diff --git a/src/util/XMLUTF16Transcoder.hpp b/src/util/XMLUTF16Transcoder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ff4f43d8d82cbeef515656b527fe5b0daa589691 --- /dev/null +++ b/src/util/XMLUTF16Transcoder.hpp @@ -0,0 +1,129 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + +#ifndef XMLUTF16TRANSCODER_HPP +#define XMLUTF16TRANSCODER_HPP + +#include <util/XML4CDefs.hpp> +#include <util/TransService.hpp> + + +// +// This class provides an implementation of the XMLTranscoder interface +// for a simple UTF16 transcoder. The parser does some encodings +// intrinsically without depending upon external transcoding services. +// To make everything more orthagonal, we implement these internal +// transcoders using the same transcoder abstraction as the pluggable +// transcoding services do. +// +class XMLUTIL_EXPORT XMLUTF16Transcoder : public XMLTranscoder +{ +public : + // ----------------------------------------------------------------------- + // Public constructors and destructor + // ----------------------------------------------------------------------- + XMLUTF16Transcoder + ( + const XMLCh* const encodingName + , const unsigned int blockSize + , const bool swapped + ); + + virtual ~XMLUTF16Transcoder(); + + + // ----------------------------------------------------------------------- + // Implementation of the XMLTranscoder interface + // ----------------------------------------------------------------------- + virtual bool supportsSrcOfs() const; + + virtual XMLCh transcodeOne + ( + const XMLByte* const srcData + , const unsigned int srcBytes + , unsigned int& bytesEaten + ); + + virtual unsigned int transcodeXML + ( + const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes + ); + + +private : + // ----------------------------------------------------------------------- + // Unimplemented constructors and operators + // ----------------------------------------------------------------------- + XMLUTF16Transcoder(const XMLUTF16Transcoder&); + void operator=(const XMLUTF16Transcoder&); + + + // ----------------------------------------------------------------------- + // Private data members + // + // fSwapped + // Indicates whether the encoding is of the opposite endianness from + // the local host. + // ----------------------------------------------------------------------- + bool fSwapped; +}; + +#endif diff --git a/src/util/XMLUTF8Transcoder.cpp b/src/util/XMLUTF8Transcoder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..878e9c3bfbbc520c0a192d9d4d8c4db03761a584 --- /dev/null +++ b/src/util/XMLUTF8Transcoder.cpp @@ -0,0 +1,270 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + + +// --------------------------------------------------------------------------- +// Includes +// --------------------------------------------------------------------------- +#include <util/XMLUTF8Transcoder.hpp> +#include <util/UTFDataFormatException.hpp> + + +// --------------------------------------------------------------------------- +// Local static data +// +// gUTFBytes +// A list of counts of trailing bytes for each initial byte in the input. +// +// gUTFOffsets +// A list of values to offset each result char type, according to how +// many source bytes when into making it. +// --------------------------------------------------------------------------- +static const XMLByte gUTFBytes[256] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + , 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + , 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 +}; + +static const XMLUInt32 gUTFOffsets[6] = +{ + 0, 0x3080, 0xE2080, 0x3C82080, 0xFA082080, 0x82022080 +}; + + + + +// --------------------------------------------------------------------------- +// XMLUTF8Transcoder: Constructors and Destructor +// --------------------------------------------------------------------------- +XMLUTF8Transcoder::XMLUTF8Transcoder(const unsigned int blockSize) : + + XMLTranscoder(XMLUni::fgUTF8EncodingString, blockSize) + , fSpareCh(0) +{ +} + +XMLUTF8Transcoder::~XMLUTF8Transcoder() +{ +} + + +// --------------------------------------------------------------------------- +// XMLUTF8Transcoder: Implementation of the transcoder API +// --------------------------------------------------------------------------- +bool XMLUTF8Transcoder::supportsSrcOfs() const +{ + // Yes we support this + return true; +} + + +XMLCh +XMLUTF8Transcoder::transcodeOne(const XMLByte* const srcData + , const unsigned int srcBytes + , unsigned int& bytesEaten) +{ + // If there is a spare char, then do that first + if (fSpareCh) + { + const XMLCh retCh = fSpareCh; + fSpareCh = 0; + return retCh; + } + + // If there are no bytes, then give up now + if (!srcBytes) + return 0; + + // Just call the other version with a max of one output character + XMLCh chTarget; + unsigned char dummy; + if (!transcodeXML(srcData, srcBytes, &chTarget, 1, bytesEaten, &dummy)) + return 0; + + return chTarget; +} + + +unsigned int +XMLUTF8Transcoder::transcodeXML(const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes) +{ + // If debugging, make sure that the block size is legal + #if defined(XML4C_DEBUG) + checkBlockSize(maxChars); + #endif + + // This will track the target chars we've transcoded out so far + unsigned int charsRead = 0; + + // + // If we have a spare, then store it first and zero out the spare. + // Be sure to bump the count of chars read. + // + if (fSpareCh) + { + // + // A spare is a trailing surrogate. So it actually takes up no + // space in the source data. Its leading surrogate accounted for + // all the bytes eaten. + // + charSizes[charsRead] = 0; + toFill[charsRead++] = fSpareCh; + fSpareCh = 0; + } + + // + // Just loop until we run out of input or hit the max chars that + // was requested. + // + bytesEaten = 0; + const XMLByte* srcPtr = srcData; + while (charsRead < maxChars) + { + // See how many trailing src bytes this sequence is going to require + const unsigned int trailingBytes = gUTFBytes[*srcPtr]; + + // + // If there are not enough source bytes to do this one, then we + // are done. + // + if (bytesEaten + trailingBytes >= srcCount) + break; + + // Looks ok, so lets build up the value + XMLUInt32 tmpVal = 0; + switch(trailingBytes) + { + case 5 : tmpVal += *srcPtr++; tmpVal <<= 6; + case 4 : tmpVal += *srcPtr++; tmpVal <<= 6; + case 3 : tmpVal += *srcPtr++; tmpVal <<= 6; + case 2 : tmpVal += *srcPtr++; tmpVal <<= 6; + case 1 : tmpVal += *srcPtr++; tmpVal <<= 6; + case 0 : tmpVal += *srcPtr++; + } + tmpVal -= gUTFOffsets[trailingBytes]; + + // + // If it will fit into a single char, then put it in. Otherwise + // encode it as a surrogate pair. If its not valid, use the + // replacement char. + // + if (!(tmpVal & 0xFFFF0000)) + { + charSizes[charsRead] = trailingBytes + 1; + toFill[charsRead++] = XMLCh(tmpVal); + } + else if (tmpVal > 0x10FFFF) + { + // + // If we've gotten more than 32 chars so far, then just break + // out for now and lets process those. When we come back in + // here again, we'll get no chars and throw an exception. This + // way, the error will have a line and col number closer to + // the real problem area. + // + if (charsRead > 32) + break; + ThrowXML(UTFDataFormatException, XML4CExcepts::Reader_BadUTF8Seq); + } + else + { + // Store the leading surrogate char + tmpVal -= 0x10000; + charSizes[charsRead] = trailingBytes + 1; + toFill[charsRead++] = XMLCh((tmpVal >> 10) + 0xD800); + + // + // If we don't have room for the trailing one, then store + // it in the spare char. Else store it in the buffer. + // + if (charsRead >= maxChars) + { + fSpareCh = XMLCh(tmpVal & 0x3FF) + 0xDC00; + } + else + { + // This one accounts for no bytes eaten + charSizes[charsRead] = 0; + toFill[charsRead++] = XMLCh(tmpVal & 0x3FF) + 0xDC00; + } + } + + // Update the bytes eaten + bytesEaten += trailingBytes + 1; + } + + // Return the characters read + return charsRead; +} diff --git a/src/util/XMLUTF8Transcoder.hpp b/src/util/XMLUTF8Transcoder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..24432711897431eb1a07b81b8484e885d049ff58 --- /dev/null +++ b/src/util/XMLUTF8Transcoder.hpp @@ -0,0 +1,127 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xerces" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache\@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation, and was + * originally based on software copyright (c) 1999, International + * Business Machines, Inc., http://www.ibm.com . For more information + * on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + +#ifndef XMLUTF8TRANSCODER_HPP +#define XMLUTF8TRANSCODER_HPP + +#include <util/XML4CDefs.hpp> +#include <util/TransService.hpp> + +// +// This class provides an implementation of the XMLTranscoder interface +// for a simple UTF8 transcoder. The parser does some encodings +// intrinsically without depending upon external transcoding services. +// To make everything more orthagonal, we implement these internal +// transcoders using the same transcoder abstraction as the pluggable +// transcoding services do. +// +class XMLUTIL_EXPORT XMLUTF8Transcoder : public XMLTranscoder +{ +public : + // ----------------------------------------------------------------------- + // Public constructors and destructor + // ----------------------------------------------------------------------- + XMLUTF8Transcoder + ( + const unsigned int blockSize + ); + + virtual ~XMLUTF8Transcoder(); + + + // ----------------------------------------------------------------------- + // Implementation of the XMLTranscoder interface + // ----------------------------------------------------------------------- + virtual bool supportsSrcOfs() const; + + virtual XMLCh transcodeOne + ( + const XMLByte* const srcData + , const unsigned int srcBytes + , unsigned int& bytesEaten + ); + + virtual unsigned int transcodeXML + ( + const XMLByte* const srcData + , const unsigned int srcCount + , XMLCh* const toFill + , const unsigned int maxChars + , unsigned int& bytesEaten + , unsigned char* const charSizes + ); + + +private : + // ----------------------------------------------------------------------- + // Unimplemented constructors and operators + // ----------------------------------------------------------------------- + XMLUTF8Transcoder(const XMLUTF8Transcoder&); + void operator=(const XMLUTF8Transcoder&); + + + // ----------------------------------------------------------------------- + // Private data members + // + // fSpareCh + // Sometimes, when we decode a surrogate pair, there isn't enough + // space to store the second one. So we have to have somewhere to + // put it until the next time. + // ----------------------------------------------------------------------- + XMLCh fSpareCh; +}; + +#endif