/* * The Apache Software License, Version 1.1 * * Copyright (c) 1999-2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache\@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation, and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.ibm.com . For more information * on the Apache Software Foundation, please see * <http://www.apache.org/>. */ /* * $Id$ */ // --------------------------------------------------------------------------- // Includes // --------------------------------------------------------------------------- #include <xercesc/util/Janitor.hpp> #include <xercesc/util/RefHashTableOf.hpp> #include <xercesc/util/RefVectorOf.hpp> #include <xercesc/util/XML88591Transcoder.hpp> #include <xercesc/util/XMLASCIITranscoder.hpp> #include <xercesc/util/XMLChTranscoder.hpp> #include <xercesc/util/XMLEBCDICTranscoder.hpp> #include <xercesc/util/XMLIBM1140Transcoder.hpp> #include <xercesc/util/XMLUCS4Transcoder.hpp> #include <xercesc/util/XMLUTF8Transcoder.hpp> #include <xercesc/util/XMLUTF16Transcoder.hpp> #include <xercesc/util/XMLWin1252Transcoder.hpp> #include <xercesc/util/XMLUniDefs.hpp> #include <xercesc/util/XMLUni.hpp> #include <xercesc/util/TransENameMap.hpp> #include <xercesc/util/EncodingValidator.hpp> #include <xercesc/util/XMLRegisterCleanup.hpp> #include <xercesc/util/PlatformUtils.hpp> XERCES_CPP_NAMESPACE_BEGIN // --------------------------------------------------------------------------- // Local, static data // // gMappings // This is a hash table of ENameMap objects. It is created and filled // in when the platform init calls our initTransService() method. // // gMappingsRecognizer // This is an array of ENameMap objects, predefined for those // already recognized by XMLRecognizer::Encodings. // // gStrictIANAEncoding // A flag to control whether strict IANA encoding names checking should // be done // // --------------------------------------------------------------------------- static RefHashTableOf<ENameMap>* gMappings = 0; static RefVectorOf<ENameMap>* gMappingsRecognizer = 0; static bool gStrictIANAEncoding = false; // ----------------------------------------------------------------------- // Notification that lazy data has been deleted // ----------------------------------------------------------------------- static void reinitMappings() { delete gMappings; // The contents of the gMappings hash table are owned by gMappings = 0; // the it, and so will be deleted by gMapping's destructor. } static void reinitMappingsRecognizer() { delete gMappingsRecognizer; gMappingsRecognizer = 0; } // --------------------------------------------------------------------------- // XMLTransService: Constructors and destructor // --------------------------------------------------------------------------- XMLTransService::XMLTransService() { static XMLRegisterCleanup mappingsCleanup; static XMLRegisterCleanup mappingsRecognizerCleanup; if (!gMappings) { RefHashTableOf<ENameMap>* t = new RefHashTableOf<ENameMap>(103); if (XMLPlatformUtils::compareAndSwap((void **)&gMappings, t, 0) != 0) { delete t; } else { mappingsCleanup.registerCleanup(reinitMappings); } } if (!gMappingsRecognizer) { RefVectorOf<ENameMap>* t = new RefVectorOf<ENameMap>(XMLRecognizer::Encodings_Count); if (XMLPlatformUtils::compareAndSwap((void **)&gMappingsRecognizer, t, 0) != 0) { delete t; } else { mappingsRecognizerCleanup.registerCleanup(reinitMappingsRecognizer); } } } XMLTransService::~XMLTransService() { } // --------------------------------------------------------------------------- // Allow user specific encodings to be added to the mappings table. // Should be called after platform init // --------------------------------------------------------------------------- void XMLTransService::addEncoding(const XMLCh* const encoding, ENameMap* const ownMapping) { gMappings->put((void *) encoding, ownMapping); } // --------------------------------------------------------------------------- // XMLTransService: Non-virtual API // --------------------------------------------------------------------------- XMLTranscoder* XMLTransService::makeNewTranscoderFor( const char* const encodingName , XMLTransService::Codes& resValue , const unsigned int blockSize) { XMLCh* tmpName = XMLString::transcode(encodingName); ArrayJanitor<XMLCh> janName(tmpName); return makeNewTranscoderFor(tmpName, resValue, blockSize); } XMLTranscoder* XMLTransService::makeNewTranscoderFor( const XMLCh* const encodingName , XMLTransService::Codes& resValue , const unsigned int blockSize) { // // If strict IANA encoding flag is set, validate encoding name // if (gStrictIANAEncoding) { if (!EncodingValidator::instance()->isValidEncoding(encodingName)) { resValue = XMLTransService::UnsupportedEncoding; return 0; } } // // First try to find it in our list of mappings to intrinsically // supported encodings. We have to upper case the passed encoding // name because we use a hash table and we stored all our mappings // in all uppercase. // const unsigned int bufSize = 2048; XMLCh upBuf[bufSize + 1]; if (!XMLString::copyNString(upBuf, encodingName, bufSize)) { resValue = XMLTransService::InternalFailure; return 0; } XMLString::upperCase(upBuf); ENameMap* ourMapping = gMappings->get(upBuf); // If we found it, then call the factory method for it if (ourMapping) { XMLTranscoder* temp = ourMapping->makeNew(blockSize); resValue = temp ? XMLTransService::Ok : XMLTransService::InternalFailure; return temp; } // // It wasn't an intrinsic and it wasn't disallowed, so pass it on // to the trans service to see if he can make anything of it. // XMLTranscoder* temp = makeNewXMLTranscoder(encodingName, resValue, blockSize); // if successful, set resValue to OK // if failed, the makeNewXMLTranscoder has already set the proper failing resValue if (temp) resValue = XMLTransService::Ok; return temp; } XMLTranscoder* XMLTransService::makeNewTranscoderFor( XMLRecognizer::Encodings encodingEnum , XMLTransService::Codes& resValue , const unsigned int blockSize) { // // We can only make transcoder if the passed encodingEnum is under this range // if (encodingEnum < XMLRecognizer::Encodings_Min || encodingEnum > XMLRecognizer::Encodings_Max) { resValue = XMLTransService::InternalFailure; return 0; } ENameMap* ourMapping = gMappingsRecognizer->elementAt(encodingEnum); // If we found it, then call the factory method for it if (ourMapping) { XMLTranscoder* temp = ourMapping->makeNew(blockSize); resValue = temp ? XMLTransService::Ok : XMLTransService::InternalFailure; return temp; } else { XMLTranscoder* temp = makeNewXMLTranscoder(XMLRecognizer::nameForEncoding(encodingEnum), resValue, blockSize); // if successful, set resValue to OK // if failed, the makeNewXMLTranscoder has already set the proper failing resValue if (temp) resValue = XMLTransService::Ok; return temp; } } // --------------------------------------------------------------------------- // XMLTransTransService: Hidden Init Method // // This is called by platform utils during startup. // --------------------------------------------------------------------------- void XMLTransService::initTransService() { // // A stupid way to increment the fCurCount inside the RefVectorOf // for (unsigned int i = 0; i < XMLRecognizer::Encodings_Count; i++) gMappingsRecognizer->addElement(0); // // Add in the magical mapping for the native XMLCh transcoder. This // is used for internal entities. // gMappingsRecognizer->setElementAt(new ENameMapFor<XMLChTranscoder>(XMLUni::fgXMLChEncodingString), XMLRecognizer::XERCES_XMLCH); gMappings->put((void*)XMLUni::fgXMLChEncodingString, new ENameMapFor<XMLChTranscoder>(XMLUni::fgXMLChEncodingString)); // // Add in our mappings for ASCII. // gMappingsRecognizer->setElementAt(new ENameMapFor<XMLASCIITranscoder>(XMLUni::fgUSASCIIEncodingString), XMLRecognizer::US_ASCII); gMappings->put((void*)XMLUni::fgUSASCIIEncodingString, new ENameMapFor<XMLASCIITranscoder>(XMLUni::fgUSASCIIEncodingString)); gMappings->put((void*)XMLUni::fgUSASCIIEncodingString2, new ENameMapFor<XMLASCIITranscoder>(XMLUni::fgUSASCIIEncodingString2)); gMappings->put((void*)XMLUni::fgUSASCIIEncodingString3, new ENameMapFor<XMLASCIITranscoder>(XMLUni::fgUSASCIIEncodingString3)); gMappings->put((void*)XMLUni::fgUSASCIIEncodingString4, new ENameMapFor<XMLASCIITranscoder>(XMLUni::fgUSASCIIEncodingString4)); // // Add in our mappings for UTF-8 // gMappingsRecognizer->setElementAt(new ENameMapFor<XMLUTF8Transcoder>(XMLUni::fgUTF8EncodingString), XMLRecognizer::UTF_8); gMappings->put((void*)XMLUni::fgUTF8EncodingString, new ENameMapFor<XMLUTF8Transcoder>(XMLUni::fgUTF8EncodingString)); gMappings->put((void*)XMLUni::fgUTF8EncodingString2, new ENameMapFor<XMLUTF8Transcoder>(XMLUni::fgUTF8EncodingString2)); // // Add in our mappings for Latin1 // gMappings->put((void*)XMLUni::fgISO88591EncodingString, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString)); gMappings->put((void*)XMLUni::fgISO88591EncodingString2, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString2)); gMappings->put((void*)XMLUni::fgISO88591EncodingString3, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString3)); gMappings->put((void*)XMLUni::fgISO88591EncodingString4, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString4)); gMappings->put((void*)XMLUni::fgISO88591EncodingString5, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString5)); gMappings->put((void*)XMLUni::fgISO88591EncodingString6, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString6)); gMappings->put((void*)XMLUni::fgISO88591EncodingString7, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString7)); gMappings->put((void*)XMLUni::fgISO88591EncodingString8, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString8)); gMappings->put((void*)XMLUni::fgISO88591EncodingString9, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString9)); gMappings->put((void*)XMLUni::fgISO88591EncodingString10, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString10)); gMappings->put((void*)XMLUni::fgISO88591EncodingString11, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString11)); gMappings->put((void*)XMLUni::fgISO88591EncodingString12, new ENameMapFor<XML88591Transcoder>(XMLUni::fgISO88591EncodingString12)); // // Add in our mappings for UTF-16 and UCS-4, little endian // bool swapped = false; #if defined(ENDIANMODE_BIG) swapped = true; #endif gMappingsRecognizer->setElementAt(new EEndianNameMapFor<XMLUTF16Transcoder>(XMLUni::fgUTF16LEncodingString, swapped), XMLRecognizer::UTF_16L); gMappings->put ( (void*)XMLUni::fgUTF16LEncodingString, new EEndianNameMapFor<XMLUTF16Transcoder> ( XMLUni::fgUTF16LEncodingString , swapped ) ); gMappings->put ( (void*)XMLUni::fgUTF16LEncodingString2, new EEndianNameMapFor<XMLUTF16Transcoder> ( XMLUni::fgUTF16LEncodingString2 , swapped ) ); gMappingsRecognizer->setElementAt(new EEndianNameMapFor<XMLUCS4Transcoder>(XMLUni::fgUCS4LEncodingString, swapped), XMLRecognizer::UCS_4L); gMappings->put ( (void*)XMLUni::fgUCS4LEncodingString, new EEndianNameMapFor<XMLUCS4Transcoder> ( XMLUni::fgUCS4LEncodingString , swapped ) ); gMappings->put ( (void*)XMLUni::fgUCS4LEncodingString2, new EEndianNameMapFor<XMLUCS4Transcoder> ( XMLUni::fgUCS4LEncodingString2 , swapped ) ); // // Add in our mappings for UTF-16 and UCS-4, big endian // swapped = false; #if defined(ENDIANMODE_LITTLE) swapped = true; #endif gMappingsRecognizer->setElementAt(new EEndianNameMapFor<XMLUTF16Transcoder>(XMLUni::fgUTF16BEncodingString, swapped), XMLRecognizer::UTF_16B); gMappings->put ( (void*)XMLUni::fgUTF16BEncodingString, new EEndianNameMapFor<XMLUTF16Transcoder> ( XMLUni::fgUTF16BEncodingString , swapped ) ); gMappings->put ( (void*)XMLUni::fgUTF16BEncodingString2, new EEndianNameMapFor<XMLUTF16Transcoder> ( XMLUni::fgUTF16BEncodingString2 , swapped ) ); gMappingsRecognizer->setElementAt(new EEndianNameMapFor<XMLUCS4Transcoder>(XMLUni::fgUCS4BEncodingString, swapped), XMLRecognizer::UCS_4B); gMappings->put ( (void*)XMLUni::fgUCS4BEncodingString, new EEndianNameMapFor<XMLUCS4Transcoder> ( XMLUni::fgUCS4BEncodingString , swapped ) ); gMappings->put ( (void*)XMLUni::fgUCS4BEncodingString2, new EEndianNameMapFor<XMLUCS4Transcoder> ( XMLUni::fgUCS4BEncodingString2 , swapped ) ); // // Add in our mappings for UTF-16 and UCS-4 which does not indicate endian // assumes the same endian encoding as the OS // gMappings->put ( (void*)XMLUni::fgUTF16EncodingString, new EEndianNameMapFor<XMLUTF16Transcoder> ( XMLUni::fgUTF16EncodingString , false ) ); gMappings->put ( (void*)XMLUni::fgUTF16EncodingString2, new EEndianNameMapFor<XMLUTF16Transcoder> ( XMLUni::fgUTF16EncodingString2 , false ) ); gMappings->put ( (void*)XMLUni::fgUTF16EncodingString3, new EEndianNameMapFor<XMLUTF16Transcoder> ( XMLUni::fgUTF16EncodingString3 , false ) ); gMappings->put ( (void*)XMLUni::fgUTF16EncodingString4, new EEndianNameMapFor<XMLUTF16Transcoder> ( XMLUni::fgUTF16EncodingString4 , false ) ); gMappings->put ( (void*)XMLUni::fgUCS4EncodingString, new EEndianNameMapFor<XMLUCS4Transcoder> ( XMLUni::fgUCS4EncodingString , false ) ); gMappings->put ( (void*)XMLUni::fgUCS4EncodingString2, new EEndianNameMapFor<XMLUCS4Transcoder> ( XMLUni::fgUCS4EncodingString2 , false ) ); gMappings->put ( (void*)XMLUni::fgUCS4EncodingString3, new EEndianNameMapFor<XMLUCS4Transcoder> ( XMLUni::fgUCS4EncodingString3 , false ) ); // // Add in our mappings for IBM037, and the one alias we support for // it, which is EBCDIC-CP-US. // gMappingsRecognizer->setElementAt(new ENameMapFor<XMLEBCDICTranscoder>(XMLUni::fgEBCDICEncodingString), XMLRecognizer::EBCDIC); gMappings->put((void*)XMLUni::fgIBM037EncodingString, new ENameMapFor<XMLEBCDICTranscoder>(XMLUni::fgIBM037EncodingString)); gMappings->put((void*)XMLUni::fgIBM037EncodingString2, new ENameMapFor<XMLEBCDICTranscoder>(XMLUni::fgIBM037EncodingString2)); // // Add in our mappings for IBM037 with Euro update, i.e. IBM1140. It // has alias IBM01140, the one suggested by IANA // gMappings->put((void*)XMLUni::fgIBM1140EncodingString, new ENameMapFor<XMLIBM1140Transcoder>(XMLUni::fgIBM1140EncodingString)); gMappings->put((void*)XMLUni::fgIBM1140EncodingString2, new ENameMapFor<XMLIBM1140Transcoder>(XMLUni::fgIBM1140EncodingString2)); gMappings->put((void*)XMLUni::fgIBM1140EncodingString3, new ENameMapFor<XMLIBM1140Transcoder>(XMLUni::fgIBM1140EncodingString3)); gMappings->put((void*)XMLUni::fgIBM1140EncodingString4, new ENameMapFor<XMLIBM1140Transcoder>(XMLUni::fgIBM1140EncodingString4)); // // Add in our mappings for Windows-1252. We don't have any aliases for // this one, so there is just one mapping. // gMappings->put((void*)XMLUni::fgWin1252EncodingString, new ENameMapFor<XMLWin1252Transcoder>(XMLUni::fgWin1252EncodingString)); } // --------------------------------------------------------------------------- // XMLTransService: IANA encoding setting // --------------------------------------------------------------------------- void XMLTransService::strictIANAEncoding(const bool newState) { gStrictIANAEncoding = newState; } bool XMLTransService::isStrictIANAEncoding() { return gStrictIANAEncoding; } // --------------------------------------------------------------------------- // XMLTranscoder: Public Destructor // --------------------------------------------------------------------------- XMLTranscoder::~XMLTranscoder() { delete [] fEncodingName; } // --------------------------------------------------------------------------- // XMLTranscoder: Hidden Constructors // --------------------------------------------------------------------------- XMLTranscoder::XMLTranscoder(const XMLCh* const encodingName , const unsigned int blockSize) : fEncodingName(0) , fBlockSize(blockSize) { fEncodingName = XMLString::replicate(encodingName); } // --------------------------------------------------------------------------- // XMLTranscoder: Protected helpers // --------------------------------------------------------------------------- void XMLTranscoder::checkBlockSize(const unsigned int toCheck) { // if (toCheck > fBlockSize) // ThrowXML(TranscodingException, XMLExcepts::Trans_BadBlockSize); } // --------------------------------------------------------------------------- // XMLLCPTranscoder: Public Destructor // --------------------------------------------------------------------------- XMLLCPTranscoder::XMLLCPTranscoder() { } // --------------------------------------------------------------------------- // XMLLCPTranscoder: Hidden Constructors // --------------------------------------------------------------------------- XMLLCPTranscoder::~XMLLCPTranscoder() { } XERCES_CPP_NAMESPACE_END