From 80b03acfa067ab2a4e4f724373211ff1320d33f2 Mon Sep 17 00:00:00 2001 From: Khaled Noaman <knoaman@apache.org> Date: Tue, 7 Dec 2004 19:45:43 +0000 Subject: [PATCH] An option to ignore a cached DTD grammar when a document contains an internal and external subset. git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@176232 13f79535-47bb-0310-9956-ffa450edef68 --- src/xercesc/internal/DGXMLScanner.cpp | 5 +++- src/xercesc/internal/IGXMLScanner.cpp | 7 +++-- src/xercesc/internal/XMLScanner.cpp | 4 ++- src/xercesc/internal/XMLScanner.hpp | 17 +++++++++++++ src/xercesc/parsers/DOMBuilderImpl.cpp | 12 +++++++-- src/xercesc/parsers/SAX2XMLReaderImpl.cpp | 10 ++++++++ src/xercesc/parsers/SAXParser.cpp | 14 ++++++++++ src/xercesc/parsers/SAXParser.hpp | 31 ++++++++++++++++++++++- src/xercesc/parsers/XercesDOMParser.cpp | 9 +++++++ src/xercesc/parsers/XercesDOMParser.hpp | 25 ++++++++++++++++++ src/xercesc/util/XMLUni.cpp | 15 +++++++++++ src/xercesc/util/XMLUni.hpp | 1 + 12 files changed, 143 insertions(+), 7 deletions(-) diff --git a/src/xercesc/internal/DGXMLScanner.cpp b/src/xercesc/internal/DGXMLScanner.cpp index 60d8b0cc2..f6cedef24 100644 --- a/src/xercesc/internal/DGXMLScanner.cpp +++ b/src/xercesc/internal/DGXMLScanner.cpp @@ -972,7 +972,10 @@ void DGXMLScanner::scanDocTypeDecl() InputSource* srcUsed=0; Janitor<InputSource> janSrc(srcUsed); - if (fUseCachedGrammar) + // If we had an internal subset and we're using the cached grammar, it + // means that the ignoreCachedDTD is set, so we ignore the cached + // grammar + if (fUseCachedGrammar && !hasIntSubset) { srcUsed = resolveSystemId(sysId, pubId); janSrc.reset(srcUsed); diff --git a/src/xercesc/internal/IGXMLScanner.cpp b/src/xercesc/internal/IGXMLScanner.cpp index faf8a25b6..47d09fa12 100644 --- a/src/xercesc/internal/IGXMLScanner.cpp +++ b/src/xercesc/internal/IGXMLScanner.cpp @@ -1266,7 +1266,7 @@ void IGXMLScanner::scanDocTypeDecl() Janitor<DTDElementDecl> rootDeclJanitor(rootDecl); rootDecl->setCreateReason(DTDElementDecl::AsRootElem); rootDecl->setExternalElemDeclaration(true); - if(!fUseCachedGrammar) + if(!fUseCachedGrammar) { // this will break getRootElemId on DTDGrammar when // cached grammars are in use, but @@ -1423,7 +1423,10 @@ void IGXMLScanner::scanDocTypeDecl() InputSource* srcUsed=0; Janitor<InputSource> janSrc(srcUsed); - if (fUseCachedGrammar) + // If we had an internal subset and we're using the cached grammar, it + // means that the ignoreCachedDTD is set, so we ignore the cached + // grammar + if (fUseCachedGrammar && !hasIntSubset) { srcUsed = resolveSystemId(sysId, pubId); janSrc.reset(srcUsed); diff --git a/src/xercesc/internal/XMLScanner.cpp b/src/xercesc/internal/XMLScanner.cpp index b46f4387c..d0f25980f 100644 --- a/src/xercesc/internal/XMLScanner.cpp +++ b/src/xercesc/internal/XMLScanner.cpp @@ -160,6 +160,7 @@ XMLScanner::XMLScanner(XMLValidator* const valToAdopt, , fNormalizeData(true) , fGenerateSyntheticAnnotations(false) , fValidateAnnotations(false) + , fIgnoreCachedDTD(false) , fErrorCount(0) , fEntityExpansionLimit(0) , fEntityExpansionCount(0) @@ -243,6 +244,7 @@ XMLScanner::XMLScanner( XMLDocumentHandler* const docHandler , fNormalizeData(true) , fGenerateSyntheticAnnotations(false) , fValidateAnnotations(false) + , fIgnoreCachedDTD(false) , fErrorCount(0) , fEntityExpansionLimit(0) , fEntityExpansionCount(0) @@ -1752,7 +1754,7 @@ void XMLScanner::checkInternalDTD(bool hasExtSubset if (fToCacheGrammar) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager); - if (fUseCachedGrammar && hasExtSubset ) + if (fUseCachedGrammar && hasExtSubset && !fIgnoreCachedDTD) { InputSource* sysIdSrc = resolveSystemId(sysId, pubId); Janitor<InputSource> janSysIdSrc(sysIdSrc); diff --git a/src/xercesc/internal/XMLScanner.hpp b/src/xercesc/internal/XMLScanner.hpp index 51b5e3d9a..2b55544b2 100644 --- a/src/xercesc/internal/XMLScanner.hpp +++ b/src/xercesc/internal/XMLScanner.hpp @@ -16,6 +16,10 @@ /* * $Log$ + * Revision 1.45 2004/12/07 19:45:43 knoaman + * An option to ignore a cached DTD grammar when a document contains an + * internal and external subset. + * * Revision 1.44 2004/12/03 19:40:30 cargilld * Change call to resolveEntity to pass in public id so that only one call to resolveEntity is needed (a follow-on to Alberto's fix). * @@ -546,6 +550,7 @@ public : bool getGenerateSyntheticAnnotations() const; bool getValidateAnnotations() const; + bool getIgnoreCachedDTD() const; // ----------------------------------------------------------------------- // Getter methods @@ -643,6 +648,7 @@ public : void setGenerateSyntheticAnnotations(const bool newValue); void setValidateAnnotations(const bool newValue); + void setIgnoredCachedDTD(const bool newValue); // ----------------------------------------------------------------------- // Mutator methods @@ -1010,6 +1016,7 @@ protected: bool fNormalizeData; bool fGenerateSyntheticAnnotations; bool fValidateAnnotations; + bool fIgnoreCachedDTD; int fErrorCount; unsigned int fEntityExpansionLimit; unsigned int fEntityExpansionCount; @@ -1366,6 +1373,11 @@ inline bool XMLScanner::getValidateAnnotations() const return fValidateAnnotations; } +inline bool XMLScanner::getIgnoreCachedDTD() const +{ + return fIgnoreCachedDTD; +} + // --------------------------------------------------------------------------- // XMLScanner: Setter methods // --------------------------------------------------------------------------- @@ -1535,6 +1547,11 @@ inline void XMLScanner::setInputBufferSize(const size_t bufferSize) fCDataBuf.setFullHandler(this, fBufferSize); } +inline void XMLScanner::setIgnoredCachedDTD(const bool newValue) +{ + fIgnoreCachedDTD = newValue; +} + // --------------------------------------------------------------------------- // XMLScanner: Mutator methods // --------------------------------------------------------------------------- diff --git a/src/xercesc/parsers/DOMBuilderImpl.cpp b/src/xercesc/parsers/DOMBuilderImpl.cpp index 7a69254f9..a978290ba 100644 --- a/src/xercesc/parsers/DOMBuilderImpl.cpp +++ b/src/xercesc/parsers/DOMBuilderImpl.cpp @@ -243,7 +243,10 @@ void DOMBuilderImpl::setFeature(const XMLCh* const name, const bool state) { getScanner()->setIdentityConstraintChecking(state); } - + else if (XMLString::compareIString(name, XMLUni::fgXercesIgnoreCachedDTD) == 0) + { + getScanner()->setIgnoredCachedDTD(state); + } else { throw DOMException(DOMException::NOT_FOUND_ERR, 0, getMemoryManager()); } @@ -342,6 +345,10 @@ bool DOMBuilderImpl::getFeature(const XMLCh* const name) const { return getScanner()->getValidateAnnotations(); } + else if (XMLString::compareIString(name, XMLUni::fgXercesIgnoreCachedDTD) == 0) + { + return getScanner()->getIgnoreCachedDTD(); + } else { throw DOMException(DOMException::NOT_FOUND_ERR, 0, getMemoryManager()); } @@ -365,7 +372,8 @@ bool DOMBuilderImpl::canSetFeature(const XMLCh* const name, const bool state) co (XMLString::compareIString(name, XMLUni::fgXercesDOMHasPSVIInfo) == 0) || (XMLString::compareIString(name, XMLUni::fgXercesValidateAnnotations) == 0) || (XMLString::compareIString(name, XMLUni::fgXercesGenerateSyntheticAnnotations) == 0) || - (XMLString::compareIString(name, XMLUni::fgXercesIdentityConstraintChecking) == 0) + (XMLString::compareIString(name, XMLUni::fgXercesIdentityConstraintChecking) == 0) || + (XMLString::compareIString(name, XMLUni::fgXercesIgnoreCachedDTD) == 0) ) { return true; } diff --git a/src/xercesc/parsers/SAX2XMLReaderImpl.cpp b/src/xercesc/parsers/SAX2XMLReaderImpl.cpp index d439c4c93..e0a3709cb 100644 --- a/src/xercesc/parsers/SAX2XMLReaderImpl.cpp +++ b/src/xercesc/parsers/SAX2XMLReaderImpl.cpp @@ -16,6 +16,10 @@ /* * $Log$ + * Revision 1.41 2004/12/07 19:45:43 knoaman + * An option to ignore a cached DTD grammar when a document contains an + * internal and external subset. + * * Revision 1.40 2004/10/04 11:30:51 amassari * As start/endPrefixMapping doesn't use the XMLBufMgr variable, we need only one XMLBuffer * @@ -1573,6 +1577,10 @@ void SAX2XMLReaderImpl::setFeature(const XMLCh* const name, const bool value) { fScanner->setValidateAnnotations(value); } + else if (XMLString::compareIString(name, XMLUni::fgXercesIgnoreCachedDTD) == 0) + { + fScanner->setIgnoredCachedDTD(value); + } else throw SAXNotRecognizedException("Unknown Feature", fMemoryManager); } @@ -1611,6 +1619,8 @@ bool SAX2XMLReaderImpl::getFeature(const XMLCh* const name) const return fScanner->getGenerateSyntheticAnnotations(); else if (XMLString::compareIString(name, XMLUni::fgXercesValidateAnnotations) == 0) return fScanner->getValidateAnnotations(); + else if (XMLString::compareIString(name, XMLUni::fgXercesIgnoreCachedDTD) == 0) + return fScanner->getIgnoreCachedDTD(); else throw SAXNotRecognizedException("Unknown Feature", fMemoryManager); diff --git a/src/xercesc/parsers/SAXParser.cpp b/src/xercesc/parsers/SAXParser.cpp index 1395d1ef0..e00bff31b 100644 --- a/src/xercesc/parsers/SAXParser.cpp +++ b/src/xercesc/parsers/SAXParser.cpp @@ -16,6 +16,10 @@ /* * $Log$ + * Revision 1.37 2004/12/07 19:45:43 knoaman + * An option to ignore a cached DTD grammar when a document contains an + * internal and external subset. + * * Revision 1.36 2004/09/29 19:00:29 peiyongz * [jira1207] --patch from Dan Rosen * @@ -568,6 +572,11 @@ unsigned int SAXParser::getSrcOffset() const return fScanner->getSrcOffset(); } +bool SAXParser::getIgnoreCachedDTD() const +{ + return fScanner->getIgnoreCachedDTD(); +} + // --------------------------------------------------------------------------- // SAXParser: Setter methods // --------------------------------------------------------------------------- @@ -704,6 +713,11 @@ void SAXParser::setInputBufferSize(const size_t bufferSize) fScanner->setInputBufferSize(bufferSize); } +void SAXParser::setIgnoreCachedDTD(const bool newValue) +{ + fScanner->setIgnoredCachedDTD(newValue); +} + // --------------------------------------------------------------------------- // SAXParser: Overrides of the SAX Parser interface // --------------------------------------------------------------------------- diff --git a/src/xercesc/parsers/SAXParser.hpp b/src/xercesc/parsers/SAXParser.hpp index 83f06962f..c194dc776 100644 --- a/src/xercesc/parsers/SAXParser.hpp +++ b/src/xercesc/parsers/SAXParser.hpp @@ -16,6 +16,10 @@ /* * $Log$ + * Revision 1.37 2004/12/07 19:45:43 knoaman + * An option to ignore a cached DTD grammar when a document contains an + * internal and external subset. + * * Revision 1.36 2004/09/29 19:28:12 cargilld * Mark SAXParser as deprecated. * @@ -680,6 +684,15 @@ public : */ bool getValidateAnnotations() const; + /** Get the 'ignore cached DTD grammar' flag + * + * @return true, if the parser is currently configured to + * ignore cached DTD, false otherwise. + * + * @see #setIgnoreCachedDTD + */ + bool getIgnoreCachedDTD() const; + //@} @@ -948,7 +961,7 @@ public : * instead of building the grammar from scratch, to validate XML * documents. * - * If the 'Grammar caching' flag is set to true, this mehod ignore the + * If the 'Grammar caching' flag is set to true, this method ignores the * value passed in. * * The parser's default state is: false. @@ -1008,6 +1021,22 @@ public : */ void setInputBufferSize(const size_t bufferSize); + /** Set the 'ignore cached DTD grammar' flag + * + * This method gives users the option to ignore a cached DTD grammar, when + * an XML document contains both an internal and external DTD, and the use + * cached grammar from parse option is enabled. Currently, we do not allow + * using cached DTD grammar when an internal subset is present in the + * document. This option will only affect the behavior of the parser when + * an internal and external DTD both exist in a document (i.e. no effect + * if document has no internal subset). + * + * The parser's default state is false + * + * @param newValue The state to set + */ + void setIgnoreCachedDTD(const bool newValue); + //@} diff --git a/src/xercesc/parsers/XercesDOMParser.cpp b/src/xercesc/parsers/XercesDOMParser.cpp index 416e09248..8e54f7cbd 100644 --- a/src/xercesc/parsers/XercesDOMParser.cpp +++ b/src/xercesc/parsers/XercesDOMParser.cpp @@ -98,6 +98,11 @@ unsigned int XercesDOMParser::getSrcOffset() const return getScanner()->getSrcOffset(); } +bool XercesDOMParser::getIgnoreCachedDTD() const +{ + return getScanner()->getIgnoreCachedDTD(); +} + // --------------------------------------------------------------------------- // XercesDOMParser: Setter methods // --------------------------------------------------------------------------- @@ -153,6 +158,10 @@ void XercesDOMParser::useCachedGrammarInParse(const bool newState) getScanner()->useCachedGrammarInParse(newState); } +void XercesDOMParser::setIgnoreCachedDTD(const bool newValue) +{ + getScanner()->setIgnoredCachedDTD(newValue); +} // --------------------------------------------------------------------------- // XercesDOMParser: Utilities diff --git a/src/xercesc/parsers/XercesDOMParser.hpp b/src/xercesc/parsers/XercesDOMParser.hpp index 22f1f2d16..88bbe2310 100644 --- a/src/xercesc/parsers/XercesDOMParser.hpp +++ b/src/xercesc/parsers/XercesDOMParser.hpp @@ -200,6 +200,15 @@ public : */ unsigned int getSrcOffset() const; + /** Get the 'ignore cached DTD grammar' flag + * + * @return true, if the parser is currently configured to + * ignore cached DTD, false otherwise. + * + * @see #setIgnoreCachedDTD + */ + bool getIgnoreCachedDTD() const; + //@} @@ -301,6 +310,22 @@ public : */ void useCachedGrammarInParse(const bool newState); + /** Set the 'ignore cached DTD grammar' flag + * + * This method gives users the option to ignore a cached DTD grammar, when + * an XML document contains both an internal and external DTD, and the use + * cached grammar from parse option is enabled. Currently, we do not allow + * using cached DTD grammar when an internal subset is present in the + * document. This option will only affect the behavior of the parser when + * an internal and external DTD both exist in a document (i.e. no effect + * if document has no internal subset). + * + * The parser's default state is false + * + * @param newValue The state to set + */ + void setIgnoreCachedDTD(const bool newValue); + //@} // ----------------------------------------------------------------------- diff --git a/src/xercesc/util/XMLUni.cpp b/src/xercesc/util/XMLUni.cpp index ab0823015..b2382a368 100644 --- a/src/xercesc/util/XMLUni.cpp +++ b/src/xercesc/util/XMLUni.cpp @@ -1258,6 +1258,21 @@ const XMLCh XMLUni::fgXercesUseCachedGrammarInParse[] = , chLatin_s, chLatin_e, chNull }; +//Xerces: http://apache.org/xml/features/validation/ignoreCachedDTD +const XMLCh XMLUni::fgXercesIgnoreCachedDTD[] = +{ + chLatin_h, chLatin_t, chLatin_t, chLatin_p, chColon, chForwardSlash + , chForwardSlash, chLatin_a, chLatin_p, chLatin_a, chLatin_c, chLatin_h + , chLatin_e, chPeriod, chLatin_o, chLatin_r, chLatin_g, chForwardSlash + , chLatin_x, chLatin_m, chLatin_l, chForwardSlash, chLatin_f, chLatin_e + , chLatin_a, chLatin_t, chLatin_u, chLatin_r, chLatin_e, chLatin_s + , chForwardSlash, chLatin_v, chLatin_a, chLatin_l, chLatin_i, chLatin_d + , chLatin_a, chLatin_t, chLatin_i, chLatin_o, chLatin_n, chForwardSlash + , chLatin_i, chLatin_g, chLatin_n, chLatin_o, chLatin_r, chLatin_e + , chLatin_C, chLatin_a, chLatin_c, chLatin_h, chLatin_e, chLatin_d + , chLatin_D, chLatin_T, chLatin_D, chNull +}; + //Introduced in DOM Level 3 const XMLCh XMLUni::fgDOMCanonicalForm[] = diff --git a/src/xercesc/util/XMLUni.hpp b/src/xercesc/util/XMLUni.hpp index db029d422..001d33108 100644 --- a/src/xercesc/util/XMLUni.hpp +++ b/src/xercesc/util/XMLUni.hpp @@ -223,6 +223,7 @@ public : static const XMLCh fgXercesDOMHasPSVIInfo[]; static const XMLCh fgXercesGenerateSyntheticAnnotations[]; static const XMLCh fgXercesValidateAnnotations[]; + static const XMLCh fgXercesIgnoreCachedDTD[]; // SAX2 features/properties names -- GitLab