From 80b03acfa067ab2a4e4f724373211ff1320d33f2 Mon Sep 17 00:00:00 2001
From: Khaled Noaman <knoaman@apache.org>
Date: Tue, 7 Dec 2004 19:45:43 +0000
Subject: [PATCH] An option to ignore a cached DTD grammar when a document
 contains an internal and external subset.

git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@176232 13f79535-47bb-0310-9956-ffa450edef68
---
 src/xercesc/internal/DGXMLScanner.cpp     |  5 +++-
 src/xercesc/internal/IGXMLScanner.cpp     |  7 +++--
 src/xercesc/internal/XMLScanner.cpp       |  4 ++-
 src/xercesc/internal/XMLScanner.hpp       | 17 +++++++++++++
 src/xercesc/parsers/DOMBuilderImpl.cpp    | 12 +++++++--
 src/xercesc/parsers/SAX2XMLReaderImpl.cpp | 10 ++++++++
 src/xercesc/parsers/SAXParser.cpp         | 14 ++++++++++
 src/xercesc/parsers/SAXParser.hpp         | 31 ++++++++++++++++++++++-
 src/xercesc/parsers/XercesDOMParser.cpp   |  9 +++++++
 src/xercesc/parsers/XercesDOMParser.hpp   | 25 ++++++++++++++++++
 src/xercesc/util/XMLUni.cpp               | 15 +++++++++++
 src/xercesc/util/XMLUni.hpp               |  1 +
 12 files changed, 143 insertions(+), 7 deletions(-)

diff --git a/src/xercesc/internal/DGXMLScanner.cpp b/src/xercesc/internal/DGXMLScanner.cpp
index 60d8b0cc2..f6cedef24 100644
--- a/src/xercesc/internal/DGXMLScanner.cpp
+++ b/src/xercesc/internal/DGXMLScanner.cpp
@@ -972,7 +972,10 @@ void DGXMLScanner::scanDocTypeDecl()
 
         InputSource* srcUsed=0;
         Janitor<InputSource> janSrc(srcUsed);
-        if (fUseCachedGrammar)
+        // If we had an internal subset and we're using the cached grammar, it
+        // means that the ignoreCachedDTD is set, so we ignore the cached
+        // grammar
+        if (fUseCachedGrammar && !hasIntSubset)
         {
             srcUsed = resolveSystemId(sysId, pubId);
             janSrc.reset(srcUsed);
diff --git a/src/xercesc/internal/IGXMLScanner.cpp b/src/xercesc/internal/IGXMLScanner.cpp
index faf8a25b6..47d09fa12 100644
--- a/src/xercesc/internal/IGXMLScanner.cpp
+++ b/src/xercesc/internal/IGXMLScanner.cpp
@@ -1266,7 +1266,7 @@ void IGXMLScanner::scanDocTypeDecl()
     Janitor<DTDElementDecl> rootDeclJanitor(rootDecl);    
     rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
     rootDecl->setExternalElemDeclaration(true);
-    if(!fUseCachedGrammar) 
+    if(!fUseCachedGrammar)
     {
         // this will break getRootElemId on DTDGrammar when
         // cached grammars are in use, but 
@@ -1423,7 +1423,10 @@ void IGXMLScanner::scanDocTypeDecl()
 
         InputSource* srcUsed=0;
         Janitor<InputSource> janSrc(srcUsed);
-        if (fUseCachedGrammar)
+        // If we had an internal subset and we're using the cached grammar, it
+        // means that the ignoreCachedDTD is set, so we ignore the cached
+        // grammar
+        if (fUseCachedGrammar && !hasIntSubset)
         {
             srcUsed = resolveSystemId(sysId, pubId);
             janSrc.reset(srcUsed);
diff --git a/src/xercesc/internal/XMLScanner.cpp b/src/xercesc/internal/XMLScanner.cpp
index b46f4387c..d0f25980f 100644
--- a/src/xercesc/internal/XMLScanner.cpp
+++ b/src/xercesc/internal/XMLScanner.cpp
@@ -160,6 +160,7 @@ XMLScanner::XMLScanner(XMLValidator* const valToAdopt,
     , fNormalizeData(true)
     , fGenerateSyntheticAnnotations(false)
     , fValidateAnnotations(false)
+    , fIgnoreCachedDTD(false)
     , fErrorCount(0)
     , fEntityExpansionLimit(0)
     , fEntityExpansionCount(0)
@@ -243,6 +244,7 @@ XMLScanner::XMLScanner( XMLDocumentHandler* const  docHandler
     , fNormalizeData(true)
     , fGenerateSyntheticAnnotations(false)
     , fValidateAnnotations(false)
+    , fIgnoreCachedDTD(false)
     , fErrorCount(0)
     , fEntityExpansionLimit(0)
     , fEntityExpansionCount(0)
@@ -1752,7 +1754,7 @@ void XMLScanner::checkInternalDTD(bool hasExtSubset
     if (fToCacheGrammar)
         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager);
 
-    if (fUseCachedGrammar && hasExtSubset )
+    if (fUseCachedGrammar && hasExtSubset && !fIgnoreCachedDTD)
     {
         InputSource* sysIdSrc = resolveSystemId(sysId, pubId);
         Janitor<InputSource> janSysIdSrc(sysIdSrc);
diff --git a/src/xercesc/internal/XMLScanner.hpp b/src/xercesc/internal/XMLScanner.hpp
index 51b5e3d9a..2b55544b2 100644
--- a/src/xercesc/internal/XMLScanner.hpp
+++ b/src/xercesc/internal/XMLScanner.hpp
@@ -16,6 +16,10 @@
 
 /*
  * $Log$
+ * Revision 1.45  2004/12/07 19:45:43  knoaman
+ * An option to ignore a cached DTD grammar when a document contains an
+ * internal and external subset.
+ *
  * Revision 1.44  2004/12/03 19:40:30  cargilld
  * Change call to resolveEntity to pass in public id so that only one call to resolveEntity is needed (a follow-on to Alberto's fix).
  *
@@ -546,6 +550,7 @@ public :
 
     bool getGenerateSyntheticAnnotations() const;
     bool getValidateAnnotations() const;
+    bool getIgnoreCachedDTD() const;
 
     // -----------------------------------------------------------------------
     //  Getter methods
@@ -643,6 +648,7 @@ public :
 
     void setGenerateSyntheticAnnotations(const bool newValue);
     void setValidateAnnotations(const bool newValue);
+    void setIgnoredCachedDTD(const bool newValue);
 
     // -----------------------------------------------------------------------
     //  Mutator methods
@@ -1010,6 +1016,7 @@ protected:
     bool                        fNormalizeData;
     bool                        fGenerateSyntheticAnnotations;
     bool                        fValidateAnnotations;
+    bool                        fIgnoreCachedDTD;
     int                         fErrorCount;
     unsigned int                fEntityExpansionLimit;
     unsigned int                fEntityExpansionCount;
@@ -1366,6 +1373,11 @@ inline bool XMLScanner::getValidateAnnotations() const
     return fValidateAnnotations;
 }
 
+inline bool XMLScanner::getIgnoreCachedDTD() const
+{
+    return fIgnoreCachedDTD;
+}
+
 // ---------------------------------------------------------------------------
 //  XMLScanner: Setter methods
 // ---------------------------------------------------------------------------
@@ -1535,6 +1547,11 @@ inline void XMLScanner::setInputBufferSize(const size_t bufferSize)
     fCDataBuf.setFullHandler(this, fBufferSize);
 }
 
+inline void XMLScanner::setIgnoredCachedDTD(const bool newValue)
+{
+    fIgnoreCachedDTD = newValue;
+}
+
 // ---------------------------------------------------------------------------
 //  XMLScanner: Mutator methods
 // ---------------------------------------------------------------------------
diff --git a/src/xercesc/parsers/DOMBuilderImpl.cpp b/src/xercesc/parsers/DOMBuilderImpl.cpp
index 7a69254f9..a978290ba 100644
--- a/src/xercesc/parsers/DOMBuilderImpl.cpp
+++ b/src/xercesc/parsers/DOMBuilderImpl.cpp
@@ -243,7 +243,10 @@ void DOMBuilderImpl::setFeature(const XMLCh* const name, const bool state)
     {
         getScanner()->setIdentityConstraintChecking(state);
     }
-
+    else if (XMLString::compareIString(name, XMLUni::fgXercesIgnoreCachedDTD) == 0)
+    {
+        getScanner()->setIgnoredCachedDTD(state);
+    }
     else {
         throw DOMException(DOMException::NOT_FOUND_ERR, 0, getMemoryManager());
     }
@@ -342,6 +345,10 @@ bool DOMBuilderImpl::getFeature(const XMLCh* const name) const
     {
         return getScanner()->getValidateAnnotations();
     }
+    else if (XMLString::compareIString(name, XMLUni::fgXercesIgnoreCachedDTD) == 0)
+    {
+        return getScanner()->getIgnoreCachedDTD();
+    }
     else {
         throw DOMException(DOMException::NOT_FOUND_ERR, 0, getMemoryManager());
     }
@@ -365,7 +372,8 @@ bool DOMBuilderImpl::canSetFeature(const XMLCh* const name, const bool state) co
         (XMLString::compareIString(name, XMLUni::fgXercesDOMHasPSVIInfo) == 0) ||
         (XMLString::compareIString(name, XMLUni::fgXercesValidateAnnotations) == 0) ||
         (XMLString::compareIString(name, XMLUni::fgXercesGenerateSyntheticAnnotations) == 0) ||
-        (XMLString::compareIString(name, XMLUni::fgXercesIdentityConstraintChecking) == 0)
+        (XMLString::compareIString(name, XMLUni::fgXercesIdentityConstraintChecking) == 0) ||
+        (XMLString::compareIString(name, XMLUni::fgXercesIgnoreCachedDTD) == 0)
        ) {
         return true;
     }
diff --git a/src/xercesc/parsers/SAX2XMLReaderImpl.cpp b/src/xercesc/parsers/SAX2XMLReaderImpl.cpp
index d439c4c93..e0a3709cb 100644
--- a/src/xercesc/parsers/SAX2XMLReaderImpl.cpp
+++ b/src/xercesc/parsers/SAX2XMLReaderImpl.cpp
@@ -16,6 +16,10 @@
 
 /*
  * $Log$
+ * Revision 1.41  2004/12/07 19:45:43  knoaman
+ * An option to ignore a cached DTD grammar when a document contains an
+ * internal and external subset.
+ *
  * Revision 1.40  2004/10/04 11:30:51  amassari
  * As start/endPrefixMapping doesn't use the XMLBufMgr variable, we need only one XMLBuffer
  *
@@ -1573,6 +1577,10 @@ void SAX2XMLReaderImpl::setFeature(const XMLCh* const name, const bool value)
     {
         fScanner->setValidateAnnotations(value);
     }
+    else if (XMLString::compareIString(name, XMLUni::fgXercesIgnoreCachedDTD) == 0)
+    {
+        fScanner->setIgnoredCachedDTD(value);
+    }
     else
        throw SAXNotRecognizedException("Unknown Feature", fMemoryManager);
 }
@@ -1611,6 +1619,8 @@ bool SAX2XMLReaderImpl::getFeature(const XMLCh* const name) const
         return fScanner->getGenerateSyntheticAnnotations();
     else if (XMLString::compareIString(name, XMLUni::fgXercesValidateAnnotations) == 0)
         return fScanner->getValidateAnnotations();
+    else if (XMLString::compareIString(name, XMLUni::fgXercesIgnoreCachedDTD) == 0)
+        return fScanner->getIgnoreCachedDTD();
     else
        throw SAXNotRecognizedException("Unknown Feature", fMemoryManager);
 
diff --git a/src/xercesc/parsers/SAXParser.cpp b/src/xercesc/parsers/SAXParser.cpp
index 1395d1ef0..e00bff31b 100644
--- a/src/xercesc/parsers/SAXParser.cpp
+++ b/src/xercesc/parsers/SAXParser.cpp
@@ -16,6 +16,10 @@
 
 /*
  * $Log$
+ * Revision 1.37  2004/12/07 19:45:43  knoaman
+ * An option to ignore a cached DTD grammar when a document contains an
+ * internal and external subset.
+ *
  * Revision 1.36  2004/09/29 19:00:29  peiyongz
  * [jira1207] --patch from Dan Rosen
  *
@@ -568,6 +572,11 @@ unsigned int SAXParser::getSrcOffset() const
     return fScanner->getSrcOffset();
 }
 
+bool SAXParser::getIgnoreCachedDTD() const
+{
+    return fScanner->getIgnoreCachedDTD();
+}
+
 // ---------------------------------------------------------------------------
 //  SAXParser: Setter methods
 // ---------------------------------------------------------------------------
@@ -704,6 +713,11 @@ void SAXParser::setInputBufferSize(const size_t bufferSize)
     fScanner->setInputBufferSize(bufferSize);
 }
 
+void SAXParser::setIgnoreCachedDTD(const bool newValue)
+{
+    fScanner->setIgnoredCachedDTD(newValue);
+}
+
 // ---------------------------------------------------------------------------
 //  SAXParser: Overrides of the SAX Parser interface
 // ---------------------------------------------------------------------------
diff --git a/src/xercesc/parsers/SAXParser.hpp b/src/xercesc/parsers/SAXParser.hpp
index 83f06962f..c194dc776 100644
--- a/src/xercesc/parsers/SAXParser.hpp
+++ b/src/xercesc/parsers/SAXParser.hpp
@@ -16,6 +16,10 @@
 
 /*
  * $Log$
+ * Revision 1.37  2004/12/07 19:45:43  knoaman
+ * An option to ignore a cached DTD grammar when a document contains an
+ * internal and external subset.
+ *
  * Revision 1.36  2004/09/29 19:28:12  cargilld
  * Mark SAXParser as deprecated.
  *
@@ -680,6 +684,15 @@ public :
       */
     bool getValidateAnnotations() const;
 
+    /** Get the 'ignore cached DTD grammar' flag
+      *    
+      * @return true, if the parser is currently configured to
+      *         ignore cached DTD, false otherwise.
+      *
+      * @see #setIgnoreCachedDTD
+      */
+    bool getIgnoreCachedDTD() const;
+
     //@}
 
 
@@ -948,7 +961,7 @@ public :
       * instead of building the grammar from scratch, to validate XML
       * documents.
       *
-      * If the 'Grammar caching' flag is set to true, this mehod ignore the
+      * If the 'Grammar caching' flag is set to true, this method ignores the
       * value passed in.
       *
       * The parser's default state is: false.
@@ -1008,6 +1021,22 @@ public :
       */
     void setInputBufferSize(const size_t bufferSize);
 
+    /** Set the 'ignore cached DTD grammar' flag
+      *
+      * This method gives users the option to ignore a cached DTD grammar, when
+      * an XML document contains both an internal and external DTD, and the use
+      * cached grammar from parse option is enabled. Currently, we do not allow
+      * using cached DTD grammar when an internal subset is present in the
+      * document. This option will only affect the behavior of the parser when
+      * an internal and external DTD both exist in a document (i.e. no effect
+      * if document has no internal subset).
+      *
+      * The parser's default state is false
+      *
+      * @param newValue The state to set
+      */
+    void setIgnoreCachedDTD(const bool newValue);
+
     //@}
 
 
diff --git a/src/xercesc/parsers/XercesDOMParser.cpp b/src/xercesc/parsers/XercesDOMParser.cpp
index 416e09248..8e54f7cbd 100644
--- a/src/xercesc/parsers/XercesDOMParser.cpp
+++ b/src/xercesc/parsers/XercesDOMParser.cpp
@@ -98,6 +98,11 @@ unsigned int XercesDOMParser::getSrcOffset() const
     return getScanner()->getSrcOffset();
 }
 
+bool XercesDOMParser::getIgnoreCachedDTD() const
+{
+    return getScanner()->getIgnoreCachedDTD();
+}
+
 // ---------------------------------------------------------------------------
 //  XercesDOMParser: Setter methods
 // ---------------------------------------------------------------------------
@@ -153,6 +158,10 @@ void XercesDOMParser::useCachedGrammarInParse(const bool newState)
         getScanner()->useCachedGrammarInParse(newState);
 }
 
+void XercesDOMParser::setIgnoreCachedDTD(const bool newValue)
+{
+    getScanner()->setIgnoredCachedDTD(newValue);
+}
 
 // ---------------------------------------------------------------------------
 //  XercesDOMParser: Utilities
diff --git a/src/xercesc/parsers/XercesDOMParser.hpp b/src/xercesc/parsers/XercesDOMParser.hpp
index 22f1f2d16..88bbe2310 100644
--- a/src/xercesc/parsers/XercesDOMParser.hpp
+++ b/src/xercesc/parsers/XercesDOMParser.hpp
@@ -200,6 +200,15 @@ public :
      */
     unsigned int getSrcOffset() const;
 
+    /** Get the 'ignore cached DTD grammar' flag
+      *    
+      * @return true, if the parser is currently configured to
+      *         ignore cached DTD, false otherwise.
+      *
+      * @see #setIgnoreCachedDTD
+      */
+    bool getIgnoreCachedDTD() const;
+
     //@}
 
 
@@ -301,6 +310,22 @@ public :
       */
     void useCachedGrammarInParse(const bool newState);
 
+    /** Set the 'ignore cached DTD grammar' flag
+      *
+      * This method gives users the option to ignore a cached DTD grammar, when
+      * an XML document contains both an internal and external DTD, and the use
+      * cached grammar from parse option is enabled. Currently, we do not allow
+      * using cached DTD grammar when an internal subset is present in the
+      * document. This option will only affect the behavior of the parser when
+      * an internal and external DTD both exist in a document (i.e. no effect
+      * if document has no internal subset).
+      *
+      * The parser's default state is false
+      *
+      * @param newValue The state to set
+      */
+    void setIgnoreCachedDTD(const bool newValue);
+
     //@}
 
     // -----------------------------------------------------------------------
diff --git a/src/xercesc/util/XMLUni.cpp b/src/xercesc/util/XMLUni.cpp
index ab0823015..b2382a368 100644
--- a/src/xercesc/util/XMLUni.cpp
+++ b/src/xercesc/util/XMLUni.cpp
@@ -1258,6 +1258,21 @@ const XMLCh XMLUni::fgXercesUseCachedGrammarInParse[] =
     ,   chLatin_s, chLatin_e, chNull
 };
 
+//Xerces: http://apache.org/xml/features/validation/ignoreCachedDTD
+const XMLCh XMLUni::fgXercesIgnoreCachedDTD[] =
+{
+        chLatin_h, chLatin_t, chLatin_t, chLatin_p, chColon, chForwardSlash
+    ,   chForwardSlash, chLatin_a, chLatin_p, chLatin_a, chLatin_c, chLatin_h
+    ,   chLatin_e, chPeriod, chLatin_o, chLatin_r, chLatin_g, chForwardSlash
+    ,   chLatin_x, chLatin_m, chLatin_l, chForwardSlash, chLatin_f, chLatin_e
+    ,   chLatin_a, chLatin_t, chLatin_u, chLatin_r, chLatin_e, chLatin_s
+    ,   chForwardSlash, chLatin_v, chLatin_a, chLatin_l, chLatin_i, chLatin_d
+    ,   chLatin_a, chLatin_t, chLatin_i, chLatin_o, chLatin_n, chForwardSlash
+    ,   chLatin_i, chLatin_g, chLatin_n, chLatin_o, chLatin_r, chLatin_e
+    ,   chLatin_C, chLatin_a, chLatin_c, chLatin_h, chLatin_e, chLatin_d
+    ,   chLatin_D, chLatin_T, chLatin_D, chNull
+};
+
 
 //Introduced in DOM Level 3
 const XMLCh XMLUni::fgDOMCanonicalForm[] =
diff --git a/src/xercesc/util/XMLUni.hpp b/src/xercesc/util/XMLUni.hpp
index db029d422..001d33108 100644
--- a/src/xercesc/util/XMLUni.hpp
+++ b/src/xercesc/util/XMLUni.hpp
@@ -223,6 +223,7 @@ public :
     static const XMLCh fgXercesDOMHasPSVIInfo[];
     static const XMLCh fgXercesGenerateSyntheticAnnotations[];
     static const XMLCh fgXercesValidateAnnotations[];
+    static const XMLCh fgXercesIgnoreCachedDTD[];
 
 
     // SAX2 features/properties names
-- 
GitLab