From e3d099013df3c6c8fbf220666dc88e70ac66aadd Mon Sep 17 00:00:00 2001
From: PeiYong Zhang <peiyongz@apache.org>
Date: Wed, 7 Apr 2004 14:15:12 +0000
Subject: [PATCH] allow internalDTD (conditionally) with grammar reusing

git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@175860 13f79535-47bb-0310-9956-ffa450edef68
---
 src/xercesc/internal/DGXMLScanner.cpp |  8 +++----
 src/xercesc/internal/IGXMLScanner.cpp |  8 +++----
 src/xercesc/internal/XMLScanner.cpp   | 30 +++++++++++++++++++++++++++
 src/xercesc/internal/XMLScanner.hpp   |  7 +++++++
 4 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/src/xercesc/internal/DGXMLScanner.cpp b/src/xercesc/internal/DGXMLScanner.cpp
index 867c364fb..dba1050e0 100644
--- a/src/xercesc/internal/DGXMLScanner.cpp
+++ b/src/xercesc/internal/DGXMLScanner.cpp
@@ -935,9 +935,7 @@ void DGXMLScanner::scanDocTypeDecl()
         // Eat the opening square bracket
         fReaderMgr.getNextChar();
 
-        // We can't have any internal subset if we are reusing the validator
-        if (fUseCachedGrammar || fToCacheGrammar)
-            ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager);
+        checkInternalDTD(hasExtSubset, sysId);
 
         //  And try to scan the internal subset. If we fail, try to recover
         //  by skipping forward tot he close angle and returning.
@@ -1048,7 +1046,7 @@ void DGXMLScanner::scanDocTypeDecl()
                 const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
 
                 fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
-                ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setRootName(sysIdStr);
+                ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
                 fGrammarResolver->putGrammar(fGrammar);
             }
 
@@ -1817,7 +1815,7 @@ Grammar* DGXMLScanner::loadDTDGrammar(const InputSource& src,
         const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
 
         fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
-        ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setRootName(sysIdStr);
+        ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
         fGrammarResolver->putGrammar(fGrammar);
     }
 
diff --git a/src/xercesc/internal/IGXMLScanner.cpp b/src/xercesc/internal/IGXMLScanner.cpp
index 315f8f882..d0badb836 100644
--- a/src/xercesc/internal/IGXMLScanner.cpp
+++ b/src/xercesc/internal/IGXMLScanner.cpp
@@ -1485,9 +1485,7 @@ void IGXMLScanner::scanDocTypeDecl()
         // Eat the opening square bracket
         fReaderMgr.getNextChar();
 
-        // We can't have any internal subset if we are reusing the validator
-        if (fUseCachedGrammar || fToCacheGrammar)
-            ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager);
+        checkInternalDTD(hasExtSubset, sysId);
 
         //  And try to scan the internal subset. If we fail, try to recover
         //  by skipping forward tot he close angle and returning.
@@ -1596,7 +1594,7 @@ void IGXMLScanner::scanDocTypeDecl()
                 const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
 
                 fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
-                ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setRootName(sysIdStr);
+                ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
                 fGrammarResolver->putGrammar(fGrammar);
             }
 
@@ -3380,7 +3378,7 @@ Grammar* IGXMLScanner::loadDTDGrammar(const InputSource& src,
         const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
               
         fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
-        ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setRootName(sysIdStr);
+        ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
         fGrammarResolver->putGrammar(fGrammar);
     }
 
diff --git a/src/xercesc/internal/XMLScanner.cpp b/src/xercesc/internal/XMLScanner.cpp
index 097c4a512..6c6a8290f 100644
--- a/src/xercesc/internal/XMLScanner.cpp
+++ b/src/xercesc/internal/XMLScanner.cpp
@@ -1736,6 +1736,36 @@ void XMLScanner::setURIStringPool(XMLStringPool* const stringPool)
 //  XMLScanner: Private helper methods
 // ---------------------------------------------------------------------------
 
+/***
+ * In reusing grammars (cacheing grammar from parse, or use cached grammar), internal
+ * dtd is allowed conditionally.
+ *
+ * In the case of cacheing grammar from parse, it is NOT allowed.
+ *
+ * In the case of use cached grammar,
+ *   if external dtd is present and it is parsed before, then it is not allowed,
+ *   otherwise it is allowed.
+ *
+ ***/
+void XMLScanner::checkInternalDTD(bool hasExtSubset, const XMLCh* const sysId)
+{
+    if (fToCacheGrammar)
+        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager);
+
+    if (fUseCachedGrammar && hasExtSubset )
+    {
+        InputSource* sysIdSrc = resolveSystemId(sysId);
+        Janitor<InputSource> janSysIdSrc(sysIdSrc);
+        Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId());
+
+        if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) 
+        {
+            ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager);
+        }
+    }
+
+}
+
 //  This method is called after the content scan to insure that all the
 //  ID/IDREF attributes match up (i.e. that all IDREFs refer to IDs.) This is
 //  an XML 1.0 rule, so we can do here in the core.
diff --git a/src/xercesc/internal/XMLScanner.hpp b/src/xercesc/internal/XMLScanner.hpp
index 4569d7954..4eeaa6f3f 100644
--- a/src/xercesc/internal/XMLScanner.hpp
+++ b/src/xercesc/internal/XMLScanner.hpp
@@ -56,6 +56,9 @@
 
 /*
  * $Log$
+ * Revision 1.33  2004/04/07 14:15:12  peiyongz
+ * allow internalDTD (conditionally) with grammar reusing
+ *
  * Revision 1.32  2003/12/31 15:40:00  cargilld
  * Release memory when an error is encountered.
  *
@@ -715,6 +718,9 @@ protected:
     virtual void scanReset(const InputSource& src) = 0;
     virtual void sendCharData(XMLBuffer& toSend) = 0;
 
+    //return owned by the caller
+    virtual InputSource* resolveSystemId(const XMLCh* const sysId) = 0;
+
     // -----------------------------------------------------------------------
     //  Protected scanning methods
     // -----------------------------------------------------------------------
@@ -729,6 +735,7 @@ protected:
     // -----------------------------------------------------------------------
     //  Private helper methods
     // -----------------------------------------------------------------------
+    void checkInternalDTD(bool hasExtSubset, const XMLCh* const sysId);
     void checkIDRefs();
     bool isLegalToken(const XMLPScanToken& toCheck);
     XMLTokens senseNextToken(unsigned int& orgReader);
-- 
GitLab