diff --git a/doc/program-sax2.xml b/doc/program-sax2.xml index 7311281f3d786de32b525f128491fe50db70f923..5f9359ac8cf4fd03cfb926354d62da34c8e3f980 100644 --- a/doc/program-sax2.xml +++ b/doc/program-sax2.xml @@ -207,6 +207,15 @@ MySAX2Handler::fatalError(const SAXParseException& exception) <p/> + <table> + <tr><td colspan="2"><em>http://apache.org/xml/features/nonvalidating/load-external-dtd</em></td></tr> + <tr><td><em>true:</em></td><td> Load the External DTD (default). </td></tr> + <tr><td><em>false:</em></td><td> Ignore the external DTD completely. This feature is + ignored and DTD is always loaded when validation is on.</td></tr> + </table> + + <p/> + <table> <tr><td colspan="2"><em>http://apache.org/xml/features/validation/reuse-grammar</em></td></tr> <tr><td><em>true:</em></td><td> The parser will reuse grammar information from previous parses in subsequent parses. </td></tr> diff --git a/src/xercesc/internal/XMLScanner.cpp b/src/xercesc/internal/XMLScanner.cpp index 3b0ebbd3baf62f7903fa44b7185143658144aa04..d0354c55b6727e6a0a82833d2e4e73d938d71074 100644 --- a/src/xercesc/internal/XMLScanner.cpp +++ b/src/xercesc/internal/XMLScanner.cpp @@ -221,6 +221,7 @@ XMLScanner::XMLScanner(XMLValidator* const valToAdopt) : , fRootElemName(0) , fExternalSchemaLocation(0) , fExternalNoNamespaceSchemaLocation(0) + , fLoadExternalDTD(true) { commonInit(); @@ -284,6 +285,7 @@ XMLScanner::XMLScanner( XMLDocumentHandler* const docHandler , fRootElemName(0) , fExternalSchemaLocation(0) , fExternalNoNamespaceSchemaLocation(0) + , fLoadExternalDTD(true) { commonInit(); @@ -2215,28 +2217,7 @@ void XMLScanner::scanProlog() } else if (fReaderMgr.skippedString(XMLUni::fgDocTypeString)) { - if (!fReuseGrammar && fValidatorFromUser && !fValidator->handlesDTD()) - { - ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator); - } - - // - // We have a doc type. So, create a DTDScanner and - // switch the Grammar to the emptyNamespace one. - // - - if (!switchGrammar(XMLUni::fgZeroLenString) && fValidate) - { - fValidator->emitError - ( - XMLValid::GrammarNotFound - , XMLUni::fgZeroLenString - ); - } - - DTDScanner fDTDScanner((DTDGrammar*)fGrammar, fEntityDeclPool, fDocTypeHandler); - fDTDScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr); - fDTDScanner.scanDocTypeDecl(fReuseGrammar); + scanDocTypeDecl(); // if reusing grammar, this has been validated already in first scan // skip for performance @@ -2297,6 +2278,302 @@ void XMLScanner::scanProlog() } } +// +// This method handles the high level logic of scanning the DOCType +// declaration. This calls the DTDScanner and kicks off both the scanning of +// the internal subset and the scanning of the external subset, if any. +// +// When we get here the '<!DOCTYPE' part has already been scanned, which is +// what told us that we had a doc type decl to parse. +// + +void XMLScanner::scanDocTypeDecl() +{ + if (!fReuseGrammar && fValidatorFromUser && !fValidator->handlesDTD()) + { + ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator); + } + + // + // We have a doc type. So, create a DTDScanner and + // switch the Grammar to the emptyNamespace one. + // + + if (!switchGrammar(XMLUni::fgZeroLenString) && fValidate) + { + fValidator->emitError + ( + XMLValid::GrammarNotFound + , XMLUni::fgZeroLenString + ); + } + + DTDScanner dtdScanner((DTDGrammar*)fGrammar, fEntityDeclPool, fDocTypeHandler); + dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr); + + if (fDocTypeHandler) + fDocTypeHandler->resetDocType(); + + // There must be some space after DOCTYPE + if (!fReaderMgr.skipPastSpaces()) + { + emitError(XMLErrs::ExpectedWhitespace); + + // Just skip the Doctype declaration and return + fReaderMgr.skipPastChar(chCloseAngle); + return; + } + + // Get a buffer for the root element + XMLBufBid bbRootName(&fBufMgr); + + // + // Get a name from the input, which should be the name of the root + // element of the upcoming content. + // + fReaderMgr.getName(bbRootName.getBuffer()); + if (bbRootName.isEmpty()) + { + emitError(XMLErrs::NoRootElemInDOCTYPE); + fReaderMgr.skipPastChar(chCloseAngle); + return; + } + + // + // Store the root element name for later check + // + setRootElemName(bbRootName.getRawBuffer()); + + // + // This element obviously is not going to exist in the element decl + // pool yet, but we need to call docTypeDecl. So force it into + // the element decl pool, marked as being there because it was in + // the DOCTYPE. Later, when its declared, the status will be updated. + // + // Only do this if we are not reusing the validator! If we are reusing, + // then look it up instead. It has to exist! + // + DTDElementDecl* rootDecl; + Janitor<DTDElementDecl> janSrc(0); + + if (fReuseGrammar) + { + if (fGrammar->getGrammarType() == Grammar::DTDGrammarType) { + rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); + if (rootDecl) + ((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId()); + else { + rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId); + rootDecl->setCreateReason(DTDElementDecl::AsRootElem); + rootDecl->setExternalElemDeclaration(true); + ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl)); + } + } + else { + rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId); + rootDecl->setCreateReason(DTDElementDecl::AsRootElem); + rootDecl->setExternalElemDeclaration(true); + janSrc.reset(rootDecl); + } + } + else + { + rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId); + rootDecl->setCreateReason(DTDElementDecl::AsRootElem); + rootDecl->setExternalElemDeclaration(true); + ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl)); + } + + // Skip any spaces after the name + fReaderMgr.skipPastSpaces(); + + // + // And now if we are looking at a >, then we are done. It is not + // required to have an internal or external subset, though why you + // would not escapes me. + // + if (fReaderMgr.skippedChar(chCloseAngle)) { + // + // If we have a doc type handler and advanced callbacks are enabled, + // call the doctype event. + // + if (fDocTypeHandler) + fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false); + return; + } + + // either internal/external subset + if(!fReuseGrammar) { + if (fValScheme == Val_Auto && !fValidate) + fValidate = true; + } + + + bool hasIntSubset = false; + bool hasExtSubset = false; + XMLCh* sysId = 0; + XMLCh* pubId = 0; + + // + // If the next character is '[' then we have no external subset cause + // there is no system id, just the opening character of the internal + // subset. Else, has to be an id. + // + // Just look at the next char, don't eat it. + if (fReaderMgr.peekNextChar() == chOpenSquare) + { + hasIntSubset = true; + } + else + { + // Indicate we have an external subset + hasExtSubset = true; + fHasNoDTD = false; + + // Get buffers for the ids + XMLBufBid bbPubId(&fBufMgr); + XMLBufBid bbSysId(&fBufMgr); + + // Get the external subset id + if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External)) + { + fReaderMgr.skipPastChar(chCloseAngle); + return; + } + + // Get copies of the ids we got + pubId = XMLString::replicate(bbPubId.getRawBuffer()); + sysId = XMLString::replicate(bbSysId.getRawBuffer()); + + // Skip spaces and check again for the opening of an internal subset + fReaderMgr.skipPastSpaces(); + + // Just look at the next char, don't eat it. + if (fReaderMgr.peekNextChar() == chOpenSquare) { + hasIntSubset = true; + } + } + + // Insure that the ids get cleaned up, if they got allocated + ArrayJanitor<XMLCh> janSysId(sysId); + ArrayJanitor<XMLCh> janPubId(pubId); + + // + // If we have a doc type handler and advanced callbacks are enabled, + // call the doctype event. + // + if (fDocTypeHandler) + fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset); + + // + // Ok, if we had an internal subset, we are just past the [ character + // and need to parse that first. + // + if (hasIntSubset) + { + // Eat the opening square bracket + fReaderMgr.getNextChar(); + + // We can't have any internal subset if we are reusing the validator + if (fReuseGrammar) + ThrowXML(RuntimeException, XMLExcepts::Val_CantHaveIntSS); + + // + // And try to scan the internal subset. If we fail, try to recover + // by skipping forward tot he close angle and returning. + // + if (!dtdScanner.scanInternalSubset()) + { + fReaderMgr.skipPastChar(chCloseAngle); + return; + } + + // + // Do a sanity check that some expanded PE did not propogate out of + // the doctype. This could happen if it was terminated early by bad + // syntax. + // + if (fReaderMgr.getReaderDepth() > 1) + { + emitError(XMLErrs::PEPropogated); + + // Ask the reader manager to pop back down to the main level + fReaderMgr.cleanStackBackTo(1); + } + + fReaderMgr.skipPastSpaces(); + } + + // And that should leave us at the closing > of the DOCTYPE line + if (!fReaderMgr.skippedChar(chCloseAngle)) + { + // + // Do a special check for the common scenario of an extra ] char at + // the end. This is easy to recover from. + // + if (fReaderMgr.skippedChar(chCloseSquare) + && fReaderMgr.skippedChar(chCloseAngle)) + { + emitError(XMLErrs::ExtraCloseSquare); + } + else + { + emitError(XMLErrs::UnterminatedDOCTYPE); + fReaderMgr.skipPastChar(chCloseAngle); + } + } + + // + // If we had an external subset, then we need to deal with that one + // next. If we are reusing the validator, then don't scan it. + // + if (hasExtSubset && !fReuseGrammar && (fLoadExternalDTD || fValidate)) + { + // And now create a reader to read this entity + InputSource* srcUsed; + XMLReader* reader = fReaderMgr.createReader + ( + sysId + , pubId + , false + , XMLReader::RefFrom_NonLiteral + , XMLReader::Type_General + , XMLReader::Source_External + , srcUsed + ); + + // Put a janitor on the input source + Janitor<InputSource> janSrc(srcUsed); + + // + // If it failed then throw an exception + // + if (!reader) + ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId()); + + // + // In order to make the processing work consistently, we have to + // make this look like an external entity. So create an entity + // decl and fill it in and push it with the reader, as happens + // with an external entity. Put a janitor on it to insure it gets + // cleaned up. The reader manager does not adopt them. + // + const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull }; + DTDEntityDecl* declDTD = new DTDEntityDecl(gDTDStr); + declDTD->setSystemId(sysId); + Janitor<DTDEntityDecl> janDecl(declDTD); + + // Mark this one as a throw at end + reader->setThrowAtEnd(true); + + // And push it onto the stack, with its pseudo name + fReaderMgr.pushReader(reader, declDTD); + + // Tell it its not in an include section + dtdScanner.scanExtSubsetDecl(false); + } +} + bool XMLScanner::scanStartTag(bool& gotData) { // diff --git a/src/xercesc/internal/XMLScanner.hpp b/src/xercesc/internal/XMLScanner.hpp index 2350156d2116c2ee9019821e1bbc2435f0fdc573..a3374fc4f04b7ec87e54e74eea91d4e844811682 100644 --- a/src/xercesc/internal/XMLScanner.hpp +++ b/src/xercesc/internal/XMLScanner.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.5 2002/05/30 16:20:57 tng + * Add feature to optionally ignore external DTD. + * * Revision 1.4 2002/05/27 18:42:14 tng * To get ready for 64 bit large file, use XMLSSize_t to represent line and column number. * @@ -396,6 +399,7 @@ public : bool getHasNoDTD() const; XMLCh* getExternalSchemaLocation() const; XMLCh* getExternalNoNamespaceSchemaLocation() const; + bool getLoadExternalDTD() const; // ----------------------------------------------------------------------- // Getter methods @@ -492,6 +496,7 @@ public : void setExternalNoNamespaceSchemaLocation(const XMLCh* const noNamespaceSchemaLocation); void setExternalSchemaLocation(const char* const schemaLocation); void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation); + void setLoadExternalDTD(const bool loadDTD); // ----------------------------------------------------------------------- // Mutator methods @@ -503,7 +508,7 @@ public : // setValidationScheme() instead. // ----------------------------------------------------------------------- bool getDoValidation() const; - void setDoValidation(const bool validate, const bool setValScheme = true); + void setDoValidation(const bool validate); // ----------------------------------------------------------------------- @@ -683,6 +688,7 @@ private : XMLBuffer& toFill , const XMLCh chEndChar ); + void scanDocTypeDecl(); // ----------------------------------------------------------------------- // Private helper methods @@ -908,6 +914,9 @@ private : // The no target namespace XML Schema Location that was specified // externally using setExternalNoNamespaceSchemaLocation. // + // fLoadExternalDTD + // This flag indicates whether the external DTD be loaded or not + // // ----------------------------------------------------------------------- bool fDoNamespaces; bool fExitOnFirstFatal; @@ -964,6 +973,7 @@ private : XMLCh* fRootElemName; XMLCh* fExternalSchemaLocation; XMLCh* fExternalNoNamespaceSchemaLocation; + bool fLoadExternalDTD; }; @@ -1162,6 +1172,11 @@ inline XMLCh* XMLScanner::getExternalNoNamespaceSchemaLocation() const return fExternalNoNamespaceSchemaLocation; } +inline bool XMLScanner::getLoadExternalDTD() const +{ + return fLoadExternalDTD; +} + // --------------------------------------------------------------------------- // XMLScanner: Setter methods // --------------------------------------------------------------------------- @@ -1281,6 +1296,11 @@ inline void XMLScanner::setExternalNoNamespaceSchemaLocation(const char* const n fExternalNoNamespaceSchemaLocation = XMLString::transcode(noNamespaceSchemaLocation); } +inline void XMLScanner::setLoadExternalDTD(const bool loadDTD) +{ + fLoadExternalDTD = loadDTD; +} + // --------------------------------------------------------------------------- // XMLScanner: Mutator methods @@ -1299,15 +1319,13 @@ inline bool XMLScanner::getDoValidation() const return fValidate; } -inline void XMLScanner::setDoValidation(const bool validate, const bool setValScheme) +inline void XMLScanner::setDoValidation(const bool validate) { fValidate = validate; - if (setValScheme) { - if (fValidate) - fValScheme = Val_Always; - else - fValScheme = Val_Never; - } + if (fValidate) + fValScheme = Val_Always; + else + fValScheme = Val_Never; } #endif diff --git a/src/xercesc/parsers/AbstractDOMParser.cpp b/src/xercesc/parsers/AbstractDOMParser.cpp index 5d2c65b84d023621eaae60eed8976a333370303c..ab7335fb2623da97be838debf0a76004fb75f7c6 100644 --- a/src/xercesc/parsers/AbstractDOMParser.cpp +++ b/src/xercesc/parsers/AbstractDOMParser.cpp @@ -227,6 +227,12 @@ XMLCh* AbstractDOMParser::getExternalNoNamespaceSchemaLocation() const return fScanner->getExternalNoNamespaceSchemaLocation(); } +bool AbstractDOMParser::getLoadExternalDTD() const +{ + return fScanner->getLoadExternalDTD(); +} + + // --------------------------------------------------------------------------- // AbstractDOMParser: Setter methods // --------------------------------------------------------------------------- @@ -283,6 +289,11 @@ void AbstractDOMParser::setExternalNoNamespaceSchemaLocation(const char* const n fScanner->setExternalNoNamespaceSchemaLocation(noNamespaceSchemaLocation); } +void AbstractDOMParser::setLoadExternalDTD(const bool newState) +{ + fScanner->setLoadExternalDTD(newState); +} + // --------------------------------------------------------------------------- // AbstractDOMParser: Parsing methods diff --git a/src/xercesc/parsers/AbstractDOMParser.hpp b/src/xercesc/parsers/AbstractDOMParser.hpp index 60792b7cdc24d0076be6ed5db9cc6f4db727a315..00a0f05cacb3d3bb6709ce5e2b8b5a5080bf66dd 100644 --- a/src/xercesc/parsers/AbstractDOMParser.hpp +++ b/src/xercesc/parsers/AbstractDOMParser.hpp @@ -341,6 +341,19 @@ public : */ XMLCh* getExternalNoNamespaceSchemaLocation() const; + /** Get the 'Loading External DTD' flag + * + * This method returns the state of the parser's loading external DTD + * flag. + * + * @return false, if the parser is currently configured to + * ignore external DTD completely, true otherwise. + * + * @see #setLoadExternalDTD + * @see #getValidationScheme + */ + bool getLoadExternalDTD() const; + //@} @@ -576,6 +589,24 @@ public : */ void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation); + /** Set the 'Loading External DTD' flag + * + * This method allows users to enable or disable the loading of external DTD. + * When set to false, the parser will ignore any external DTD completely + * if the validationScheme is set to Val_Never. + * + * The parser's default state is: true. + * + * This flag is ignored if the validationScheme is set to Val_Always or Val_Auto. + * + * @param newState The value specifying whether external DTD should + * be loaded or not. + * + * @see #getLoadExternalDTD + * @see #setValidationScheme + */ + void setLoadExternalDTD(const bool newState); + //@} diff --git a/src/xercesc/parsers/SAX2XMLReaderImpl.cpp b/src/xercesc/parsers/SAX2XMLReaderImpl.cpp index 848889370358b96eaee42b4a7b21ae4681d437af..8dfcea67323dc52b874395db86e3406417de41c6 100644 --- a/src/xercesc/parsers/SAX2XMLReaderImpl.cpp +++ b/src/xercesc/parsers/SAX2XMLReaderImpl.cpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.7 2002/05/30 16:20:09 tng + * Add feature to optionally ignore external DTD. + * * Revision 1.6 2002/05/29 21:37:47 knoaman * Add baseURI to resolveEntity to support DOMInputSource. * @@ -1368,6 +1371,11 @@ void SAX2XMLReaderImpl::setFeature(const XMLCh* const name, const bool value) fScanner->setValidationSchemaFullChecking(value); } + else if (XMLString::compareIString(name, XMLUni::fgSAX2XercesLoadExternalDTD) == 0) + { + fScanner->setLoadExternalDTD(value); + } + else throw SAXNotRecognizedException("Unknown Feature"); } @@ -1390,6 +1398,8 @@ bool SAX2XMLReaderImpl::getFeature(const XMLCh* const name) const return getDoSchema(); else if (XMLString::compareIString(name, XMLUni::fgSAX2XercesSchemaFullChecking) == 0) return fScanner->getValidationSchemaFullChecking(); + else if (XMLString::compareIString(name, XMLUni::fgSAX2XercesLoadExternalDTD) == 0) + return fScanner->getLoadExternalDTD(); else throw SAXNotRecognizedException("Unknown Feature"); return false; diff --git a/src/xercesc/parsers/SAX2XMLReaderImpl.hpp b/src/xercesc/parsers/SAX2XMLReaderImpl.hpp index 0bc69782eeff51f1c01bdc4242a53b01bd327e4e..3d94ccba4850f7e5c55024ee39a1b17d43ac0d75 100644 --- a/src/xercesc/parsers/SAX2XMLReaderImpl.hpp +++ b/src/xercesc/parsers/SAX2XMLReaderImpl.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.6 2002/05/30 16:20:09 tng + * Add feature to optionally ignore external DTD. + * * Revision 1.5 2002/05/29 21:37:47 knoaman * Add baseURI to resolveEntity to support DOMInputSource. * @@ -697,6 +700,7 @@ public : * <br>http://apache.org/xml/features/validation/reuse-grammar (default: false) * <br>http://apache.org/xml/features/validation/schema (default: true) * <br>http://apache.org/xml/features/validation/schema-full-checking (default: false) + * <br>http://apache.org/xml/features/nonvalidating/load-external-dtd (default: true) * <br>http://apache.org/xml/features/validation/reuse-validator (Deprecated) (default: false) * * @param name The unique identifier (URI) of the feature. diff --git a/src/xercesc/parsers/SAXParser.cpp b/src/xercesc/parsers/SAXParser.cpp index 809875de8fdaa13a029af4497034009a03f196c1..0cb242465f345e3c11d0ce17510e3490263be2fd 100644 --- a/src/xercesc/parsers/SAXParser.cpp +++ b/src/xercesc/parsers/SAXParser.cpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.6 2002/05/30 16:20:09 tng + * Add feature to optionally ignore external DTD. + * * Revision 1.5 2002/05/29 21:37:47 knoaman * Add baseURI to resolveEntity to support DOMInputSource. * @@ -367,6 +370,11 @@ XMLCh* SAXParser::getExternalNoNamespaceSchemaLocation() const return fScanner->getExternalNoNamespaceSchemaLocation(); } +bool SAXParser::getLoadExternalDTD() const +{ + return fScanner->getLoadExternalDTD(); +} + // --------------------------------------------------------------------------- // SAXParser: Setter methods @@ -427,6 +435,11 @@ void SAXParser::setExternalNoNamespaceSchemaLocation(const char* const noNamespa fScanner->setExternalNoNamespaceSchemaLocation(noNamespaceSchemaLocation); } +void SAXParser::setLoadExternalDTD(const bool newState) +{ + fScanner->setLoadExternalDTD(newState); +} + // --------------------------------------------------------------------------- // SAXParser: Overrides of the SAX Parser interface diff --git a/src/xercesc/parsers/SAXParser.hpp b/src/xercesc/parsers/SAXParser.hpp index e20da55dda3141be766e90c09acbfcacc11d7b2b..473931ad60e56850bcb454dd7b2e79e21de96cc5 100644 --- a/src/xercesc/parsers/SAXParser.hpp +++ b/src/xercesc/parsers/SAXParser.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.6 2002/05/30 16:20:09 tng + * Add feature to optionally ignore external DTD. + * * Revision 1.5 2002/05/29 21:37:47 knoaman * Add baseURI to resolveEntity to support DOMInputSource. * @@ -407,6 +410,20 @@ public : */ XMLCh* getExternalNoNamespaceSchemaLocation() const; + /** Get the 'Loading External DTD' flag + * + * This method returns the state of the parser's loading external DTD + * flag. + * + * @return false, if the parser is currently configured to + * ignore external DTD completely, true otherwise. + * + * @see #setLoadExternalDTD + * @see #getValidationScheme + */ + bool getLoadExternalDTD() const; + + //@} @@ -578,6 +595,25 @@ public : */ void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation); + /** Set the 'Loading External DTD' flag + * + * This method allows users to enable or disable the loading of external DTD. + * When set to false, the parser will ignore any external DTD completely + * if the validationScheme is set to Val_Never. + * + * The parser's default state is: true. + * + * This flag is ignored if the validationScheme is set to Val_Always or Val_Auto. + * + * @param newState The value specifying whether external DTD should + * be loaded or not. + * + * @see #getLoadExternalDTD + * @see #setValidationScheme + */ + void setLoadExternalDTD(const bool newState); + + //@} diff --git a/src/xercesc/sax2/SAX2XMLReader.hpp b/src/xercesc/sax2/SAX2XMLReader.hpp index 295a569313452c8c3f0e62bc07041730f16a74e0..c519713ec63f5f815487a0e452609e8519794e21 100644 --- a/src/xercesc/sax2/SAX2XMLReader.hpp +++ b/src/xercesc/sax2/SAX2XMLReader.hpp @@ -56,8 +56,11 @@ /* * $Log$ - * Revision 1.1 2002/02/01 22:22:09 peiyongz - * Initial revision + * Revision 1.2 2002/05/30 16:22:53 tng + * Add feature to optionally ignore external DTD. + * + * Revision 1.1.1.1 2002/02/01 22:22:09 peiyongz + * sane_include * * Revision 1.18 2002/01/28 18:45:40 knoaman * Update documentation for SAX2 feature 'namespace-prefixes'. @@ -519,6 +522,7 @@ public: * <br>http://apache.org/xml/features/validation/reuse-grammar (default: false) * <br>http://apache.org/xml/features/validation/schema (default: true) * <br>http://apache.org/xml/features/validation/schema-full-checking (default: false) + * <br>http://apache.org/xml/features/nonvalidating/load-external-dtd (default: true) * <br>http://apache.org/xml/features/validation/reuse-validator (Deprecated) (default: false) * * @param name The unique identifier (URI) of the feature. diff --git a/src/xercesc/util/XMLUni.cpp b/src/xercesc/util/XMLUni.cpp index 799a667c53f8c387a152cea8646e7d36ffeafa94..74032aaa7e80684956e60083c86e15213e11d2c8 100644 --- a/src/xercesc/util/XMLUni.cpp +++ b/src/xercesc/util/XMLUni.cpp @@ -914,6 +914,22 @@ const XMLCh XMLUni::fgSAX2XercesSchemaFullChecking[] = , chLatin_e, chLatin_c, chLatin_k, chLatin_i, chLatin_n, chLatin_g, chNull }; +//Xerces: http://apache.org/xml/features/nonvalidating/load-external-dtd +const XMLCh XMLUni::fgSAX2XercesLoadExternalDTD[] = +{ + chLatin_h, chLatin_t, chLatin_t, chLatin_p, chColon, chForwardSlash + , chForwardSlash, chLatin_a, chLatin_p, chLatin_a, chLatin_c, chLatin_h + , chLatin_e, chPeriod, chLatin_o, chLatin_r, chLatin_g, chForwardSlash + , chLatin_x, chLatin_m, chLatin_l, chForwardSlash, chLatin_f, chLatin_e + , chLatin_a, chLatin_t, chLatin_u, chLatin_r, chLatin_e, chLatin_s + , chForwardSlash, chLatin_n, chLatin_o, chLatin_n + , chLatin_v, chLatin_a, chLatin_l, chLatin_i, chLatin_d + , chLatin_a, chLatin_t, chLatin_i, chLatin_n, chLatin_g, chForwardSlash + , chLatin_l, chLatin_o, chLatin_a, chLatin_d, chDash + , chLatin_e, chLatin_x, chLatin_t, chLatin_e, chLatin_r, chLatin_n, chLatin_a, chLatin_l, chDash + , chLatin_d, chLatin_t, chLatin_d, chNull +}; + //deprecated //Xerces: http://apache.org/xml/features/validation/reuse-validator const XMLCh XMLUni::fgSAX2XercesReuseValidator[] = @@ -977,7 +993,7 @@ const XMLCh XMLUni::fgDOMCDATASections[] = , chLatin_e, chLatin_c, chLatin_t, chLatin_i, chLatin_o, chLatin_n, chLatin_s, chNull }; -const XMLCh XMLUni::fgDOMComments[] = +const XMLCh XMLUni::fgDOMComments[] = { chLatin_c, chLatin_o, chLatin_m, chLatin_m, chLatin_e, chLatin_n, chLatin_t , chLatin_s, chNull diff --git a/src/xercesc/util/XMLUni.hpp b/src/xercesc/util/XMLUni.hpp index 04fa7e224f7a39e7b786aab0b40c437d3110aaf3..d21cb9b15bb970dc941f58c108d0c5bf9ef742c2 100644 --- a/src/xercesc/util/XMLUni.hpp +++ b/src/xercesc/util/XMLUni.hpp @@ -227,6 +227,7 @@ public : static const XMLCh fgSAX2XercesSchemaFullChecking[]; static const XMLCh fgSAX2XercesSchemaExternalSchemaLocation[]; static const XMLCh fgSAX2XercesSchemaExternalNoNameSpaceSchemaLocation[]; + static const XMLCh fgSAX2XercesLoadExternalDTD[]; // DOMBuilder features static const XMLCh fgDOMCanonicalForm[]; diff --git a/src/xercesc/validators/DTD/DTDScanner.cpp b/src/xercesc/validators/DTD/DTDScanner.cpp index 48af45383f23412b896ca2357730dc757ebd7622..e49c899ef1f8b936a7cda359fdc518f2bacf1793 100644 --- a/src/xercesc/validators/DTD/DTDScanner.cpp +++ b/src/xercesc/validators/DTD/DTDScanner.cpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.5 2002/05/30 16:17:19 tng + * Add feature to optionally ignore external DTD. + * * Revision 1.4 2002/05/03 14:51:16 peiyongz * Bug#8769: UMR detected by memory tool - patch from Kenneth Palsson * @@ -1621,288 +1624,6 @@ void DTDScanner::scanDefaultDecl(DTDAttDef& toFill) } -// -// This method handles the high level logic of scanning the DOCType -// declaration. This kicks off both the scanning of the internal subset and -// the scanning of the external subset, if any. -// -// When we get here the '<!DOCTYPE' part has already been scanned, which is -// what told us that we had a doc type decl to parse. -// - -void DTDScanner::scanDocTypeDecl(const bool reuseGrammar) -{ - if (fDocTypeHandler) - fDocTypeHandler->resetDocType(); - - // There must be some space after DOCTYPE - if (!fReaderMgr->skipPastSpaces()) - { - fScanner->emitError(XMLErrs::ExpectedWhitespace); - - // Just skip the Doctype declaration and return - fReaderMgr->skipPastChar(chCloseAngle); - return; - } - - // Get a buffer for the root element - XMLBufBid bbRootName(fBufMgr); - - // - // Get a name from the input, which should be the name of the root - // element of the upcoming content. - // - fReaderMgr->getName(bbRootName.getBuffer()); - if (bbRootName.isEmpty()) - { - fScanner->emitError(XMLErrs::NoRootElemInDOCTYPE); - fReaderMgr->skipPastChar(chCloseAngle); - return; - } - - // - // Store the root element name for later check - // - fScanner->setRootElemName(bbRootName.getRawBuffer()); - - // - // This element obviously is not going to exist in the element decl - // pool yet, but we need to call docTypeDecl. So force it into - // the element decl pool, marked as being there because it was in - // the DOCTYPE. Later, when its declared, the status will be updated. - // - // Only do this if we are not reusing the validator! If we are reusing, - // then look it up instead. It has to exist! - // - DTDElementDecl* rootDecl; - Janitor<DTDElementDecl> janSrc(0); - - if (reuseGrammar) - { - Grammar* fGrammar = fDTDGrammar; - if (fGrammar->getGrammarType() == Grammar::DTDGrammarType) { - rootDecl = (DTDElementDecl*) fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); - if (rootDecl) - fDTDGrammar->setRootElemId(rootDecl->getId()); - else { - rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId); - rootDecl->setCreateReason(DTDElementDecl::AsRootElem); - rootDecl->setExternalElemDeclaration(isReadingExternalEntity()); - fDTDGrammar->setRootElemId(fDTDGrammar->putElemDecl(rootDecl)); - } - } - else { - rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId); - rootDecl->setCreateReason(DTDElementDecl::AsRootElem); - rootDecl->setExternalElemDeclaration(isReadingExternalEntity()); - janSrc.reset(rootDecl); - } - } - else - { - rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId); - rootDecl->setCreateReason(DTDElementDecl::AsRootElem); - rootDecl->setExternalElemDeclaration(isReadingExternalEntity()); - fDTDGrammar->setRootElemId(fDTDGrammar->putElemDecl(rootDecl)); - } - - // Skip any spaces after the name - fReaderMgr->skipPastSpaces(); - - // - // And now if we are looking at a >, then we are done. It is not - // required to have an internal or external subset, though why you - // would not escapes me. - // - if (fReaderMgr->skippedChar(chCloseAngle)) { - // - // If we have a doc type handler and advanced callbacks are enabled, - // call the doctype event. - // - if (fDocTypeHandler) - fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false); - return; - } - - // either internal/external subset - if(!reuseGrammar) { - if (fScanner->getValidationScheme() == XMLScanner::Val_Auto) - fScanner->setDoValidation(true, false); - } - - - bool hasIntSubset = false; - bool hasExtSubset = false; - XMLCh* sysId = 0; - XMLCh* pubId = 0; - - // - // If the next character is '[' then we have no external subset cause - // there is no system id, just the opening character of the internal - // subset. Else, has to be an id. - // - // Just look at the next char, don't eat it. - if (fReaderMgr->peekNextChar() == chOpenSquare) - { - hasIntSubset = true; - } - else - { - // Indicate we have an external subset - hasExtSubset = true; - fScanner->setHasNoDTD(false); - - // Get buffers for the ids - XMLBufBid bbPubId(fBufMgr); - XMLBufBid bbSysId(fBufMgr); - - // Get the external subset id - if (!scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), IDType_External)) - { - fReaderMgr->skipPastChar(chCloseAngle); - return; - } - - // Get copies of the ids we got - pubId = XMLString::replicate(bbPubId.getRawBuffer()); - sysId = XMLString::replicate(bbSysId.getRawBuffer()); - - // Skip spaces and check again for the opening of an internal subset - fReaderMgr->skipPastSpaces(); - - // Just look at the next char, don't eat it. - if (fReaderMgr->peekNextChar() == chOpenSquare) { - hasIntSubset = true; - } - } - - // Insure that the ids get cleaned up, if they got allocated - ArrayJanitor<XMLCh> janSysId(sysId); - ArrayJanitor<XMLCh> janPubId(pubId); - - // - // If we have a doc type handler and advanced callbacks are enabled, - // call the doctype event. - // - if (fDocTypeHandler) - fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset); - - // - // Ok, if we had an internal subset, we are just past the [ character - // and need to parse that first. - // - if (hasIntSubset) - { - // Eat the opening square bracket - fReaderMgr->getNextChar(); - - // We can't have any internal subset if we are reusing the validator - if (reuseGrammar) - ThrowXML(RuntimeException, XMLExcepts::Val_CantHaveIntSS); - - // Indicate we are in the internal subset now - FlagJanitor<bool> janContentFlag(&fInternalSubset, true); - - // - // And try to scan the internal subset. If we fail, try to recover - // by skipping forward tot he close angle and returning. - // - if (!scanInternalSubset()) - { - fReaderMgr->skipPastChar(chCloseAngle); - return; - } - - // - // Do a sanity check that some expanded PE did not propogate out of - // the doctype. This could happen if it was terminated early by bad - // syntax. - // - if (fReaderMgr->getReaderDepth() > 1) - { - fScanner->emitError(XMLErrs::PEPropogated); - - // Ask the reader manager to pop back down to the main level - fReaderMgr->cleanStackBackTo(1); - } - - fReaderMgr->skipPastSpaces(); - } - - // And that should leave us at the closing > of the DOCTYPE line - if (!fReaderMgr->skippedChar(chCloseAngle)) - { - // - // Do a special check for the common scenario of an extra ] char at - // the end. This is easy to recover from. - // - if (fReaderMgr->skippedChar(chCloseSquare) - && fReaderMgr->skippedChar(chCloseAngle)) - { - fScanner->emitError(XMLErrs::ExtraCloseSquare); - } - else - { - fScanner->emitError(XMLErrs::UnterminatedDOCTYPE); - fReaderMgr->skipPastChar(chCloseAngle); - } - } - - // - // If we had an external subset, then we need to deal with that one - // next. If we are reusing the validator, then don't scan it. - // - if (hasExtSubset && !reuseGrammar) - { - // Indicate we are in the external subset now - FlagJanitor<bool> janContentFlag(&fInternalSubset, false); - - // And now create a reader to read this entity - InputSource* srcUsed; - XMLReader* reader = fReaderMgr->createReader - ( - sysId - , pubId - , false - , XMLReader::RefFrom_NonLiteral - , XMLReader::Type_General - , XMLReader::Source_External - , srcUsed - ); - - // Put a janitor on the input source - Janitor<InputSource> janSrc(srcUsed); - - // - // If it failed then throw an exception - // - if (!reader) - ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId()); - - // - // In order to make the processing work consistently, we have to - // make this look like an external entity. So create an entity - // decl and fill it in and push it with the reader, as happens - // with an external entity. Put a janitor on it to insure it gets - // cleaned up. The reader manager does not adopt them. - // - const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull }; - DTDEntityDecl* declDTD = new DTDEntityDecl(gDTDStr); - declDTD->setSystemId(sysId); - Janitor<DTDEntityDecl> janDecl(declDTD); - - // Mark this one as a throw at end - reader->setThrowAtEnd(true); - - // And push it onto the stack, with its pseudo name - fReaderMgr->pushReader(reader, declDTD); - - // Tell it its not in an include section - scanExtSubsetDecl(false); - } -} - - // // This is called after seeing '<!ELEMENT' which indicates that an element // markup is starting. This guy scans the rest of it and adds it to the @@ -2680,6 +2401,10 @@ bool DTDScanner::scanEq() // void DTDScanner::scanExtSubsetDecl(const bool inIncludeSect) { + // Indicate we are in the external subset now + FlagJanitor<bool> janContentFlag(&fInternalSubset, false); + + bool bAcceptDecl = !inIncludeSect; // Get a buffer for whitespace @@ -3095,6 +2820,9 @@ void DTDScanner::scanIgnoredSection() // bool DTDScanner::scanInternalSubset() { + // Indicate we are in the internal subset now + FlagJanitor<bool> janContentFlag(&fInternalSubset, true); + // If we have a doc type handler, tell it the internal subset starts if (fDocTypeHandler) fDocTypeHandler->startIntSubset(); diff --git a/src/xercesc/validators/DTD/DTDScanner.hpp b/src/xercesc/validators/DTD/DTDScanner.hpp index aa4e91da74b6b72de69179ecf20a18c049803ccb..77261e2f92191890c35a8fb2066232b98f7db90e 100644 --- a/src/xercesc/validators/DTD/DTDScanner.hpp +++ b/src/xercesc/validators/DTD/DTDScanner.hpp @@ -56,8 +56,11 @@ /* * $Log$ - * Revision 1.1 2002/02/01 22:22:44 peiyongz - * Initial revision + * Revision 1.2 2002/05/30 16:17:19 tng + * Add feature to optionally ignore external DTD. + * + * Revision 1.1.1.1 2002/02/01 22:22:44 peiyongz + * sane_include * * Revision 1.4 2001/06/21 14:25:56 knoaman * Fix for bug 1946 @@ -92,12 +95,12 @@ public: // ----------------------------------------------------------------------- // Class specific types // - // NOTE: This should really be private, but some of the compilers we - // have to support cannot understand that. - // // EntityExpRes // Returned from scanEntityRef() to indicate how the expanded text // was treated. + // + // IDTypes + // Type of the ID // ----------------------------------------------------------------------- enum EntityExpRes { @@ -106,6 +109,14 @@ public: , EntityExp_Returned }; + enum IDTypes + { + IDType_Public + , IDType_External + , IDType_Either + }; + + // ----------------------------------------------------------------------- // Constructors and Destructor @@ -137,20 +148,16 @@ public: DocTypeHandler* const handlerToSet ); - void scanDocTypeDecl(const bool reuseGrammar); + void scanExtSubsetDecl(const bool inIncludeSect); + bool scanInternalSubset(); + bool scanId + ( + XMLBuffer& pubIdToFill + , XMLBuffer& sysIdToFill + , const IDTypes whatKind + ); private: - // ----------------------------------------------------------------------- - // Private class types - // ----------------------------------------------------------------------- - enum IDTypes - { - IDType_Public - , IDType_External - , IDType_Either - }; - - // ----------------------------------------------------------------------- // Private DTD scanning methods. These are all in XMLValidator2.cpp // ----------------------------------------------------------------------- @@ -199,15 +206,7 @@ private: , const bool notation ); bool scanEq(); - void scanExtSubsetDecl(const bool inIncludeSect); - bool scanId - ( - XMLBuffer& pubIdToFill - , XMLBuffer& sysIdToFill - , const IDTypes whatKind - ); void scanIgnoredSection(); - bool scanInternalSubset(); void scanMarkupDecl(const bool parseTextDecl); bool scanMixed(DTDElementDecl& toFill); void scanNotationDecl();