diff --git a/src/xercesc/dom/DOMLSParser.hpp b/src/xercesc/dom/DOMLSParser.hpp index 74afe14f0434b612580ed9e4b591ddb7048ba4ed..6a2f099fa172f3a8ef740065d1b646db1830a4d5 100644 --- a/src/xercesc/dom/DOMLSParser.hpp +++ b/src/xercesc/dom/DOMLSParser.hpp @@ -24,6 +24,7 @@ #define XERCESC_INCLUDE_GUARD_DOMLSPARSER_HPP #include <xercesc/dom/DOMConfiguration.hpp> +#include <xercesc/dom/DOMLSParserFilter.hpp> #include <xercesc/util/XercesDefs.hpp> #include <xercesc/validators/common/Grammar.hpp> @@ -32,7 +33,6 @@ XERCES_CPP_NAMESPACE_BEGIN class DOMErrorHandler; class DOMLSInput; -class DOMLSParserFilter; class DOMNode; class DOMDocument; diff --git a/src/xercesc/parsers/DOMLSParserImpl.cpp b/src/xercesc/parsers/DOMLSParserImpl.cpp index de53fa103e83f40a7eebeae622d5af0d6b8566b4..c729f86132491a7df4dd971add6d4d2e423632bb 100644 --- a/src/xercesc/parsers/DOMLSParserImpl.cpp +++ b/src/xercesc/parsers/DOMLSParserImpl.cpp @@ -83,6 +83,8 @@ AbstractDOMParser(valToAdopt, manager, gramPool) , fCharsetOverridesXMLEncoding(true) , fUserAdoptsDocument(false) , fSupportedParameters(0) +, fFilterAction(0) +, fFilterDelayedTextNodes(0) { // dom spec has different default from scanner's default, so set explicitly getScanner()->setNormalizeData(false); @@ -144,6 +146,8 @@ AbstractDOMParser(valToAdopt, manager, gramPool) DOMLSParserImpl::~DOMLSParserImpl() { delete fSupportedParameters; + delete fFilterAction; + delete fFilterDelayedTextNodes; } @@ -723,6 +727,10 @@ DOMDocument* DOMLSParserImpl::parse(const DOMLSInput* source) // remove the abort filter, if present if(fFilter==&g_AbortFilter) fFilter=0; + if(fFilterAction) + fFilterAction->removeAll(); + if(fFilterDelayedTextNodes) + fFilterDelayedTextNodes->removeAll(); Wrapper4DOMLSInput isWrapper((DOMLSInput*)source, fEntityResolver, false, getMemoryManager()); @@ -744,6 +752,10 @@ DOMDocument* DOMLSParserImpl::parseURI(const XMLCh* const systemId) // remove the abort filter, if present if(fFilter==&g_AbortFilter) fFilter=0; + if(fFilterAction) + fFilterAction->removeAll(); + if(fFilterDelayedTextNodes) + fFilterDelayedTextNodes->removeAll(); AbstractDOMParser::parse(systemId); if(getErrorCount()!=0) @@ -763,6 +775,10 @@ DOMDocument* DOMLSParserImpl::parseURI(const char* const systemId) // remove the abort filter, if present if(fFilter==&g_AbortFilter) fFilter=0; + if(fFilterAction) + fFilterAction->removeAll(); + if(fFilterDelayedTextNodes) + fFilterDelayedTextNodes->removeAll(); AbstractDOMParser::parse(systemId); if(getErrorCount()!=0) @@ -781,6 +797,15 @@ void DOMLSParserImpl::parseWithContext(const DOMLSInput*, if (getParseInProgress()) throw DOMException(DOMException::INVALID_STATE_ERR, XMLDOMMsg::LSParser_ParseInProgress, fMemoryManager); + // remove the abort filter, if present + if(fFilter==&g_AbortFilter) + fFilter=0; + if(fFilterAction) + fFilterAction->removeAll(); + if(fFilterDelayedTextNodes) + fFilterDelayedTextNodes->removeAll(); + + // TODO throw DOMException(DOMException::NOT_SUPPORTED_ERR, 0, getMemoryManager()); } @@ -988,6 +1013,28 @@ XMLFilePos DOMLSParserImpl::getSrcOffset() const return getScanner()->getSrcOffset(); } +void DOMLSParserImpl::applyFilter(DOMNode* node) +{ + DOMLSParserFilter::FilterAction action; + // if the parent was already rejected, reject this too + if(fFilterAction && fFilterAction->containsKey(fCurrentParent) && fFilterAction->get(fCurrentParent)==DOMLSParserFilter::FILTER_REJECT) + action = DOMLSParserFilter::FILTER_REJECT; + else + action = fFilter->acceptNode(node); + + switch(action) + { + case DOMLSParserFilter::FILTER_ACCEPT: break; + case DOMLSParserFilter::FILTER_REJECT: + case DOMLSParserFilter::FILTER_SKIP: if(node==fCurrentNode) + fCurrentNode = (node->getPreviousSibling()?node->getPreviousSibling():fCurrentParent); + fCurrentParent->removeChild(node); + node->release(); + break; + case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager); + } +} + void DOMLSParserImpl::docCharacters(const XMLCh* const chars , const XMLSize_t length , const bool cdataSection) @@ -995,70 +1042,89 @@ void DOMLSParserImpl::docCharacters(const XMLCh* const chars AbstractDOMParser::docCharacters(chars, length, cdataSection); if(fFilter) { + // send the notification for the previous text node + if(fFilterDelayedTextNodes && fCurrentNode->getPreviousSibling() && fFilterDelayedTextNodes->containsKey(fCurrentNode->getPreviousSibling())) + { + DOMNode* textNode = fCurrentNode->getPreviousSibling(); + fFilterDelayedTextNodes->removeKey(textNode); + applyFilter(textNode); + } DOMNodeFilter::ShowType whatToShow=fFilter->getWhatToShow(); - if(cdataSection && (whatToShow & DOMNodeFilter::SHOW_CDATA_SECTION) || - !cdataSection && (whatToShow & DOMNodeFilter::SHOW_TEXT)) + if(cdataSection && (whatToShow & DOMNodeFilter::SHOW_CDATA_SECTION)) { - DOMLSParserFilter::FilterAction action = - fFilter->acceptNode(fCurrentNode); - - switch(action) - { - case DOMLSParserFilter::FILTER_ACCEPT: break; - case DOMLSParserFilter::FILTER_REJECT: - case DOMLSParserFilter::FILTER_SKIP: fCurrentParent->removeChild(fCurrentNode); - break; - case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager); - } + applyFilter(fCurrentNode); + } + else if(!cdataSection && (whatToShow & DOMNodeFilter::SHOW_TEXT)) + { + if(fFilterDelayedTextNodes==0) + fFilterDelayedTextNodes=new (fMemoryManager) ValueHashTableOf<bool, PtrHasher>(7, fMemoryManager); + fFilterDelayedTextNodes->put(fCurrentNode, true); } } } void DOMLSParserImpl::docComment(const XMLCh* const comment) { + if(fFilter) + { + // send the notification for the previous text node + if(fFilterDelayedTextNodes && fFilterDelayedTextNodes->containsKey(fCurrentNode)) + { + fFilterDelayedTextNodes->removeKey(fCurrentNode); + applyFilter(fCurrentNode); + } + } + AbstractDOMParser::docComment(comment); if(fFilter) { DOMNodeFilter::ShowType whatToShow=fFilter->getWhatToShow(); if(whatToShow & DOMNodeFilter::SHOW_COMMENT) - { - DOMLSParserFilter::FilterAction action = - fFilter->acceptNode(fCurrentNode); - - switch(action) - { - case DOMLSParserFilter::FILTER_ACCEPT: break; - case DOMLSParserFilter::FILTER_REJECT: - case DOMLSParserFilter::FILTER_SKIP: fCurrentParent->removeChild(fCurrentNode); - break; - case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager); - } - } + applyFilter(fCurrentNode); } } void DOMLSParserImpl::docPI(const XMLCh* const target , const XMLCh* const data) { + if(fFilter) + { + // send the notification for the previous text node + if(fFilterDelayedTextNodes && fFilterDelayedTextNodes->containsKey(fCurrentNode)) + { + fFilterDelayedTextNodes->removeKey(fCurrentNode); + applyFilter(fCurrentNode); + } + } + AbstractDOMParser::docPI(target, data); if(fFilter) { DOMNodeFilter::ShowType whatToShow=fFilter->getWhatToShow(); if(whatToShow & DOMNodeFilter::SHOW_PROCESSING_INSTRUCTION) - { - DOMLSParserFilter::FilterAction action = - fFilter->acceptNode(fCurrentNode); + applyFilter(fCurrentNode); + } +} - switch(action) - { - case DOMLSParserFilter::FILTER_ACCEPT: break; - case DOMLSParserFilter::FILTER_REJECT: - case DOMLSParserFilter::FILTER_SKIP: fCurrentParent->removeChild(fCurrentNode); - break; - case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager); - } +void DOMLSParserImpl::startEntityReference(const XMLEntityDecl& entDecl) +{ + if(fCreateEntityReferenceNodes && fFilter) + { + // send the notification for the previous text node + if(fFilterDelayedTextNodes && fFilterDelayedTextNodes->containsKey(fCurrentNode)) + { + fFilterDelayedTextNodes->removeKey(fCurrentNode); + applyFilter(fCurrentNode); } } + + DOMNode* origParent = fCurrentParent; + AbstractDOMParser::startEntityReference(entDecl); + if (fCreateEntityReferenceNodes && fFilter) + { + if(fFilterAction && fFilterAction->containsKey(origParent) && fFilterAction->get(origParent)==DOMLSParserFilter::FILTER_REJECT) + fFilterAction->put(fCurrentNode, DOMLSParserFilter::FILTER_REJECT); + } } void DOMLSParserImpl::endElement(const XMLElementDecl& elemDecl @@ -1066,31 +1132,49 @@ void DOMLSParserImpl::endElement(const XMLElementDecl& elemDecl , const bool isRoot , const XMLCh* const elemPrefix) { - DOMNode* origParent=fCurrentParent; - DOMNode* origNode=fCurrentNode; + if(fFilter) + { + // send the notification for the previous text node + if(fFilterDelayedTextNodes && fFilterDelayedTextNodes->containsKey(fCurrentNode)) + { + fFilterDelayedTextNodes->removeKey(fCurrentNode); + applyFilter(fCurrentNode); + } + } + AbstractDOMParser::endElement(elemDecl, urlId, isRoot, elemPrefix); if(fFilter) { DOMNodeFilter::ShowType whatToShow=fFilter->getWhatToShow(); if(whatToShow & DOMNodeFilter::SHOW_ELEMENT) { - DOMLSParserFilter::FilterAction action = - fFilter->acceptNode(origNode); - + DOMNode* thisNode = fCurrentNode; + DOMLSParserFilter::FilterAction action; + if(fFilterAction && fFilterAction->containsKey(thisNode)) + { + action = fFilterAction->get(thisNode); + fFilterAction->removeKey(thisNode); + } + else + action = fFilter->acceptNode(thisNode); switch(action) { case DOMLSParserFilter::FILTER_ACCEPT: break; - case DOMLSParserFilter::FILTER_REJECT: origParent->removeChild(origNode); + case DOMLSParserFilter::FILTER_REJECT: fCurrentNode = (thisNode->getPreviousSibling()?thisNode->getPreviousSibling():fCurrentParent); + fCurrentParent->removeChild(thisNode); + thisNode->release(); break; case DOMLSParserFilter::FILTER_SKIP: { - DOMNode* child=origNode->getFirstChild(); + DOMNode* child=thisNode->getFirstChild(); while(child) { DOMNode* next=child->getNextSibling(); - origParent->appendChild(child); + fCurrentParent->appendChild(child); child=next; } - origParent->removeChild(origNode); + fCurrentNode = (thisNode->getPreviousSibling()?thisNode->getPreviousSibling():fCurrentParent); + fCurrentParent->removeChild(thisNode); + thisNode->release(); } break; case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager); @@ -1107,21 +1191,37 @@ void DOMLSParserImpl::startElement(const XMLElementDecl& elemDecl , const bool isEmpty , const bool isRoot) { - AbstractDOMParser::startElement(elemDecl, urlId, elemPrefix, attrList, attrCount, false, isRoot); if(fFilter) { - DOMLSParserFilter::FilterAction action = - fFilter->startElement((DOMElement*)fCurrentNode); + // send the notification for the previous text node + if(fFilterDelayedTextNodes && fFilterDelayedTextNodes->containsKey(fCurrentNode)) + { + fFilterDelayedTextNodes->removeKey(fCurrentNode); + applyFilter(fCurrentNode); + } + } - switch(action) + DOMNode* origParent = fCurrentParent; + AbstractDOMParser::startElement(elemDecl, urlId, elemPrefix, attrList, attrCount, false, isRoot); + if(fFilter) + { + // if the parent was already rejected, reject this too + if(fFilterAction && fFilterAction->containsKey(origParent) && fFilterAction->get(origParent)==DOMLSParserFilter::FILTER_REJECT) + fFilterAction->put(fCurrentNode, DOMLSParserFilter::FILTER_REJECT); + else { - case DOMLSParserFilter::FILTER_ACCEPT: break; - case DOMLSParserFilter::FILTER_REJECT: // TODO: reject also the children - case DOMLSParserFilter::FILTER_SKIP: fCurrentParent=fCurrentNode->getParentNode(); - fCurrentParent->removeChild(fCurrentNode); - fCurrentNode=fCurrentParent; - break; - case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager); + DOMLSParserFilter::FilterAction action = fFilter->startElement((DOMElement*)fCurrentNode); + + switch(action) + { + case DOMLSParserFilter::FILTER_ACCEPT: break; + case DOMLSParserFilter::FILTER_REJECT: + case DOMLSParserFilter::FILTER_SKIP: if(fFilterAction==0) + fFilterAction=new (fMemoryManager) ValueHashTableOf<DOMLSParserFilter::FilterAction, PtrHasher>(7, fMemoryManager); + fFilterAction->put(fCurrentNode, action); + break; + case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager); + } } } if(isEmpty) diff --git a/src/xercesc/parsers/DOMLSParserImpl.hpp b/src/xercesc/parsers/DOMLSParserImpl.hpp index e2a3c7db9e151c9d1b630bb1a9ff6f7f60d67391..9f053ab849e101f41d74e342960f33ebbb9eb13a 100644 --- a/src/xercesc/parsers/DOMLSParserImpl.hpp +++ b/src/xercesc/parsers/DOMLSParserImpl.hpp @@ -29,6 +29,7 @@ #include <xercesc/dom/DOMConfiguration.hpp> #include <xercesc/util/XercesDefs.hpp> #include <xercesc/util/RefVectorOf.hpp> +#include <xercesc/util/ValueHashTableOf.hpp> XERCES_CPP_NAMESPACE_BEGIN @@ -555,6 +556,10 @@ public : const XMLCh* const target , const XMLCh* const data ); + virtual void startEntityReference + ( + const XMLEntityDecl& entDecl + ); virtual void endElement ( const XMLElementDecl& elemDecl @@ -580,6 +585,11 @@ private : // ----------------------------------------------------------------------- void resetParse(); + // ----------------------------------------------------------------------- + // Helper methods + // ----------------------------------------------------------------------- + void applyFilter(DOMNode* node); + // ----------------------------------------------------------------------- // Private data members // @@ -607,6 +617,16 @@ private : // A list of the parameters that can be set, including the ones // specific of Xerces // + // fFilterAction + // A map of elements rejected by the DOMLSParserFilter::startElement + // callback, used to avoid invoking DOMLSParserFilter::acceptNode + // on its children + // + // fFilterDelayedTextNodes + // As text nodes are filled incrementally, store them in a map + // so that we ask DOMLSParserFilter::acceptNode only once, when it + // is completely created + // //----------------------------------------------------------------------- DOMLSResourceResolver* fEntityResolver; XMLEntityResolver* fXMLEntityResolver; @@ -615,6 +635,8 @@ private : bool fCharsetOverridesXMLEncoding; bool fUserAdoptsDocument; DOMStringListImpl* fSupportedParameters; + ValueHashTableOf<DOMLSParserFilter::FilterAction, PtrHasher>* fFilterAction; + ValueHashTableOf<bool, PtrHasher>* fFilterDelayedTextNodes; // ----------------------------------------------------------------------- // Unimplemented constructors and operators diff --git a/tests/src/DOM/DOMTest/DTest.cpp b/tests/src/DOM/DOMTest/DTest.cpp index f09b0b7d2155545af81b504cffb757e23ff53d3a..c99aa120430a7b9e9e6939139b098e15f7fff692 100644 --- a/tests/src/DOM/DOMTest/DTest.cpp +++ b/tests/src/DOM/DOMTest/DTest.cpp @@ -4915,7 +4915,28 @@ public: DOMLSInput* m_input; }; +class ParserSkipper : public DOMLSParserFilter +{ +public: + ParserSkipper() : fCallbackCalls(0) { } + + virtual FilterAction acceptNode(DOMNode* node) { fCallbackCalls++; return DOMLSParserFilter::FILTER_ACCEPT;} + virtual FilterAction startElement(DOMElement* node) + { + XMLCh elem[]={chLatin_e, chLatin_l, chLatin_e, chLatin_m, chNull }; + if(XMLString::equals(node->getNodeName(), elem)) + return DOMLSParserFilter::FILTER_REJECT; + else + return DOMLSParserFilter::FILTER_ACCEPT; + } + virtual DOMNodeFilter::ShowType getWhatToShow() const { return DOMNodeFilter::SHOW_ALL; } + + unsigned int fCallbackCalls; +}; + bool DOMTest::testLSExceptions() { + bool OK = true; + const char* sXml="<?xml version='1.0'?>" "<!DOCTYPE root[" "<!ENTITY ent1 'Dallas. &ent3; #5668'>" @@ -4929,7 +4950,6 @@ bool DOMTest::testLSExceptions() { "<elem>Home </elem>" "<elem>Test: &ent5;</elem>" "</root>"; - MemBufInputSource is((XMLByte*)sXml, strlen(sXml), "bufId"); static const XMLCh gLS[] = { chLatin_L, chLatin_S, chNull }; DOMImplementationLS *impl = (DOMImplementationLS*)DOMImplementationRegistry::getDOMImplementation(gLS); @@ -4944,14 +4964,14 @@ bool DOMTest::testLSExceptions() { DOMDocument* doc=domBuilder->parse(input); fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__); - return false; + OK=false; } catch(DOMLSException& e) { if(e.code!=DOMLSException::PARSE_ERR) { fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__); - return false; + OK=false; } } @@ -4962,20 +4982,70 @@ bool DOMTest::testLSExceptions() { DOMDocument* doc=domBuilder->parse(input); fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__); - return false; + OK=false; } catch(DOMException& e) { if(e.code!=DOMException::INVALID_STATE_ERR) { fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__); - return false; + OK=false; + } + } + + try + { + ParserSkipper skipper; + domBuilder->setFilter(&skipper); + domBuilder->getDomConfig()->setParameter(XMLUni::fgDOMEntities, false); + DOMDocument* doc=domBuilder->parse(input); + + // verify that we get only 3 calls: for the text node, the CDATA section and the root element + if(doc==NULL || doc->getDocumentElement()==NULL || doc->getDocumentElement()->getChildElementCount()!=0 || skipper.fCallbackCalls!=3) + { + fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__); + OK=false; + } + } + catch(DOMException&) + { + fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__); + OK=false; + } + + // this XML should trigger reuse of DOMElement + const char* sXml2="<?xml version='1.0'?>" + "<root>" + "<elem>Home</elem>" + "<elem2>Test</elem2>" + "<elem>Home</elem>" + "<elem2>Test</elem2>" + "</root>"; + XMLString::transcode(sXml2, tempStr, 3999); + input->setStringData(tempStr); + try + { + ParserSkipper skipper; + domBuilder->setFilter(&skipper); + DOMDocument* doc=domBuilder->parse(input); + + // verify that we get only 5 calls: for the root element, the two elem2 and the two text nodes under them + if(doc==NULL || doc->getDocumentElement()==NULL || doc->getDocumentElement()->getChildElementCount()!=2 || skipper.fCallbackCalls!=5) + { + fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__); + OK=false; } } + catch(DOMException&) + { + fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__); + OK=false; + } + input->release(); domBuilder->release(); - return true; + return OK; } bool DOMTest::testElementTraversal() {