From 21cfa9606db88dac85cbe3271ab255ba5dd132a4 Mon Sep 17 00:00:00 2001 From: Tinny Ng <tng@apache.org> Date: Thu, 8 Aug 2002 14:18:09 +0000 Subject: [PATCH] DOM Fix: Recycle node value buffer to avoid memory growth. git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@174097 13f79535-47bb-0310-9956-ffa450edef68 --- doc/program-dom.xml | 81 +++++++++++- src/xercesc/dom/impl/DOMCDATASectionImpl.cpp | 9 +- src/xercesc/dom/impl/DOMCharacterDataImpl.cpp | 65 ++++++---- src/xercesc/dom/impl/DOMCharacterDataImpl.hpp | 7 +- src/xercesc/dom/impl/DOMCommentImpl.cpp | 4 +- src/xercesc/dom/impl/DOMDocumentImpl.cpp | 28 ++++- src/xercesc/dom/impl/DOMDocumentImpl.hpp | 117 +++++++++--------- src/xercesc/dom/impl/DOMDocumentTypeImpl.hpp | 1 - src/xercesc/dom/impl/DOMElementImpl.cpp | 4 +- src/xercesc/dom/impl/DOMStringPool.cpp | 78 ++++++++++++ src/xercesc/dom/impl/DOMStringPool.hpp | 95 ++++++++++++++ src/xercesc/dom/impl/DOMTextImpl.cpp | 12 +- tests/DOM/DOMMemTest/DOMMemTest.cpp | 31 ++++- 13 files changed, 423 insertions(+), 109 deletions(-) diff --git a/doc/program-dom.xml b/doc/program-dom.xml index b9ca0828d..ea8e94a8d 100644 --- a/doc/program-dom.xml +++ b/doc/program-dom.xml @@ -164,14 +164,89 @@ <anchor name="XMLCh"/> <s3 title="String Type"> <p>The C++ DOM uses the plain, null-terminated (XMLCh *) utf-16 strings - as the String type. The (XMLCh*) utf-16 type string has low overhead. - All the string data would remain in memory until the document object is released.</p> - + as the String type. The (XMLCh*) utf-16 type string has low overhead.</p> <source> //C++ DOM const XMLCh* nodeValue = aNode->getNodeValue(); </source> + <p>All the string data would remain in memory until the document object is released. + But such string data may be RECYCLED by the implementation if necessary. + Users should make appropriate copy of any returned string for safe reference.</p> + + <p>For example after a DOMNode has been released, the memory allocated for its node value + will be recycled by the implementation. </p> + + <source> + XMLCh xfoo[] = {chLatin_f, chLatin_o, chLatin_o, chNull}; + + // pAttr has node value = "foo" + // fNodeValue has "foo" + pAttr->setNodeValue(xfoo); + const XMLCh* fNodeValue = pAttr->getNodeValue(); + + // fNodeValue has "foo" + // make a copy of the string for future reference + XMLCh* oldNodeValue = XMLString::replicate(fNodeValue); + + // release the node pAttr + pAttr->release() + + // other operations + : + : + + // implementation may have recycled the memory of the pAttr already + // so it's not safe to expect fNodeValue still have "foo" + if (XMLString::compareString(xfoo, fNodeValue)) + printf("fNodeValue has some other content\n"); + + // should use your own safe copy + if (!XMLString::compareString(xfoo, oldNodeValue)) + printf("Use your own copy of the oldNodeValue if want to reference the string later\n"); + + // delete your own replicated string when done + delete [] oldNodeValue; + + </source> + + <p>Or if DOMNode::setNodeValue() is called to set a new node value, + the implementation will simply overwrite the node value memory area. So any previous + pointers will now have the new value automatically. Users should make appropriate + copy of any previous returned string for safe reference. For example</p> + + <source> + XMLCh xfoo[] = {chLatin_f, chLatin_o, chLatin_o, chNull}; + XMLCh xfee[] = {chLatin_f, chLatin_e, chLatin_e, chNull}; + + // pAttr has node value = "foo" + pAttr->setNodeValue(xfoo); + const XMLCh* fNodeValue = pAttr->getNodeValue(); + + // fNodeValue has "foo" + // make a copy of the string for future reference + XMLCh* oldNodeValue = XMLString::replicate(fNodeValue); + + // now set pAttr with a new node value "fee" + pAttr->setNodeValue(xfee); + + // should not rely on fNodeValue for the old node value, it may not compare + if (XMLString::compareString(xfoo, fNodeValue)) + printf("Should not rely on fNodeValue for the old node value\n"); + + // should use your own safe copy + if (!XMLString::compareString(xfoo, oldNodeValue)) + printf("Use your own copy of the oldNodeValue if want to reference the string later\n"); + + // delete your own replicated string when done + delete [] oldNodeValue; + + </source> + + <p>This is to prevent memory growth when DOMNode::setNodeValue() is being called hundreds of + times. This design allows users to actively select which returned string should stay + in memory by manually copying the string to application's own heap.</p> + </s3> </s2> diff --git a/src/xercesc/dom/impl/DOMCDATASectionImpl.cpp b/src/xercesc/dom/impl/DOMCDATASectionImpl.cpp index 5da4564cf..92183d168 100644 --- a/src/xercesc/dom/impl/DOMCDATASectionImpl.cpp +++ b/src/xercesc/dom/impl/DOMCDATASectionImpl.cpp @@ -63,6 +63,7 @@ #include "DOMRangeImpl.hpp" #include "DOMDocumentImpl.hpp" #include "DOMCasts.hpp" +#include "DOMStringPool.hpp" #include <xercesc/dom/DOMException.hpp> #include <xercesc/util/XMLUniDefs.hpp> @@ -125,7 +126,7 @@ DOMText *DOMCDATASectionImpl::splitText(XMLSize_t offset) throw DOMException( DOMException::NO_MODIFICATION_ALLOWED_ERR, 0); } - XMLSize_t len = XMLString::stringLen(fCharacterData.fData); + XMLSize_t len = fCharacterData.fDataBuf->getLen(); if (offset > len || offset < 0) throw DOMException(DOMException::INDEX_SIZE_ERR, 0); @@ -137,10 +138,7 @@ DOMText *DOMCDATASectionImpl::splitText(XMLSize_t offset) if (parent != 0) parent->insertBefore(newText, getNextSibling()); - XMLCh *wData = (XMLCh *)(fCharacterData.fData); // Cast off const. - wData[offset] = 0; // revisit - could change a string that - // application code has. Do we want to do this? - + fCharacterData.fDataBuf->chop(offset); if (this->getOwnerDocument() != 0) { Ranges* ranges = ((DOMDocumentImpl *)this->getOwnerDocument())->getRanges(); @@ -183,6 +181,7 @@ void DOMCDATASectionImpl::release() if (doc) { fParent.release(); + fCharacterData.releaseBuffer(); doc->release(this, DOMDocumentImpl::CDATA_SECTION_OBJECT); } else { diff --git a/src/xercesc/dom/impl/DOMCharacterDataImpl.cpp b/src/xercesc/dom/impl/DOMCharacterDataImpl.cpp index 360f9d143..cfb6634bd 100644 --- a/src/xercesc/dom/impl/DOMCharacterDataImpl.cpp +++ b/src/xercesc/dom/impl/DOMCharacterDataImpl.cpp @@ -64,19 +64,37 @@ #include "DOMRangeImpl.hpp" #include "DOMDocumentImpl.hpp" #include "DOMCasts.hpp" -#include <xercesc/framework/XMLBuffer.hpp> +#include "DOMStringPool.hpp" #include <xercesc/util/XMLUniDefs.hpp> DOMCharacterDataImpl::DOMCharacterDataImpl(DOMDocument *doc, const XMLCh *dat) + : fDoc(0) + , fDataBuf(0) { - this->fData = ((DOMDocumentImpl *)doc)->getPooledString(dat); + fDoc = (DOMDocumentImpl*)doc; + + fDataBuf = fDoc->popBuffer(); + if (!fDataBuf) + fDataBuf = new (fDoc) DOMBuffer(fDoc, dat); + else + fDataBuf->set(dat); + }; DOMCharacterDataImpl::DOMCharacterDataImpl(const DOMCharacterDataImpl &other) + : fDoc(0) + , fDataBuf(0) { - fData = other.fData; + fDoc = (DOMDocumentImpl*)other.fDoc; + + fDataBuf = fDoc->popBuffer(); + if (!fDataBuf) + fDataBuf = new (fDoc) DOMBuffer(fDoc, other.fDataBuf->getRawBuffer()); + else + fDataBuf->set(other.fDataBuf->getRawBuffer()); + }; @@ -86,7 +104,7 @@ DOMCharacterDataImpl::~DOMCharacterDataImpl() { const XMLCh * DOMCharacterDataImpl::getNodeValue() const { - return fData; + return fDataBuf->getRawBuffer(); }; @@ -95,7 +113,7 @@ void DOMCharacterDataImpl::setNodeValue(const DOMNode *node, const XMLCh *value) if (castToNodeImpl(node)->isReadOnly()) throw DOMException(DOMException::NO_MODIFICATION_ALLOWED_ERR, 0); - fData = ((DOMDocumentImpl *)node->getOwnerDocument())->getPooledString(value); + fDataBuf->set(value); if (node->getOwnerDocument() != 0) { Ranges* ranges = ((DOMDocumentImpl *)node->getOwnerDocument())->getRanges(); @@ -117,11 +135,7 @@ void DOMCharacterDataImpl::appendData(const DOMNode *node, const XMLCh *dat) throw DOMException( DOMException::NO_MODIFICATION_ALLOWED_ERR, 0); - - XMLBuffer temp; - temp.set(fData); - temp.append(dat); - fData = ((DOMDocumentImpl *)node->getOwnerDocument())->getPooledString(temp.getRawBuffer()); + fDataBuf->append(dat); }; @@ -134,7 +148,7 @@ void DOMCharacterDataImpl::deleteData(const DOMNode *node, XMLSize_t offset, XML // when parameter values are bad. // - XMLSize_t len = XMLString::stringLen(this->fData); + XMLSize_t len = this->fDataBuf->getLen(); if (offset > len || offset < 0 || count < 0) throw DOMException(DOMException::INDEX_SIZE_ERR, 0); @@ -159,10 +173,10 @@ void DOMCharacterDataImpl::deleteData(const DOMNode *node, XMLSize_t offset, XML else newString = temp; - XMLString::copyNString(newString, fData, offset); - XMLString::copyString(newString+offset, fData+offset+count); + XMLString::copyNString(newString, fDataBuf->getRawBuffer(), offset); + XMLString::copyString(newString+offset, fDataBuf->getRawBuffer()+offset+count); - fData = ((DOMDocumentImpl *)node->getOwnerDocument())->getPooledString(newString); + fDataBuf->set(newString); if (newLen >= 3999) delete[] newString; @@ -188,7 +202,7 @@ void DOMCharacterDataImpl::deleteData(const DOMNode *node, XMLSize_t offset, XML const XMLCh *DOMCharacterDataImpl::getData() const { - return fData; + return fDataBuf->getRawBuffer(); }; @@ -197,7 +211,7 @@ const XMLCh *DOMCharacterDataImpl::getData() const // XMLSize_t DOMCharacterDataImpl::getLength() const { - return XMLString::stringLen(fData); + return fDataBuf->getLen(); }; @@ -212,7 +226,7 @@ void DOMCharacterDataImpl::insertData(const DOMNode *node, XMLSize_t offset, con // when parameter values are bad. // - XMLSize_t len = XMLString::stringLen(this->fData); + XMLSize_t len = fDataBuf->getLen(); if (offset > len || offset < 0) throw DOMException(DOMException::INDEX_SIZE_ERR, 0); @@ -227,11 +241,11 @@ void DOMCharacterDataImpl::insertData(const DOMNode *node, XMLSize_t offset, con else newString = temp; - XMLString::copyNString(newString, fData, offset); + XMLString::copyNString(newString, fDataBuf->getRawBuffer(), offset); XMLString::copyNString(newString+offset, dat, datLen); - XMLString::copyString(newString+offset+datLen, fData+offset); + XMLString::copyString(newString+offset+datLen, fDataBuf->getRawBuffer()+offset); - fData = ((DOMDocumentImpl *)node->getOwnerDocument())->getPooledString(newString); + fDataBuf->set(newString); if (newLen >= 3999) delete[] newString; @@ -257,7 +271,7 @@ void DOMCharacterDataImpl::setData(const DOMNode *node, const XMLCh *arg) { if (castToNodeImpl(node)->isReadOnly()) throw DOMException(DOMException::NO_MODIFICATION_ALLOWED_ERR, 0); - fData = ((DOMDocumentImpl *)node->getOwnerDocument())->getPooledString(arg); + fDataBuf->set(arg); }; @@ -273,7 +287,7 @@ const XMLCh * DOMCharacterDataImpl::substringData(const DOMNode *node, XMLSize_t // - XMLSize_t len = XMLString::stringLen(fData); + XMLSize_t len = fDataBuf->getLen(); if (offset > len || offset < 0 || count < 0) throw DOMException(DOMException::INDEX_SIZE_ERR, 0); @@ -286,7 +300,7 @@ const XMLCh * DOMCharacterDataImpl::substringData(const DOMNode *node, XMLSize_t else newString = temp; - XMLString::copyNString(newString, fData+offset, count); + XMLString::copyNString(newString, fDataBuf->getRawBuffer()+offset, count); newString[count] = chNull; const XMLCh* retString = ((DOMDocumentImpl *)node->getOwnerDocument())->getPooledString(newString); @@ -295,6 +309,11 @@ const XMLCh * DOMCharacterDataImpl::substringData(const DOMNode *node, XMLSize_t delete[] newString; return retString; + }; +void DOMCharacterDataImpl::releaseBuffer() { + fDoc->releaseBuffer(fDataBuf); +} + diff --git a/src/xercesc/dom/impl/DOMCharacterDataImpl.hpp b/src/xercesc/dom/impl/DOMCharacterDataImpl.hpp index a06faa04c..74c80ad11 100644 --- a/src/xercesc/dom/impl/DOMCharacterDataImpl.hpp +++ b/src/xercesc/dom/impl/DOMCharacterDataImpl.hpp @@ -75,6 +75,8 @@ class DOMNode; class DOMDocument; +class DOMDocumentImpl; +class DOMBuffer; // Instances of DOMCharacterDataImpl appear as members of node types // that implement the DOMCharacterData interfaces. @@ -83,7 +85,9 @@ class DOMDocument; class CDOM_EXPORT DOMCharacterDataImpl { public: - const XMLCh *fData; + DOMBuffer* fDataBuf; + // for the buffer bid + DOMDocumentImpl* fDoc; public: DOMCharacterDataImpl(DOMDocument *doc, const XMLCh *dat); @@ -102,6 +106,7 @@ public: const XMLCh* substringData(const DOMNode *node, XMLSize_t offset, XMLSize_t count) const; + void releaseBuffer(); }; diff --git a/src/xercesc/dom/impl/DOMCommentImpl.cpp b/src/xercesc/dom/impl/DOMCommentImpl.cpp index 894c2b081..a19040741 100644 --- a/src/xercesc/dom/impl/DOMCommentImpl.cpp +++ b/src/xercesc/dom/impl/DOMCommentImpl.cpp @@ -112,8 +112,10 @@ void DOMCommentImpl::release() throw DOMException(DOMException::INVALID_ACCESS_ERR,0); DOMDocumentImpl* doc = (DOMDocumentImpl*) getOwnerDocument(); - if (doc) + if (doc) { + fCharacterData.releaseBuffer(); doc->release(this, DOMDocumentImpl::COMMENT_OBJECT); + } else { // shouldn't reach here throw DOMException(DOMException::INVALID_ACCESS_ERR,0); diff --git a/src/xercesc/dom/impl/DOMDocumentImpl.cpp b/src/xercesc/dom/impl/DOMDocumentImpl.cpp index dce803dfc..5d1451e9e 100644 --- a/src/xercesc/dom/impl/DOMDocumentImpl.cpp +++ b/src/xercesc/dom/impl/DOMDocumentImpl.cpp @@ -123,7 +123,8 @@ DOMDocumentImpl::DOMDocumentImpl() fStandalone(false), fDocumentURI(0), fUserDataTable(0), - fRecycleNodePtr(0) + fRecycleNodePtr(0), + fRecycleBufferPtr(0) { fNamePool = new (this) DOMStringPool(257, this); }; @@ -151,9 +152,11 @@ DOMDocumentImpl::DOMDocumentImpl(const XMLCh *fNamespaceURI, fStandalone(false), fDocumentURI(0), fUserDataTable(0), - fRecycleNodePtr(0) + fRecycleNodePtr(0), + fRecycleBufferPtr(0) { fNamePool = new (this) DOMStringPool(257, this); + try { setDocumentType(doctype); @@ -208,6 +211,10 @@ DOMDocumentImpl::~DOMDocumentImpl() delete fRecycleNodePtr; } + if (fRecycleBufferPtr) { + delete fRecycleBufferPtr; + } + // Delete the heap for this document. This uncerimoniously yanks the storage // out from under all of the nodes in the document. Destructors are NOT called. this->deleteHeap(); @@ -690,7 +697,6 @@ int DOMDocumentImpl::changes() const{ // just lying around naked in DocumentImpl. // //----------------------------------------------------------------------- - XMLCh * DOMDocumentImpl::cloneString(const XMLCh *src) { if (!src) return 0; @@ -1215,6 +1221,22 @@ void DOMDocumentImpl::release(DOMNode* object, NodeObjectType type) fRecycleNodePtr->operator[](type)->push(object); } +void DOMDocumentImpl::releaseBuffer(DOMBuffer* buffer) +{ + if (!fRecycleBufferPtr) + fRecycleBufferPtr = new RefStackOf<DOMBuffer> (15, false); + + fRecycleBufferPtr->push(buffer); +} + +DOMBuffer* DOMDocumentImpl::popBuffer() +{ + if (!fRecycleBufferPtr || fRecycleBufferPtr->empty()) + return 0; + + return fRecycleBufferPtr->pop(); +} + void * DOMDocumentImpl::allocate(size_t amount, NodeObjectType type) { diff --git a/src/xercesc/dom/impl/DOMDocumentImpl.hpp b/src/xercesc/dom/impl/DOMDocumentImpl.hpp index f498db693..83ff8e74f 100644 --- a/src/xercesc/dom/impl/DOMDocumentImpl.hpp +++ b/src/xercesc/dom/impl/DOMDocumentImpl.hpp @@ -91,7 +91,6 @@ class DOMCommentImpl; class DOMDeepNodeListImpl; class DOMDocumentFragmentImpl; class DOMDocumentTypeImpl; -class IDDStringPool; class DOMElementImpl; class DOMEntityImpl; class DOMEntityReferenceImpl; @@ -108,6 +107,7 @@ class DOMNodeIDMap; class DOMRangeImpl; class DOMParentNode; class DOMStringPool; +class DOMBuffer; typedef RefVectorOf<DOMRangeImpl> Ranges; typedef KeyRefPair<void, DOMUserDataHandler> DOMUserDataRecord; @@ -139,67 +139,10 @@ public: // ----------------------------------------------------------------------- // data // ----------------------------------------------------------------------- - - DOMNodeImpl fNode; // Implements common node functionality. DOMParentNode fParent; // Implements common parent node functionality - - DOMDocumentType* fDocType; - DOMElement* fDocElement; - DOMStringPool* fNamePool; DOMNodeIDMap* fNodeIDMap; // for use by GetElementsById(). - Ranges* fRanges; - - int fChanges; - - bool errorChecking; // Bypass error checking. - - // New data introduced in DOM Level 3 - XMLCh* fActualEncoding; - XMLCh* fEncoding; - bool fStandalone; - XMLCh* fVersion; - XMLCh* fDocumentURI; - - RefHashTableOf<DOMNodeUserDataTable>* fUserDataTable; - - - // Per-Document heap Variables. - // The heap consists of one or more biggish blocks which are - // sub-allocated for individual allocations of nodes, strings, etc. - // The big blocks form a linked list, allowing them to be located for deletion. - // - // There is no provision for deleting suballocated blocks, other than - // deleting the entire heap when the document is deleted. - // - // There is no header on individual sub-allocated blocks. - // The header on big blocks consists only of a single back pointer to - // the previously allocated big block (our linked list of big blocks) - // - // - // revisit - this heap should be encapsulated into its own - // class, rather than hanging naked on Document. - // - void* fCurrentBlock; - char* fFreePtr; - XMLSize_t fFreeBytesRemaining; - - // To recycle the DOMNode pointer - RefArrayOf<DOMNodePtr>* fRecycleNodePtr; - - friend class DOMNodeImpl; - friend class DOMNodeIteratorImpl; - friend class DOMTreeWalkerImpl; - friend class DOMRangeImpl; - friend class IDXercesDOMParser; - - - - void setDocumentType(DOMDocumentType *doctype); - - - public: DOMDocumentImpl(); DOMDocumentImpl(const XMLCh* namespaceURI, //DOM Level 2 @@ -207,6 +150,8 @@ public: DOMDocumentType* doctype); virtual ~DOMDocumentImpl(); + void setDocumentType(DOMDocumentType *doctype); + // Add all functions that are pure virutal in DOMNODE DOMNODE_FUNCTIONS; @@ -348,14 +293,14 @@ public: // a document, and is not recovered until the // document itself is deleted. // - void* allocate(size_t amount); void* allocate(size_t amount, NodeObjectType type); XMLCh* cloneString(const XMLCh *src); const XMLCh* getPooledString(const XMLCh *src); void deleteHeap(); void release(DOMNode* object, NodeObjectType type); - + void releaseBuffer(DOMBuffer* buffer); + DOMBuffer* popBuffer(); // Factory methods for getting/creating node lists. // Because nothing is ever deleted, the implementation caches and recycles @@ -371,14 +316,66 @@ private: virtual DOMNode* importNode(DOMNode *source, bool deep, bool cloningNode); private: + // ----------------------------------------------------------------------- + // data + // ----------------------------------------------------------------------- + // New data introduced in DOM Level 3 + XMLCh* fActualEncoding; + XMLCh* fEncoding; + bool fStandalone; + XMLCh* fVersion; + XMLCh* fDocumentURI; + + RefHashTableOf<DOMNodeUserDataTable>* fUserDataTable; + + + // Per-Document heap Variables. + // The heap consists of one or more biggish blocks which are + // sub-allocated for individual allocations of nodes, strings, etc. + // The big blocks form a linked list, allowing them to be located for deletion. + // + // There is no provision for deleting suballocated blocks, other than + // deleting the entire heap when the document is deleted. + // + // There is no header on individual sub-allocated blocks. + // The header on big blocks consists only of a single back pointer to + // the previously allocated big block (our linked list of big blocks) + // + // + // revisit - this heap should be encapsulated into its own + // class, rather than hanging naked on Document. + // + void* fCurrentBlock; + char* fFreePtr; + XMLSize_t fFreeBytesRemaining; + + // To recycle the DOMNode pointer + RefArrayOf<DOMNodePtr>* fRecycleNodePtr; + + // To recycle DOMBuffer pointer + RefStackOf<DOMBuffer>* fRecycleBufferPtr; + + // Pool of DOMNodeList for getElementsByTagName DOMDeepNodeListPool<DOMDeepNodeListImpl>* fNodeListPool; + // Other data + DOMDocumentType* fDocType; + DOMElement* fDocElement; + DOMStringPool* fNamePool; + + Ranges* fRanges; + + int fChanges; + bool errorChecking; // Bypass error checking. + }; +// --------------------------------------------------------------------------- // // Operator new. Global overloaded version, lets any object be allocated on // the heap owned by a document. // +// --------------------------------------------------------------------------- inline void * operator new(size_t amt, DOMDocument *doc, DOMDocumentImpl::NodeObjectType type) { // revist. Probably should be a checked cast. diff --git a/src/xercesc/dom/impl/DOMDocumentTypeImpl.hpp b/src/xercesc/dom/impl/DOMDocumentTypeImpl.hpp index c95948c41..ebeea47a8 100644 --- a/src/xercesc/dom/impl/DOMDocumentTypeImpl.hpp +++ b/src/xercesc/dom/impl/DOMDocumentTypeImpl.hpp @@ -73,7 +73,6 @@ #include <xercesc/util/XercesDefs.hpp> -#include <xercesc/framework/XMLBuffer.hpp> #include <xercesc/dom/DOMDocumentType.hpp> #include "DOMNodeImpl.hpp" #include "DOMChildNode.hpp" diff --git a/src/xercesc/dom/impl/DOMElementImpl.cpp b/src/xercesc/dom/impl/DOMElementImpl.cpp index d1d59b589..da23d25fb 100644 --- a/src/xercesc/dom/impl/DOMElementImpl.cpp +++ b/src/xercesc/dom/impl/DOMElementImpl.cpp @@ -83,7 +83,7 @@ DOMElementImpl::DOMElementImpl(DOMDocument *ownerDoc, const XMLCh *eName) : fNode(ownerDoc), fParent(ownerDoc), fAttributes(0), fDefaultAttributes(0) { DOMDocumentImpl *docImpl = (DOMDocumentImpl *)ownerDoc; - fName = docImpl->fNamePool->getPooledString(eName); + fName = docImpl->getPooledString(eName); setupDefaultAttributes(); if (!fDefaultAttributes) { fDefaultAttributes = new (getOwnerDocument()) DOMAttrMapImpl(this); @@ -551,7 +551,7 @@ DOMNode* DOMElementImpl::rename(const XMLCh* namespaceURI, const XMLCh* name) DOMDocumentImpl* doc = (DOMDocumentImpl*) getOwnerDocument(); if (!namespaceURI || !*namespaceURI) { - fName = doc->fNamePool->getPooledString(name); + fName = doc->getPooledString(name); fAttributes->reconcileDefaultAttributes(getDefaultAttributes()); return this; diff --git a/src/xercesc/dom/impl/DOMStringPool.cpp b/src/xercesc/dom/impl/DOMStringPool.cpp index fcb60d4f5..0892c4d40 100644 --- a/src/xercesc/dom/impl/DOMStringPool.cpp +++ b/src/xercesc/dom/impl/DOMStringPool.cpp @@ -140,5 +140,83 @@ const XMLCh *DOMStringPool::getPooledString(const XMLCh *in) }; +// ----------------------------------------------------------------------- +// DOMBuffer: Constructors +// ----------------------------------------------------------------------- +DOMBuffer::DOMBuffer(DOMDocumentImpl *doc, int capacity) : + fBuffer(0) + , fIndex(0) + , fCapacity(capacity) + , fDoc(doc) +{ + // Buffer is one larger than capacity, to allow for zero term + fBuffer = (XMLCh*) doc->allocate((fCapacity+1)*sizeof(XMLCh)); + + // Keep it null terminated + fBuffer[0] = XMLCh(0); +} + +DOMBuffer::DOMBuffer(DOMDocumentImpl *doc, const XMLCh* string) : + fBuffer(0) + , fIndex(0) + , fCapacity(0) + , fDoc(doc) +{ + unsigned int actualCount = XMLString::stringLen(string); + fCapacity = actualCount + 15; + + // Buffer is one larger than capacity, to allow for zero term + fBuffer = (XMLCh*) doc->allocate((fCapacity+1)*sizeof(XMLCh)); + + memcpy(fBuffer, string, actualCount * sizeof(XMLCh)); + fIndex = actualCount; + + // Keep it null terminated + fBuffer[fIndex] = 0; +} + +// --------------------------------------------------------------------------- +// DOMBuffer: Buffer management +// --------------------------------------------------------------------------- +void DOMBuffer::append(const XMLCh* const chars, const unsigned int count) +{ + unsigned int actualCount = count; + if (!count) + actualCount = XMLString::stringLen(chars); + if (fIndex + actualCount >= fCapacity) + expandCapacity(actualCount); + memcpy(&fBuffer[fIndex], chars, actualCount * sizeof(XMLCh)); + fIndex += actualCount; +} + +void DOMBuffer::set(const XMLCh* const chars, const unsigned int count) +{ + unsigned int actualCount = count; + if (!count) + actualCount = XMLString::stringLen(chars); + fIndex = 0; + if (fIndex + actualCount >= fCapacity) + expandCapacity(actualCount); + memcpy(fBuffer, chars, actualCount * sizeof(XMLCh)); + fIndex = actualCount; +} +// --------------------------------------------------------------------------- +// DOMBuffer: Private helper methods +// --------------------------------------------------------------------------- +void DOMBuffer::expandCapacity(const unsigned int extraNeeded) +{ + //not enough room. Calc new capacity and allocate new buffer + const unsigned int newCap = (unsigned int)((fIndex + extraNeeded) * 1.25); + XMLCh* newBuf = new (fDoc) XMLCh[newCap+1]; + + // Copy over the old stuff + memcpy(newBuf, fBuffer, fCapacity * sizeof(XMLCh)); + + // revisit: Leave the old buffer in document heap, yes, this is a leak, but live with it! + // store new stuff + fBuffer = newBuf; + fCapacity = newCap; +} + diff --git a/src/xercesc/dom/impl/DOMStringPool.hpp b/src/xercesc/dom/impl/DOMStringPool.hpp index defa595b8..3a91a84a9 100644 --- a/src/xercesc/dom/impl/DOMStringPool.hpp +++ b/src/xercesc/dom/impl/DOMStringPool.hpp @@ -103,4 +103,99 @@ private: }; + +// +// DOMBuffer is a lightweight text buffer +// The buffer is not nul terminated until some asks to see the raw buffer +// contents. This also avoids overhead during append operations. +class DOMBuffer +{ +public : + // ----------------------------------------------------------------------- + // Constructors and Destructor + // ----------------------------------------------------------------------- + DOMBuffer(DOMDocumentImpl *doc, int capacity = 31); + + DOMBuffer(DOMDocumentImpl *doc, const XMLCh* string); + + ~DOMBuffer() + { + } + + // ----------------------------------------------------------------------- + // Buffer Management + // ----------------------------------------------------------------------- + void append + ( + const XMLCh* const chars + , const unsigned int count = 0 + ); + + const XMLCh* getRawBuffer() const + { + fBuffer[fIndex] = 0; + return fBuffer; + } + + void reset() + { + fIndex = 0; + fBuffer[0] = 0; + } + + void set + ( + const XMLCh* const chars + , const unsigned int count = 0 + ); + + void chop + ( + const unsigned int count + ) + { + fBuffer[count] = 0; + fIndex = count; + } + + + // ----------------------------------------------------------------------- + // Getters + // ----------------------------------------------------------------------- + unsigned int getLen() const + { + return fIndex; + } + + // ----------------------------------------------------------------------- + // Private helpers + // ----------------------------------------------------------------------- + void expandCapacity(const unsigned int extraNeeded); + + +private : + // ----------------------------------------------------------------------- + // Private data members + // + // fBuffer + // The pointer to the buffer data. Its grown as needed. Its always + // one larger than fCapacity, to leave room for the null terminator. + // + // fIndex + // The current index into the buffer, as characters are appended + // to it. If its zero, then the buffer is empty. + // + // fCapacity + // The current capacity of the buffer. Its actually always one + // larger, to leave room for the null terminator. + // + // fDoc + // For allocating memory + // ----------------------------------------------------------------------- + XMLCh* fBuffer; + unsigned int fIndex; + unsigned int fCapacity; + DOMDocumentImpl* fDoc; +}; + #endif diff --git a/src/xercesc/dom/impl/DOMTextImpl.cpp b/src/xercesc/dom/impl/DOMTextImpl.cpp index 4c9d7813b..0c38f9822 100644 --- a/src/xercesc/dom/impl/DOMTextImpl.cpp +++ b/src/xercesc/dom/impl/DOMTextImpl.cpp @@ -65,6 +65,7 @@ #include <xercesc/dom/DOMNode.hpp> #include "DOMDocumentImpl.hpp" +#include "DOMStringPool.hpp" #include "DOMTextImpl.hpp" #include "DOMCharacterDataImpl.hpp" #include "DOMChildNode.hpp" @@ -117,7 +118,7 @@ DOMText *DOMTextImpl::splitText(XMLSize_t offset) throw DOMException( DOMException::NO_MODIFICATION_ALLOWED_ERR, 0); } - XMLSize_t len = XMLString::stringLen(fCharacterData.fData); + XMLSize_t len = fCharacterData.fDataBuf->getLen(); if (offset > len || offset < 0) throw DOMException(DOMException::INDEX_SIZE_ERR, 0); @@ -129,10 +130,7 @@ DOMText *DOMTextImpl::splitText(XMLSize_t offset) if (parent != 0) parent->insertBefore(newText, getNextSibling()); - XMLCh *wData = (XMLCh *)(fCharacterData.fData); // Cast off const. - wData[offset] = 0; // revisit - could change a string that - // application code has. Do we want to do this? - + fCharacterData.fDataBuf->chop(offset); if (this->getOwnerDocument() != 0) { Ranges* ranges = ((DOMDocumentImpl *)this->getOwnerDocument())->getRanges(); @@ -185,8 +183,10 @@ void DOMTextImpl::release() throw DOMException(DOMException::INVALID_ACCESS_ERR,0); DOMDocumentImpl* doc = (DOMDocumentImpl*) getOwnerDocument(); - if (doc) + if (doc) { + fCharacterData.releaseBuffer(); doc->release(this, DOMDocumentImpl::TEXT_OBJECT); + } else { // shouldn't reach here throw DOMException(DOMException::INVALID_ACCESS_ERR,0); diff --git a/tests/DOM/DOMMemTest/DOMMemTest.cpp b/tests/DOM/DOMMemTest/DOMMemTest.cpp index aa78f6b37..ed71ffc5c 100644 --- a/tests/DOM/DOMMemTest/DOMMemTest.cpp +++ b/tests/DOM/DOMMemTest/DOMMemTest.cpp @@ -1429,17 +1429,40 @@ void DOMReleaseTests() //simulate setting the attribute value // The setValue and setAttribute should call release internally so that // the overall memory usage is not increased - int i = 0; - for(i=0;i<200000;i++) + int i = 0; + for(i=0;i<20000;i++) { pAttr->setValue(tempStr2); } - for(i=0;i<200000;i++) + for(i=0;i<20000;i++) { //same problem cpRoot->removeAttribute(tempStr); cpRoot->setAttribute(tempStr,tempStr2); } + + //simulate changing node value + // the overall memory usage is not increased + char tempchar[4000]; + for(i=0;i<20000;i++) + { + sprintf(tempchar, "time is %i\n",XMLPlatformUtils::getCurrentMillis()); + int len = strlen(tempchar); + for (int j = len; j < 4000-len; j++) + tempchar[j] = 'a'; + pAttr->setNodeValue(X(tempchar)); + } + + DOMText* text = cpXMLDocument->createTextNode(tempStr3); + for(i=0;i<20000;i++) + { + sprintf(tempchar, "time is %i\n",XMLPlatformUtils::getCurrentMillis()); + int len = strlen(tempchar); + for (int j = len; j < 4000-len; j++) + tempchar[j] = 'a'; + text->setNodeValue(X(tempchar)); + } + cpXMLDocument->release(); } @@ -1477,7 +1500,7 @@ int mymain() }; int main() { - for (int i = 0; i<5; i++) + for (int i = 0; i<3; i++) mymain(); printf("Test Run Successfully\n"); -- GitLab