From 90cdf0158bacbf4942022c9c55c29b8ccc82d06d Mon Sep 17 00:00:00 2001 From: Alberto Massari <amassari@apache.org> Date: Tue, 21 Jun 2011 10:49:44 +0000 Subject: [PATCH] Added a version of tokenize that uses a specified character as delimiter git-svn-id: https://svn.apache.org/repos/asf/xerces/c/trunk@1137951 13f79535-47bb-0310-9956-ffa450edef68 --- src/xercesc/util/XMLString.cpp | 48 ++++++++++++++++++++++++++++++++++ src/xercesc/util/XMLString.hpp | 11 ++++++++ 2 files changed, 59 insertions(+) diff --git a/src/xercesc/util/XMLString.cpp b/src/xercesc/util/XMLString.cpp index a13bbe451..8ec1a0b5a 100644 --- a/src/xercesc/util/XMLString.cpp +++ b/src/xercesc/util/XMLString.cpp @@ -1582,6 +1582,54 @@ void XMLString::subString(XMLCh* const targetStr, const XMLCh* const srcStr targetStr[copySize] = 0; } +BaseRefVectorOf<XMLCh>* XMLString::tokenizeString(const XMLCh* const tokenizeSrc + , XMLCh delimiter + , MemoryManager* const manager) +{ + XMLCh* orgText = replicate(tokenizeSrc, manager); + ArrayJanitor<XMLCh> janText(orgText, manager); + XMLCh* tokenizeStr = orgText; + + RefArrayVectorOf<XMLCh>* tokenStack = new (manager) RefArrayVectorOf<XMLCh>(16, true, manager); + + XMLSize_t len = stringLen(tokenizeStr); + XMLSize_t skip; + XMLSize_t index = 0; + + while (index != len) { + // find the first non-space character + for (skip = index; skip < len; skip++) + { + if (tokenizeStr[skip]!=delimiter) + break; + } + index = skip; + + // find the delimiter (space character) + for (; skip < len; skip++) + { + if (tokenizeStr[skip]==delimiter) + break; + } + + // we reached the end of the string + if (skip == index) + break; + + // these tokens are adopted in the RefVector and will be deleted + // when the vector is deleted by the caller + XMLCh* token = (XMLCh*) manager->allocate + ( + (skip+1-index) * sizeof(XMLCh) + );//new XMLCh[skip+1-index]; + + XMLString::subString(token, tokenizeStr, index, skip, len, manager); + tokenStack->addElement(token); + index = skip; + } + return tokenStack; +} + BaseRefVectorOf<XMLCh>* XMLString::tokenizeString(const XMLCh* const tokenizeSrc , MemoryManager* const manager) { diff --git a/src/xercesc/util/XMLString.hpp b/src/xercesc/util/XMLString.hpp index c47c16fb7..57da0bf1e 100644 --- a/src/xercesc/util/XMLString.hpp +++ b/src/xercesc/util/XMLString.hpp @@ -1247,6 +1247,17 @@ public: static BaseRefVectorOf<XMLCh>* tokenizeString(const XMLCh* const tokenizeSrc , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); + /** Break a string into tokens with the given character as delimiter, and + * stored in a string vector. The caller owns the string vector + * that is returned, and is responsible for deleting it. + * @param tokenizeSrc String to be tokenized + * @param delimiter Delimiter character + * @param manager The MemoryManager to use to allocate objects + * @return a vector of all the tokenized string + */ + static BaseRefVectorOf<XMLCh>* tokenizeString(const XMLCh* const tokenizeSrc + , XMLCh delimiter + , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); //@} /** @name Formatting functions */ -- GitLab