Newer
Older
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
Boris Kolpackov
committed
*
Boris Kolpackov
committed
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/framework/XMLBuffer.hpp>
#include <xercesc/framework/XMLElementDecl.hpp>
#include <xercesc/framework/XMLValidator.hpp>
#include <xercesc/validators/common/CMAny.hpp>
#include <xercesc/validators/common/CMBinaryOp.hpp>
#include <xercesc/validators/common/CMLeaf.hpp>
#include <xercesc/validators/common/CMRepeatingLeaf.hpp>
#include <xercesc/validators/common/CMUnaryOp.hpp>
#include <xercesc/validators/common/DFAContentModel.hpp>
#include <xercesc/validators/common/ContentSpecNode.hpp>
#include <xercesc/validators/common/Grammar.hpp>
#include <xercesc/validators/schema/SchemaSymbols.hpp>
#include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
#include <xercesc/validators/schema/XercesElementWildcard.hpp>
#include <xercesc/util/RefHashTableOf.hpp>
#include <xercesc/util/XMLInteger.hpp>
struct CMStateSetHasher
{
XMLSize_t getHashVal(const void *const key, XMLSize_t mod)
{
const CMStateSet* const pkey = (const CMStateSet*) key;
return ((pkey->hashCode()) % mod);
}
bool equals(const void *const key1, const void *const key2)
{
const CMStateSet* const pkey1 = (const CMStateSet*) key1;
const CMStateSet* const pkey2 = (const CMStateSet*) key2;
return (*pkey1==*pkey2);
}
};
// ---------------------------------------------------------------------------
// DFAContentModel: Constructors and Destructor
// ---------------------------------------------------------------------------
DFAContentModel::DFAContentModel( const bool dtd
, ContentSpecNode* const elemContentSpec
, MemoryManager* const manager) :
fElemMap(0)
, fElemMapType(0)
, fElemMapSize(0)
, fEmptyOk(false)
, fEOCPos(0)
, fFinalStateFlags(0)
, fFollowList(0)
, fHeadNode(0)
, fLeafCount(0)
, fLeafList(0)
David Abram Cargill
committed
, fLeafListType(0)
, fCountingStates(0)
, fDTD(dtd)
, fIsMixed(false)
, fLeafNameTypeVector(0)
, fMemoryManager(manager)
{
// And build the DFA data structures
buildDFA(elemContentSpec);
}
DFAContentModel::DFAContentModel( const bool dtd
, ContentSpecNode* const elemContentSpec
, const bool isMixed
, MemoryManager* const manager):
fElemMap(0)
, fElemMapType(0)
, fElemMapSize(0)
, fEmptyOk(false)
, fEOCPos(0)
, fFinalStateFlags(0)
, fFollowList(0)
, fHeadNode(0)
, fLeafCount(0)
, fLeafList(0)
David Abram Cargill
committed
, fLeafListType(0)
, fCountingStates(0)
, fDTD(dtd)
, fIsMixed(isMixed)
, fLeafNameTypeVector(0)
, fMemoryManager(manager)
{
// And build the DFA data structures
buildDFA(elemContentSpec);
}
DFAContentModel::~DFAContentModel()
{
//
// Clean up all the stuff that is not just temporary representation
// data that was cleaned up after building the DFA.
//
fMemoryManager->deallocate(fFinalStateFlags); //delete [] fFinalStateFlags;
unsigned int index;
fMemoryManager->deallocate(fTransTable[index]); //delete [] fTransTable[index];
fMemoryManager->deallocate(fTransTable); //delete [] fTransTable;
if(fCountingStates)
{
for (unsigned int j = 0; j < fTransTableSize; ++j)
delete fCountingStates[j];
fMemoryManager->deallocate(fCountingStates);
}
for (index = 0; index < fLeafCount; index++)
delete fElemMap[index];
fMemoryManager->deallocate(fElemMap); //delete [] fElemMap;
fMemoryManager->deallocate(fElemMapType); //delete [] fElemMapType;
fMemoryManager->deallocate(fLeafListType); //delete [] fLeafListType;
Alberto Massari
committed
delete fLeafNameTypeVector;
}
// ---------------------------------------------------------------------------
// DFAContentModel: Implementation of the ContentModel virtual interface
// ---------------------------------------------------------------------------
Alberto Massari
committed
bool
DFAContentModel::validateContent( QName** const children
Alberto Massari
committed
, unsigned int
, XMLSize_t* indexFailingChild
, MemoryManager* const) const
{
//
// If there are no children, then either we fail on the 0th element
// or we return success. It depends upon whether this content model
// accepts empty content, which we determined earlier.
//
if (!childCount)
{
// success
Alberto Massari
committed
if(fEmptyOk)
return true;
*indexFailingChild=0;
return false;
}
//
// Lets loop through the children in the array and move our way
// through the states. Note that we use the fElemMap array to map
// an element index to a state index.
//
unsigned int curState = 0;
unsigned int nextState = 0;
unsigned int loopCount = 0;
unsigned int childIndex = 0;
for (; childIndex < childCount; childIndex++)
{
// Get the current element index out
const QName* curElem = children[childIndex];
const XMLCh* curElemRawName = 0;
if (fDTD)
curElemRawName = curElem->getRawName();
// If this is text in a Schema mixed content model, skip it.
if ( fIsMixed &&
( curElem->getURI() == XMLElementDecl::fgPCDataElemId))
continue;
// Look up this child in our element map
unsigned int elemIndex = 0;
for (; elemIndex < fElemMapSize; elemIndex++)
{
const QName* inElem = fElemMap[elemIndex];
if (fDTD) {
if (XMLString::equals(inElem->getRawName(), curElemRawName)) {
nextState = fTransTable[curState][elemIndex];
if (nextState != XMLContentModel::gInvalidTrans)
break;
}
}
else {
ContentSpecNode::NodeTypes type = fElemMapType[elemIndex];
if (type == ContentSpecNode::Leaf)
{
if ((inElem->getURI() == curElem->getURI()) &&
(XMLString::equals(inElem->getLocalPart(), curElem->getLocalPart()))) {
nextState = fTransTable[curState][elemIndex];
if (nextState != XMLContentModel::gInvalidTrans)
break;
}
}
else if ((type & 0x0f)== ContentSpecNode::Any)
{
nextState = fTransTable[curState][elemIndex];
if (nextState != XMLContentModel::gInvalidTrans)
Alberto Massari
committed
break;
}
else if ((type & 0x0f) == ContentSpecNode::Any_NS)
{
if (inElem->getURI() == curElem->getURI())
{
nextState = fTransTable[curState][elemIndex];
if (nextState != XMLContentModel::gInvalidTrans)
break;
}
}
else if ((type & 0x0f) == ContentSpecNode::Any_Other)
{
Boris Kolpackov
committed
// Here we assume that empty string has id 1.
Boris Kolpackov
committed
unsigned int uriId = curElem->getURI();
if (uriId != 1 && uriId != inElem->getURI()) {
nextState = fTransTable[curState][elemIndex];
if (nextState != XMLContentModel::gInvalidTrans)
break;
}
}
}
}//for elemIndex
// If "nextState" is -1, we found a match, but the transition is invalid
if (nextState == XMLContentModel::gInvalidTrans)
Alberto Massari
committed
{
*indexFailingChild=childIndex;
return false;
}
// If we didn't find it, then obviously not valid
if (elemIndex == fElemMapSize)
Alberto Massari
committed
{
*indexFailingChild=childIndex;
return false;
}
Alberto Massari
committed
unsigned int nextLoop = 0;
if(!handleRepetitions(curElem, curState, loopCount, nextState, nextLoop, elemIndex, 0))
Alberto Massari
committed
{
*indexFailingChild=childIndex;
return false;
}
Alberto Massari
committed
loopCount = nextLoop;
nextState = 0;
}//for childIndex
//
// We transitioned all the way through the input list. However, that
// does not mean that we ended in a final state. So check whether
// our ending state is a final state.
//
if (!fFinalStateFlags[curState])
Alberto Massari
committed
{
*indexFailingChild=childIndex;
return false;
}
// verify if we exited before the minOccurs was satisfied
if (fCountingStates != 0) {
Occurence* o = fCountingStates[curState];
if (o != 0 && loopCount < (unsigned int)o->minOccurs) {
// not enough loops on the current state to be considered final.
*indexFailingChild=childIndex;
return false;
}
}
Alberto Massari
committed
return true;
bool DFAContentModel::validateContentSpecial(QName** const children
, XMLSize_t childCount
Alberto Massari
committed
, unsigned int
, XMLStringPool* const pStringPool
, XMLSize_t* indexFailingChild
, MemoryManager* const) const
{
SubstitutionGroupComparator comparator(pGrammarResolver, pStringPool);
if (childCount == 0)
Alberto Massari
committed
{
if(fEmptyOk)
return true;
*indexFailingChild=0;
return false;
}
//
// Lets loop through the children in the array and move our way
// through the states. Note that we use the fElemMap array to map
// an element index to a state index.
//
unsigned int curState = 0;
unsigned int loopCount = 0;
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
unsigned int nextState = 0;
unsigned int childIndex = 0;
for (; childIndex < childCount; childIndex++)
{
// Get the current element index out
QName* curElem = children[childIndex];
// If this is text in a Schema mixed content model, skip it.
if ( fIsMixed &&
( curElem->getURI() == XMLElementDecl::fgPCDataElemId))
continue;
// Look up this child in our element map
unsigned int elemIndex = 0;
for (; elemIndex < fElemMapSize; elemIndex++)
{
QName* inElem = fElemMap[elemIndex];
ContentSpecNode::NodeTypes type = fElemMapType[elemIndex];
if (type == ContentSpecNode::Leaf)
{
if (comparator.isEquivalentTo(curElem, inElem) )
{
nextState = fTransTable[curState][elemIndex];
if (nextState != XMLContentModel::gInvalidTrans)
break;
}
}
else if ((type & 0x0f)== ContentSpecNode::Any)
{
nextState = fTransTable[curState][elemIndex];
if (nextState != XMLContentModel::gInvalidTrans)
Alberto Massari
committed
break;
}
else if ((type & 0x0f) == ContentSpecNode::Any_NS)
{
if (inElem->getURI() == curElem->getURI())
{
nextState = fTransTable[curState][elemIndex];
if (nextState != XMLContentModel::gInvalidTrans)
break;
}
}
else if ((type & 0x0f) == ContentSpecNode::Any_Other)
{
Boris Kolpackov
committed
// Here we assume that empty string has id 1.
Alberto Massari
committed
//
Boris Kolpackov
committed
unsigned int uriId = curElem->getURI();
if (uriId != 1 && uriId != inElem->getURI())
{
nextState = fTransTable[curState][elemIndex];
if (nextState != XMLContentModel::gInvalidTrans)
break;
}
}
}//for elemIndex
// If "nextState" is -1, we found a match, but the transition is invalid
if (nextState == XMLContentModel::gInvalidTrans)
Alberto Massari
committed
{
*indexFailingChild=childIndex;
return false;
}
// If we didn't find it, then obviously not valid
if (elemIndex == fElemMapSize)
Alberto Massari
committed
{
*indexFailingChild=childIndex;
return false;
}
Alberto Massari
committed
unsigned int nextLoop = 0;
if(!handleRepetitions(curElem, curState, loopCount, nextState, nextLoop, elemIndex, &comparator))
Alberto Massari
committed
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
{
*indexFailingChild=childIndex;
return false;
}
curState = nextState;
loopCount = nextLoop;
nextState = 0;
}//for childIndex
//
// We transitioned all the way through the input list. However, that
// does not mean that we ended in a final state. So check whether
// our ending state is a final state.
//
if (!fFinalStateFlags[curState])
{
*indexFailingChild=childIndex;
return false;
}
// verify if we exited before the minOccurs was satisfied
if (fCountingStates != 0) {
Occurence* o = fCountingStates[curState];
if (o != 0) {
if (loopCount < (unsigned int)o->minOccurs) {
// not enough loops on the current state.
*indexFailingChild=childIndex;
return false;
}
}
}
//success
return true;
}
bool DFAContentModel::handleRepetitions(const QName* const curElem,
unsigned int curState,
unsigned int currentLoop,
unsigned int& nextState,
unsigned int& nextLoop,
XMLSize_t elemIndex,
SubstitutionGroupComparator * comparator) const
{
nextLoop = 0;
if (fCountingStates != 0) {
nextLoop = currentLoop;
Occurence* o = fCountingStates[curState];
if (o != 0) {
if (curState == nextState) {
if (++nextLoop > (unsigned int)o->maxOccurs && o->maxOccurs != -1) {
// It's likely that we looped too many times on the current state
// however it's possible that we actually matched another particle
// which allows the same name.
//
// Consider:
//
// <xs:sequence>
// <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
// <xs:element name="foo" type="xs:string" fixed="bar"/>
// </xs:sequence>
//
// and
//
// <xs:sequence>
// <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
// <xs:any namespace="##any" processContents="skip"/>
// </xs:sequence>
//
// In the DFA there will be two transitions from the current state which
Alberto Massari
committed
// allow "foo". Note that this is not a UPA violation. The ambiguity of which
// transition to take is resolved by the current value of the counter. Since
Alberto Massari
committed
// we've already seen enough instances of the first "foo" perhaps there is
// another element declaration or wildcard deeper in the element map which
// matches.
unsigned int tempNextState = 0;
Alberto Massari
committed
while (++elemIndex < fElemMapSize) {
QName* inElem = fElemMap[elemIndex];
ContentSpecNode::NodeTypes type = fElemMapType[elemIndex];
if (type == ContentSpecNode::Leaf)
{
if(comparator!=0) {
if (comparator->isEquivalentTo(curElem, inElem) )
Alberto Massari
committed
{
tempNextState = fTransTable[curState][elemIndex];
if (tempNextState != XMLContentModel::gInvalidTrans)
break;
}
}
Alberto Massari
committed
else if (fDTD) {
if (XMLString::equals(inElem->getRawName(), curElem->getRawName())) {
Alberto Massari
committed
tempNextState = fTransTable[curState][elemIndex];
if (tempNextState != XMLContentModel::gInvalidTrans)
break;
}
}
Alberto Massari
committed
else {
if ((inElem->getURI() == curElem->getURI()) &&
(XMLString::equals(inElem->getLocalPart(), curElem->getLocalPart()))) {
Alberto Massari
committed
tempNextState = fTransTable[curState][elemIndex];
if (tempNextState != XMLContentModel::gInvalidTrans)
break;
}
}
}
Alberto Massari
committed
else if ((type & 0x0f)== ContentSpecNode::Any)
{
tempNextState = fTransTable[curState][elemIndex];
if (tempNextState != XMLContentModel::gInvalidTrans)
break;
Alberto Massari
committed
}
Alberto Massari
committed
else if ((type & 0x0f) == ContentSpecNode::Any_NS)
{
if (inElem->getURI() == curElem->getURI())
{
tempNextState = fTransTable[curState][elemIndex];
if (tempNextState != XMLContentModel::gInvalidTrans)
break;
}
}
else if ((type & 0x0f) == ContentSpecNode::Any_Other)
{
// Here we assume that empty string has id 1.
//
unsigned int uriId = curElem->getURI();
if (uriId != 1 && uriId != inElem->getURI())
{
tempNextState = fTransTable[curState][elemIndex];
if (tempNextState != XMLContentModel::gInvalidTrans)
break;
}
Alberto Massari
committed
}
}
Alberto Massari
committed
// if we still can't find a match, report the error
if (elemIndex == fElemMapSize)
return false;
// if we found a match, set the next state and reset the
Alberto Massari
committed
// counter if the next state is a counting state.
nextState = tempNextState;
Occurence* o = fCountingStates[nextState];
if (o != 0) {
Alberto Massari
committed
nextLoop = (elemIndex == o->elemIndex) ? 1 : 0;
}
}
}
Alberto Massari
committed
else if (nextLoop < (unsigned int)o->minOccurs) {
// not enough loops on the current state.
return false;
}
else {
Alberto Massari
committed
// Exiting a counting state. If we're entering a new
// counting state, reset the counter.
o = fCountingStates[nextState];
if (o != 0) {
Alberto Massari
committed
nextLoop = (elemIndex == o->elemIndex) ? 1 : 0;
}
}
}
Alberto Massari
committed
else {
o = fCountingStates[nextState];
if (o != 0) {
// Entering a new counting state. Reset the counter.
// If we've already seen one instance of the looping
// particle set the counter to 1, otherwise set it
// to 0.
nextLoop = (elemIndex == o->elemIndex) ? 1 : 0;
}
}
}
Alberto Massari
committed
return true;
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
}
// ---------------------------------------------------------------------------
// DFAContentModel: Private helper methods
// ---------------------------------------------------------------------------
void DFAContentModel::buildDFA(ContentSpecNode* const curNode)
{
unsigned int index;
//
// The first step we need to take is to rewrite the content model using
// our CMNode objects, and in the process get rid of any repetition short
// cuts, converting them into '*' style repetitions or getting rid of
// repetitions altogether.
//
// The conversions done are:
//
// x+ -> (x|x*)
// x? -> (x|epsilon)
//
// This is a relatively complex scenario. What is happening is that we
// create a top level binary node of which the special EOC value is set
// as the right side node. The the left side is set to the rewritten
// syntax tree. The source is the original content model info from the
// decl pool. The rewrite is done by buildSyntaxTree() which recurses the
// decl pool's content of the element and builds a new tree in the
// process.
//
// Note that, during this operation, we set each non-epsilon leaf node's
// DFA state position and count the number of such leafs, which is left
// in the fLeafCount member.
//
fLeafCount=countLeafNodes(curNode);
fEOCPos = fLeafCount++;
// We need to build an array of references to the non-epsilon
// leaf nodes. We will put them in the array according to their position values
fLeafList = (CMLeaf**) fMemoryManager->allocate(fLeafCount*sizeof(CMLeaf*)); //new CMLeaf*[fLeafCount];
fLeafListType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate
(
fLeafCount * sizeof(ContentSpecNode::NodeTypes)
); //new ContentSpecNode::NodeTypes[fLeafCount];
//
// And, moving onward... We now need to build the follow position sets
// for all the nodes. So we allocate an array of pointers to state sets,
// one for each leaf node (i.e. each significant DFA position.)
//
fFollowList = (CMStateSet**) fMemoryManager->allocate
(
fLeafCount * sizeof(CMStateSet*)
); //new CMStateSet*[fLeafCount];
fFollowList[index] = new (fMemoryManager) CMStateSet(fLeafCount, fMemoryManager);
// The buildSyntaxTree function will recursively iterate over the ContentSpecNode
// and build the CMNode hierarchy; it will also put every leaf node in the fLeafList
// array, then calculate the first and last position sets of each node. This is
// cached away in each of the nodes.
//
// Along the way we also set the leaf count in each node as the maximum
// state count. They must know this in order to create their first/last
// position sets.
//
unsigned int counter=0;
CMNode* nodeOrgContent = buildSyntaxTree(curNode, counter);
//
// Check to see whether this content model can handle an empty content,
// which is something we need to optimize by looking now before we
// throw away the info that would tell us that.
//
// If the left node of the head (the top level of the original content)
// is nullable, then its true.
//
fEmptyOk = nodeOrgContent->isNullable();
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
//
// And handle specially the EOC node, which also must be numbered and
// counted as a non-epsilon leaf node. It could not be handled in the
// above tree build because it was created before all that started. We
// save the EOC position since its used during the DFA building loop.
//
CMLeaf* nodeEOC = new (fMemoryManager) CMLeaf
(
new (fMemoryManager) QName
(
XMLUni::fgZeroLenString
, XMLUni::fgZeroLenString
, XMLContentModel::gEOCFakeId
, fMemoryManager
)
, fEOCPos
, true
, fLeafCount
, fMemoryManager
);
fHeadNode = new (fMemoryManager) CMBinaryOp
(
ContentSpecNode::Sequence
, nodeOrgContent
, nodeEOC
, fLeafCount
, fMemoryManager
);
// Put also the final EOC node in the leaf array
fLeafList[counter] = new (fMemoryManager) CMLeaf
(
nodeEOC->getElement()
, nodeEOC->getPosition()
, fLeafCount
, fMemoryManager
);
fLeafListType[counter] = ContentSpecNode::Leaf;
//
// Now handle our top level. We use our left child's last pos set and our
// right child's first pos set, so get them now for convenience.
//
const CMStateSet& last = nodeOrgContent->getLastPos();
const CMStateSet& first = nodeEOC->getFirstPos();
//
// Now, for every position which is in our left child's last set
// add all of the states in our right child's first set to the
// follow set for that position.
//
CMStateSetEnumerator enumLast(&last);
while(enumLast.hasMoreElements())
{
XMLSize_t index=enumLast.nextElement();
*fFollowList[index] |= first;
}
//
// And finally the big push... Now we build the DFA using all the states
// and the tree we've built up. First we set up the various data
// structures we are going to use while we do this.
//
// First of all we need an array of unique element ids in our content
// model. For each transition table entry, we need a set of contiguous
// indices to represent the transitions for a particular input element.
// So we need to a zero based range of indexes that map to element types.
// This element map provides that mapping.
//
fElemMap = (QName**) fMemoryManager->allocate
(
fLeafCount * sizeof(QName*)
); //new QName*[fLeafCount];
fElemMapType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate
(
fLeafCount * sizeof(ContentSpecNode::NodeTypes)
); //new ContentSpecNode::NodeTypes[fLeafCount];
Occurence** elemOccurenceMap=0;
for (unsigned int outIndex = 0; outIndex < fLeafCount; outIndex++)
{
fElemMap[outIndex] = new (fMemoryManager) QName(fMemoryManager);
if ( (fLeafListType[outIndex] & 0x0f) != ContentSpecNode::Leaf )
if (!fLeafNameTypeVector)
fLeafNameTypeVector = new (fMemoryManager) ContentLeafNameTypeVector(fMemoryManager);
CMLeaf* leaf=fLeafList[outIndex];
const QName* element = leaf->getElement();
const XMLCh* elementRawName = 0;
if (fDTD && element)
elementRawName = element->getRawName();
// See if the current leaf node's element index is in the list
unsigned int inIndex = 0;
for (; inIndex < fElemMapSize; inIndex++)
{
const QName* inElem = fElemMap[inIndex];
if (fDTD) {
if (XMLString::equals(inElem->getRawName(), elementRawName)) {
break;
}
}
else {
if ((fElemMapType[inIndex] == fLeafListType[outIndex]) &&
(inElem->getURI() == element->getURI()) &&
(XMLString::equals(inElem->getLocalPart(), element->getLocalPart()))) {
break;
}
}
}
// If it was not in the list, then add it and bump the map size
if (inIndex == fElemMapSize)
{
fElemMap[fElemMapSize]->setValues(*element);
if(leaf->isRepeatableLeaf())
{
if (elemOccurenceMap == 0) {
elemOccurenceMap = (Occurence**)fMemoryManager->allocate(fLeafCount*sizeof(Occurence*));
memset(elemOccurenceMap, 0, fLeafCount*sizeof(Occurence*));
}
elemOccurenceMap[fElemMapSize] = new (fMemoryManager) Occurence(((CMRepeatingLeaf*)leaf)->getMinOccurs(), ((CMRepeatingLeaf*)leaf)->getMaxOccurs(), fElemMapSize);
}
fElemMapType[fElemMapSize] = fLeafListType[outIndex];
++fElemMapSize;
}
}
// set up the fLeafNameTypeVector object if there is one.
if (fLeafNameTypeVector) {
fLeafNameTypeVector->setValues(fElemMap, fElemMapType, fElemMapSize);
}
/***
* Optimization(Jan, 2001); We sort fLeafList according to
* elemIndex which is *uniquely* associated to each leaf.
* We are *assuming* that each element appears in at least one leaf.
**/
// don't forget to delete it
#ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH
int *fLeafSorter = (int*) fMemoryManager->allocate
(
(fLeafCount + fElemMapSize) * sizeof(int)
); //new int[fLeafCount + fElemMapSize];
unsigned int fSortCount = 0;
for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
{
const QName* element = fElemMap[elemIndex];
const XMLCh* elementRawName = 0;
if (fDTD && element)
elementRawName = element->getRawName();
for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
{
Boris Kolpackov
committed
const QName* leaf = fLeafList[leafIndex]->getElement();
if (XMLString::equals(leaf->getRawName(), elementRawName)) {
fLeafSorter[fSortCount++] = leafIndex;
}
}
else {
if ((fElemMapType[elemIndex] == fLeafListType[leafIndex]) &&
(leaf->getURI() == element->getURI()) &&
(XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) {
fLeafSorter[fSortCount++] = leafIndex;
}
}
}
fLeafSorter[fSortCount++] = -1;
}
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
#endif
// instead of using a single array with -1 to separate elements, use a bidimensional map
unsigned int** fLeafSorter = (unsigned int**)fMemoryManager->allocate(fElemMapSize * sizeof(unsigned int*));
unsigned int* tmpSorter = (unsigned int*)fMemoryManager->allocate(fLeafCount * sizeof(unsigned int));
for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
{
const QName* element = fElemMap[elemIndex];
const XMLCh* elementRawName = 0;
if (fDTD && element)
elementRawName = element->getRawName();
unsigned int fSortCount=0;
for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
{
const QName* leaf = fLeafList[leafIndex]->getElement();
if (fDTD) {
if (XMLString::equals(leaf->getRawName(), elementRawName)) {
tmpSorter[fSortCount++] = leafIndex;
}
}
else {
if ((fElemMapType[elemIndex] == fLeafListType[leafIndex]) &&
(leaf->getURI() == element->getURI()) &&
(XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) {
tmpSorter[fSortCount++] = leafIndex;
}
}
}
fLeafSorter[elemIndex]=(unsigned int*)fMemoryManager->allocate((fSortCount+1) * sizeof(unsigned int));
fLeafSorter[elemIndex][0]=fSortCount;
for (unsigned int index=0;index<fSortCount;index++)
fLeafSorter[elemIndex][index+1]=tmpSorter[index];
}
fMemoryManager->deallocate(tmpSorter);
//
// Next lets create some arrays, some that that hold transient info
// during the DFA build and some that are permament. These are kind of
// sticky since we cannot know how big they will get, but we don't want
// to use any collection type classes because of performance.
//
// Basically they will probably be about fLeafCount*2 on average, but can
// be as large as 2^(fLeafCount*2), worst case. So we start with
// fLeafCount*4 as a middle ground. This will be very unlikely to ever
// have to expand though, it if does, the overhead will be somewhat ugly.
//
unsigned int curArraySize = fLeafCount * 4;
Alberto Massari
committed
CMStateSet** statesToDo = (CMStateSet**)
fMemoryManager->allocate
(
Alberto Massari
committed
curArraySize * sizeof(CMStateSet*)
); //new const CMStateSet*[curArraySize];
fFinalStateFlags = (bool*) fMemoryManager->allocate
(
curArraySize * sizeof(bool)
); //new bool[curArraySize];
fTransTable = (unsigned int**) fMemoryManager->allocate
(
curArraySize * sizeof(unsigned int*)
); //new unsigned int*[curArraySize];
//
// Ok we start with the initial set as the first pos set of the head node
// (which is the seq node that holds the content model and the EOC node.)
//
Alberto Massari
committed
CMStateSet* setT = new (fMemoryManager) CMStateSet(fHeadNode->getFirstPos());
//
// Note on memory leak: Bugzilla#2707:
// ===================================
// The CMBinary, pointed to by fHeadNode, shall be released by
// deleted by itself.
//
// fLeafList[] maintains its **OWN** copy of CMLeaf to avoid double deletion
// of CMLeaf.
//
delete fHeadNode;
//
// Init our two state flags. Basically the unmarked state counter is
// always chasing the current state counter. When it catches up, that
// means we made a pass through that did not add any new states to the
// lists, at which time we are done. We could have used a expanding array
// of flags which we used to mark off states as we complete them, but
// this is easier though less readable maybe.
//
unsigned int unmarkedState = 0;
unsigned int curState = 0;
//
// Init the first transition table entry, and put the initial state
// into the states to do list, then bump the current state.
//
fTransTable[curState] = makeDefStateList();
statesToDo[curState] = setT;
curState++;
//
// the stateTable is an auxiliary means to fast
// identification of new state created (instead
Alberto Massari
committed
// of sequential loop statesToDo to find out),
// while the role that statesToDo plays remain unchanged.
//
RefHashTableOf<XMLInteger, CMStateSetHasher> *stateTable =
new (fMemoryManager) RefHashTableOf<XMLInteger, CMStateSetHasher>
(
curArraySize
, true
, fMemoryManager
);
//stateTable->put((CMStateSet*)setT, new (fMemoryManager) XMLInteger(0));
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
//
// Ok, almost done with the algorithm from hell... We now enter the
// loop where we go until the states done counter catches up with
// the states to do counter.
//
CMStateSet* newSet = 0;
while (unmarkedState < curState)
{
//
// Get the next unmarked state out of the list of states to do.
// And get the associated transition table entry.
//
setT = statesToDo[unmarkedState];
unsigned int* transEntry = fTransTable[unmarkedState];
// Mark this one final if it contains the EOC state
fFinalStateFlags[unmarkedState] = setT->getBit(fEOCPos);
// Bump up the unmarked state count, marking this state done
unmarkedState++;
// Optimization(Jan, 2001)
unsigned int sorterIndex = 0;
// Optimization(Jan, 2001)
// Loop through each possible input symbol in the element map
for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
{
//
// Build up a set of states which is the union of all of the
// follow sets of DFA positions that are in the current state. If
// we gave away the new set last time through then create a new
// one. Otherwise, zero out the existing one.
//
if (!newSet)
newSet = new (fMemoryManager) CMStateSet
(
fLeafCount
, fMemoryManager
);
else
newSet->zeroBits();
#ifdef OBSOLETED
// unoptimized code
for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
{
// If this leaf index (DFA position) is in the current set...
if (setT->getBit(leafIndex))
{
//
// If this leaf is the current input symbol, then we want
// to add its follow list to the set of states to transition
// to from the current state.
//
const QName* leaf = fLeafList[leafIndex]->getElement();
const QName* element = fElemMap[elemIndex];
if (fDTD) {
if (XMLString::equals(leaf->getRawName(), element->getRawName())) {