Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
{
emitError(XMLErrs::PEPropogated);
// Ask the reader manager to pop back down to the main level
fReaderMgr.cleanStackBackTo(1);
}
fReaderMgr.skipPastSpaces();
}
// And that should leave us at the closing > of the DOCTYPE line
if (!fReaderMgr.skippedChar(chCloseAngle))
{
// Do a special check for the common scenario of an extra ] char at
// the end. This is easy to recover from.
if (fReaderMgr.skippedChar(chCloseSquare)
&& fReaderMgr.skippedChar(chCloseAngle))
{
emitError(XMLErrs::ExtraCloseSquare);
}
else
{
emitError(XMLErrs::UnterminatedDOCTYPE);
fReaderMgr.skipPastChar(chCloseAngle);
}
}
// If we had an external subset, then we need to deal with that one
// next. If we are reusing the validator, then don't scan it.
if (hasExtSubset) {
if (fUseCachedGrammar)
{
InputSource* sysIdSrc = resolveSystemId(sysId);
Janitor<InputSource> janSysIdSrc(sysIdSrc);
Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId());
if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {
fDTDGrammar = (DTDGrammar*) grammar;
fGrammar = fDTDGrammar;
fValidator->setGrammar(fGrammar);
Neil Graham
committed
// we *cannot* identify the root element on
// cached grammars; else we risk breaking multithreaded
// applications. - NG
/*******
rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
if (rootDecl)
((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId());
else {
rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
(
bbRootName.getRawBuffer()
, fEmptyNamespaceId
, DTDElementDecl::Any
, fGrammarPoolMemoryManager
rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
rootDecl->setExternalElemDeclaration(true);
((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
}
Neil Graham
committed
*********/
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
return;
}
}
if (fLoadExternalDTD || fValidate)
{
// And now create a reader to read this entity
InputSource* srcUsed;
XMLReader* reader = fReaderMgr.createReader
(
sysId
, pubId
, false
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, srcUsed
, fCalculateSrcOfs
);
// Put a janitor on the input source
Janitor<InputSource> janSrc(srcUsed);
// If it failed then throw an exception
if (!reader)
David Abram Cargill
committed
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId(), fMemoryManager);
if (fToCacheGrammar) {
unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
fGrammarResolver->putGrammar(fGrammar);
}
// In order to make the processing work consistently, we have to
// make this look like an external entity. So create an entity
// decl and fill it in and push it with the reader, as happens
// with an external entity. Put a janitor on it to insure it gets
// cleaned up. The reader manager does not adopt them.
const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager);
declDTD->setSystemId(sysId);
Janitor<DTDEntityDecl> janDecl(declDTD);
// Mark this one as a throw at end
reader->setThrowAtEnd(true);
// And push it onto the stack, with its pseudo name
fReaderMgr.pushReader(reader, declDTD);
// Tell it its not in an include section
dtdScanner.scanExtSubsetDecl(false, true);
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
}
}
}
bool DGXMLScanner::scanStartTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the root and its empty.
gotData = true;
// Get the QName. In this case, we are not doing namespaces, so we just
// use it as is and don't have to break it into parts.
if (!fReaderMgr.getName(fQNameBuf))
{
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipToChar(chOpenAngle);
return false;
}
// Assume it won't be an empty tag
bool isEmpty = false;
// See if its the root element
const bool isRoot = fElemStack.isEmpty();
// Lets try to look up the element in the validator's element decl pool
// We can pass bogus values for the URI id and the base name. We know that
// this can only be called if we are doing a DTD style validator and that
// he will only look at the QName.
//
Neil Graham
committed
// We *do not* tell him to fault in a decl if he does not find one - NG.
Neil Graham
committed
const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
XMLElementDecl* elemDecl = fGrammar->getElemDecl
(
fEmptyNamespaceId
, 0
, qnameRawBuf
, Grammar::TOP_LEVEL_SCOPE
);
Neil Graham
committed
// look in the undeclared pool:
if(!elemDecl)
{
elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
}
if(!elemDecl)
{
wasAdded = true;
elemDecl = new (fMemoryManager) DTDElementDecl
(
qnameRawBuf
, fEmptyNamespaceId
, DTDElementDecl::Any
, fMemoryManager
);
elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
}
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
if (fValidate) {
if (wasAdded)
{
// This is to tell the reuse Validator that this element was
// faulted-in, was not an element in the validator pool originally
elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
fValidator->emitError
(
XMLValid::ElementNotDefined
, qnameRawBuf
);
}
// If its not marked declared, then emit an error
else if (!elemDecl->isDeclared())
{
fValidator->emitError
(
XMLValid::ElementNotDefined
, qnameRawBuf
);
}
fValidator->validateElement(elemDecl);
}
// Expand the element stack and add the new element
fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
// If this is the first element and we are validating, check the root
// element.
if (isRoot)
{
fRootGrammar = fGrammar;
if (fValidate)
{
// If a DocType exists, then check if it matches the root name there.
if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName))
fValidator->emitError(XMLValid::RootElemNotLikeDocType);
// Some validators may also want to check the root, call the
// XMLValidator::checkRootElement
if (fValidatorFromUser && !fValidator->checkRootElement(elemDecl->getId()))
fValidator->emitError(XMLValid::RootElemNotLikeDocType);
}
}
else if (fValidate)
{
// If the element stack is not empty, then add this element as a
// child of the previous top element. If its empty, this is the root
// elem and is not the child of anything.
fElemStack.addChild(elemDecl->getElementName(), true);
}
// Skip any whitespace after the name
fReaderMgr.skipPastSpaces();
// We loop until we either see a /> or >, handling attribute/value
// pairs until we get there.
unsigned int attCount = 0;
unsigned int curAttListSize = fAttrList->size();
wasAdded = false;
Alberto Massari
committed
fElemCount++;
while (true)
{
// And get the next non-space character
XMLCh nextCh = fReaderMgr.peekNextChar();
// If the next character is not a slash or closed angle bracket,
// then it must be whitespace, since whitespace is required
// between the end of the last attribute and the name of the next
// one.
if (attCount)
{
if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
{
{
// Ok, skip by them and peek another char
fReaderMgr.skipPastSpaces();
nextCh = fReaderMgr.peekNextChar();
}
else
{
// Emit the error but keep on going
emitError(XMLErrs::ExpectedWhitespace);
}
}
}
// Ok, here we first check for any of the special case characters.
// If its not one, then we do the normal case processing, which
// assumes that we've hit an attribute value, Otherwise, we do all
// the special case checks.
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
{
// Assume its going to be an attribute, so get a name from
// the input.
if (!fReaderMgr.getName(fAttNameBuf))
{
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.skipPastChar(chCloseAngle);
return false;
}
// And next must be an equal sign
if (!scanEq())
{
static const XMLCh tmpList[] =
{
chSingleQuote, chDoubleQuote, chCloseAngle
, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedEqSign);
// Try to sync back up by skipping forward until we either
// hit something meaningful.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
{
// Jump back to top for normal processing of these
continue;
}
else if ((chFound == chSingleQuote)
|| (chFound == chDoubleQuote)
{
// Just fall through assuming that the value is to follow
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
return false;
}
else
{
// Something went really wrong
return false;
}
}
// See if this attribute is declared for this element. If we are
// not validating of course it will not be at first, but we will
// fault it into the pool (to avoid lots of redundant errors.)
XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( fAttNameBuf.getRawBuffer());
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
XMLCh * namePtr = fAttNameBuf.getRawBuffer();
// Add this attribute to the attribute list that we use to
// pass them to the handler. We reuse its existing elements
// but expand it as required.
// Note that we want to this first since this will
// make a copy of the namePtr; we can then make use of
// that copy in the hashtable lookup that checks
// for duplicates. This will mean we may have to update
// the type of the XMLAttr later.
XMLAttr* curAtt;
if (attCount >= curAttListSize)
{
if (fDoNamespaces) {
curAtt = new (fMemoryManager) XMLAttr
(
fEmptyNamespaceId
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, (attDef)?attDef->getType():XMLAttDef::CData
, true
, fMemoryManager
);
}
else
{
curAtt = new (fMemoryManager) XMLAttr
(
-1
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, XMLUni::fgZeroLenString
, (attDef)?attDef->getType():XMLAttDef::CData
, true
, fMemoryManager
);
}
fAttrList->addElement(curAtt);
}
else
{
curAtt = fAttrList->elementAt(attCount);
if (fDoNamespaces)
{
curAtt->set
(
fEmptyNamespaceId
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, (attDef)?attDef->getType():XMLAttDef::CData
);
}
else
{
curAtt->set
(
-1
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, XMLUni::fgZeroLenString
, (attDef)?attDef->getType():XMLAttDef::CData
);
}
curAtt->setSpecified(true);
}
// reset namePtr so it refers to newly-allocated memory
namePtr = (XMLCh *)curAtt->getName();
// now need to prepare for duplicate detection
if(attDef)
unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
if(!curCountPtr)
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
curCountPtr = getNewUIntPtr();
*curCountPtr = fElemCount;
fAttDefRegistry->put(attDef, curCountPtr);
}
else if(*curCountPtr < fElemCount)
*curCountPtr = fElemCount;
else
{
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, attDef->getFullName()
, elemDecl->getFullName()
);
}
}
else
{
unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr);
if(!curCountPtr)
{
curCountPtr = getNewUIntPtr();
*curCountPtr = fElemCount;
fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr);
}
else if(*curCountPtr < fElemCount)
*curCountPtr = fElemCount;
else
{
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, namePtr
, elemDecl->getFullName()
}
if (fValidate)
{
if (!attDef)
fValidator->emitError
(
XMLValid::AttNotDefinedForElement
, fAttNameBuf.getRawBuffer()
, qnameRawBuf
);
}
}
// Skip any whitespace before the value and then scan the att
// value. This will come back normalized with entity refs and
// char refs expanded.
fReaderMgr.skipPastSpaces();
if (!scanAttValue(attDef, fAttNameBuf.getRawBuffer(), fAttValueBuf))
{
static const XMLCh tmpList[] =
{
chCloseAngle, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedAttrValue);
// It failed, so lets try to get synced back up. We skip
// forward until we find some whitespace or one of the
// chars in our list.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle)
|| (chFound == chForwardSlash)
{
// Just fall through and process this attribute, though
// the value will be "".
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
return false;
}
else
{
// Something went really wrong
return false;
}
}
// must set the newly-minted value on the XMLAttr:
curAtt->setValue(fAttValueBuf.getRawBuffer());
// Now that its all stretched out, lets look at its type and
// determine if it has a valid value. It will output any needed
// errors, but we just keep going. We only need to do this if
// we are validating.
if (attDef)
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
{
// Let the validator pass judgement on the attribute value
if (fValidate)
{
fValidator->validateAttrValue
(
attDef
, fAttValueBuf.getRawBuffer()
, false
, elemDecl
);
}
}
if (fDoNamespaces)
{
// Make sure that the name is basically well formed for namespace
// enabled rules. It either has no colons, or it has one which
// is neither the first or last char.
const int colonFirst = XMLString::indexOf(fAttNameBuf.getRawBuffer(), chColon);
if (colonFirst != -1)
{
const int colonLast = XMLString::lastIndexOf(fAttNameBuf.getRawBuffer(), chColon);
if (colonFirst != colonLast)
{
emitError(XMLErrs::TooManyColonsInName);
continue;
}
else if ((colonFirst == 0)
|| (colonLast == (int)fAttNameBuf.getLen() - 1))
{
emitError(XMLErrs::InvalidColonPos);
continue;
}
}
}
attCount++;
// And jump back to the top of the loop
continue;
}
// It was some special case character so do all of the checks and
// deal with it.
if (!nextCh)
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
if (nextCh == chForwardSlash)
{
fReaderMgr.getNextChar();
isEmpty = true;
if (!fReaderMgr.skippedChar(chCloseAngle))
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
break;
}
else if (nextCh == chCloseAngle)
{
fReaderMgr.getNextChar();
break;
}
else if (nextCh == chOpenAngle)
{
// Check for this one specially, since its going to be common
// and it is kind of auto-recovering since we've already hit the
// next open bracket, which is what we would have seeked to (and
// skipped this whole tag.)
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
break;
}
else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
{
// Check for this one specially, which is probably a missing
// attribute name, e.g. ="value". Just issue expected name
// error and eat the quoted string, then jump back to the
// top again.
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.getNextChar();
fReaderMgr.skipQuotedString(nextCh);
fReaderMgr.skipPastSpaces();
continue;
}
}
// Make an initial pass through the list and find any xmlns attributes.
if (fDoNamespaces && attCount)
Gareth Reakes
committed
scanAttrListforNameSpaces(fAttrList, attCount, elemDecl);
if(attCount)
{
// clean up after ourselves:
// clear the map used to detect duplicate attributes
fUndeclaredAttrRegistry->removeAll();
}
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
// Now lets get the fAttrList filled in. This involves faulting in any
// defaulted and fixed attributes and normalizing the values of any that
// we got explicitly.
//
// We update the attCount value with the total number of attributes, but
// it goes in with the number of values we got during the raw scan of
// explictly provided attrs above.
attCount = buildAttList(attCount, elemDecl, *fAttrList);
// If we have a document handler, then tell it about this start tag. We
// don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
// any prefix since its just one big name if we are not doing namespaces.
unsigned int uriId = fEmptyNamespaceId;
if (fDocHandler)
{
if (fDoNamespaces)
{
uriId = resolvePrefix
(
elemDecl->getElementName()->getPrefix()
, ElemStack::Mode_Element
);
}
fDocHandler->startElement
(
*elemDecl
, uriId
, (fDoNamespaces) ? elemDecl->getElementName()->getPrefix() : 0
, *fAttrList
, attCount
, false
, isRoot
);
}
// If empty, validate content right now if we are validating and then
// pop the element stack top. Else, we have to update the current stack
// top's namespace mapping elements.
if (isEmpty)
{
// If validating, then insure that its legal to have no content
if (fValidate)
{
const int res = fValidator->checkContent(elemDecl, 0, 0);
if (res >= 0)
{
fValidator->emitError
(
XMLValid::ElementNotValidForContent
, qnameRawBuf
, elemDecl->getFormattedContentModel()
);
}
}
// If we have a doc handler, tell it about the end tag
if (fDocHandler)
{
fDocHandler->endElement
(
*elemDecl
, uriId
, isRoot
, (fDoNamespaces) ? elemDecl->getElementName()->getPrefix()
: XMLUni::fgZeroLenString
);
// pass back type name information
Alberto Massari
committed
fDocHandler->elementTypeInfo(0, 0);
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
}
// Pop the element stack back off since it'll never be used now
fElemStack.popTop();
// If the elem stack is empty, then it was an empty root
if (isRoot)
gotData = false;
}
return true;
}
unsigned int
DGXMLScanner::resolveQName(const XMLCh* const qName
, XMLBuffer& prefixBuf
, const short mode
, int& prefixColonPos)
{
// Lets split out the qName into a URI and name buffer first. The URI
// can be empty.
prefixColonPos = XMLString::indexOf(qName, chColon);
if (prefixColonPos == -1)
{
// Its all name with no prefix, so put the whole thing into the name
// buffer. Then map the empty string to a URI, since the empty string
// represents the default namespace. This will either return some
// explicit URI which the default namespace is mapped to, or the
// the default global namespace.
bool unknown = false;
prefixBuf.reset();
return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown);
}
else
{
// Copy the chars up to but not including the colon into the prefix
// buffer.
prefixBuf.set(qName, prefixColonPos);
// Watch for the special namespace prefixes. We always map these to
// special URIs. 'xml' gets mapped to the official URI that its defined
// to map to by the NS spec. xmlns gets mapped to a special place holder
// URI that we define (so that it maps to something checkable.)
const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer();
if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) {
// if this is an element, it is an error to have xmlns as prefix
if (mode == ElemStack::Mode_Element)
emitError(XMLErrs::NoXMLNSAsElementPrefix, qName);
return fXMLNSNamespaceId;
}
else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) {
return fXMLNamespaceId;
}
else
{
bool unknown = false;
unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown);
if (unknown)
emitError(XMLErrs::UnknownPrefix, prefixRawBuf);
return uriId;
}
}
}
// ---------------------------------------------------------------------------
// DGXMLScanner: Grammar preparsing
// ---------------------------------------------------------------------------
Grammar* DGXMLScanner::loadGrammar(const InputSource& src
, const short grammarType
, const bool toCache)
{
Grammar* loadedGrammar = 0;
try
{
fGrammarResolver->cacheGrammarFromParse(false);
fGrammarResolver->useCachedGrammarInParse(false);
fRootGrammar = 0;
if (fValScheme == Val_Auto) {
fValidate = true;
}
// Reset some status flags
fInException = false;
fStandalone = false;
fErrorCount = 0;
fHasNoDTD = true;
if (grammarType == Grammar::DTDGrammarType) {
loadedGrammar = loadDTDGrammar(src, toCache);
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
}
// Reset the reader manager to close all files, sockets, etc...
fReaderMgr.reset();
}
// NOTE:
//
// In all of the error processing below, the emitError() call MUST come
// before the flush of the reader mgr, or it will fail because it tries
// to find out the position in the XML source of the error.
catch(const XMLErrs::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLValid::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLException& excToCatch)
{
// Emit the error and catch any user exception thrown from here. Make
// sure in all cases we flush the reader manager.
fInException = true;
try
{
if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
emitError
(
XMLErrs::DisplayErrorMessage
, excToCatch.getMessage()
);
else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
emitError
(
XMLErrs::XMLException_Fatal
, excToCatch.getType()
, excToCatch.getMessage()
);
else
emitError
(
XMLErrs::XMLException_Error
, excToCatch.getType()
, excToCatch.getMessage()
);
}
Neil Graham
committed
catch(const OutOfMemoryException&)
{
throw;
}
catch(...)
{
// Flush the reader manager and rethrow user's error
fReaderMgr.reset();
throw;
}
// If it returned, then reset the reader manager and fall through
fReaderMgr.reset();
}
Neil Graham
committed
catch(const OutOfMemoryException&)
{
throw;
}
catch(...)
{
// Reset and rethrow
fReaderMgr.reset();
throw;
}
return loadedGrammar;
}
Grammar* DGXMLScanner::loadDTDGrammar(const InputSource& src,
const bool toCache)
{
// Reset the validators
fDTDValidator->reset();
if (fValidatorFromUser)
fValidator->reset();
fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
fGrammarResolver->putGrammar(fDTDGrammar);
fGrammar = fDTDGrammar;
fValidator->setGrammar(fGrammar);
// And for all installed handlers, send reset events. This gives them
// a chance to flush any cached data.
if (fDocHandler)
fDocHandler->resetDocument();
if (fEntityHandler)
fEntityHandler->resetEntities();
if (fErrorReporter)
fErrorReporter->resetErrors();
// Clear out the id reference list
if (toCache) {
unsigned int sysId = fGrammarResolver->getStringPool()->addOrFind(src.getSystemId());
const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
fGrammarResolver->putGrammar(fGrammar);
}
// Handle the creation of the XML reader object for this input source.
// This will provide us with transcoding and basic lexing services.
XMLReader* newReader = fReaderMgr.createReader
(
src
, false
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, fCalculateSrcOfs
);
if (!newReader) {
if (src.getIssueFatalErrorIfNotFound())
David Abram Cargill
committed
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
David Abram Cargill
committed
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
}
// In order to make the processing work consistently, we have to
// make this look like an external entity. So create an entity
// decl and fill it in and push it with the reader, as happens
// with an external entity. Put a janitor on it to insure it gets
// cleaned up. The reader manager does not adopt them.
const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager);
declDTD->setSystemId(src.getSystemId());
Janitor<DTDEntityDecl> janDecl(declDTD);
// Mark this one as a throw at end
newReader->setThrowAtEnd(true);
// And push it onto the stack, with its pseudo name
fReaderMgr.pushReader(newReader, declDTD);
// If we have a doc type handler and advanced callbacks are enabled,
// call the doctype event.
if (fDocTypeHandler) {
// Create a dummy root
DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
(
gDTDStr
, fEmptyNamespaceId
, DTDElementDecl::Any
, fGrammarPoolMemoryManager
rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
rootDecl->setExternalElemDeclaration(true);
Janitor<DTDElementDecl> janSrc(rootDecl);
fDocTypeHandler->doctypeDecl(*rootDecl, src.getPublicId(), src.getSystemId(), false, true);
}
// Create DTDScanner
DTDScanner dtdScanner
(
(DTDGrammar*)fGrammar
, fDocTypeHandler
, fGrammarPoolMemoryManager
, fMemoryManager
);
dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
// Tell it its not in an include section
dtdScanner.scanExtSubsetDecl(false, true);
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
if (fValidate) {
// validate the DTD scan so far
fValidator->preContentValidation(false, true);
}
if (toCache)
fGrammarResolver->cacheGrammars();
return fDTDGrammar;
}
// ---------------------------------------------------------------------------
// DGXMLScanner: Private helper methods
// ---------------------------------------------------------------------------
// This method handles the common initialization, to avoid having to do
// it redundantly in multiple constructors.
void DGXMLScanner::commonInit()
{
// And we need one for the raw attribute scan. This just stores key/
// value string pairs (prior to any processing.)
fAttrNSList = new (fMemoryManager) ValueVectorOf<XMLAttr*>(8, fMemoryManager);
fDTDValidator = new (fMemoryManager) DTDValidator();
Neil Graham
committed
fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
(
131, false, new (fMemoryManager)HashPtr(), fMemoryManager
);
fUndeclaredAttrRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
(
Neil Graham
committed
7, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
}
void DGXMLScanner::cleanUp()
{
delete fAttrNSList;
delete fDTDValidator;
Neil Graham
committed
delete fDTDElemNonDeclPool;
delete fAttDefRegistry;
delete fUndeclaredAttrRegistry;
}
// This method is called from scanStartTagNS() to build up the list of
// XMLAttr objects that will be passed out in the start tag callout. We
// get the key/value pairs from the raw scan of explicitly provided attrs,
// which have not been normalized. And we get the element declaration from
// which we will get any defaulted or fixed attribute defs and add those
// in as well.
unsigned int
DGXMLScanner::buildAttList(const unsigned int attCount
, XMLElementDecl* elemDecl
, RefVectorOf<XMLAttr>& toFill)
{