Newer
Older
fAttrNameHashList->setElementAt(attNameHash, attCount);
}
attCount++;
// And jump back to the top of the loop
continue;
}
// It was some special case character so do all of the checks and
// deal with it.
if (!nextCh)
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
if (nextCh == chForwardSlash)
{
fReaderMgr.getNextChar();
isEmpty = true;
if (!fReaderMgr.skippedChar(chCloseAngle))
emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
break;
}
else if (nextCh == chCloseAngle)
{
fReaderMgr.getNextChar();
break;
}
else if (nextCh == chOpenAngle)
{
// Check for this one specially, since its going to be common
// and it is kind of auto-recovering since we've already hit the
// next open bracket, which is what we would have seeked to (and
// skipped this whole tag.)
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
break;
}
else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
{
// Check for this one specially, which is probably a missing
// attribute name, e.g. ="value". Just issue expected name
// error and eat the quoted string, then jump back to the
// top again.
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.getNextChar();
fReaderMgr.skipQuotedString(nextCh);
fReaderMgr.skipPastSpaces();
continue;
}
}
// If empty, validate content right now if we are validating and then
// pop the element stack top. Else, we have to update the current stack
// top's namespace mapping elements.
if (isEmpty)
{
// Pop the element stack back off since it'll never be used now
fElemStack.popTop();
// If the elem stack is empty, then it was an empty root
if (isRoot)
gotData = false;
}
// If we have a document handler, then tell it about this start tag. We
// don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
// any prefix since its just one big name if we are not doing namespaces.
if (fDocHandler)
{
fDocHandler->startElement
(
, fEmptyNamespaceId
, 0
, *fAttrList
, attCount
, isEmpty
, isRoot
);
}
return true;
}
// This method is called to scan a start tag when we are processing
// namespaces. There are two different versions of this method, one for
// namespace aware processing an done for non-namespace aware processing.
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
//
// This method is called after we've scanned the < of a start tag. So we
// have to get the element name, then scan the attributes, after which
// we are either going to see >, />, or attributes followed by one of those
// sequences.
bool WFXMLScanner::scanStartTagNS(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the root and its empty.
gotData = true;
// The current position is after the open bracket, so we need to read in
// in the element name.
if (!fReaderMgr.getName(fQNameBuf))
{
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipToChar(chOpenAngle);
return false;
}
// See if its the root element
const bool isRoot = fElemStack.isEmpty();
// Assume it won't be an empty tag
bool isEmpty = false;
// Skip any whitespace after the name
fReaderMgr.skipPastSpaces();
// Lets try to look up the element
XMLElementDecl* elemDecl = fElementLookup->get(qnameRawBuf);
if (!elemDecl) {
if (!XMLString::compareNString(qnameRawBuf, XMLUni::fgXMLNSColonString, 6))
emitError(XMLErrs::NoXMLNSAsElementPrefix, qnameRawBuf);
if (fElementIndex < fElements->size()) {
elemDecl = fElements->elementAt(fElementIndex);
}
else {
elemDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
fGrammarPoolMemoryManager
fElements->addElement(elemDecl);
}
elemDecl->setElementName(qnameRawBuf, fEmptyNamespaceId);
fElementLookup->put((void*)elemDecl->getFullName(), elemDecl);
fElementIndex++;
}
// Expand the element stack and add the new element
fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
// reset NS attribute list
fAttrNSList->removeAllElements();
// We loop until we either see a /> or >, handling attribute/value
// pairs until we get there.
unsigned int attCount = 0;
unsigned int curAttListSize = fAttrList->size();
while (true)
{
// And get the next non-space character
XMLCh nextCh = fReaderMgr.peekNextChar();
// If the next character is not a slash or closed angle bracket,
// then it must be whitespace, since whitespace is required
// between the end of the last attribute and the name of the next
// one.
if (attCount)
{
if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
{
{
// Ok, skip by them and peek another char
fReaderMgr.skipPastSpaces();
nextCh = fReaderMgr.peekNextChar();
}
else
{
// Emit the error but keep on going
emitError(XMLErrs::ExpectedWhitespace);
}
}
}
// Ok, here we first check for any of the special case characters.
// If its not one, then we do the normal case processing, which
// assumes that we've hit an attribute value, Otherwise, we do all
// the special case checks.
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
{
// Assume its going to be an attribute, so get a name from
// the input.
if (!fReaderMgr.getName(fAttNameBuf))
{
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.skipPastChar(chCloseAngle);
return false;
}
// And next must be an equal sign
if (!scanEq())
{
static const XMLCh tmpList[] =
{
chSingleQuote, chDoubleQuote, chCloseAngle
, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedEqSign);
// Try to sync back up by skipping forward until we either
// hit something meaningful.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
{
// Jump back to top for normal processing of these
continue;
}
else if ((chFound == chSingleQuote)
|| (chFound == chDoubleQuote)
{
// Just fall through assuming that the value is to follow
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
return false;
}
else
{
// Something went really wrong
return false;
}
}
// See if this attribute is declared more than one for this element.
David Abram Cargill
committed
unsigned int attNameHash = XMLString::hash(attNameRawBuf, 109, fMemoryManager);
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
if (attCount) {
for (unsigned int k=0; k < attCount; k++) {
if (fAttrNameHashList->elementAt(k) == attNameHash) {
if (XMLString::equals(
fAttrList->elementAt(k)->getQName()
, attNameRawBuf))
{
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, attNameRawBuf
, qnameRawBuf
);
break;
}
}
}
}
// Skip any whitespace before the value and then scan the att
// value. This will come back normalized with entity refs and
// char refs expanded.
fReaderMgr.skipPastSpaces();
if (!scanAttValue(attNameRawBuf, fAttValueBuf))
{
static const XMLCh tmpList[] =
{
chCloseAngle, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedAttrValue);
// It failed, so lets try to get synced back up. We skip
// forward until we find some whitespace or one of the
// chars in our list.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle)
|| (chFound == chForwardSlash)
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
{
// Just fall through and process this attribute, though
// the value will be "".
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
return false;
}
else
{
// Something went really wrong
return false;
}
}
// Add this attribute to the attribute list that we use to
// pass them to the handler. We reuse its existing elements
// but expand it as required.
const XMLCh* attValueRawBuf = fAttValueBuf.getRawBuffer();
XMLAttr* curAtt = 0;
if (attCount >= curAttListSize)
{
curAtt = new (fMemoryManager) XMLAttr
(
fEmptyNamespaceId
, attNameRawBuf
, attValueRawBuf
, XMLAttDef::CData
, true
, fMemoryManager
);
fAttrList->addElement(curAtt);
fAttrNameHashList->addElement(attNameHash);
}
else
{
curAtt = fAttrList->elementAt(attCount);
curAtt->set
(
fEmptyNamespaceId
, attNameRawBuf
, attValueRawBuf
);
fAttrNameHashList->setElementAt(attNameHash, attCount);
}
// Make sure that the name is basically well formed for namespace
// enabled rules. It either has no colons, or it has one which
// is neither the first or last char.
const int colonFirst = XMLString::indexOf(attNameRawBuf, chColon);
if (colonFirst != -1)
{
const int colonLast = XMLString::lastIndexOf(attNameRawBuf, chColon);
if (colonFirst != colonLast)
{
emitError(XMLErrs::TooManyColonsInName);
continue;
}
else if ((colonFirst == 0)
|| (colonLast == (int)fAttNameBuf.getLen() - 1))
{
emitError(XMLErrs::InvalidColonPos);
continue;
}
}
// Map prefix to namespace
const XMLCh* attPrefix = curAtt->getPrefix();
const XMLCh* attLocalName = curAtt->getName();
const XMLCh* namespaceURI = fAttValueBuf.getRawBuffer();
if (attPrefix && *attPrefix) {
if (XMLString::equals(attPrefix, XMLUni::fgXMLString)) {
curAtt->setURIId(fXMLNamespaceId);
}
else if (XMLString::equals(attPrefix, XMLUni::fgXMLNSString)) {
if (XMLString::equals(attLocalName, XMLUni::fgXMLNSString))
emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
else if (XMLString::equals(attLocalName, XMLUni::fgXMLString)) {
if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
}
Neil Graham
committed
if (!namespaceURI)
emitError(XMLErrs::NoEmptyStrNamespace, attNameRawBuf);
else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
emitError(XMLErrs::NoEmptyStrNamespace, attNameRawBuf);
, fURIStringPool->addOrFind(namespaceURI)
);
curAtt->setURIId(fXMLNSNamespaceId);
}
else {
fAttrNSList->addElement(curAtt);
}
}
else {
if (XMLString::equals(XMLUni::fgXMLNSString, attLocalName)) {
if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName))
emitError(XMLErrs::NoUseOfxmlnsURI);
else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
emitError(XMLErrs::XMLURINotMatchXMLPrefix);
fElemStack.addPrefix
(
XMLUni::fgZeroLenString
, fURIStringPool->addOrFind(namespaceURI)
);
}
}
// increment attribute count
attCount++;
// And jump back to the top of the loop
continue;
}
// It was some special case character so do all of the checks and
// deal with it.
if (!nextCh)
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
if (nextCh == chForwardSlash)
{
fReaderMgr.getNextChar();
isEmpty = true;
if (!fReaderMgr.skippedChar(chCloseAngle))
emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
break;
}
else if (nextCh == chCloseAngle)
{
fReaderMgr.getNextChar();
break;
}
else if (nextCh == chOpenAngle)
{
// Check for this one specially, since its going to be common
// and it is kind of auto-recovering since we've already hit the
// next open bracket, which is what we would have seeked to (and
// skipped this whole tag.)
emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
break;
}
else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
{
// Check for this one specially, which is probably a missing
// attribute name, e.g. ="value". Just issue expected name
// error and eat the quoted string, then jump back to the
// top again.
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.getNextChar();
fReaderMgr.skipQuotedString(nextCh);
fReaderMgr.skipPastSpaces();
continue;
}
}
// Handle provided attributes that we did not map their prefixes
for (unsigned int i=0; i < fAttrNSList->size(); i++) {
XMLAttr* providedAttr = fAttrNSList->elementAt(i);
resolvePrefix
(
providedAttr->getPrefix(),
ElemStack::Mode_Attribute
)
Alberto Massari
committed
if(attCount) {
//
// Decide if to use hash table to do duplicate checking
//
bool toUseHashTable = false;
setAttrDupChkRegistry(attCount, toUseHashTable);
Alberto Massari
committed
// check for duplicate namespace attributes:
// by checking for qualified names with the same local part and with prefixes
// which have been bound to namespace names that are identical.
XMLAttr* loopAttr;
XMLAttr* curAtt;
for (unsigned int attrIndex=0; attrIndex < attCount-1; attrIndex++) {
loopAttr = fAttrList->elementAt(attrIndex);
if (!toUseHashTable)
{
for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) {
curAtt = fAttrList->elementAt(curAttrIndex);
if (curAtt->getURIId() == loopAttr->getURIId() &&
XMLString::equals(curAtt->getName(), loopAttr->getName())) {
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
Alberto Massari
committed
, curAtt->getName()
, elemDecl->getFullName()
);
}
}
}
else
{
if (fAttrDupChkRegistry->containsKey((void*)loopAttr->getName(), loopAttr->getURIId()))
{
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, loopAttr->getName()
, elemDecl->getFullName()
Alberto Massari
committed
);
}
fAttrDupChkRegistry->put((void*)loopAttr->getName(), loopAttr->getURIId(), loopAttr);
Alberto Massari
committed
}
}
// Resolve the qualified name to a URI.
unsigned int uriId = resolvePrefix
(
elemDecl->getElementName()->getPrefix()
, ElemStack::Mode_Element
);
// Now we can update the element stack
fElemStack.setCurrentURI(uriId);
// Tell the document handler about this start tag
if (fDocHandler)
{
, elemDecl->getElementName()->getPrefix()
, *fAttrList
, attCount
, false
, isRoot
);
}
// If empty, validate content right now if we are validating and then
// pop the element stack top. Else, we have to update the current stack
// top's namespace mapping elements.
if (isEmpty)
{
// Pop the element stack back off since it'll never be used now
fElemStack.popTop();
// If we have a doc handler, tell it about the end tag
if (fDocHandler)
{
fDocHandler->endElement
(
, elemDecl->getElementName()->getPrefix()
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
);
}
// If the elem stack is empty, then it was an empty root
if (isRoot)
gotData = false;
}
return true;
}
unsigned int
WFXMLScanner::resolveQName(const XMLCh* const qName
, XMLBuffer& prefixBuf
, const short mode
, int& prefixColonPos)
{
// Lets split out the qName into a URI and name buffer first. The URI
// can be empty.
prefixColonPos = XMLString::indexOf(qName, chColon);
if (prefixColonPos == -1)
{
// Its all name with no prefix, so put the whole thing into the name
// buffer. Then map the empty string to a URI, since the empty string
// represents the default namespace. This will either return some
// explicit URI which the default namespace is mapped to, or the
// the default global namespace.
bool unknown = false;
prefixBuf.reset();
return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown);
}
else
{
// Copy the chars up to but not including the colon into the prefix
// buffer.
prefixBuf.set(qName, prefixColonPos);
// Watch for the special namespace prefixes. We always map these to
// special URIs. 'xml' gets mapped to the official URI that its defined
// to map to by the NS spec. xmlns gets mapped to a special place holder
// URI that we define (so that it maps to something checkable.)
const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer();
if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) {
// if this is an element, it is an error to have xmlns as prefix
if (mode == ElemStack::Mode_Element)
emitError(XMLErrs::NoXMLNSAsElementPrefix, qName);
return fXMLNSNamespaceId;
}
else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) {
unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown);
emitError(XMLErrs::UnknownPrefix, prefixRawBuf);
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
return uriId;
}
}
}
// ---------------------------------------------------------------------------
// XMLScanner: Private parsing methods
// ---------------------------------------------------------------------------
bool WFXMLScanner::scanAttValue(const XMLCh* const attrName
, XMLBuffer& toFill)
{
// Reset the target buffer
toFill.reset();
// Get the next char which must be a single or double quote
XMLCh quoteCh;
if (!fReaderMgr.skipIfQuote(quoteCh))
return false;
// We have to get the current reader because we have to ignore closing
// quotes until we hit the same reader again.
const unsigned int curReader = fReaderMgr.getCurrentReaderNum();
// Loop until we get the attribute value. Note that we use a double
// loop here to avoid the setup/teardown overhead of the exception
// handler on every round.
XMLCh nextCh;
XMLCh secondCh = 0;
bool gotLeadingSurrogate = false;
bool escaped;
while (true)
{
try
{
while(true)
{
nextCh = fReaderMgr.getNextChar();
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
// Check for our ending quote in the same entity
if (nextCh == quoteCh)
{
if (curReader == fReaderMgr.getCurrentReaderNum())
return true;
// Watch for spillover into a previous entity
if (curReader > fReaderMgr.getCurrentReaderNum())
{
emitError(XMLErrs::PartialMarkupInEntity);
return false;
}
}
// Check for an entity ref now, before we let it affect our
// whitespace normalization logic below. We ignore the empty flag
// in this one.
escaped = false;
if (nextCh == chAmpersand)
{
if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
{
gotLeadingSurrogate = false;
continue;
}
}
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
// Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
{
emitError(XMLErrs::Expected2ndSurrogateChar);
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate) {
emitError(XMLErrs::Expected2ndSurrogateChar);
// Its got to at least be a valid XML character
else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
David Abram Cargill
committed
, fMemoryManager
);
emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
}
// If its not escaped, then make sure its not a < character, which
// is not allowed in attribute values.
if (!escaped) {
if (nextCh == chOpenAngle)
nextCh = chSpace;
}
// Else add it to the buffer
toFill.append(nextCh);
if (secondCh)
Alberto Massari
committed
{
toFill.append(secondCh);
secondCh=0;
}
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
}
}
catch(const EndOfEntityException&)
{
// Just eat it and continue.
gotLeadingSurrogate = false;
escaped = false;
}
}
return true;
}
// This method scans a CDATA section. It collects the character into one
// of the temp buffers and calls the document handler, if any, with the
// characters. It assumes that the <![CDATA string has been scanned before
// this call.
void WFXMLScanner::scanCDSection()
{
static const XMLCh CDataClose[] =
{
chCloseSquare, chCloseAngle, chNull
};
// The next character should be the opening square bracket. If not
// issue an error, but then try to recover by skipping any whitespace
// and checking again.
if (!fReaderMgr.skippedChar(chOpenSquare))
{
emitError(XMLErrs::ExpectedOpenSquareBracket);
fReaderMgr.skipPastSpaces();
// If we still don't find it, then give up, else keep going
if (!fReaderMgr.skippedChar(chOpenSquare))
return;
}
// Get a buffer for this
XMLBufBid bbCData(&fBufMgr);
// We just scan forward until we hit the end of CDATA section sequence.
// CDATA is effectively a big escape mechanism so we don't treat markup
// characters specially here.
bool emittedError = false;
while (true)
{
const XMLCh nextCh = fReaderMgr.getNextChar();
// Watch for unexpected end of file
if (!nextCh)
{
emitError(XMLErrs::UnterminatedCDATASection);
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
}
// If this is a close square bracket it could be our closing
// sequence.
if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
{
// make sure we were not expecting a trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
// If we have a doc handler, call it
if (fDocHandler)
{
fDocHandler->docCharacters
(
bbCData.getRawBuffer()
, bbCData.getLen()
, true
);
}
// And we are done
break;
}
// Make sure its a valid character. But if we've emitted an error
// already, don't bother with the overhead since we've already told
// them about it.
if (!emittedError)
{
// Deal with surrogate pairs
if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
// Its got to at least be a valid XML character
else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
David Abram Cargill
committed
, fMemoryManager
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);
emittedError = true;
}
}
gotLeadingSurrogate = false;
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
}
}
// Add it to the buffer
bbCData.append(nextCh);
}
}
void WFXMLScanner::scanCharData(XMLBuffer& toUse)
{
// We have to watch for the stupid ]]> sequence, which is illegal in
// character data. So this is a little state machine that handles that.
enum States
{
State_Waiting
, State_GotOne
, State_GotTwo
};
// Reset the buffer before we start
toUse.reset();
// Turn on the 'throw at end' flag of the reader manager
ThrowEOEJanitor jan(&fReaderMgr, true);
// In order to be more efficient we have to use kind of a deeply nested
// set of blocks here. The outer block puts on a try and catches end of
// entity exceptions. The inner loop is the per-character loop. If we
// put the try inside the inner loop, it would work but would require
// the exception handling code setup/teardown code to be invoked for
// each character.
XMLCh nextCh;
XMLCh secondCh = 0;
States curState = State_Waiting;
bool escaped = false;
bool gotLeadingSurrogate = false;
bool notDone = true;
while (notDone)
{
try
{
while (true)
{
// Eat through as many plain content characters as possible without
// needing special handling. Moving most content characters here,
// in this one call, rather than running the overall loop once
// per content character, is a speed optimization.
if (curState == State_Waiting && !gotLeadingSurrogate)
fReaderMgr.movePlainContentChars(toUse);
// Try to get another char from the source
// The code from here on down covers all contengencies,
if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
{
// If we were waiting for a trailing surrogate, its an error
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
notDone = false;
break;
}
// Watch for a reference. Note that the escapement mechanism
// is ignored in this content.
escaped = false;
if (nextCh == chAmpersand)
{
sendCharData(toUse);
// Turn off the throwing at the end of entity during this
ThrowEOEJanitor jan(&fReaderMgr, false);
if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
{
gotLeadingSurrogate = false;
continue;
}
}
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
// Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
{
emitError(XMLErrs::Expected2ndSurrogateChar);
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate) {
emitError(XMLErrs::Expected2ndSurrogateChar);
// Its got to at least be a valid XML character
else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
David Abram Cargill
committed
, fMemoryManager
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);