Newer
Older
// Scans all the input from the start of the file to the root element.
// There does not have to be anything in the prolog necessarily, but usually
// there is at least an XMLDecl.
//
// On exit from here we are either at the end of the file or about to read
// the opening < of the root element.
void XMLScanner::scanProlog()
{
// Get a buffer for whitespace processing
XMLBufBid bbCData(&fBufMgr);
// Loop through the prolog. If there is no content, this could go all
// the way to the end of the file.
try
{
while (true)
const XMLCh nextCh = fReaderMgr.peekNextChar();
if (nextCh == chOpenAngle)
{
// Ok, it could be the xml decl, a comment, the doc type line,
// or the start of the root element.
if (checkXMLDecl(true))
{
// There shall be at lease --ONE-- space in between
// the tag '<?xml' and the VersionInfo.
//
// If we are not at line 1, col 6, then the decl was not
// the first text, so its invalid.
const XMLReader* curReader = fReaderMgr.getCurrentReader();
if ((curReader->getLineNumber() != 1)
|| (curReader->getColumnNumber() != 7))
{
emitError(XMLErrs::XMLDeclMustBeFirst);
scanXMLDecl(Decl_XML);
}
else if (fReaderMgr.skippedString(XMLUni::fgPIString))
{
scanPI();
}
else if (fReaderMgr.skippedString(XMLUni::fgCommentString))
{
scanComment();
}
else if (fReaderMgr.skippedString(XMLUni::fgDocTypeString))
{
scanDocTypeDecl();
// if reusing grammar, this has been validated already in first scan
// skip for performance
if (fValidate && !fGrammar->getValidated()) {
// validate the DTD scan so far
fValidator->preContentValidation(fUseCachedGrammar, true);
else
{
// Assume its the start of the root element
return;
{
// If we have a document handler then gather up the
// whitespace and call back. Otherwise just skip over spaces.
if (fDocHandler)
{
fReaderMgr.getSpaces(bbCData.getBuffer());
fDocHandler->ignorableWhitespace
(
bbCData.getRawBuffer()
, bbCData.getLen()
, false
);
}
else
{
fReaderMgr.skipPastSpaces();
else
{
emitError(XMLErrs::InvalidDocumentStructure);
// Watch for end of file and break out
if (!nextCh)
break;
else
fReaderMgr.skipPastChar(chCloseAngle);
}
catch(const EndOfEntityException&)
{
// We should never get an end of entity here. They should only
// occur within the doc type scanning method, and not leak out to
// here.
emitError
(
XMLErrs::UnexpectedEOE
, "in prolog"
);
}
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
}
// Scans the <?xml .... ?> line. This stuff is all sequential so we don't
// do any state machine loop here. We just bull straight through it. It ends
// past the closing bracket. If there is a document handler, then its called
// on the XMLDecl callback.
//
// On entry, the <?xml has been scanned, and we pick it up from there.
//
// NOTE: In order to provide good recovery from bad XML here, we try to be
// very flexible. No matter what order the stuff is in, we'll keep going
// though we'll issue errors.
//
// The parameter tells us which type of decl we should expect, Text or XML.
// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
// [77] TextDecl::= '<?xml' VersionInfo? EncodingDecl S? '?>'
void XMLScanner::scanXMLDecl(const DeclTypes type)
{
// Get us some buffers to use
XMLBufBid bbVersion(&fBufMgr);
XMLBufBid bbEncoding(&fBufMgr);
XMLBufBid bbStand(&fBufMgr);
XMLBufBid bbDummy(&fBufMgr);
XMLBufBid bbName(&fBufMgr);
// We use this little enum and array to keep up with what we found
// and what order we found them in. This lets us get them free form
// without too much overhead, but still know that they were in the
// wrong order.
enum Strings
{
VersionString
, EncodingString
, StandaloneString
, UnknownString
, StringCount
};
int flags[StringCount] = { -1, -1, -1, -1 };
// Also set up a list of buffers in the right order so that we know
// where to put stuff.
XMLBuffer* buffers[StringCount] ;
buffers[0] = &bbVersion.getBuffer();
buffers[1] = &bbEncoding.getBuffer();
buffers[2] = &bbStand.getBuffer();
buffers[3] = &bbDummy.getBuffer();
int curCount = 0;
Strings curString;
XMLBuffer& nameBuf = bbName.getBuffer();
while (true)
{
// Skip any spaces
const unsigned int spaceCount = fReaderMgr.skipPastSpaces();
// If we are looking at a question mark, then break out
if (fReaderMgr.lookingAtChar(chQuestion))
break;
// If this is not the first string, then we require the spaces
if (!spaceCount && curCount)
emitError(XMLErrs::ExpectedWhitespace);
// Get characters up to the next whitespace or equal's sign.
if (!scanUpToWSOr(nameBuf, chEqual))
emitError(XMLErrs::ExpectedDeclString);
// See if it matches any of our expected strings
if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgVersionString))
else if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgEncodingString))
else if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgStandaloneString))
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
curString = StandaloneString;
else
curString = UnknownString;
// If its an unknown string, then give that error. Else check to
// see if this one has been done already and give that error.
if (curString == UnknownString)
emitError(XMLErrs::ExpectedDeclString, nameBuf.getRawBuffer());
else if (flags[curString] != -1)
emitError(XMLErrs::DeclStringRep, nameBuf.getRawBuffer());
else if (flags[curString] == -1)
flags[curString] = ++curCount;
// Scan for an equal's sign. If we don't find it, issue an error
// but keep trying to go on.
if (!scanEq())
emitError(XMLErrs::ExpectedEqSign);
// Get a quote string into the buffer for the string that we are
// currently working on.
if (!getQuotedString(*buffers[curString]))
{
emitError(XMLErrs::ExpectedQuotedString);
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// And validate the value according which one it was
const XMLCh* rawValue = buffers[curString]->getRawBuffer();
if (curString == VersionString)
{
if (XMLString::equals(rawValue, XMLUni::fgVersion1_1)) {
if (type == Decl_XML) {
fReaderMgr.setXMLVersion(XMLReader::XMLV1_1);
}
}
else if (!XMLString::equals(rawValue, XMLUni::fgVersion1_0))
emitError(XMLErrs::UnsupportedXMLVersion, rawValue);
}
else if (curString == EncodingString)
{
if (!XMLString::isValidEncName(rawValue))
emitError(XMLErrs::BadXMLEncoding, rawValue);
}
else if (curString == StandaloneString)
{
if (XMLString::equals(rawValue, XMLUni::fgYesString))
else if (XMLString::equals(rawValue, XMLUni::fgNoString))
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
fStandalone = false;
else
{
emitError(XMLErrs::BadStandalone);
if (!XMLString::compareIString(rawValue, XMLUni::fgYesString))
fStandalone = true;
else if (!XMLString::compareIString(rawValue, XMLUni::fgNoString))
fStandalone = false;
}
}
}
// Make sure that the strings present are in order. We don't care about
// which ones are present at this point, just that any there are in the
// right order.
int curTop = 0;
for (int index = VersionString; index < StandaloneString; index++)
{
if (flags[index] != -1)
{
if (flags[index] != curTop + 1)
{
emitError(XMLErrs::DeclStringsInWrongOrder);
break;
}
curTop = flags[index];
}
}
// If its an XML decl, the version must be present.
// If its a Text decl, then encoding must be present AND standalone must not be present.
if ((type == Decl_XML) && (flags[VersionString] == -1))
emitError(XMLErrs::XMLVersionRequired);
else if (type == Decl_Text) {
if (flags[StandaloneString] != -1)
emitError(XMLErrs::StandaloneNotLegal);
if (flags[EncodingString] == -1)
emitError(XMLErrs::EncodingRequired);
}
if (!fReaderMgr.skippedChar(chQuestion))
{
emitError(XMLErrs::UnterminatedXMLDecl);
fReaderMgr.skipPastChar(chCloseAngle);
}
else if (!fReaderMgr.skippedChar(chCloseAngle))
{
emitError(XMLErrs::UnterminatedXMLDecl);
fReaderMgr.skipPastChar(chCloseAngle);
}
// Do this before we possibly update the reader with the
// actual encoding string. Otherwise, we will pass the wrong thing
// for the last parameter!
const XMLCh* actualEnc = fReaderMgr.getCurrentEncodingStr();
// Ok, we've now seen the real encoding string, if there was one, so
// lets call back on the current reader and tell it what the real
// encoding string was. If it fails, that's because it represents some
// sort of contradiction with the autosensed format, and it keeps the
// original encoding.
//
// NOTE: This can fail for a number of reasons, such as a bogus encoding
// name or because its in flagrant contradiction of the auto-sensed
// format.
if (flags[EncodingString] != -1)
{
if (!fReaderMgr.getCurrentReader()->setEncoding(bbEncoding.getRawBuffer()))
emitError(XMLErrs::ContradictoryEncoding, bbEncoding.getRawBuffer());
else
actualEnc = bbEncoding.getRawBuffer();
}
// If we have a document handler then call the XML Decl callback.
if (fDocHandler)
{
fDocHandler->XMLDecl
(
bbVersion.getRawBuffer()
, bbEncoding.getRawBuffer()
, bbStand.getRawBuffer()
, actualEnc
);
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
}
}
const XMLCh* XMLScanner::getURIText(const unsigned int uriId) const
{
if (fURIStringPool->exists(uriId)) {
// Look up the URI in the string pool and return its id
const XMLCh* value = fURIStringPool->getValueForId(uriId);
if (!value)
return XMLUni::fgZeroLenString;
return value;
}
else
return XMLUni::fgZeroLenString;
}
bool XMLScanner::getURIText( const unsigned int uriId
, XMLBuffer& uriBufToFill) const
{
if (fURIStringPool->exists(uriId)) {
// Look up the URI in the string pool and return its id
const XMLCh* value = fURIStringPool->getValueForId(uriId);
if (!value)
}
bool XMLScanner::checkXMLDecl(bool startWithAngle) {
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
//
// [3] S ::= (#x20 | #x9 | #xD | #xA)+
if (startWithAngle) {
if (fReaderMgr.peekString(XMLUni::fgXMLDeclString)) {
if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpace)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTab)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLF)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCR))
{
return true;
}
else if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpaceU)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTabU)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLFU)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCRU))
{
// Just in case, check for upper case. If found, issue
// an error, but keep going.
emitError(XMLErrs::XMLDeclMustBeLowerCase);
return true;
}
}
}
else {
if (fReaderMgr.peekString(XMLUni::fgXMLString)) {
if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpace)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringHTab)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringLF)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringCR))
{
return true;
}
else if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpaceU)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringHTabU)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringLFU)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringCRU))
{
// Just in case, check for upper case. If found, issue
// an error, but keep going.
emitError(XMLErrs::XMLDeclMustBeLowerCase);
return true;
}
}
}
return false;
}
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
// ---------------------------------------------------------------------------
// XMLScanner: Grammar preparsing
// ---------------------------------------------------------------------------
Grammar* XMLScanner::loadGrammar(const XMLCh* const systemId
, const short grammarType
, const bool toCache)
{
InputSource* srcToUse = 0;
if (fEntityHandler){
srcToUse = fEntityHandler->resolveEntity(XMLUni::fgZeroLenString, systemId);
}
// First we try to parse it as a URL. If that fails, we assume its
// a file and try it that way.
if (!srcToUse) {
try
{
// Create a temporary URL. Since this is the primary document,
// it has to be fully qualified. If not, then assume we are just
// mistaking a file for a URL.
XMLURL tmpURL(systemId);
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
srcToUse = new LocalFileInputSource(systemId);
}
else
{
srcToUse = new URLInputSource(tmpURL);
}
}
catch(const MalformedURLException&)
{
srcToUse = new LocalFileInputSource(systemId);
}
catch(...)
{
// Just rethrow this, since its not our problem
throw;
}
}
Janitor<InputSource> janSrc(srcToUse);
return loadGrammar(*srcToUse, grammarType, toCache);
}
Grammar* XMLScanner::loadGrammar(const char* const systemId
, const short grammarType
, const bool toCache)
{
// We just delegate this to the XMLCh version after transcoding
XMLCh* tmpBuf = XMLString::transcode(systemId);
ArrayJanitor<XMLCh> janBuf(tmpBuf);
return loadGrammar(tmpBuf, grammarType, toCache);
}
// ---------------------------------------------------------------------------
// XMLScanner: Setter methods
// ---------------------------------------------------------------------------
void XMLScanner::setURIStringPool(XMLStringPool* const stringPool)
fURIStringPool = stringPool;
fEmptyNamespaceId = fURIStringPool->addOrFind(XMLUni::fgZeroLenString);
fUnknownNamespaceId = fURIStringPool->addOrFind(XMLUni::fgUnknownURIName);
fXMLNamespaceId = fURIStringPool->addOrFind(XMLUni::fgXMLURIName);
fXMLNSNamespaceId = fURIStringPool->addOrFind(XMLUni::fgXMLNSURIName);
}
// ---------------------------------------------------------------------------
// XMLScanner: Private helper methods
// ---------------------------------------------------------------------------
// This method is called after the content scan to insure that all the
// ID/IDREF attributes match up (i.e. that all IDREFs refer to IDs.) This is
// an XML 1.0 rule, so we can do here in the core.
void XMLScanner::checkIDRefs()
{
// Iterate the id ref list. If we find any entries here which are used
// but not declared, then that's an error.
RefHashTableOfEnumerator<XMLRefInfo> refEnum(fIDRefList);
while (refEnum.hasMoreElements())
// Get a ref to the current element
const XMLRefInfo& curRef = refEnum.nextElement();
// If its used but not declared, then its an error
if (!curRef.getDeclared() && curRef.getUsed() && fValidate)
fValidator->emitError(XMLValid::IDNotDeclared, curRef.getRefName());
}
}
// This just does a simple check that the passed progressive scan token is
// legal for this scanner.
bool XMLScanner::isLegalToken(const XMLPScanToken& toCheck)
{
return ((fScannerId == toCheck.fScannerId)
&& (fSequenceId == toCheck.fSequenceId));
}
// This method will handle figuring out what the next top level token is
// in the input stream. It will return an enumerated value that indicates
// what it believes the next XML level token must be. It will eat as many
// chars are required to figure out what is next.
XMLScanner::XMLTokens XMLScanner::senseNextToken(unsigned int& orgReader)
{
// Get the next character and use it to guesstimate what the next token
// is going to be. We turn on end of entity exceptions when we do this
// in order to catch the scenario where the current entity ended at
// the > of some markup.
XMLCh nextCh;
ThrowEOEJanitor janMgr(&fReaderMgr, true);
nextCh = fReaderMgr.peekNextChar();
// Check for special chars. Start with the most
// obvious end of file, which should be legal here at top level.
if (!nextCh)
return Token_EOF;
// If it's not a '<' we must be in content.
//
// This includes entity references '&' of some sort. These must
// be character data because that's the only place a reference can
// occur in content.
if (nextCh != chOpenAngle)
return Token_CharData;
// Ok it had to have been a '<' character. So get it out of the reader
// and store the reader number where we saw it, passing it back to the
// caller.
fReaderMgr.getNextChar();
orgReader = fReaderMgr.getCurrentReaderNum();
// Ok, so lets go through the things that it could be at this point which
// are all some form of markup.
nextCh = fReaderMgr.peekNextChar();
if (nextCh == chForwardSlash)
fReaderMgr.getNextChar();
return Token_EndTag;
}
else if (nextCh == chBang)
{
static const XMLCh gCDATAStr[] =
chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A
, chLatin_T, chLatin_A, chNull
};
if (fReaderMgr.skippedString(gCDATAStr))
return Token_CData;
if (fReaderMgr.skippedString(gCommentString))
return Token_Comment;
emitError(XMLErrs::ExpectedCommentOrCDATA);
return Token_Unknown;
}
else if (nextCh == chQuestion)
// It must be a PI
fReaderMgr.getNextChar();
return Token_PI;
// Assume its an element name, so return with a start tag token. If it
// turns out not to be, then it will fail when it cannot get a valid tag.
return Token_StartTag;
// ---------------------------------------------------------------------------
// XMLScanner: Private parsing methods
// ---------------------------------------------------------------------------
// This guy just scans out a single or double quoted string of characters.
// It does not pass any judgement on the contents and assumes that it is
// illegal to have another quote of the same kind inside the string's
// contents.
//
// NOTE: This is for simple stuff like the strings in the XMLDecl which
// cannot have any entities inside them. So this guy does not handle any
// end of entity stuff.
bool XMLScanner::getQuotedString(XMLBuffer& toFill)
// Reset the target buffer
toFill.reset();
// Get the next char which must be a single or double quote
XMLCh quoteCh;
if (!fReaderMgr.skipIfQuote(quoteCh))
return false;
while (true)
{
// Get another char
const XMLCh nextCh = fReaderMgr.getNextChar();
// See if it matches the starting quote char
if (nextCh == quoteCh)
break;
// We should never get either an end of file null char here. If we
// do, just fail. It will be handled more gracefully in the higher
// level code that called us.
if (!nextCh)
return false;
// Else add it to the buffer
toFill.append(nextCh);
}
return true;
}
// This method scans a character reference and returns the character that
// was refered to. It assumes that we've already scanned the &# characters
// that prefix the numeric code.
bool XMLScanner::scanCharRef(XMLCh& toFill, XMLCh& second)
{
bool gotOne = false;
unsigned int value = 0;
// Set the radix. Its supposed to be a lower case x if hex. But, in
// order to recover well, we check for an upper and put out an error
// for that.
unsigned int radix = 10;
if (fReaderMgr.skippedChar(chLatin_x))
{
radix = 16;
}
else if (fReaderMgr.skippedChar(chLatin_X))
{
emitError(XMLErrs::HexRadixMustBeLowerCase);
radix = 16;
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
while (true)
{
const XMLCh nextCh = fReaderMgr.peekNextChar();
// Watch for EOF
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
// Break out on the terminating semicolon
if (nextCh == chSemiColon)
{
fReaderMgr.getNextChar();
break;
}
// Convert this char to a binary value, or bail out if its not
// one.
unsigned int nextVal;
if ((nextCh >= chDigit_0) && (nextCh <= chDigit_9))
nextVal = (unsigned int)(nextCh - chDigit_0);
else if ((nextCh >= chLatin_A) && (nextCh <= chLatin_F))
nextVal= (unsigned int)(10 + (nextCh - chLatin_A));
else if ((nextCh >= chLatin_a) && (nextCh <= chLatin_f))
nextVal = (unsigned int)(10 + (nextCh - chLatin_a));
// If we got at least a sigit, then do an unterminated ref error.
// Else, do an expected a numerical ref thing.
if (gotOne)
emitError(XMLErrs::UnterminatedCharRef);
else
emitError(XMLErrs::ExpectedNumericalCharRef);
// Return failure
return false;
}
// Make sure its valid for the radix. If not, then just eat the
// digit and go on after issueing an error. Else, update the
// running value with this new digit.
if (nextVal >= radix)
{
XMLCh tmpStr[2];
tmpStr[0] = nextCh;
tmpStr[1] = chNull;
emitError(XMLErrs::BadDigitForRadix, tmpStr);
}
else
{
value = (value * radix) + nextVal;
}
// Indicate that we got at least one good digit
gotOne = true;
// And eat the last char
fReaderMgr.getNextChar();
}
// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] // any Unicode character, excluding the
// | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // surrogate blocks, FFFE, and FFFF.
bool validChar = false;
if (value < 0x20)
{
if (value == 0x09 || value == 0x0A || value == 0x0D)
{
validChar = true;
}
}
else if (value <= 0xD7FF || (value >= 0xE000 && (value <= 0xFFFD || (value >= 0x10000 && value <= 0x10FFFF))))
{
validChar = true;
}
if (!validChar)
{
// Character reference was not in the valid range
emitError(XMLErrs::InvalidCharacterRef);
return false;
}
// Return the char (or chars)
if (value >= 0x10000)
{
value -= 0x10000;
toFill = XMLCh((value >> 10) + 0xD800);
second = XMLCh((value & 0x3FF) + 0xDC00);
}
else
{
toFill = XMLCh(value);
second = 0;
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
return true;
}
// We get here after the '<!--' part of the comment. We scan past the
// terminating '-->' It will calls the appropriate handler with the comment
// text, if one is provided. A comment can be in either the document or
// the DTD, so the fInDocument flag is used to know which handler to send
// it to.
void XMLScanner::scanComment()
{
enum States
{
InText
, OneDash
, TwoDashes
};
// Get a buffer for this
XMLBufBid bbComment(&fBufMgr);
// Get the comment text into a temp buffer. Be sure to use temp buffer
// two here, since its to be used for stuff that is potentially longer
// than just a name.
States curState = InText;
bool gotLeadingSurrogate = false;
while (true)
{
// Get the next character
const XMLCh nextCh = fReaderMgr.getNextChar();
// Watch for an end of file
if (!nextCh)
{
emitError(XMLErrs::UnterminatedComment);
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
}
// Check for correct surrogate pairs
if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
if (gotLeadingSurrogate)
{
if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
emitError(XMLErrs::Expected2ndSurrogateChar);
}
// Its got to at least be a valid XML character
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
gotLeadingSurrogate = false;
}
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
if (curState == InText)
{
// If its a dash, go to OneDash state. Otherwise take as text
if (nextCh == chDash)
curState = OneDash;
else
bbComment.append(nextCh);
}
else if (curState == OneDash)
{
// If its another dash, then we change to the two dashes states.
// Otherwise, we have to put in the deficit dash and the new
// character and go back to InText.
if (nextCh == chDash)
{
curState = TwoDashes;
}
else
{
bbComment.append(chDash);
bbComment.append(nextCh);
curState = InText;
}
}
else if (curState == TwoDashes)
{
// The next character must be the closing bracket
if (nextCh != chCloseAngle)
{
emitError(XMLErrs::IllegalSequenceInComment);
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
break;
}
}
// If we have an available handler, call back with the comment.
if (fDocHandler)
{
fDocHandler->docComment
(
bbComment.getRawBuffer()
);
// Most equal signs can have white space around them, so this little guy
// just makes the calling code cleaner by eating whitespace.
bool XMLScanner::scanEq()
{
fReaderMgr.skipPastSpaces();
if (fReaderMgr.skippedChar(chEqual))
{
fReaderMgr.skipPastSpaces();
return true;
}
return false;
unsigned int
XMLScanner::scanUpToWSOr(XMLBuffer& toFill, const XMLCh chEndChar)
fReaderMgr.getUpToCharOrWS(toFill, chEndChar);
return toFill.getLen();