Newer
Older
// It must be a PI
fReaderMgr.getNextChar();
return Token_PI;
// Assume its an element name, so return with a start tag token. If it
// turns out not to be, then it will fail when it cannot get a valid tag.
return Token_StartTag;
// ---------------------------------------------------------------------------
// XMLScanner: Private parsing methods
// ---------------------------------------------------------------------------
// This guy just scans out a single or double quoted string of characters.
// It does not pass any judgement on the contents and assumes that it is
// illegal to have another quote of the same kind inside the string's
// contents.
//
// NOTE: This is for simple stuff like the strings in the XMLDecl which
// cannot have any entities inside them. So this guy does not handle any
// end of entity stuff.
bool XMLScanner::getQuotedString(XMLBuffer& toFill)
// Reset the target buffer
toFill.reset();
// Get the next char which must be a single or double quote
XMLCh quoteCh;
if (!fReaderMgr.skipIfQuote(quoteCh))
return false;
XMLCh nextCh;
// Get another char and see if it matches the starting quote char
while ((nextCh=fReaderMgr.getNextChar())!=quoteCh)
{
// We should never get either an end of file null char here. If we
// do, just fail. It will be handled more gracefully in the higher
// level code that called us.
if (!nextCh)
return false;
// Else add it to the buffer
toFill.append(nextCh);
}
return true;
}
// This method scans a character reference and returns the character that
// was refered to. It assumes that we've already scanned the &# characters
// that prefix the numeric code.
bool XMLScanner::scanCharRef(XMLCh& toFill, XMLCh& second)
{
bool gotOne = false;
unsigned int value = 0;
// Set the radix. Its supposed to be a lower case x if hex. But, in
// order to recover well, we check for an upper and put out an error
// for that.
unsigned int radix = 10;
if (fReaderMgr.skippedChar(chLatin_x))
{
radix = 16;
}
else if (fReaderMgr.skippedChar(chLatin_X))
{
emitError(XMLErrs::HexRadixMustBeLowerCase);
radix = 16;
while (true)
{
const XMLCh nextCh = fReaderMgr.peekNextChar();
// Watch for EOF
if (!nextCh)
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
// Break out on the terminating semicolon
if (nextCh == chSemiColon)
{
fReaderMgr.getNextChar();
break;
}
// Convert this char to a binary value, or bail out if its not
// one.
unsigned int nextVal;
if ((nextCh >= chDigit_0) && (nextCh <= chDigit_9))
nextVal = (unsigned int)(nextCh - chDigit_0);
else if ((nextCh >= chLatin_A) && (nextCh <= chLatin_F))
nextVal= (unsigned int)(10 + (nextCh - chLatin_A));
else if ((nextCh >= chLatin_a) && (nextCh <= chLatin_f))
nextVal = (unsigned int)(10 + (nextCh - chLatin_a));
// If we got at least a sigit, then do an unterminated ref error.
// Else, do an expected a numerical ref thing.
if (gotOne)
emitError(XMLErrs::UnterminatedCharRef);
else
emitError(XMLErrs::ExpectedNumericalCharRef);
// Return failure
return false;
}
// Make sure its valid for the radix. If not, then just eat the
// digit and go on after issueing an error. Else, update the
// running value with this new digit.
if (nextVal >= radix)
{
XMLCh tmpStr[2];
tmpStr[0] = nextCh;
tmpStr[1] = chNull;
emitError(XMLErrs::BadDigitForRadix, tmpStr);
}
else
{
value = (value * radix) + nextVal;
// Guard against overflow.
if (value > 0x10FFFF) {
// Character reference was not in the valid range
emitError(XMLErrs::InvalidCharacterRef);
return false;
}
// Indicate that we got at least one good digit
gotOne = true;
// And eat the last char
fReaderMgr.getNextChar();
}
// Return the char (or chars)
// And check if the character expanded is valid or not
if (value >= 0x10000 && value <= 0x10FFFF)
{
value -= 0x10000;
toFill = XMLCh((value >> 10) + 0xD800);
second = XMLCh((value & 0x3FF) + 0xDC00);
}
else if (value <= 0xFFFD)
{
toFill = XMLCh(value);
second = 0;
if (!fReaderMgr.getCurrentReader()->isXMLChar(toFill) && !fReaderMgr.getCurrentReader()->isControlChar(toFill)) {
// Character reference was not in the valid range
emitError(XMLErrs::InvalidCharacterRef);
return false;
}
}
else {
// Character reference was not in the valid range
emitError(XMLErrs::InvalidCharacterRef);
return false;
return true;
}
// We get here after the '<!--' part of the comment. We scan past the
// terminating '-->' It will calls the appropriate handler with the comment
// text, if one is provided. A comment can be in either the document or
// the DTD, so the fInDocument flag is used to know which handler to send
// it to.
void XMLScanner::scanComment()
{
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
enum States
{
InText
, OneDash
, TwoDashes
};
// Get a buffer for this
XMLBufBid bbComment(&fBufMgr);
// Get the comment text into a temp buffer. Be sure to use temp buffer
// two here, since its to be used for stuff that is potentially longer
// than just a name.
States curState = InText;
bool gotLeadingSurrogate = false;
while (true)
{
// Get the next character
const XMLCh nextCh = fReaderMgr.getNextChar();
// Watch for an end of file
if (!nextCh)
{
emitError(XMLErrs::UnterminatedComment);
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
}
// Check for correct surrogate pairs
if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
if (gotLeadingSurrogate)
{
if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
emitError(XMLErrs::Expected2ndSurrogateChar);
}
// Its got to at least be a valid XML character
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
David Abram Cargill
committed
, fMemoryManager
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
gotLeadingSurrogate = false;
}
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
if (curState == InText)
{
// If its a dash, go to OneDash state. Otherwise take as text
if (nextCh == chDash)
curState = OneDash;
else
bbComment.append(nextCh);
}
else if (curState == OneDash)
{
// If its another dash, then we change to the two dashes states.
// Otherwise, we have to put in the deficit dash and the new
// character and go back to InText.
if (nextCh == chDash)
{
curState = TwoDashes;
}
else
{
bbComment.append(chDash);
bbComment.append(nextCh);
curState = InText;
}
}
else if (curState == TwoDashes)
{
// The next character must be the closing bracket
if (nextCh != chCloseAngle)
{
emitError(XMLErrs::IllegalSequenceInComment);
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
break;
}
}
// If we have an available handler, call back with the comment.
if (fDocHandler)
{
fDocHandler->docComment
(
bbComment.getRawBuffer()
);
//mark comment is seen within the current element
if (! fElemStack.isEmpty())
fElemStack.setCommentOrPISeen();
// Most equal signs can have white space around them, so this little guy
// just makes the calling code cleaner by eating whitespace.
unsigned int
XMLScanner::scanUpToWSOr(XMLBuffer& toFill, const XMLCh chEndChar)
fReaderMgr.getUpToCharOrWS(toFill, chEndChar);
return toFill.getLen();
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
unsigned int *XMLScanner::getNewUIntPtr()
{
// this method hands back a new pointer initialized to 0
unsigned int *retVal;
if(fUIntPoolCol < 64)
{
retVal = fUIntPool[fUIntPoolRow]+fUIntPoolCol;
fUIntPoolCol++;
return retVal;
}
// time to grow the pool...
if(fUIntPoolRow+1 == fUIntPoolRowTotal)
{
// and time to add some space for new rows:
fUIntPoolRowTotal <<= 1;
unsigned int **newArray = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal );
memcpy(newArray, fUIntPool, (fUIntPoolRow+1) * sizeof(unsigned int *));
fMemoryManager->deallocate(fUIntPool);
fUIntPool = newArray;
// need to 0 out new elements we won't need:
for (unsigned int i=fUIntPoolRow+2; i<fUIntPoolRowTotal; i++)
fUIntPool[i] = 0;
}
// now to add a new row; we just made sure we have space
fUIntPoolRow++;
fUIntPool[fUIntPoolRow] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6);
// point to next element
fUIntPoolCol = 1;
return fUIntPool[fUIntPoolRow];
}
void XMLScanner::resetUIntPool()
{
// to reuse the unsigned int pool--and the hashtables that use it--
// simply reinitialize everything to 0's
for(unsigned int i = 0; i<= fUIntPoolRow; i++)
memset(fUIntPool[i], 0, sizeof(unsigned int) << 6);
}
void XMLScanner::recreateUIntPool()
{
// this allows a bloated unsigned int pool to be dispensed with
// first, delete old fUIntPool
for (unsigned int i=0; i<=fUIntPoolRow; i++)
{
fMemoryManager->deallocate(fUIntPool[i]);
}
fMemoryManager->deallocate(fUIntPool);
fUIntPoolRow = fUIntPoolCol = 0;
fUIntPoolRowTotal = 2;
fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal);
fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6);
fUIntPool[1] = 0;
}