Newer
Older
// NOTE: This is for simple stuff like the strings in the XMLDecl which
// cannot have any entities inside them. So this guy does not handle any
// end of entity stuff.
bool XMLScanner::getQuotedString(XMLBuffer& toFill)
// Reset the target buffer
toFill.reset();
// Get the next char which must be a single or double quote
XMLCh quoteCh;
if (!fReaderMgr.skipIfQuote(quoteCh))
return false;
while (true)
{
// Get another char
const XMLCh nextCh = fReaderMgr.getNextChar();
// See if it matches the starting quote char
if (nextCh == quoteCh)
break;
// We should never get either an end of file null char here. If we
// do, just fail. It will be handled more gracefully in the higher
// level code that called us.
if (!nextCh)
return false;
// Else add it to the buffer
toFill.append(nextCh);
}
return true;
}
// This method scans a character reference and returns the character that
// was refered to. It assumes that we've already scanned the &# characters
// that prefix the numeric code.
bool XMLScanner::scanCharRef(XMLCh& toFill, XMLCh& second)
{
bool gotOne = false;
unsigned int value = 0;
// Set the radix. Its supposed to be a lower case x if hex. But, in
// order to recover well, we check for an upper and put out an error
// for that.
unsigned int radix = 10;
if (fReaderMgr.skippedChar(chLatin_x))
{
radix = 16;
}
else if (fReaderMgr.skippedChar(chLatin_X))
{
emitError(XMLErrs::HexRadixMustBeLowerCase);
radix = 16;
while (true)
{
const XMLCh nextCh = fReaderMgr.peekNextChar();
// Watch for EOF
if (!nextCh)
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
// Break out on the terminating semicolon
if (nextCh == chSemiColon)
{
fReaderMgr.getNextChar();
break;
}
// Convert this char to a binary value, or bail out if its not
// one.
unsigned int nextVal;
if ((nextCh >= chDigit_0) && (nextCh <= chDigit_9))
nextVal = (unsigned int)(nextCh - chDigit_0);
else if ((nextCh >= chLatin_A) && (nextCh <= chLatin_F))
nextVal= (unsigned int)(10 + (nextCh - chLatin_A));
else if ((nextCh >= chLatin_a) && (nextCh <= chLatin_f))
nextVal = (unsigned int)(10 + (nextCh - chLatin_a));
// If we got at least a sigit, then do an unterminated ref error.
// Else, do an expected a numerical ref thing.
if (gotOne)
emitError(XMLErrs::UnterminatedCharRef);
else
emitError(XMLErrs::ExpectedNumericalCharRef);
// Return failure
return false;
}
// Make sure its valid for the radix. If not, then just eat the
// digit and go on after issueing an error. Else, update the
// running value with this new digit.
if (nextVal >= radix)
{
XMLCh tmpStr[2];
tmpStr[0] = nextCh;
tmpStr[1] = chNull;
emitError(XMLErrs::BadDigitForRadix, tmpStr);
}
else
{
value = (value * radix) + nextVal;
// Guard against overflow.
if (value > 0x10FFFF) {
// Character reference was not in the valid range
emitError(XMLErrs::InvalidCharacterRef);
return false;
}
// Indicate that we got at least one good digit
gotOne = true;
// And eat the last char
fReaderMgr.getNextChar();
}
// Return the char (or chars)
// And check if the character expanded is valid or not
if (value >= 0x10000 && value <= 0x10FFFF)
{
value -= 0x10000;
toFill = XMLCh((value >> 10) + 0xD800);
second = XMLCh((value & 0x3FF) + 0xDC00);
}
else if (value <= 0xFFFD)
{
toFill = XMLCh(value);
second = 0;
if (!fReaderMgr.getCurrentReader()->isXMLChar(toFill) && !fReaderMgr.getCurrentReader()->isControlChar(toFill)) {
// Character reference was not in the valid range
emitError(XMLErrs::InvalidCharacterRef);
return false;
}
}
else {
// Character reference was not in the valid range
emitError(XMLErrs::InvalidCharacterRef);
return false;
return true;
}
// We get here after the '<!--' part of the comment. We scan past the
// terminating '-->' It will calls the appropriate handler with the comment
// text, if one is provided. A comment can be in either the document or
// the DTD, so the fInDocument flag is used to know which handler to send
// it to.
void XMLScanner::scanComment()
{
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
enum States
{
InText
, OneDash
, TwoDashes
};
// Get a buffer for this
XMLBufBid bbComment(&fBufMgr);
// Get the comment text into a temp buffer. Be sure to use temp buffer
// two here, since its to be used for stuff that is potentially longer
// than just a name.
States curState = InText;
bool gotLeadingSurrogate = false;
while (true)
{
// Get the next character
const XMLCh nextCh = fReaderMgr.getNextChar();
// Watch for an end of file
if (!nextCh)
{
emitError(XMLErrs::UnterminatedComment);
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
}
// Check for correct surrogate pairs
if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
if (gotLeadingSurrogate)
{
if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
emitError(XMLErrs::Expected2ndSurrogateChar);
}
// Its got to at least be a valid XML character
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
David Abram Cargill
committed
, fMemoryManager
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
gotLeadingSurrogate = false;
}
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
if (curState == InText)
{
// If its a dash, go to OneDash state. Otherwise take as text
if (nextCh == chDash)
curState = OneDash;
else
bbComment.append(nextCh);
}
else if (curState == OneDash)
{
// If its another dash, then we change to the two dashes states.
// Otherwise, we have to put in the deficit dash and the new
// character and go back to InText.
if (nextCh == chDash)
{
curState = TwoDashes;
}
else
{
bbComment.append(chDash);
bbComment.append(nextCh);
curState = InText;
}
}
else if (curState == TwoDashes)
{
// The next character must be the closing bracket
if (nextCh != chCloseAngle)
{
emitError(XMLErrs::IllegalSequenceInComment);
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
break;
}
}
// If we have an available handler, call back with the comment.
if (fDocHandler)
{
fDocHandler->docComment
(
bbComment.getRawBuffer()
);
//mark comment is seen within the current element
if (! fElemStack.isEmpty())
fElemStack.setCommentOrPISeen();
// Most equal signs can have white space around them, so this little guy
// just makes the calling code cleaner by eating whitespace.
unsigned int
XMLScanner::scanUpToWSOr(XMLBuffer& toFill, const XMLCh chEndChar)
fReaderMgr.getUpToCharOrWS(toFill, chEndChar);
return toFill.getLen();
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
unsigned int *XMLScanner::getNewUIntPtr()
{
// this method hands back a new pointer initialized to 0
unsigned int *retVal;
if(fUIntPoolCol < 64)
{
retVal = fUIntPool[fUIntPoolRow]+fUIntPoolCol;
fUIntPoolCol++;
return retVal;
}
// time to grow the pool...
if(fUIntPoolRow+1 == fUIntPoolRowTotal)
{
// and time to add some space for new rows:
fUIntPoolRowTotal <<= 1;
unsigned int **newArray = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal );
memcpy(newArray, fUIntPool, (fUIntPoolRow+1) * sizeof(unsigned int *));
fMemoryManager->deallocate(fUIntPool);
fUIntPool = newArray;
// need to 0 out new elements we won't need:
for (unsigned int i=fUIntPoolRow+2; i<fUIntPoolRowTotal; i++)
fUIntPool[i] = 0;
}
// now to add a new row; we just made sure we have space
fUIntPoolRow++;
fUIntPool[fUIntPoolRow] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6);
// point to next element
fUIntPoolCol = 1;
return fUIntPool[fUIntPoolRow];
}
void XMLScanner::resetUIntPool()
{
// to reuse the unsigned int pool--and the hashtables that use it--
// simply reinitialize everything to 0's
for(unsigned int i = 0; i<= fUIntPoolRow; i++)
memset(fUIntPool[i], 0, sizeof(unsigned int) << 6);
}
void XMLScanner::recreateUIntPool()
{
// this allows a bloated unsigned int pool to be dispensed with
// first, delete old fUIntPool
for (unsigned int i=0; i<=fUIntPoolRow; i++)
{
fMemoryManager->deallocate(fUIntPool[i]);
}
fMemoryManager->deallocate(fUIntPool);
fUIntPoolRow = fUIntPoolCol = 0;
fUIntPoolRowTotal = 2;
fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal);
fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6);
fUIntPool[1] = 0;
}