Newer
Older
{
handleEOL(curCh, inDecl);
}
}
else
{
skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
return true;
}
}
// We've eaten up the current buffer, so lets try to reload it. If
// we don't get anything new, then break out. If we do, then we go
// back to the top to keep getting spaces.
if (!refreshCharBuffer())
break;
}
// We never hit any non-space and ate up the whole reader
skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
return false;
}
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
bool XMLReader::skippedChar(const XMLCh toSkip)
{
//
// If the buffer is empty, then try to reload it. If we still get
// nothing, then return false.
//
if (fCharIndex == fCharsAvail)
{
if (!refreshCharBuffer())
return false;
}
//
// See if the current char is the one we want. If so, then we need
// to eat it and return true.
//
if (fCharBuf[fCharIndex] == toSkip)
{
fCharIndex++;
fCurCol++;
return true;
}
return false;
}
bool XMLReader::skippedSpace()
{
//
// If the buffer is empty, then try to reload it. If we still get
// nothing, then return false.
//
if (fCharIndex == fCharsAvail)
{
if (!refreshCharBuffer())
return false;
}
//
// See if the current char is a whitespace. If so, then we need to eat
// it and return true.
//
const XMLCh curCh = fCharBuf[fCharIndex];
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
//
// 'curCh' is a whitespace(x20|x9|xD|xA), so we only can have
// end-of-line combinations with a leading chCR(xD) or chLF(xA)
//
// 100000 x20
// 001001 x9
// 001010 chLF
// 001101 chCR
// -----------
// 000110 == (chCR|chLF) & ~(0x9|0x20)
//
// if the result of the logical-& operation is
// true : 'curCh' must be xA or xD
// false : 'curCh' must be x20 or x9
//
if ( ( curCh & (chCR|chLF) & ~(0x9|0x20) ) == 0 )
{
fCurCol++;
} else
{
handleEOL((XMLCh&)curCh, false);
}
return true;
}
return false;
}
bool XMLReader::skippedString(const XMLCh* const toSkip)
{
// Get the length of the string to skip
const unsigned int srcLen = XMLString::stringLen(toSkip);
unsigned int charsLeft = charsLeftInBuffer();
if (srcLen <= fCharsAvail) {
//
// See if the current reader has enough chars to test against this
// string. If not, then ask it to reload its buffer. If that does not
// get us enough, then it cannot match.
//
// NOTE: This works because strings never have to cross a reader! And
// a string to skip will never have a new line in it, so we will never
// miss adjusting the current line.
//
while (charsLeft < srcLen)
{
refreshCharBuffer();
unsigned int t = charsLeftInBuffer();
if (t == charsLeft) // if the refreshCharBuf() did not add anything new
return false; // give up and return.
charsLeft = t;
}
//
// Ok, now we now that the current reader has enough chars in its
// buffer and that its index is back at zero. So we can do a quick and
// dirty comparison straight to its buffer with no requirement to unget
// if it fails.
//
if (memcmp(&fCharBuf[fCharIndex], toSkip, srcLen*sizeof(XMLCh)))
return false;
//
// And get the character buffer index back right by just adding the
// source len to it.
//
fCharIndex += srcLen;
}
else {
if (charsLeft == 0) {
refreshCharBuffer();
charsLeft = charsLeftInBuffer();
if (charsLeft == 0)
return false; // error situation
}
if (memcmp(&fCharBuf[fCharIndex], toSkip, charsLeft*sizeof(XMLCh)))
return false;
fCharIndex += charsLeft;
unsigned int offset = charsLeft;
unsigned int remainingLen = srcLen - charsLeft;
while (remainingLen > 0) {
refreshCharBuffer();
charsLeft = charsLeftInBuffer();
if (charsLeft == 0)
return false; // error situation
if (charsLeft > remainingLen)
charsLeft = remainingLen;
if (memcmp(&fCharBuf[fCharIndex], toSkip+offset, charsLeft*sizeof(XMLCh)))
return false;
offset += charsLeft;
remainingLen -= charsLeft;
fCharIndex += charsLeft;
}
// Add the source length to the current column to get it back right
fCurCol += srcLen;
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
return true;
}
//
// This is just to peek if the next coming buffer
// matches the string toPeek.
// Similar to skippedString, but just the fCharIndex and fCurCol are not updated
//
bool XMLReader::peekString(const XMLCh* const toPeek)
{
// Get the length of the string to skip
const unsigned int srcLen = XMLString::stringLen(toPeek);
//
// See if the current reader has enough chars to test against this
// string. If not, then ask it to reload its buffer. If that does not
// get us enough, then it cannot match.
//
// NOTE: This works because strings never have to cross a reader! And
// a string to skip will never have a new line in it, so we will never
// miss adjusting the current line.
//
unsigned int charsLeft = charsLeftInBuffer();
while (charsLeft < srcLen)
{
refreshCharBuffer();
unsigned int t = charsLeftInBuffer();
if (t == charsLeft) // if the refreshCharBuf() did not add anything new
return false; // give up and return.
charsLeft = t;
}
//
// Ok, now we now that the current reader has enough chars in its
// buffer and that its index is back at zero. So we can do a quick and
// dirty comparison straight to its buffer with no requirement to unget
// if it fails.
//
if (memcmp(&fCharBuf[fCharIndex], toPeek, srcLen*sizeof(XMLCh)))
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
return false;
return true;
}
// ---------------------------------------------------------------------------
// XMLReader: Setter methods (most are inlined)
// ---------------------------------------------------------------------------
bool XMLReader::setEncoding(const XMLCh* const newEncoding)
{
//
// If the encoding was forced, then we ignore the new value and just
// return with success. If it was forced, then we are to use that
// encoding without question. Note that, if we are forced, we created
// a transcoder up front so there is no need to do one here in that
// case.
//
if (fForcedEncoding)
return true;
//
// upperCase the newEncoding first for better performance
//
XMLCh* inputEncoding = XMLString::replicate(newEncoding, fMemoryManager);
David Abram Cargill
committed
XMLString::upperCaseASCII(inputEncoding);
XMLRecognizer::Encodings newBaseEncoding;
// Check for non-endian specific UTF-16 or UCS-4. If so, and if we
// are already in one of the endian versions of those encodings,
// then just keep it and go on. Otherwise, its not valid.
if (!XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString)
|| !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString2)
|| !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString3)
|| !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString4)
|| !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString5)
|| !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString6)
|| !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString7))
fMemoryManager->deallocate(inputEncoding);
if ((fEncoding != XMLRecognizer::UTF_16L)
&& (fEncoding != XMLRecognizer::UTF_16B))
{
return false;
}
// Override with the original endian specific encoding
newBaseEncoding = fEncoding;
if (fEncoding == XMLRecognizer::UTF_16L) {
fMemoryManager->deallocate(fEncodingStr);
David Abram Cargill
committed
fEncodingStr = 0;
fEncodingStr = XMLString::replicate(XMLUni::fgUTF16LEncodingString, fMemoryManager);
else {
fMemoryManager->deallocate(fEncodingStr);
David Abram Cargill
committed
fEncodingStr = 0;
fEncodingStr = XMLString::replicate(XMLUni::fgUTF16BEncodingString, fMemoryManager);
}
}
else if (!XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString)
|| !XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString2)
|| !XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString3)
|| !XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString4))
{
fMemoryManager->deallocate(inputEncoding);
if ((fEncoding != XMLRecognizer::UCS_4L)
&& (fEncoding != XMLRecognizer::UCS_4B))
{
return false;
}
// Override with the original endian specific encoding
newBaseEncoding = fEncoding;
if (fEncoding == XMLRecognizer::UCS_4L) {
fMemoryManager->deallocate(fEncodingStr);
David Abram Cargill
committed
fEncodingStr = 0;
fEncodingStr = XMLString::replicate(XMLUni::fgUCS4LEncodingString, fMemoryManager);
}
else {
fMemoryManager->deallocate(fEncodingStr);
David Abram Cargill
committed
fEncodingStr = 0;
fEncodingStr = XMLString::replicate(XMLUni::fgUCS4BEncodingString, fMemoryManager);
}
else
{
//
// Try to map the string to one of our standard encodings. If its not
// one of them, then it has to be one of the non-intrinsic encodings,
// in which case we have to delete our intrinsic encoder and create a
// new one.
//
newBaseEncoding = XMLRecognizer::encodingForName(inputEncoding);
//
// If it does not come back as one of the auto-sensed encodings, then we
// have to possibly replace it and at least check a few things.
//
if (newBaseEncoding == XMLRecognizer::OtherEncoding)
// We already know it's none of those non-endian special cases,
// so just replicate the new name and use it directly to create the transcoder
fMemoryManager->deallocate(fEncodingStr);
fEncodingStr = inputEncoding;
XMLTransService::Codes failReason;
fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
(
fEncodingStr
, failReason
, kCharBufSize
, fMemoryManager
else
{
// Store the new encoding string since it is just an intrinsic
fMemoryManager->deallocate(fEncodingStr);
fEncodingStr = inputEncoding;
}
if (!fTranscoder) {
//
// Now we can create a transcoder using the recognized fEncoding. We
// might get back a transcoder for an intrinsically supported encoding,
// or we might get one from the underlying transcoding service.
//
XMLTransService::Codes failReason;
fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
(
newBaseEncoding
, failReason
, kCharBufSize
, fMemoryManager
David Abram Cargill
committed
ThrowXMLwithMemMgr1(TranscodingException, XMLExcepts::Trans_CantCreateCvtrFor, fEncodingStr, fMemoryManager);
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
// Update the base encoding member with the new base encoding found
fEncoding = newBaseEncoding;
// Looks ok to us
return true;
}
// ---------------------------------------------------------------------------
// XMLReader: Private helper methods
// ---------------------------------------------------------------------------
//
// This is called when the encoding flag is set and just sets the fSwapped
// flag appropriately.
//
void XMLReader::checkForSwapped()
{
// Assume not swapped
fSwapped = false;
James David Berry
committed
if (XMLPlatformUtils::fgXMLChBigEndian)
{
if ((fEncoding == XMLRecognizer::UTF_16L)
|| (fEncoding == XMLRecognizer::UCS_4L))
James David Berry
committed
}
else
{
if ((fEncoding == XMLRecognizer::UTF_16B)
|| (fEncoding == XMLRecognizer::UCS_4B))
James David Berry
committed
}
}
//
// This is called from the constructor when the encoding is not forced.
// We assume that the encoding has been auto-sensed at this point and that
// fSwapped is set correctly.
//
// In the case of UCS-4 and EBCDIC, we don't have to check for a decl.
// The fact that we got here, means that there is one, because that's the
// only way we can autosense those.
//
void XMLReader::doInitDecode()
{
switch(fEncoding)
{
case XMLRecognizer::UCS_4B :
case XMLRecognizer::UCS_4L :
{
// Remove bom if any
if (((fRawByteBuf[0] == 0x00) && (fRawByteBuf[1] == 0x00) && (fRawByteBuf[2] == 0xFE) && (fRawByteBuf[3] == 0xFF)) ||
((fRawByteBuf[0] == 0xFF) && (fRawByteBuf[1] == 0xFE) && (fRawByteBuf[2] == 0x00) && (fRawByteBuf[3] == 0x00)) )
{
for (unsigned int i = 0; i < fRawBytesAvail; i++)
fRawByteBuf[i] = fRawByteBuf[i+4];
fRawBytesAvail -=4;
}
// Look at the raw buffer as UCS4 chars
const UCS4Ch* asUCS = (const UCS4Ch*)fRawByteBuf;
while (fRawBufIndex < fRawBytesAvail)
{
// Get out the current 4 byte value and inc our raw buf index
UCS4Ch curVal = *asUCS++;
fRawBufIndex += sizeof(UCS4Ch);
// Swap if that is required for this machine
if (fSwapped)
curVal = BitOps::swapBytes(curVal);
// Make sure its at least semi legal. If not, undo and throw
if (curVal > 0xFFFF)
{
fCharsAvail = 0;
fRawBufIndex = 0;
fMemoryManager->deallocate(fPublicId);
fMemoryManager->deallocate(fEncodingStr);
ArrayJanitor<XMLCh> janValue(fSystemId, fMemoryManager);
David Abram Cargill
committed
ThrowXMLwithMemMgr1
(
TranscodingException
, XMLExcepts::Reader_CouldNotDecodeFirstLine
, fSystemId
David Abram Cargill
committed
, fMemoryManager
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
);
}
// Convert the value to an XML char and store it
fCharSizeBuf[fCharsAvail] = 4;
fCharBuf[fCharsAvail++] = XMLCh(curVal);
// Break out on the > character
if (curVal == chCloseAngle)
break;
}
break;
}
case XMLRecognizer::UTF_8 :
{
// If there's a utf-8 BOM (0xEF 0xBB 0xBF), skip past it.
// Don't move to char buf - no one wants to see it.
// Note: this causes any encoding= declaration to override
// the BOM's attempt to say that the encoding is utf-8.
// Look at the raw buffer as short chars
const char* asChars = (const char*)fRawByteBuf;
if (fRawBytesAvail > XMLRecognizer::fgUTF8BOMLen &&
XMLString::compareNString( asChars
, XMLRecognizer::fgUTF8BOM
, XMLRecognizer::fgUTF8BOMLen) == 0)
{
fRawBufIndex += XMLRecognizer::fgUTF8BOMLen;
asChars += XMLRecognizer::fgUTF8BOMLen;
}
//
// First check that there are enough bytes to even see the
// decl indentifier. If not, get out now with no action since
// there is no decl.
//
if (fRawBytesAvail < XMLRecognizer::fgASCIIPreLen)
break;
// Check for the opening sequence. If not, then no decl
if (XMLString::compareNString( asChars
, XMLRecognizer::fgASCIIPre
, XMLRecognizer::fgASCIIPreLen))
{
break;
}
while (fRawBufIndex < fRawBytesAvail)
{
const char curCh = *asChars++;
fRawBufIndex++;
// Looks ok, so store it
fCharSizeBuf[fCharsAvail] = 1;
fCharBuf[fCharsAvail++] = XMLCh(curCh);
// Break out on a > character
if (curCh == chCloseAngle)
break;
//
// A char greater than 0x7F is not allowed in this case. If
// so, undo and throw.
//
if (curCh & 0x80)
{
fCharsAvail = 0;
fRawBufIndex = 0;
fMemoryManager->deallocate(fPublicId);
fMemoryManager->deallocate(fEncodingStr);
ArrayJanitor<XMLCh> janValue(fSystemId, fMemoryManager);
David Abram Cargill
committed
ThrowXMLwithMemMgr1
(
TranscodingException
, XMLExcepts::Reader_CouldNotDecodeFirstLine
, fSystemId
David Abram Cargill
committed
, fMemoryManager
);
}
}
break;
}
case XMLRecognizer::UTF_16B :
case XMLRecognizer::UTF_16L :
{
//
// If there is a decl here, we just truncate back the characters
// as we go. No surrogate creation would be allowed here in legal
// XML, so we consider it a transoding error if we find one.
//
if (fRawBytesAvail < 2)
break;
unsigned int postBOMIndex = 0;
const UTF16Ch* asUTF16 = (const UTF16Ch*)&fRawByteBuf[fRawBufIndex];
if ((*asUTF16 == chUnicodeMarker) || (*asUTF16 == chSwappedUnicodeMarker))
{
fRawBufIndex += sizeof(UTF16Ch);
asUTF16++;
postBOMIndex = fRawBufIndex;
}
// First check that there are enough raw bytes for there to even
// be a decl indentifier. If not, then nothing to do.
//
if (fRawBytesAvail - fRawBufIndex < XMLRecognizer::fgUTF16PreLen)
{
fRawBufIndex = postBOMIndex;
break;
}
//
// See we get a match on the prefix. If not, then reset and
// break out.
//
if (fEncoding == XMLRecognizer::UTF_16B)
{
if (memcmp(asUTF16, XMLRecognizer::fgUTF16BPre, XMLRecognizer::fgUTF16PreLen))
{
fRawBufIndex = postBOMIndex;
break;
}
}
else
{
if (memcmp(asUTF16, XMLRecognizer::fgUTF16LPre, XMLRecognizer::fgUTF16PreLen))
{
fRawBufIndex = postBOMIndex;
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
break;
}
}
while (fRawBufIndex < fRawBytesAvail)
{
// Get out the current 2 byte value
UTF16Ch curVal = *asUTF16++;
fRawBufIndex += sizeof(UTF16Ch);
// Swap if that is required for this machine
if (fSwapped)
curVal = BitOps::swapBytes(curVal);
//
// Store it and bump the target index, implicitly converting
// if UTF16Ch and XMLCh are not the same size.
//
fCharSizeBuf[fCharsAvail] = 2;
fCharBuf[fCharsAvail++] = curVal;
// Break out on a > char
if (curVal == chCloseAngle)
break;
}
break;
}
case XMLRecognizer::EBCDIC :
{
//
// We use special support in the intrinsic EBCDIC-US transcoder
// to go through one char at a time.
//
const XMLByte* srcPtr = fRawByteBuf;
while (1)
{
// Transcode one char from the source
const XMLCh chCur = XMLEBCDICTranscoder::xlatThisOne(*srcPtr++);
fRawBufIndex++;
//
// And put it into the character buffer. This stuff has to
// look like it was normally transcoded.
//
fCharSizeBuf[fCharsAvail] = 1;
fCharBuf[fCharsAvail++] = chCur;
// If its a > char, then break out
if (chCur == chCloseAngle)
break;
// Watch for using up all input and get out
if (fRawBufIndex == fRawBytesAvail)
break;
}
break;
}
default :
// It should never be anything else here
fMemoryManager->deallocate(fPublicId);
fMemoryManager->deallocate(fEncodingStr);
fMemoryManager->deallocate(fSystemId);
David Abram Cargill
committed
ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Reader_BadAutoEncoding, fMemoryManager);
break;
}
//
// Ok, by the time we get here, if its a legal XML file we have eaten
// the XML/TextDecl. So, if we are a PE and are being referenced from
// outside a literal, then we need to throw in an arbitrary space that
// is required by XML.
//
if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
fCharBuf[fCharsAvail++] = chSpace;
// Calculate fCharOfsBuf buffer using the elements from fCharBufSize
if (fCalculateSrcOfs)
{
fCharOfsBuf[0] = 0;
for (unsigned int index = 1; index < fCharsAvail; ++index) {
fCharOfsBuf[index] = fCharOfsBuf[index-1]+fCharSizeBuf[index-1];
}
}
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
}
//
// This method is called internally when we run out of bytes in the raw
// buffer. We just read as many bytes as we can into the raw buffer again
// and store the number of bytes we got.
//
void XMLReader::refreshRawBuffer()
{
//
// If there are any bytes left, move them down to the start. There
// should only ever be (max bytes per char - 1) at the most.
//
const unsigned int bytesLeft = fRawBytesAvail - fRawBufIndex;
// Move the existing ones down
for (unsigned int index = 0; index < bytesLeft; index++)
fRawByteBuf[index] = fRawByteBuf[fRawBufIndex + index];
//
// And then read into the buffer past the existing bytes. Add back in
// that many to the bytes read, and subtract that many from the bytes
// requested.
//
fRawBytesAvail = fStream->readBytes
(
&fRawByteBuf[bytesLeft], kRawBufSize - bytesLeft
) + bytesLeft;
//
// We need to reset the buffer index back to the start in all cases,
// since any trailing data was copied down to the start.
//
fRawBufIndex = 0;
}
//
// This method is called internally when we run out of characters in the
// trancoded character buffer. We transcode up to another maxChars chars
// from the
//
unsigned int
XMLReader::xcodeMoreChars( XMLCh* const bufToFill
, unsigned char* const charSizes
, const unsigned int maxChars)
{
// If we are plain tuckered out, then return zero now
if (!fRawBytesAvail)
return 0;
//
// If our raw buffer is low, then lets load up another batch of
// raw bytes now. We can't check for exactly zero bytes left because
// transcoding of multi-byte encodings may have left a few bytes
// representing a partial character in the buffer that can't be
// used until the next buffer (and the rest of the character)
// is read.
//
unsigned int bytesLeft = fRawBytesAvail - fRawBufIndex;
if (bytesLeft < 100)
{
refreshRawBuffer();
// If we didn't get anything more just return a zero now
if (!fRawBytesAvail)
return 0;
}
// Ask the transcoder to internalize another batch of chars
Alberto Massari
committed
XMLSize_t bytesEaten;
const XMLSize_t charsDone = fTranscoder->transcodeFrom
(
&fRawByteBuf[fRawBufIndex]
, fRawBytesAvail - fRawBufIndex
, bufToFill
, maxChars
, bytesEaten
, charSizes
);
// Update the raw buffer index
fRawBufIndex += bytesEaten;
return charsDone;
}
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
/***
*
* XML1.1
*
* 2.11 End-of-Line Handling
*
* XML parsed entities are often stored in computer files which, for editing
* convenience, are organized into lines. These lines are typically separated
* by some combination of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA).
*
* To simplify the tasks of applications, the XML processor MUST behave as if
* it normalized all line breaks in external parsed entities (including the document
* entity) on input, before parsing, by translating all of the following to a single
* #xA character:
*
* 1. the two-character sequence #xD #xA
* 2. the two-character sequence #xD #x85
* 3. the single character #x85
* 4. the single character #x2028
* 5. any #xD character that is not immediately followed by #xA or #x85.
*
*
***/
void XMLReader::handleEOL(XMLCh& curCh, bool inDecl)
{
// 1. the two-character sequence #xD #xA
// 2. the two-character sequence #xD #x85
// 5. any #xD character that is not immediately followed by #xA or #x85.
if (curCh == chCR)
{
fCurCol = 1;
fCurLine++;
//
// If not already internalized, then convert it to an
// LF and eat any following LF.
//
if (fSource == Source_External)
{
if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
{
if ( fCharBuf[fCharIndex] == chLF ||
((fCharBuf[fCharIndex] == chNEL) && fNEL) )
{
fCharIndex++;
}
}
curCh = chLF;
}
}
else if (curCh == chLF)
{
fCurCol = 1;
fCurLine++;
}
// 3. the single character #x85
// 4. the single character #x2028
else if (curCh == chNEL || curCh == chLineSeparator)
{
if (inDecl && fXMLVersion == XMLV1_1)
{
/***
* XML1.1
*
* 2.11 End-of-Line Handling
* ...
* The characters #x85 and #x2028 cannot be reliably recognized and translated
* until an entity's encoding declaration (if present) has been read.
* Therefore, it is a fatal error to use them within the XML declaration or
* text declaration.
*
***/
ThrowXMLwithMemMgr1
(
TranscodingException
, XMLExcepts::Reader_NelLsepinDecl
, fSystemId
, fMemoryManager
);
}
if (fNEL && fSource == Source_External)
{
fCurCol = 1;
fCurLine++;
curCh = chLF;
}
}
else
{
fCurCol++;
}
}