diff --git a/src/xercesc/internal/ReaderMgr.cpp b/src/xercesc/internal/ReaderMgr.cpp index d215e4a0be0031e999d4459070ff94d44400d5a2..5632bec94eee312d0ad8955fbbc4a6c7b7687036 100644 --- a/src/xercesc/internal/ReaderMgr.cpp +++ b/src/xercesc/internal/ReaderMgr.cpp @@ -286,7 +286,7 @@ bool ReaderMgr::skipIfQuote(XMLCh& chGotten) } -bool ReaderMgr::skipPastSpaces() +bool ReaderMgr::skipPastSpaces(bool inDecl) { bool skippedSomething = false; bool tmpFlag; @@ -297,31 +297,7 @@ bool ReaderMgr::skipPastSpaces() // it hit a non-space, break out. Else we have to pop another entity // and keep going. // - if (fCurReader->skipSpaces(tmpFlag)) - break; - - if (tmpFlag) - skippedSomething = true; - - // Try to pop another enitity. If we can't then we are done - if (!popReader()) - break; - } - return (tmpFlag || skippedSomething); -} - -bool ReaderMgr::skipPastSpacesInDecl() -{ - bool skippedSomething = false; - bool tmpFlag; - while (true) - { - // - // Skip all the spaces in the current reader. If it returned because - // it hit a non-space, break out. Else we have to pop another entity - // and keep going. - // - if (fCurReader->skipSpacesInDecl(tmpFlag)) + if (fCurReader->skipSpaces(tmpFlag, inDecl)) break; if (tmpFlag) diff --git a/src/xercesc/internal/ReaderMgr.hpp b/src/xercesc/internal/ReaderMgr.hpp index db2bc4e80e5e3dd5991dc371e463a27df1d9d197..6a2acd050d626107dd9be4ae5c16001e1232a639 100644 --- a/src/xercesc/internal/ReaderMgr.hpp +++ b/src/xercesc/internal/ReaderMgr.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.13 2004/06/14 15:18:52 peiyongz + * Consolidated End Of Line Handling + * * Revision 1.12 2004/06/03 15:38:27 peiyongz * XML1.1: The characters #x85 and #x2028 cannot be reliably recognized * and translated until an entity's encoding declaration (if present) has been @@ -223,8 +226,7 @@ public : XMLCh peekNextChar(); bool skipIfQuote(XMLCh& chGotten); void skipPastChar(const XMLCh toSkip); - bool skipPastSpaces(); - bool skipPastSpacesInDecl(); + bool skipPastSpaces(bool inDecl = false); void skipToChar(const XMLCh toSkipTo); bool skippedChar(const XMLCh toSkip); bool skippedSpace(); diff --git a/src/xercesc/internal/XMLReader.cpp b/src/xercesc/internal/XMLReader.cpp index 85c95ea41543ab253ce750c87b3ecce0ccc2d90c..1b48ce2c4decd19a3be14b3cdbd90e68cfabbdb9 100644 --- a/src/xercesc/internal/XMLReader.cpp +++ b/src/xercesc/internal/XMLReader.cpp @@ -72,7 +72,6 @@ #include <xercesc/util/XMLString.hpp> #include <xercesc/util/Janitor.hpp> - XERCES_CPP_NAMESPACE_BEGIN // --------------------------------------------------------------------------- @@ -695,42 +694,7 @@ bool XMLReader::getSpaces(XMLBuffer& toFill) // Eat this char fCharIndex++; - // - // Ok, we've got some whitespace here. So we have to store - // it. But we have to normalize it and update the line and - // column info along the way. - // - if (curCh == chCR) - { - fCurCol = 1; - fCurLine++; - - // - // If not already internalized, then convert it to an - // LF and eat any following LF. - // - if (fSource == Source_External) - { - if ((fCharIndex < fCharsAvail) || refreshCharBuffer()) - { - if (fCharBuf[fCharIndex] == chLF - || ((fCharBuf[fCharIndex] == chNEL) && fNEL)) - fCharIndex++; - } - curCh = chLF; - } - } - else if (curCh == chLF - || ((curCh == chNEL || curCh == chLineSeparator) && fNEL)) - { - curCh = chLF; - fCurCol = 1; - fCurLine++; - } - else - { - fCurCol++; - } + handleEOL(curCh, false); // Ok we can add this guy to our buffer toFill.append(curCh); @@ -773,42 +737,7 @@ bool XMLReader::getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck) // Eat this char fCharIndex++; - // - // Ok, we've got some whitespace here. So we have to store - // it. But we have to normalize it and update the line and - // column info along the way. - // - if (curCh == chCR) - { - fCurCol = 1; - fCurLine++; - - // - // If not already internalized, then convert it to an - // LF and eat any following LF. - // - if (fSource == Source_External) - { - if ((fCharIndex < fCharsAvail) || refreshCharBuffer()) - { - if (fCharBuf[fCharIndex] == chLF - || ((fCharBuf[fCharIndex] == chNEL) && fNEL)) - fCharIndex++; - } - curCh = chLF; - } - } - else if (curCh == chLF - || ((curCh == chNEL || curCh == chLineSeparator) && fNEL)) - { - curCh = chLF; - fCurCol = 1; - fCurLine++; - } - else - { - fCurCol++; - } + handleEOL(curCh, false); // Add it to our buffer toFill.append(curCh); @@ -853,7 +782,7 @@ bool XMLReader::skipIfQuote(XMLCh& chGotten) } -bool XMLReader::skipSpaces(bool& skippedSomething) +bool XMLReader::skipSpaces(bool& skippedSomething, bool inDecl) { // Remember the current line and column XMLSSize_t orgLine = fCurLine; @@ -874,138 +803,8 @@ bool XMLReader::skipSpaces(bool& skippedSomething) // Get the current char out of the buffer and eat it XMLCh curCh = fCharBuf[fCharIndex++]; - // Ok, we've got some whitespace here. So we have to store - // it. But we have to normalize it and update the line and - // column info along the way. - if (curCh == chCR) - { - fCurCol = 1; - fCurLine++; - - // If not already internalized, then convert it to an - // LF and eat any following LF. - if (fSource == Source_External) - { - if ((fCharIndex < fCharsAvail) || refreshCharBuffer()) - { - if (fCharBuf[fCharIndex] == chLF - || ((fCharBuf[fCharIndex] == chNEL) && fNEL)) - fCharIndex++; - } - } - } - else if (curCh == chLF - || ((curCh == chNEL || curCh == chLineSeparator) && fNEL)) - { - fCurCol = 1; - fCurLine++; - } - else - { - fCurCol++; - } - } - else - { - skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol); - return true; - } - } - - // We've eaten up the current buffer, so lets try to reload it. If - // we don't get anything new, then break out. If we do, then we go - // back to the top to keep getting spaces. - if (!refreshCharBuffer()) - break; - } - - // We never hit any non-space and ate up the whole reader - skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol); - return false; -} - -/*** - * XML1.1 - * - * 2.11 End-of-Line Handling - * ... - * The characters #x85 and #x2028 cannot be reliably recognized and translated - * until an entity's encoding declaration (if present) has been read. - * Therefore, it is a fatal error to use them within the XML declaration or - * text declaration. - * -***/ -bool XMLReader::skipSpacesInDecl(bool& skippedSomething) -{ - // Remember the current line and column - XMLSSize_t orgLine = fCurLine; - XMLSSize_t orgCol = fCurCol; - - // We enter a loop where we skip over spaces until we hit the end of - // this reader or a non-space value. The return indicates whether we - // hit the non-space (true) or the end (false). - while (true) - { - // Loop through the current chars in the buffer - while (fCharIndex < fCharsAvail) - { - // See if its a white space char. If so, then process it. Else - // we've hit a non-space and need to return. - if (isWhitespace(fCharBuf[fCharIndex])) - { - // Get the current char out of the buffer and eat it - XMLCh curCh = fCharBuf[fCharIndex++]; + handleEOL(curCh, inDecl); - // Ok, we've got some whitespace here. So we have to store - // it. But we have to normalize it and update the line and - // column info along the way. - if (curCh == chCR) - { - fCurCol = 1; - fCurLine++; - - // If not already internalized, then convert it to an - // LF and eat any following LF. - if (fSource == Source_External) - { - if ((fCharIndex < fCharsAvail) || refreshCharBuffer()) - { - if (fCharBuf[fCharIndex] == chLF - || ((fCharBuf[fCharIndex] == chNEL) && fNEL)) - fCharIndex++; - } - } - } - else if (curCh == chLF) - { - fCurCol = 1; - fCurLine++; - } - else if (curCh == chNEL || curCh == chLineSeparator) - { - if (fXMLVersion == XMLV1_1) - { - ThrowXMLwithMemMgr1 - ( - TranscodingException - , XMLExcepts::Reader_NelLsepinDecl - , fSystemId - , fMemoryManager - ); - } - else //XMLV1_0 - { - if (fNEL) - { - fCurCol = 1; - fCurLine++; - } - } - } - else - { - fCurCol++; - } } else { @@ -1074,31 +873,8 @@ bool XMLReader::skippedSpace() // Eat the character fCharIndex++; - if (curCh == chCR) - { - fCurLine++; - fCurCol = 1; + handleEOL((XMLCh&)curCh, false); - if (fSource == Source_External) - { - if ((fCharIndex < fCharsAvail) || refreshCharBuffer()) - { - if (fCharBuf[fCharIndex] == chLF - || ((fCharBuf[fCharIndex] == chNEL) && fNEL)) - fCharIndex++; - } - } - } - else if (curCh == chLF - || ((curCh == chNEL || curCh == chLineSeparator) && fNEL)) - { - fCurLine++; - fCurCol = 1; - } - else - { - fCurCol++; - } return true; } return false; @@ -1725,4 +1501,101 @@ XMLReader::xcodeMoreChars( XMLCh* const bufToFill return charsDone; } +/*** + * + * XML1.1 + * + * 2.11 End-of-Line Handling + * + * XML parsed entities are often stored in computer files which, for editing + * convenience, are organized into lines. These lines are typically separated + * by some combination of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA). + * + * To simplify the tasks of applications, the XML processor MUST behave as if + * it normalized all line breaks in external parsed entities (including the document + * entity) on input, before parsing, by translating all of the following to a single + * #xA character: + * + * 1. the two-character sequence #xD #xA + * 2. the two-character sequence #xD #x85 + * 3. the single character #x85 + * 4. the single character #x2028 + * 5. any #xD character that is not immediately followed by #xA or #x85. + * + * + ***/ +inline void XMLReader::handleEOL(XMLCh& curCh, bool inDecl) +{ + // 1. the two-character sequence #xD #xA + // 2. the two-character sequence #xD #x85 + // 5. any #xD character that is not immediately followed by #xA or #x85. + if (curCh == chCR) + { + fCurCol = 1; + fCurLine++; + + // + // If not already internalized, then convert it to an + // LF and eat any following LF. + // + if (fSource == Source_External) + { + if ((fCharIndex < fCharsAvail) || refreshCharBuffer()) + { + if ( fCharBuf[fCharIndex] == chLF || + ((fCharBuf[fCharIndex] == chNEL) && fNEL) ) + { + fCharIndex++; + } + } + curCh = chLF; + } + } + else if (curCh == chLF) + { + fCurCol = 1; + fCurLine++; + } + // 3. the single character #x85 + // 4. the single character #x2028 + else if (curCh == chNEL || curCh == chLineSeparator) + { + if (inDecl && fXMLVersion == XMLV1_1) + { + + /*** + * XML1.1 + * + * 2.11 End-of-Line Handling + * ... + * The characters #x85 and #x2028 cannot be reliably recognized and translated + * until an entity's encoding declaration (if present) has been read. + * Therefore, it is a fatal error to use them within the XML declaration or + * text declaration. + * + ***/ + ThrowXMLwithMemMgr1 + ( + TranscodingException + , XMLExcepts::Reader_NelLsepinDecl + , fSystemId + , fMemoryManager + ); + } + + if (fNEL && fSource == Source_External) + { + fCurCol = 1; + fCurLine++; + curCh = chLF; + } + } + else + { + fCurCol++; + } + + return; +} + XERCES_CPP_NAMESPACE_END diff --git a/src/xercesc/internal/XMLReader.hpp b/src/xercesc/internal/XMLReader.hpp index 747b43192e7ab27dce172c0e7641410c7b1dc236..ba84a72668ac1865ba90587a28de86cd602b133e 100644 --- a/src/xercesc/internal/XMLReader.hpp +++ b/src/xercesc/internal/XMLReader.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.17 2004/06/14 15:18:53 peiyongz + * Consolidated End Of Line Handling + * * Revision 1.16 2004/06/03 15:38:27 peiyongz * XML1.1: The characters #x85 and #x2028 cannot be reliably recognized * and translated until an entity's encoding declaration (if present) has been @@ -333,8 +336,7 @@ public: bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck); bool peekNextChar(XMLCh& chGotten); bool skipIfQuote(XMLCh& chGotten); - bool skipSpaces(bool& skippedSomething); - bool skipSpacesInDecl(bool& skippedSomething); + bool skipSpaces(bool& skippedSomething, bool inDecl = false); bool skippedChar(const XMLCh toSkip); bool skippedSpace(); bool skippedString(const XMLCh* const toSkip); @@ -428,6 +430,11 @@ private: , const unsigned int maxChars ); + inline void handleEOL + ( + XMLCh& curCh + , bool inDecl = false + ); // ----------------------------------------------------------------------- // Data members @@ -801,62 +808,8 @@ inline bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten) chGotten = fCharBuf[fCharIndex++]; // Handle end of line normalization and line/col member maintenance. - if (chGotten == chCR) - { - // - // Do the normalization. We return chLF regardless of which was - // found. We also eat a chCR followed by an chLF. - // - // We only do this if the content being spooled is not already - // internalized. - // - if (fSource == Source_External) - { - // - // See if we have another char left. If not, don't bother. - // Else, see if its an chLF to eat. If it is, bump the - // index again. - // - if (fCharIndex < fCharsAvail) - { - if (fCharBuf[fCharIndex] == chLF - || ((fCharBuf[fCharIndex] == chNEL) && fNEL)) - fCharIndex++; - } - else - { - if (refreshCharBuffer()) - { - if (fCharBuf[fCharIndex] == chLF - || ((fCharBuf[fCharIndex] == chNEL) && fNEL)) - fCharIndex++; - } - } - - // And return just an chLF - chGotten = chLF; - } + handleEOL(chGotten, false); - // And handle the line/col stuff - fCurCol = 1; - fCurLine++; - } - else if (chGotten == chLF - || ((chGotten == chNEL || chGotten == chLineSeparator) && fNEL)) - { - chGotten = chLF; - fCurLine++; - fCurCol = 1; - } - else if (chGotten) - { - // - // Only do this is not a null char. Null chars are not part of the - // real content. They are just marker characters inserted into - // the stream. - // - fCurCol++; - } return true; } @@ -883,53 +836,8 @@ inline bool XMLReader::getNextChar(XMLCh& chGotten) chGotten = fCharBuf[fCharIndex++]; // Handle end of line normalization and line/col member maintenance. - if (chGotten == chCR) - { - // - // Do the normalization. We return chLF regardless of which was - // found. We also eat a chCR followed by an chLF. - // - // We only do this if the content being spooled is not already - // internalized. - // - if (fSource == Source_External) - { - // - // See if we have another char left. If not, don't bother. - // Else, see if its an chLF to eat. If it is, bump the - // index again. - // - if ((fCharIndex < fCharsAvail) || refreshCharBuffer()) - { - if (fCharBuf[fCharIndex] == chLF - || ((fCharBuf[fCharIndex] == chNEL) && fNEL)) - fCharIndex++; - } - - // And return just an chLF - chGotten = chLF; - } + handleEOL(chGotten, false); - // And handle the line/col stuff - fCurCol = 1; - fCurLine++; - } - else if (chGotten == chLF - || ((chGotten == chNEL || chGotten == chLineSeparator) && fNEL)) - { - chGotten = chLF; - fCurLine++; - fCurCol = 1; - } - else if (chGotten) - { - // - // Only do this is not a null char. Null chars are not part of the - // real content. They are just marker characters inserted into - // the stream. - // - fCurCol++; - } return true; } diff --git a/src/xercesc/internal/XMLScanner.cpp b/src/xercesc/internal/XMLScanner.cpp index 7640508f0ed82bc647332e67f5b3254fa93ee84c..b47060abfbb02aec6847034685e7969a1c14bc2e 100644 --- a/src/xercesc/internal/XMLScanner.cpp +++ b/src/xercesc/internal/XMLScanner.cpp @@ -1340,7 +1340,7 @@ void XMLScanner::scanXMLDecl(const DeclTypes type) while (true) { // Skip any spaces - const unsigned int spaceCount = fReaderMgr.skipPastSpacesInDecl(); + const unsigned int spaceCount = fReaderMgr.skipPastSpaces(true); // If we are looking at a question mark, then break out if (fReaderMgr.lookingAtChar(chQuestion)) @@ -1375,7 +1375,7 @@ void XMLScanner::scanXMLDecl(const DeclTypes type) // Scan for an equal's sign. If we don't find it, issue an error // but keep trying to go on. - if (!scanEq()) + if (!scanEq(true)) emitError(XMLErrs::ExpectedEqSign); // Get a quote string into the buffer for the string that we are @@ -2168,12 +2168,12 @@ void XMLScanner::scanComment() // Most equal signs can have white space around them, so this little guy // just makes the calling code cleaner by eating whitespace. -bool XMLScanner::scanEq() +bool XMLScanner::scanEq(bool inDecl) { - fReaderMgr.skipPastSpacesInDecl(); + fReaderMgr.skipPastSpaces(inDecl); if (fReaderMgr.skippedChar(chEqual)) { - fReaderMgr.skipPastSpacesInDecl(); + fReaderMgr.skipPastSpaces(inDecl); return true; } return false; diff --git a/src/xercesc/internal/XMLScanner.hpp b/src/xercesc/internal/XMLScanner.hpp index b27782b69689f2ab38af6e5aae7d2fd57d0d0fd3..b2847c73c79a184e7370f412475854225d78f2bb 100644 --- a/src/xercesc/internal/XMLScanner.hpp +++ b/src/xercesc/internal/XMLScanner.hpp @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.36 2004/06/14 15:18:53 peiyongz + * Consolidated End Of Line Handling + * * Revision 1.35 2004/04/13 18:57:54 peiyongz * Unrelavant comment removal * @@ -734,7 +737,7 @@ protected: // ----------------------------------------------------------------------- bool scanCharRef(XMLCh& toFill, XMLCh& second); void scanComment(); - bool scanEq(); + bool scanEq(bool inDecl = false); void scanMiscellaneous(); void scanPI(); void scanProlog();