Newer
Older
gotLeadingSurrogate = false;
continue;
}
}
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
// Deal with surrogate pairs
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
// Make sure the returned char is a valid XML char
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
David Abram Cargill
committed
, fMemoryManager
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
}
gotLeadingSurrogate = false;
}
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
// Keep the state machine up to date
if (!escaped)
{
if (nextCh == chCloseSquare)
{
if (curState == State_Waiting)
curState = State_GotOne;
else if (curState == State_GotOne)
curState = State_GotTwo;
}
else if (nextCh == chCloseAngle)
{
if (curState == State_GotTwo)
emitError(XMLErrs::BadSequenceInCharData);
curState = State_Waiting;
}
else
{
curState = State_Waiting;
}
}
else
{
curState = State_Waiting;
}
// Add this char to the buffer
toUse.append(nextCh);
if (secondCh)
toUse.append(secondCh);
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
}
}
catch(const EndOfEntityException& toCatch)
{
// Some entity ended, so we have to send any accumulated
// chars and send an end of entity event.
sendCharData(toUse);
gotLeadingSurrogate = false;
if (fDocHandler)
fDocHandler->endEntityReference(toCatch.getEntity());
}
}
// Check the validity constraints as per XML 1.0 Section 2.9
if (fValidate && fStandalone)
{
// See if the text contains whitespace
// Get the raw data we need for the callback
const XMLCh* rawBuf = toUse.getRawBuffer();
const unsigned int len = toUse.getLen();
const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
if (isSpaces)
{
// And see if the current element is a 'Children' style content model
const ElemStack::StackElem* topElem = fElemStack.topElement();
if (topElem->fThisElement->isExternal()) {
// Get the character data opts for the current element
XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts();
if (charOpts == XMLElementDecl::SpacesOk) // => Element Content
{
// Error - standalone should have a value of "no" as whitespace detected in an
// element type with element content whose element declaration was external
//
fValidator->emitError(XMLValid::NoWSForStandalone);
}
}
}
}
// Send any char data that we accumulated into the buffer
sendCharData(toUse);
}
// This method will scan a general/character entity ref. It will either
// expand a char ref and return it directly, or push a reader for a general
// entity.
//
// The return value indicates whether the char parameters hold the value
// or whether the value was pushed as a reader, or that it failed.
//
// The escaped flag tells the caller whether the returned parameter resulted
// from a character reference, which escapes the character in some cases. It
// only makes any difference if the return value indicates the value was
// returned directly.
DGXMLScanner::EntityExpRes
DGXMLScanner::scanEntityRef( const bool inAttVal
, XMLCh& firstCh
, XMLCh& secondCh
, bool& escaped)
{
// Assume no escape
secondCh = 0;
escaped = false;
// We have to insure that its all in one entity
const unsigned int curReader = fReaderMgr.getCurrentReaderNum();
// If the next char is a pound, then its a character reference and we
// need to expand it always.
if (fReaderMgr.skippedChar(chPound))
{
// Its a character reference, so scan it and get back the numeric
// value it represents.
if (!scanCharRef(firstCh, secondCh))
return EntityExp_Failed;
escaped = true;
if (curReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
return EntityExp_Returned;
}
// Expand it since its a normal entity ref
XMLBufBid bbName(&fBufMgr);
if (!fReaderMgr.getName(bbName.getBuffer()))
{
emitError(XMLErrs::ExpectedEntityRefName);
return EntityExp_Failed;
}
// Next char must be a semi-colon. But if its not, just emit
// an error and try to continue.
if (!fReaderMgr.skippedChar(chSemiColon))
emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
// Make sure we ended up on the same entity reader as the & char
if (curReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
// Look up the name in the general entity pool
XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(bbName.getRawBuffer());
// If it does not exist, then obviously an error
if (!decl)
{
// XML 1.0 Section 4.1
// Well-formedness Constraint for entity not found:
// In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
// or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
// or a parameter entity
//
// Else it's Validity Constraint
if (fStandalone || fHasNoDTD)
emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
else {
if (fValidate)
fValidator->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
}
return EntityExp_Failed;
}
// XML 1.0 Section 4.1
// If we are a standalone document, then it has to have been declared
// in the internal subset.
if (fStandalone && !decl->getDeclaredInIntSubset())
emitError(XMLErrs::IllegalRefInStandalone, bbName.getRawBuffer());
if (decl->isExternal())
{
// If its unparsed, then its not valid here
if (decl->isUnparsed())
{
emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer());
return EntityExp_Failed;
}
// If we are in an attribute value, then not valid but keep going
if (inAttVal)
emitError(XMLErrs::NoExtRefsInAttValue);
// And now create a reader to read this entity
InputSource* srcUsed;
XMLReader* reader = fReaderMgr.createReader
(
decl->getBaseURI()
, decl->getSystemId()
, decl->getPublicId()
, false
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, srcUsed
, fCalculateSrcOfs
);
// Put a janitor on the source so it gets cleaned up on exit
Janitor<InputSource> janSrc(srcUsed);
// If the creation failed, and its not because the source was empty,
// then emit an error and return.
if (!reader)
David Abram Cargill
committed
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed->getSystemId(), fMemoryManager);
// Push the reader. If its a recursive expansion, then emit an error
// and return an failure.
if (!fReaderMgr.pushReader(reader, decl))
{
emitError(XMLErrs::RecursiveEntity, decl->getName());
return EntityExp_Failed;
}
// here's where we need to check if there's a SecurityManager,
// how many entity references we've had
if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
XMLCh expLimStr[16];
David Abram Cargill
committed
XMLString::binToText(fEntityExpansionLimit, expLimStr, 15, 10, fMemoryManager);
emitError
(
XMLErrs::EntityExpansionLimitExceeded
, expLimStr
);
// there seems nothing better to do than reset the entity expansion counter
fEntityExpansionCount = 0;
}
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
// Do a start entity reference event.
//
// <TBD> For now, we supress them in att values. Later, when
// the stuff is in place to correctly allow DOM to handle them
// we'll turn this back on.
if (fDocHandler && !inAttVal)
fDocHandler->startEntityReference(*decl);
// If it starts with the XML string, then parse a text decl
if (checkXMLDecl(true))
scanXMLDecl(Decl_Text);
}
else
{
// If its one of the special char references, then we can return
// it as a character, and its considered escaped.
if (decl->getIsSpecialChar())
{
firstCh = decl->getValue()[0];
escaped = true;
return EntityExp_Returned;
}
// Create a reader over a memory stream over the entity value
// We force it to assume UTF-16 by passing in an encoding
// string. This way it won't both trying to predecode the
// first line, looking for an XML/TextDecl.
XMLReader* valueReader = fReaderMgr.createIntEntReader
(
decl->getName()
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, decl->getValue()
, decl->getValueLen()
, false
);
// Try to push the entity reader onto the reader manager stack,
// where it will become the subsequent input. If it fails, that
// means the entity is recursive, so issue an error. The reader
// will have just been discarded, but we just keep going.
if (!fReaderMgr.pushReader(valueReader, decl))
emitError(XMLErrs::RecursiveEntity, decl->getName());
// here's where we need to check if there's a SecurityManager,
// how many entity references we've had
if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
XMLCh expLimStr[16];
David Abram Cargill
committed
XMLString::binToText(fEntityExpansionLimit, expLimStr, 15, 10, fMemoryManager);
emitError
(
XMLErrs::EntityExpansionLimitExceeded
, expLimStr
);
}
// Do a start entity reference event.
//
// <TBD> For now, we supress them in att values. Later, when
// the stuff is in place to correctly allow DOM to handle them
// we'll turn this back on.
if (fDocHandler && !inAttVal)
fDocHandler->startEntityReference(*decl);
// If it starts with the XML string, then it's an error
if (checkXMLDecl(true)) {
emitError(XMLErrs::TextDeclNotLegalHere);
fReaderMgr.skipPastChar(chCloseAngle);
}
}
return EntityExp_Pushed;
}
XERCES_CPP_NAMESPACE_END