Newer
Older
catch(const MalformedURLException& e)
// Its not a URL, so lets assume its a local file name if non-standard uri is allowed
if (!fStandardUriConformant)
srcToFill = new LocalFileInputSource
(
lastInfo.systemId
, expSysId.getRawBuffer()
);
else
throw e;
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
}
}
// Put a janitor on the input source
Janitor<InputSource> janSrc(srcToFill);
// Should just issue warning if the schema is not found
const bool flag = srcToFill->getIssueFatalErrorIfNotFound();
srcToFill->setIssueFatalErrorIfNotFound(false);
parser.parse(*srcToFill);
// Reset the InputSource
srcToFill->setIssueFatalErrorIfNotFound(flag);
if (parser.getSawFatal() && fExitOnFirstFatal)
emitError(XMLErrs::SchemaScanFatalError);
DOMDocument* document = parser.getDocument(); //Our Grammar
if (document != 0) {
DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
if (root != 0)
{
const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
if (!XMLString::equals(newUri, uri)) {
if (fValidate)
fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri);
grammar = fGrammarResolver->getGrammar(newUri);
}
if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType) {
// Since we have seen a grammar, set our validation flag
// at this point if the validation scheme is auto
if (fValScheme == Val_Auto && !fValidate) {
fValidate = true;
fElemStack.setValidationFlag(fValidate);
}
grammar = new SchemaGrammar();
TraverseSchema traverseSchema(root, fURIStringPool, (SchemaGrammar*) grammar, fGrammarResolver, this, srcToFill->getSystemId(), fEntityHandler, fErrorReporter);
if (fGrammarType == Grammar::DTDGrammarType) {
fGrammar = grammar;
fGrammarType = Grammar::SchemaGrammarType;
fValidator->setGrammar(fGrammar);
}
if (fValidate) {
// validate the Schema scan so far
fValidator->preContentValidation(false);
}
}
}
}
}
else {
// Since we have seen a grammar, set our validation flag
// at this point if the validation scheme is auto
if (fValScheme == Val_Auto && !fValidate) {
fValidate = true;
fElemStack.setValidationFlag(fValidate);
}
// we have seen a schema, so set up the fValidator as fSchemaValidator
if (fGrammarType == Grammar::DTDGrammarType) {
fGrammar = grammar;
fGrammarType = Grammar::SchemaGrammarType;
fValidator->setGrammar(fGrammar);
}
}
}
InputSource* SGXMLScanner::resolveSystemId(const XMLCh* const sysId)
{
// Create a buffer for expanding the system id
XMLBufBid bbSys(&fBufMgr);
XMLBuffer& expSysId = bbSys.getBuffer();
// Allow the entity handler to expand the system id if they choose
// to do so.
InputSource* srcToFill = 0;
if (fEntityHandler)
{
if (!fEntityHandler->expandSystemId(sysId, expSysId))
expSysId.set(sysId);
srcToFill = fEntityHandler->resolveEntity( XMLUni::fgZeroLenString
, expSysId.getRawBuffer());
}
else
{
expSysId.set(sysId);
}
// If they didn't create a source via the entity handler, then we
// have to create one on our own.
if (!srcToFill)
{
ReaderMgr::LastExtEntityInfo lastInfo;
fReaderMgr.getLastExtEntityInfo(lastInfo);
try
{
XMLURL urlTmp(lastInfo.systemId, expSysId.getRawBuffer());
if (urlTmp.isRelative())
{
ThrowXML
(
MalformedURLException
, XMLExcepts::URL_NoProtocolPresent
);
}
else {
if (fStandardUriConformant && urlTmp.hasInvalidChar())
ThrowXML(MalformedURLException, XMLExcepts::URL_MalformedURL);
srcToFill = new URLInputSource(urlTmp);
}
catch(const MalformedURLException& e)
// Its not a URL, so lets assume its a local file name if non-standard uri is allowed
if (!fStandardUriConformant)
srcToFill = new LocalFileInputSource
(
lastInfo.systemId
, expSysId.getRawBuffer()
);
else
throw e;
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
}
}
return srcToFill;
}
// ---------------------------------------------------------------------------
// SGXMLScanner: Private grammar preparsing methods
// ---------------------------------------------------------------------------
Grammar* SGXMLScanner::loadXMLSchemaGrammar(const InputSource& src,
const bool toCache)
{
// Reset the validators
fSchemaValidator->reset();
fSchemaValidator->setErrorReporter(fErrorReporter);
fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
if (fValidatorFromUser)
fValidator->reset();
XSDDOMParser parser;
parser.setValidationScheme(XercesDOMParser::Val_Never);
parser.setDoNamespaces(true);
parser.setUserEntityHandler(fEntityHandler);
parser.setUserErrorReporter(fErrorReporter);
// Should just issue warning if the schema is not found
const bool flag = src.getIssueFatalErrorIfNotFound();
((InputSource&) src).setIssueFatalErrorIfNotFound(false);
parser.parse(src);
// Reset the InputSource
((InputSource&) src).setIssueFatalErrorIfNotFound(flag);
if (parser.getSawFatal() && fExitOnFirstFatal)
emitError(XMLErrs::SchemaScanFatalError);
DOMDocument* document = parser.getDocument(); //Our Grammar
if (document != 0) {
DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
if (root != 0)
{
SchemaGrammar* grammar = new SchemaGrammar();
TraverseSchema traverseSchema(root, fURIStringPool, (SchemaGrammar*) grammar, fGrammarResolver, this, src.getSystemId(), fEntityHandler, fErrorReporter);
if (fValidate) {
// validate the Schema scan so far
fValidator->setGrammar(grammar);
fValidator->preContentValidation(false, true);
}
if (toCache) {
fGrammarResolver->cacheGrammars();
}
return grammar;
}
}
return 0;
}
// ---------------------------------------------------------------------------
// SGXMLScanner: Private parsing methods
// ---------------------------------------------------------------------------
// This method is called to do a raw scan of an attribute value. It does not
// do normalization (since we don't know their types yet.) It just scans the
// value and does entity expansion.
//
// End of entity's must be dealt with here. During DTD scan, they can come
// from external entities. During content, they can come from any entity.
// We just eat the end of entity and continue with our scan until we come
// to the closing quote. If an unterminated value causes us to go through
// subsequent entities, that will cause errors back in the calling code,
// but there's little we can do about it here.
bool SGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill)
{
// Reset the target buffer
toFill.reset();
// Get the next char which must be a single or double quote
XMLCh quoteCh;
if (!fReaderMgr.skipIfQuote(quoteCh))
return false;
// We have to get the current reader because we have to ignore closing
// quotes until we hit the same reader again.
const unsigned int curReader = fReaderMgr.getCurrentReaderNum();
// Loop until we get the attribute value. Note that we use a double
// loop here to avoid the setup/teardown overhead of the exception
// handler on every round.
XMLCh nextCh;
XMLCh secondCh = 0;
bool gotLeadingSurrogate = false;
bool escaped;
while (true)
{
try
{
while(true)
{
nextCh = fReaderMgr.getNextChar();
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
// Check for our ending quote. It has to be in the same entity
// as where we started. Quotes in nested entities are ignored.
if (nextCh == quoteCh)
{
if (curReader == fReaderMgr.getCurrentReaderNum())
return true;
// Watch for spillover into a previous entity
if (curReader > fReaderMgr.getCurrentReaderNum())
{
emitError(XMLErrs::PartialMarkupInEntity);
return false;
}
}
// Check for an entity ref . We ignore the empty flag in
// this one.
escaped = false;
if (nextCh == chAmpersand)
{
// If it was not returned directly, then jump back up
if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
{
gotLeadingSurrogate = false;
continue;
}
}
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
// Deal with surrogate pairs
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
{
emitError(XMLErrs::Expected2ndSurrogateChar);
}
else
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate) {
emitError(XMLErrs::Expected2ndSurrogateChar);
}
// Its got to at least be a valid XML character
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
);
emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
}
}
gotLeadingSurrogate = false;
}
// If it was escaped, then put in a 0xFFFF value. This will
// be used later during validation and normalization of the
// value to know that the following character was via an
// escape char.
if (escaped)
toFill.append(0xFFFF);
// Else add it to the buffer
toFill.append(nextCh);
if (secondCh)
toFill.append(secondCh);
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
}
}
catch(const EndOfEntityException&)
{
// Just eat it and continue.
gotLeadingSurrogate = false;
escaped = false;
}
}
return true;
}
// This method scans a CDATA section. It collects the character into one
// of the temp buffers and calls the document handler, if any, with the
// characters. It assumes that the <![CDATA string has been scanned before
// this call.
void SGXMLScanner::scanCDSection()
{
// This is the CDATA section opening sequence, minus the '<' character.
// We use this to watch for nested CDATA sections, which are illegal.
static const XMLCh CDataPrefix[] =
{
chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A
, chLatin_T, chLatin_A, chOpenSquare, chNull
};
static const XMLCh CDataClose[] =
{
chCloseSquare, chCloseAngle, chNull
};
// The next character should be the opening square bracket. If not
// issue an error, but then try to recover by skipping any whitespace
// and checking again.
if (!fReaderMgr.skippedChar(chOpenSquare))
{
emitError(XMLErrs::ExpectedOpenSquareBracket);
fReaderMgr.skipPastSpaces();
// If we still don't find it, then give up, else keep going
if (!fReaderMgr.skippedChar(chOpenSquare))
return;
}
// Get a buffer for this
XMLBufBid bbCData(&fBufMgr);
// We just scan forward until we hit the end of CDATA section sequence.
// CDATA is effectively a big escape mechanism so we don't treat markup
// characters specially here.
bool emittedError = false;
while (true)
{
const XMLCh nextCh = fReaderMgr.getNextChar();
// Watch for unexpected end of file
if (!nextCh)
{
emitError(XMLErrs::UnterminatedCDATASection);
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
}
if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
{
// This document is standalone; this ignorable CDATA whitespace is forbidden.
// XML 1.0, Section 2.9
// And see if the current element is a 'Children' style content model
const ElemStack::StackElem* topElem = fElemStack.topElement();
if (topElem->fThisElement->isExternal()) {
// Get the character data opts for the current element
XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts();
if (charOpts == XMLElementDecl::SpacesOk) // Element Content
{
// Error - standalone should have a value of "no" as whitespace detected in an
// element type with element content whose element declaration was external
fValidator->emitError(XMLValid::NoWSForStandalone);
}
}
}
// If this is a close square bracket it could be our closing
// sequence.
if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
{
// make sure we were not expecting a trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
// call all active identity constraints
unsigned int count = fMatcherStack->getMatcherCount();
for (unsigned int i = 0; i < count; i++) {
fMatcherStack->getMatcherAt(i)->docCharacters(bbCData.getRawBuffer(), bbCData.getLen());
}
// If we have a doc handler, call it
if (fDocHandler)
{
fDocHandler->docCharacters
(
bbCData.getRawBuffer()
, bbCData.getLen()
, true
);
}
// And we are done
break;
}
// Make sure its a valid character. But if we've emitted an error
// already, don't bother with the overhead since we've already told
// them about it.
if (!emittedError)
{
// Deal with surrogate pairs
if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
// Its got to at least be a valid XML character
else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);
emittedError = true;
}
}
gotLeadingSurrogate = false;
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
}
}
if (fValidate) {
// And see if the current element is a 'Children' style content model
const ElemStack::StackElem* topElem = fElemStack.topElement();
// Get the character data opts for the current element
XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts();
if (charOpts != XMLElementDecl::AllCharData)
{
// They definitely cannot handle any type of char data
fValidator->emitError(XMLValid::NoCharDataInCM);
}
}
// Add it to the buffer
bbCData.append(nextCh);
}
}
void SGXMLScanner::scanCharData(XMLBuffer& toUse)
{
// We have to watch for the stupid ]]> sequence, which is illegal in
// character data. So this is a little state machine that handles that.
enum States
{
State_Waiting
, State_GotOne
, State_GotTwo
};
// Reset the buffer before we start
toUse.reset();
// Turn on the 'throw at end' flag of the reader manager
ThrowEOEJanitor jan(&fReaderMgr, true);
// In order to be more efficient we have to use kind of a deeply nested
// set of blocks here. The outer block puts on a try and catches end of
// entity exceptions. The inner loop is the per-character loop. If we
// put the try inside the inner loop, it would work but would require
// the exception handling code setup/teardown code to be invoked for
// each character.
XMLCh nextCh;
XMLCh secondCh = 0;
States curState = State_Waiting;
bool escaped = false;
bool gotLeadingSurrogate = false;
bool notDone = true;
while (notDone)
{
try
{
while (true)
{
// Eat through as many plain content characters as possible without
// needing special handling. Moving most content characters here,
// in this one call, rather than running the overall loop once
// per content character, is a speed optimization.
if (curState == State_Waiting && !gotLeadingSurrogate)
fReaderMgr.movePlainContentChars(toUse);
// Try to get another char from the source
// The code from here on down covers all contengencies,
if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
{
// If we were waiting for a trailing surrogate, its an error
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
notDone = false;
break;
}
// Watch for a reference. Note that the escapement mechanism
// is ignored in this content.
escaped = false;
if (nextCh == chAmpersand)
{
sendCharData(toUse);
// Turn off the throwing at the end of entity during this
ThrowEOEJanitor jan(&fReaderMgr, false);
if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
{
gotLeadingSurrogate = false;
continue;
}
}
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
// Deal with surrogate pairs
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
// Make sure the returned char is a valid XML char
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
}
gotLeadingSurrogate = false;
}
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
// Keep the state machine up to date
if (!escaped)
{
if (nextCh == chCloseSquare)
{
if (curState == State_Waiting)
curState = State_GotOne;
else if (curState == State_GotOne)
curState = State_GotTwo;
}
else if (nextCh == chCloseAngle)
{
if (curState == State_GotTwo)
emitError(XMLErrs::BadSequenceInCharData);
curState = State_Waiting;
}
else
{
curState = State_Waiting;
}
}
else
{
curState = State_Waiting;
}
// Add this char to the buffer
toUse.append(nextCh);
if (secondCh)
toUse.append(secondCh);
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
}
}
catch(const EndOfEntityException& toCatch)
{
// Some entity ended, so we have to send any accumulated
// chars and send an end of entity event.
sendCharData(toUse);
gotLeadingSurrogate = false;
if (fDocHandler)
fDocHandler->endEntityReference(toCatch.getEntity());
}
}
// Check the validity constraints as per XML 1.0 Section 2.9
if (fValidate && fStandalone)
{
// See if the text contains whitespace
// Get the raw data we need for the callback
const XMLCh* rawBuf = toUse.getRawBuffer();
const unsigned int len = toUse.getLen();
const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
if (isSpaces)
{
// And see if the current element is a 'Children' style content model
const ElemStack::StackElem* topElem = fElemStack.topElement();
if (topElem->fThisElement->isExternal()) {
// Get the character data opts for the current element
XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts();
if (charOpts == XMLElementDecl::SpacesOk) // => Element Content
{
// Error - standalone should have a value of "no" as whitespace detected in an
// element type with element content whose element declaration was external
//
fValidator->emitError(XMLValid::NoWSForStandalone);
}
}
}
}
// Send any char data that we accumulated into the buffer
sendCharData(toUse);
}
// This method will scan a general/character entity ref. It will either
// expand a char ref and return it directly, or push a reader for a general
// entity.
//
// The return value indicates whether the char parameters hold the value
// or whether the value was pushed as a reader, or that it failed.
//
// The escaped flag tells the caller whether the returned parameter resulted
// from a character reference, which escapes the character in some cases. It
// only makes any difference if the return value indicates the value was
// returned directly.
SGXMLScanner::EntityExpRes
SGXMLScanner::scanEntityRef( const bool inAttVal
, XMLCh& firstCh
, XMLCh& secondCh
, bool& escaped)
{
// Assume no escape
secondCh = 0;
escaped = false;
// We have to insure that its all in one entity
const unsigned int curReader = fReaderMgr.getCurrentReaderNum();
// If the next char is a pound, then its a character reference and we
// need to expand it always.
if (fReaderMgr.skippedChar(chPound))
{
// Its a character reference, so scan it and get back the numeric
// value it represents.
if (!scanCharRef(firstCh, secondCh))
return EntityExp_Failed;
escaped = true;
if (curReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
return EntityExp_Returned;
}
// Expand it since its a normal entity ref
XMLBufBid bbName(&fBufMgr);
if (!fReaderMgr.getName(bbName.getBuffer()))
{
emitError(XMLErrs::ExpectedEntityRefName);
return EntityExp_Failed;
}
// Next char must be a semi-colon. But if its not, just emit
// an error and try to continue.
if (!fReaderMgr.skippedChar(chSemiColon))
emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
// Make sure we ended up on the same entity reader as the & char
if (curReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
// Look up the name in the general entity pool
// If it does not exist, then obviously an error
if (!fEntityTable->containsKey(bbName.getRawBuffer()))
{
// XML 1.0 Section 4.1
// Well-formedness Constraint for entity not found:
// In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
// or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
// or a parameter entity
if (fStandalone || fHasNoDTD)
emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
return EntityExp_Failed;
}
firstCh = fEntityTable->get(bbName.getRawBuffer());
escaped = true;
return EntityExp_Returned;
}
bool SGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace)
{
Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace);
if (!tempGrammar) {
tempGrammar = fSchemaGrammar;
}
if (!tempGrammar)
return false;
else {
fGrammar = tempGrammar;
fGrammarType = fGrammar->getGrammarType();
if (fGrammarType == Grammar::DTDGrammarType) {
ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
}
fValidator->setGrammar(fGrammar);
return true;
}
}
// check if we should skip or lax the validation of the element
// if skip - no validation
// if lax - validate only if the element if found
bool SGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
const XMLContentModel* const cm,
const unsigned int parentElemDepth)
{
bool skipThisOne = false;
bool laxThisOne = false;
unsigned int elementURI = element->getURI();
unsigned int currState = fElemState[parentElemDepth];
if (currState == XMLContentModel::gInvalidTrans) {
return laxThisOne;
}
SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool);
if (cv) {
unsigned int i = 0;
unsigned int leafCount = cv->getLeafCount();
for (; i < leafCount; i++) {
QName* fElemMap = cv->getLeafNameAt(i);
unsigned int uri = fElemMap->getURI();
unsigned int nextState;
bool anyEncountered = false;
ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
if (type == ContentSpecNode::Leaf) {
if (((uri == elementURI)
&& XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart()))
|| comparator.isEquivalentTo(element, fElemMap)) {
nextState = cm->getNextState(currState, i);
if (nextState != XMLContentModel::gInvalidTrans) {
fElemState[parentElemDepth] = nextState;
break;
}
}
} else if ((type & 0x0f) == ContentSpecNode::Any) {
anyEncountered = true;
}
else if ((type & 0x0f) == ContentSpecNode::Any_Other) {
if (uri != elementURI) {
anyEncountered = true;
}
}
else if ((type & 0x0f) == ContentSpecNode::Any_NS) {
if (uri == elementURI) {
anyEncountered = true;
}
}
if (anyEncountered) {
nextState = cm->getNextState(currState, i);
if (nextState != XMLContentModel::gInvalidTrans) {
fElemState[parentElemDepth] = nextState;
if (type == ContentSpecNode::Any_Skip ||
type == ContentSpecNode::Any_NS_Skip ||
type == ContentSpecNode::Any_Other_Skip) {
skipThisOne = true;
}
else if (type == ContentSpecNode::Any_Lax ||
type == ContentSpecNode::Any_NS_Lax ||
type == ContentSpecNode::Any_Other_Lax) {
laxThisOne = true;
}
break;
}
}
} // for
if (i == leafCount) { // no match
fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans;
return laxThisOne;
}
} // if
if (skipThisOne) {
fValidate = false;
fElemStack.setValidationFlag(fValidate);
}
return laxThisOne;
}
// check if there is an AnyAttribute, and if so, see if we should lax or skip
// if skip - no validation
// if lax - validate only if the attribute if found
bool SGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne)
{
XMLAttDef::AttTypes wildCardType = attWildCard->getType();
bool anyEncountered = false;
skipThisOne = false;
laxThisOne = false;
if (wildCardType == XMLAttDef::Any_Any)
anyEncountered = true;
else if (wildCardType == XMLAttDef::Any_Other) {
if (attWildCard->getAttName()->getURI() != uriId)
anyEncountered = true;
}
else if (wildCardType == XMLAttDef::Any_List) {
ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList();
unsigned int listSize = (nameURIList) ? nameURIList->size() : 0;
if (listSize) {
for (unsigned int i=0; i < listSize; i++) {
if (nameURIList->elementAt(i) == uriId)
anyEncountered = true;
}
}
}
if (anyEncountered) {
XMLAttDef::DefAttTypes defType = attWildCard->getDefaultType();
if (defType == XMLAttDef::ProcessContents_Skip) {
// attribute should just be bypassed,
skipThisOne = true;
}
else if (defType == XMLAttDef::ProcessContents_Lax) {
laxThisOne = true;
}
}
return anyEncountered;
}
void SGXMLScanner::normalizeURI(const XMLCh* const systemURI,
XMLBuffer& normalizedURI)
{
const XMLCh* pszSrc = systemURI;
normalizedURI.reset();
while (*pszSrc) {
if ((*(pszSrc) == chPercent)
&& (*(pszSrc+1) == chDigit_2)
&& (*(pszSrc+2) == chDigit_0))
{
pszSrc += 3;
normalizedURI.append(chSpace);
}
else if (*pszSrc == 0xFFFF) { //escaped character
pszSrc++;
}
else {
normalizedURI.append(*pszSrc);
pszSrc++;
}
}
}
XERCES_CPP_NAMESPACE_END