Newer
Older
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
fDoSchema = true;
// Reset the validators
fSchemaValidator->reset();
fSchemaValidator->setErrorReporter(fErrorReporter);
fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
fSchemaValidator->setGrammarResolver(fGrammarResolver);
if (fValidatorFromUser)
fValidator->reset();
// Handle the creation of the XML reader object for this input source.
// This will provide us with transcoding and basic lexing services.
XMLReader* newReader = fReaderMgr.createReader
(
src
, true
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, fCalculateSrcOfs
);
if (!newReader) {
if (src.getIssueFatalErrorIfNotFound())
ThrowXML1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId());
else
ThrowXML1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId());
}
// Push this read onto the reader manager
fReaderMgr.pushReader(newReader, 0);
// and reset security-related things if necessary:
if(fSecurityManager != 0)
{
fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
fEntityExpansionCount = 0;
}
fElemCount = 0;
if(fUIntPoolRowTotal >= 32)
{ // 8 KB tied up with validating attributes...
fAttDefRegistry->removeAll();
fUndeclaredAttrRegistryNS->removeAll();
recreateUIntPool();
}
else
{
// note that this will implicitly reset the values of the hashtables,
// though their buckets will still be tied up
resetUIntPool();
}
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
}
// This method is called between markup in content. It scans for character
// data that is sent to the document handler. It watches for any markup
// characters that would indicate that the character data has ended. It also
// handles expansion of general and character entities.
//
// sendData() is a local static helper for this method which handles some
// code that must be done in three different places here.
void SGXMLScanner::sendCharData(XMLBuffer& toSend)
{
// If no data in the buffer, then nothing to do
if (toSend.isEmpty())
return;
// We do different things according to whether we are validating or
// not. If not, its always just characters; else, it depends on the
// current element's content model.
if (fValidate)
{
// Get the raw data we need for the callback
const XMLCh* const rawBuf = toSend.getRawBuffer();
const unsigned int len = toSend.getLen();
// Get the character data opts for the current element
XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
// And see if the current element is a 'Children' style content model
ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
if(currType)
{
SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
if(modelType == SchemaElementDecl::Children)
charOpts = XMLElementDecl::SpacesOk;
else if(modelType == SchemaElementDecl::Empty)
charOpts = XMLElementDecl::NoCharData;
}
// should not be necessary once PSVI method on element decls
// are removed
const ElemStack::StackElem *topElem = fElemStack.topElement();
if (charOpts == XMLElementDecl::NoCharData)
{
// They definitely cannot handle any type of char data
fValidator->emitError(XMLValid::NoCharDataInCM);
Gareth Reakes
committed
((SchemaElementDecl *)topElem->fThisElement)->setValidity(PSVIDefs::INVALID);
if (getPSVIHandler())
{
// REVISIT:
// PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
}
{
// Its all spaces. So, if they can take spaces, then send it
// as ignorable whitespace. If they can handle any char data
// send it as characters.
if (charOpts == XMLElementDecl::SpacesOk) {
if (fDocHandler)
fDocHandler->ignorableWhitespace(rawBuf, len, false);
}
else if (charOpts == XMLElementDecl::AllCharData)
{
// The normalized data can only be as large as the
// original size, so this will avoid allocating way
// too much or too little memory.
Khaled Noaman
committed
XMLBuffer toFill(len+1, fMemoryManager);
if (fNormalizeData)
{
DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
{
// normalize the character according to schema whitespace facet
XMLBufBid bbtemp(&fBufMgr);
XMLBuffer& tempBuf = bbtemp.getBuffer();
((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, toFill.getRawBuffer(), tempBuf);
toFill.set(tempBuf.getRawBuffer());
}
}
// tell the schema validation about the character data for checkContent later
((SchemaValidator*)fValidator)->setDatatypeBuffer(toFill.getRawBuffer());
if (fMatcherStack->getMatcherCount())
fContent.append(toFill.getRawBuffer(), toFill.getLen());
if (fDocHandler)
fDocHandler->docCharacters(toFill.getRawBuffer(), toFill.getLen(), false);
}
}
else
{
// If they can take any char data, then send it. Otherwise, they
// can only handle whitespace and can't handle this stuff so
// issue an error.
if (charOpts == XMLElementDecl::AllCharData)
{
// The normalized data can only be as large as the
// original size, so this will avoid allocating way
// too much or too little memory.
Khaled Noaman
committed
XMLBuffer toFill(len+1, fMemoryManager);
if (fNormalizeData)
{
DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
{
// normalize the character according to schema whitespace facet
XMLBufBid bbtemp(&fBufMgr);
XMLBuffer& tempBuf = bbtemp.getBuffer();
((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, toFill.getRawBuffer(), tempBuf);
toFill.set(tempBuf.getRawBuffer());
}
}
// tell the schema validation about the character data for checkContent later
((SchemaValidator*)fValidator)->setDatatypeBuffer(toFill.getRawBuffer());
if (fMatcherStack->getMatcherCount())
fContent.append(toFill.getRawBuffer(), toFill.getLen());
if (fDocHandler)
fDocHandler->docCharacters(toFill.getRawBuffer(), toFill.getLen(), false);
}
else
{
fValidator->emitError(XMLValid::NoCharDataInCM);
Gareth Reakes
committed
((SchemaElementDecl *)topElem->fThisElement)->setValidity(PSVIDefs::INVALID);
if (getPSVIHandler())
{
// REVISIT:
// PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
}
}
}
}
else
{
// call all active identity constraints
if (fMatcherStack->getMatcherCount())
fContent.append(toSend.getRawBuffer(), toSend.getLen());
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
// Always assume its just char data if not validating
if (fDocHandler)
fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);
}
// Reset buffer
toSend.reset();
}
// This method is called with a key/value string pair that represents an
// xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the
// current top of the element stack based on this data. We know that when
// we get here, that it is one of these forms, so we don't bother confirming
// it.
//
// But we have to ensure
// 1. xxx is not xmlns
// 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
// 3. yyy is not XMLUni::fgXMLNSURIName
// 4. if xxx is not null, then yyy cannot be an empty string.
void SGXMLScanner::updateNSMap(const XMLCh* const attrName
, const XMLCh* const attrValue)
{
// We need a buffer to normalize the attribute value into
XMLBufBid bbNormal(&fBufMgr);
XMLBuffer& normalBuf = bbNormal.getBuffer();
// Normalize the value into the passed buffer. In this case, we don't
// care about the return value. An error was issued for the error, which
// is all we care about here.
normalizeAttRawValue(attrName, attrValue, normalBuf);
XMLCh* namespaceURI = normalBuf.getRawBuffer();
// We either have the default prefix (""), or we point it into the attr
// name parameter. Note that the xmlns is not the prefix we care about
// here. To us, the 'prefix' is really the local part of the attrName
// parameter.
//
// Check 1. xxx is not xmlns
// 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
// 3. yyy is not XMLUni::fgXMLNSURIName
// 4. if xxx is not null, then yyy cannot be an empty string.
const XMLCh* prefPtr = XMLUni::fgZeroLenString;
const int colonOfs = XMLString::indexOf(attrName, chColon);
if (colonOfs != -1) {
prefPtr = &attrName[colonOfs + 1];
if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString))
emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) {
if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
}
Neil Graham
committed
if (!namespaceURI)
emitError(XMLErrs::NoEmptyStrNamespace, attrName);
else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
emitError(XMLErrs::NoEmptyStrNamespace, attrName);
}
if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName))
emitError(XMLErrs::NoUseOfxmlnsURI);
else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) {
if (!XMLString::equals(prefPtr, XMLUni::fgXMLString))
emitError(XMLErrs::XMLURINotMatchXMLPrefix);
}
// Ok, we have to get the unique id for the attribute value, which is the
// URI that this value should be mapped to. The validator has the
// namespace string pool, so we ask him to find or add this new one. Then
// we ask the element stack to add this prefix to URI Id mapping.
fElemStack.addPrefix
(
prefPtr
, fURIStringPool->addOrFind(namespaceURI)
);
}
void SGXMLScanner::scanRawAttrListforNameSpaces(const RefVectorOf<KVStringPair>* theRawAttrList, int attCount)
{
// Make an initial pass through the list and find any xmlns attributes or
// schema attributes.
// When we find one, send it off to be used to update the element stack's
// namespace mappings.
int index = 0;
for (index = 0; index < attCount; index++)
{
// each attribute has the prefix:suffix="value"
const KVStringPair* curPair = fRawAttrList->elementAt(index);
const XMLCh* rawPtr = curPair->getKey();
// If either the key begins with "xmlns:" or its just plain
// "xmlns", then use it to update the map.
if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
|| XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
{
const XMLCh* valuePtr = curPair->getValue();
updateNSMap(rawPtr, valuePtr);
// if the schema URI is seen in the the valuePtr, set the boolean seeXsi
if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) {
fSeeXsi = true;
}
}
}
// walk through the list again to deal with "xsi:...."
if (fSeeXsi)
{
// Schema Xsi Type yyyy (e.g. xsi:type="yyyyy")
XMLBufBid bbXsi(&fBufMgr);
XMLBuffer& fXsiType = bbXsi.getBuffer();
QName attName(fMemoryManager);
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
for (index = 0; index < attCount; index++)
{
// each attribute has the prefix:suffix="value"
const KVStringPair* curPair = fRawAttrList->elementAt(index);
const XMLCh* rawPtr = curPair->getKey();
attName.setName(rawPtr, fEmptyNamespaceId);
const XMLCh* prefPtr = attName.getPrefix();
// if schema URI has been seen, scan for the schema location and uri
// and resolve the schema grammar; or scan for schema type
if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {
const XMLCh* valuePtr = curPair->getValue();
const XMLCh* suffPtr = attName.getLocalPart();
if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCACTION))
parseSchemaLocation(valuePtr);
else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCACTION))
resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString);
if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE)) {
fXsiType.set(valuePtr);
}
else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL)
&& fValidator && fValidator->handlesSchema()
&& XMLString::equals(valuePtr, SchemaSymbols::fgATTVAL_TRUE)) {
((SchemaValidator*)fValidator)->setNillable(true);
}
}
}
if (fValidator && fValidator->handlesSchema()) {
if (!fXsiType.isEmpty()) {
int colonPos = -1;
unsigned int uriId = resolveQName (
fXsiType.getRawBuffer()
, fPrefixBuf
, ElemStack::Mode_Element
, colonPos
);
((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId);
}
}
}
}
void SGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr)
{
Gareth Reakes
committed
BaseRefVectorOf<XMLCh>* schemaLocation = XMLString::tokenizeString(schemaLocationStr);
unsigned int size = schemaLocation->size();
if (size % 2 != 0 ) {
emitError(XMLErrs::BadSchemaLocation);
} else {
for(unsigned int i=0; i<size; i=i+2) {
resolveSchemaGrammar(schemaLocation->elementAt(i+1), schemaLocation->elementAt(i));
}
}
delete schemaLocation;
}
void SGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri) {
Grammar* grammar = fGrammarResolver->getGrammar(uri);
if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType) {
XSDDOMParser parser(0, fMemoryManager, 0);
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
parser.setValidationScheme(XercesDOMParser::Val_Never);
parser.setDoNamespaces(true);
parser.setUserEntityHandler(fEntityHandler);
parser.setUserErrorReporter(fErrorReporter);
// Create a buffer for expanding the system id
XMLBufBid bbSys(&fBufMgr);
XMLBuffer& expSysId = bbSys.getBuffer();
XMLBuffer& normalizedSysId = bbSys.getBuffer();
normalizeURI(loc, normalizedSysId);
// Allow the entity handler to expand the system id if they choose
// to do so.
InputSource* srcToFill = 0;
const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
if (fEntityHandler)
{
if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
expSysId.set(normalizedURI);
ReaderMgr::LastExtEntityInfo lastInfo;
fReaderMgr.getLastExtEntityInfo(lastInfo);
XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::SchemaGrammar,
expSysId.getRawBuffer(), uri, XMLUni::fgZeroLenString, lastInfo.systemId);
srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
}
else
{
expSysId.set(normalizedURI);
}
// If they didn't create a source via the entity handler, then we
// have to create one on our own.
if (!srcToFill)
{
ReaderMgr::LastExtEntityInfo lastInfo;
fReaderMgr.getLastExtEntityInfo(lastInfo);
try
{
XMLURL urlTmp(lastInfo.systemId, expSysId.getRawBuffer());
if (urlTmp.isRelative())
{
ThrowXML
(
MalformedURLException
, XMLExcepts::URL_NoProtocolPresent
);
}
else {
if (fStandardUriConformant && urlTmp.hasInvalidChar())
ThrowXML(MalformedURLException, XMLExcepts::URL_MalformedURL);
Khaled Noaman
committed
srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
}
catch(const MalformedURLException& e)
// Its not a URL, so lets assume its a local file name if non-standard uri is allowed
if (!fStandardUriConformant)
srcToFill = new (fMemoryManager) LocalFileInputSource
(
lastInfo.systemId
, expSysId.getRawBuffer()
Khaled Noaman
committed
, fMemoryManager
);
else
throw e;
}
}
// Put a janitor on the input source
Janitor<InputSource> janSrc(srcToFill);
// Should just issue warning if the schema is not found
bool flag = srcToFill->getIssueFatalErrorIfNotFound();
srcToFill->setIssueFatalErrorIfNotFound(false);
parser.parse(*srcToFill);
// Reset the InputSource
srcToFill->setIssueFatalErrorIfNotFound(flag);
if (parser.getSawFatal() && fExitOnFirstFatal)
emitError(XMLErrs::SchemaScanFatalError);
DOMDocument* document = parser.getDocument(); //Our Grammar
if (document != 0) {
DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
if (root != 0)
{
const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
if (!XMLString::equals(newUri, uri)) {
if (fValidate || fValScheme == Val_Auto) {
fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri);
Gareth Reakes
committed
}
grammar = fGrammarResolver->getGrammar(newUri);
}
if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType) {
// Since we have seen a grammar, set our validation flag
// at this point if the validation scheme is auto
if (fValScheme == Val_Auto && !fValidate) {
fValidate = true;
fElemStack.setValidationFlag(fValidate);
}
grammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) grammar->getGrammarDescription();
gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
gramDesc->setLocationHints(srcToFill->getSystemId());
TraverseSchema traverseSchema
(
root
, fURIStringPool
, (SchemaGrammar*) grammar
, fGrammarResolver
, this
, srcToFill->getSystemId()
, fEntityHandler
, fErrorReporter
, fMemoryManager
);
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
if (fGrammarType == Grammar::DTDGrammarType) {
fGrammar = grammar;
fGrammarType = Grammar::SchemaGrammarType;
fValidator->setGrammar(fGrammar);
}
if (fValidate) {
// validate the Schema scan so far
fValidator->preContentValidation(false);
}
}
}
}
}
else {
// Since we have seen a grammar, set our validation flag
// at this point if the validation scheme is auto
if (fValScheme == Val_Auto && !fValidate) {
fValidate = true;
fElemStack.setValidationFlag(fValidate);
}
// we have seen a schema, so set up the fValidator as fSchemaValidator
if (fGrammarType == Grammar::DTDGrammarType) {
fGrammar = grammar;
fGrammarType = Grammar::SchemaGrammarType;
fValidator->setGrammar(fGrammar);
}
}
}
InputSource* SGXMLScanner::resolveSystemId(const XMLCh* const sysId)
{
// Create a buffer for expanding the system id
XMLBufBid bbSys(&fBufMgr);
XMLBuffer& expSysId = bbSys.getBuffer();
// Allow the entity handler to expand the system id if they choose
// to do so.
InputSource* srcToFill = 0;
if (fEntityHandler)
{
if (!fEntityHandler->expandSystemId(sysId, expSysId))
expSysId.set(sysId);
ReaderMgr::LastExtEntityInfo lastInfo;
fReaderMgr.getLastExtEntityInfo(lastInfo);
XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,
expSysId.getRawBuffer(), 0, XMLUni::fgZeroLenString, lastInfo.systemId);
srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
}
else
{
expSysId.set(sysId);
}
// If they didn't create a source via the entity handler, then we
// have to create one on our own.
if (!srcToFill)
{
ReaderMgr::LastExtEntityInfo lastInfo;
fReaderMgr.getLastExtEntityInfo(lastInfo);
try
{
XMLURL urlTmp(lastInfo.systemId, expSysId.getRawBuffer());
if (urlTmp.isRelative())
{
ThrowXML
(
MalformedURLException
, XMLExcepts::URL_NoProtocolPresent
);
}
else {
if (fStandardUriConformant && urlTmp.hasInvalidChar())
ThrowXML(MalformedURLException, XMLExcepts::URL_MalformedURL);
Khaled Noaman
committed
srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
}
catch(const MalformedURLException& e)
// Its not a URL, so lets assume its a local file name if non-standard uri is allowed
if (!fStandardUriConformant)
srcToFill = new (fMemoryManager) LocalFileInputSource
(
lastInfo.systemId
, expSysId.getRawBuffer()
Khaled Noaman
committed
, fMemoryManager
);
else
throw e;
}
}
return srcToFill;
}
// ---------------------------------------------------------------------------
// SGXMLScanner: Private grammar preparsing methods
// ---------------------------------------------------------------------------
Grammar* SGXMLScanner::loadXMLSchemaGrammar(const InputSource& src,
const bool toCache)
{
// Reset the validators
fSchemaValidator->reset();
fSchemaValidator->setErrorReporter(fErrorReporter);
fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
fSchemaValidator->setGrammarResolver(fGrammarResolver);
if (fValidatorFromUser)
fValidator->reset();
XSDDOMParser parser(0, fMemoryManager, 0);
parser.setValidationScheme(XercesDOMParser::Val_Never);
parser.setDoNamespaces(true);
parser.setUserEntityHandler(fEntityHandler);
parser.setUserErrorReporter(fErrorReporter);
// Should just issue warning if the schema is not found
bool flag = src.getIssueFatalErrorIfNotFound();
((InputSource&) src).setIssueFatalErrorIfNotFound(false);
parser.parse(src);
// Reset the InputSource
((InputSource&) src).setIssueFatalErrorIfNotFound(flag);
if (parser.getSawFatal() && fExitOnFirstFatal)
emitError(XMLErrs::SchemaScanFatalError);
DOMDocument* document = parser.getDocument(); //Our Grammar
if (document != 0) {
DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
if (root != 0)
{
SchemaGrammar* grammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) grammar->getGrammarDescription();
gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
gramDesc->setLocationHints(src.getSystemId());
TraverseSchema traverseSchema
(
root
, fURIStringPool
, (SchemaGrammar*) grammar
, fGrammarResolver
, this
, src.getSystemId()
, fEntityHandler
, fErrorReporter
, fMemoryManager
);
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
if (fValidate) {
// validate the Schema scan so far
fValidator->setGrammar(grammar);
fValidator->preContentValidation(false, true);
}
if (toCache) {
fGrammarResolver->cacheGrammars();
}
return grammar;
}
}
return 0;
}
// ---------------------------------------------------------------------------
// SGXMLScanner: Private parsing methods
// ---------------------------------------------------------------------------
// This method is called to do a raw scan of an attribute value. It does not
// do normalization (since we don't know their types yet.) It just scans the
// value and does entity expansion.
//
// End of entity's must be dealt with here. During DTD scan, they can come
// from external entities. During content, they can come from any entity.
// We just eat the end of entity and continue with our scan until we come
// to the closing quote. If an unterminated value causes us to go through
// subsequent entities, that will cause errors back in the calling code,
// but there's little we can do about it here.
bool SGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill)
{
// Reset the target buffer
toFill.reset();
// Get the next char which must be a single or double quote
XMLCh quoteCh;
if (!fReaderMgr.skipIfQuote(quoteCh))
return false;
// We have to get the current reader because we have to ignore closing
// quotes until we hit the same reader again.
const unsigned int curReader = fReaderMgr.getCurrentReaderNum();
// Loop until we get the attribute value. Note that we use a double
// loop here to avoid the setup/teardown overhead of the exception
// handler on every round.
XMLCh nextCh;
XMLCh secondCh = 0;
bool gotLeadingSurrogate = false;
bool escaped;
while (true)
{
try
{
while(true)
{
nextCh = fReaderMgr.getNextChar();
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
// Check for our ending quote. It has to be in the same entity
// as where we started. Quotes in nested entities are ignored.
if (nextCh == quoteCh)
{
if (curReader == fReaderMgr.getCurrentReaderNum())
return true;
// Watch for spillover into a previous entity
if (curReader > fReaderMgr.getCurrentReaderNum())
{
emitError(XMLErrs::PartialMarkupInEntity);
return false;
}
}
// Check for an entity ref . We ignore the empty flag in
// this one.
escaped = false;
if (nextCh == chAmpersand)
{
// If it was not returned directly, then jump back up
if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
{
gotLeadingSurrogate = false;
continue;
}
}
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
// Deal with surrogate pairs
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
{
emitError(XMLErrs::Expected2ndSurrogateChar);
}
else
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate) {
emitError(XMLErrs::Expected2ndSurrogateChar);
}
// Its got to at least be a valid XML character
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
);
emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
}
}
gotLeadingSurrogate = false;
}
// If it was escaped, then put in a 0xFFFF value. This will
// be used later during validation and normalization of the
// value to know that the following character was via an
// escape char.
if (escaped)
toFill.append(0xFFFF);
// Else add it to the buffer
toFill.append(nextCh);
if (secondCh)
toFill.append(secondCh);
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
}
}
catch(const EndOfEntityException&)
{
// Just eat it and continue.
gotLeadingSurrogate = false;
escaped = false;
}
}
return true;
}
// This method scans a CDATA section. It collects the character into one
// of the temp buffers and calls the document handler, if any, with the
// characters. It assumes that the <![CDATA string has been scanned before
// this call.
void SGXMLScanner::scanCDSection()
{
static const XMLCh CDataClose[] =
{
chCloseSquare, chCloseAngle, chNull
};
// The next character should be the opening square bracket. If not
// issue an error, but then try to recover by skipping any whitespace
// and checking again.
if (!fReaderMgr.skippedChar(chOpenSquare))
{
emitError(XMLErrs::ExpectedOpenSquareBracket);
fReaderMgr.skipPastSpaces();
// If we still don't find it, then give up, else keep going
if (!fReaderMgr.skippedChar(chOpenSquare))
return;
}
// Get a buffer for this
XMLBufBid bbCData(&fBufMgr);
// We just scan forward until we hit the end of CDATA section sequence.
// CDATA is effectively a big escape mechanism so we don't treat markup
// characters specially here.
bool emittedError = false;
Tinny Ng
committed
// Get the character data opts for the current element
XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
// And see if the current element is a 'Children' style content model
ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
if(currType)
{
SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
if(modelType == SchemaElementDecl::Children)
charOpts = XMLElementDecl::SpacesOk;
else if(modelType == SchemaElementDecl::Empty)
charOpts = XMLElementDecl::NoCharData;
}
// should not be necessary when PSVI on element decl removed
Tinny Ng
committed
const ElemStack::StackElem* topElem = fElemStack.topElement();
while (true)
{
const XMLCh nextCh = fReaderMgr.getNextChar();
// Watch for unexpected end of file
if (!nextCh)
{
emitError(XMLErrs::UnterminatedCDATASection);
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
}
if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
{
// This document is standalone; this ignorable CDATA whitespace is forbidden.
// XML 1.0, Section 2.9
// And see if the current element is a 'Children' style content model
if (topElem->fThisElement->isExternal()) {
if (charOpts == XMLElementDecl::SpacesOk) // Element Content
{
// Error - standalone should have a value of "no" as whitespace detected in an
// element type with element content whose element declaration was external
fValidator->emitError(XMLValid::NoWSForStandalone);
Gareth Reakes
committed
((SchemaElementDecl *)(topElem->fThisElement))->setValidity(PSVIDefs::INVALID);
if (getPSVIHandler())
{
// REVISIT:
// PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
}
}
}
}
// If this is a close square bracket it could be our closing
// sequence.
if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
{
// make sure we were not expecting a trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
Tinny Ng
committed
if (fValidate) {
if (fNormalizeData)
{
DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
{
// normalize the character according to schema whitespace facet
XMLBufBid bbtemp(&fBufMgr);
XMLBuffer& tempBuf = bbtemp.getBuffer();
((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, bbCData.getRawBuffer(), tempBuf);
bbCData.set(tempBuf.getRawBuffer());
}
Tinny Ng
committed
}
// tell the schema validation about the character data for checkContent later
((SchemaValidator*)fValidator)->setDatatypeBuffer(bbCData.getRawBuffer());
Tinny Ng
committed
if (charOpts != XMLElementDecl::AllCharData)
{
// They definitely cannot handle any type of char data
fValidator->emitError(XMLValid::NoCharDataInCM);
((SchemaElementDecl *)topElem->fThisElement)->setValidity(PSVIDefs::INVALID);
if (getPSVIHandler())
{
// REVISIT:
// PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
}
Tinny Ng
committed
}
}
if (fMatcherStack->getMatcherCount())
fContent.append(bbCData.getRawBuffer(), bbCData.getLen());
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
// If we have a doc handler, call it
if (fDocHandler)
{
fDocHandler->docCharacters
(
bbCData.getRawBuffer()
, bbCData.getLen()
, true
);
}
// And we are done
break;
}
// Make sure its a valid character. But if we've emitted an error
// already, don't bother with the overhead since we've already told
// them about it.
if (!emittedError)
{
// Deal with surrogate pairs
if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else