Newer
Older
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
//
int curTop = 0;
for (int index = VersionString; index < StandaloneString; index++)
{
if (flags[index] != -1)
{
if (flags[index] != curTop + 1)
{
emitError(XMLErrs::DeclStringsInWrongOrder);
break;
}
curTop = flags[index];
}
}
//
// If its an XML decl, the version must be present.
// If its a Text decl, then encoding must be present AND standalone must not be present.
//
if ((type == Decl_XML) && (flags[VersionString] == -1))
emitError(XMLErrs::XMLVersionRequired);
else if (type == Decl_Text) {
if (flags[StandaloneString] != -1)
emitError(XMLErrs::StandaloneNotLegal);
if (flags[EncodingString] == -1)
emitError(XMLErrs::EncodingRequired);
}
if (!fReaderMgr.skippedChar(chQuestion))
{
emitError(XMLErrs::UnterminatedXMLDecl);
fReaderMgr.skipPastChar(chCloseAngle);
}
else if (!fReaderMgr.skippedChar(chCloseAngle))
{
emitError(XMLErrs::UnterminatedXMLDecl);
fReaderMgr.skipPastChar(chCloseAngle);
}
//
// If we have a document handler then call the XML Decl callback.
//
// !NOTE! Do this before we possibly update the reader with the
// actual encoding string. Otherwise, we will pass the wrong thing
// for the last parameter!
//
if (fDocHandler)
{
fDocHandler->XMLDecl
(
bbVersion.getRawBuffer()
, bbEncoding.getRawBuffer()
, bbStand.getRawBuffer()
, fReaderMgr.getCurrentEncodingStr()
);
}
//
// Ok, we've now seen the real encoding string, if there was one, so
// lets call back on the current reader and tell it what the real
// encoding string was. If it fails, that's because it represents some
// sort of contradiction with the autosensed format, and it keeps the
// original encoding.
//
// NOTE: This can fail for a number of reasons, such as a bogus encoding
// name or because its in flagrant contradiction of the auto-sensed
// format.
//
if (flags[EncodingString] != -1)
{
if (!fReaderMgr.getCurrentReader()->setEncoding(bbEncoding.getRawBuffer()))
emitError(XMLErrs::ContradictoryEncoding, bbEncoding.getRawBuffer());
}
}
const XMLCh* XMLScanner::getURIText(const unsigned int uriId) const
{
if (fURIStringPool->exists(uriId)) {
// Look up the URI in the string pool and return its id
const XMLCh* value = fURIStringPool->getValueForId(uriId);
if (!value)
return XMLUni::fgZeroLenString;
return value;
}
else
return XMLUni::fgZeroLenString;
}
bool XMLScanner::getURIText( const unsigned int uriId
, XMLBuffer& uriBufToFill) const
{
if (fURIStringPool->exists(uriId)) {
// Look up the URI in the string pool and return its id
const XMLCh* value = fURIStringPool->getValueForId(uriId);
if (!value)
return false;
uriBufToFill.set(value);
return true;
}
else
return false;
}
unsigned int
XMLScanner::resolveQName( const XMLCh* const qName
, XMLBuffer& nameBuf
, XMLBuffer& prefixBuf
, const ElemStack::MapModes mode)
{
// Reset both target buffers in case we don't get anything for either
nameBuf.reset();
prefixBuf.reset();
//
// Lets split out the qName into a URI and name buffer first. The URI
// can be empty.
//
const int colonPos = XMLString::indexOf(qName, chColon);
unsigned int uriId = 0;
if (colonPos == -1)
{
//
// Its all name with no prefix, so put the whole thing into the name
// buffer. Then map the empty string to a URI, since the empty string
// represents the default namespace. This will either return some
// explicit URI which the default namespace is mapped to, or the
// the default global namespace.
//
nameBuf.append(qName);
bool unknown;
uriId = fElemStack.mapPrefixToURI(prefixBuf.getRawBuffer(), mode, unknown);
#if defined(XERCES_DEBUG)
if (unknown)
{
// <TBD> This one should never be unknown
}
#endif
}
else
{
//
// Copy the chars up to but not including the colon into the prefix
// buffer.
//
prefixBuf.append(qName, colonPos);
// And copy over the rest of the chars to the name buffer
nameBuf.append(&qName[colonPos+1]);
//
// Watch for the special namespace prefixes. We always map these to
// special URIs. 'xml' gets mapped to the official URI that its defined
// to map to by the NS spec. xmlns gets mapped to a special place holder
// URI that we define (so that it maps to something checkable.)
//
if (!XMLString::compareString(prefixBuf.getRawBuffer(), XMLUni::fgXMLNSString))
uriId = fXMLNSNamespaceId;
else if (!XMLString::compareString(prefixBuf.getRawBuffer(), XMLUni::fgXMLString))
uriId = fXMLNamespaceId;
else
{
bool unknown;
uriId = fElemStack.mapPrefixToURI(prefixBuf.getRawBuffer(), mode, unknown);
if (unknown)
emitError(XMLErrs::UnknownPrefix, prefixBuf.getRawBuffer());
}
}
return uriId;
}
bool XMLScanner::checkXMLDecl(bool startWithAngle) {
//
// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
//
// [3] S ::= (#x20 | #x9 | #xD | #xA)+
//
if (startWithAngle) {
if (fReaderMgr.peekString(XMLUni::fgXMLDeclString)) {
if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpace)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTab)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLF)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCR))
{
return true;
}
else if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpaceU)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTabU)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLFU)
|| fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCRU))
{
//
// Just in case, check for upper case. If found, issue
// an error, but keep going.
//
emitError(XMLErrs::XMLDeclMustBeLowerCase);
return true;
}
}
}
else {
if (fReaderMgr.peekString(XMLUni::fgXMLString)) {
if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpace)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringHTab)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringLF)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringCR))
{
return true;
}
else if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpaceU)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringHTabU)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringLFU)
|| fReaderMgr.skippedString(XMLUni::fgXMLStringCRU))
{
//
// Just in case, check for upper case. If found, issue
// an error, but keep going.
//
emitError(XMLErrs::XMLDeclMustBeLowerCase);
return true;
}
}
}
return false;
}
// ---------------------------------------------------------------------------
// XMLScanner: Helper methos
// ---------------------------------------------------------------------------
void XMLScanner::resizeElemState() {
unsigned int newSize = fElemStateSize * 2;
unsigned int* newElemState = new unsigned int[newSize];
// Copy the existing values
unsigned int index = 0;
for (; index < fElemStateSize; index++)
newElemState[index] = fElemState[index];
for (; index < newSize; index++)
newElemState[index] = 0;
// Delete the old array and udpate our members
delete [] fElemState;
fElemState = newElemState;
fElemStateSize = newSize;
}
// ---------------------------------------------------------------------------
// XMLScanner: IC activation methos
// ---------------------------------------------------------------------------
void XMLScanner::activateSelectorFor(IdentityConstraint* const ic) {
IC_Selector* selector = ic->getSelector();
if (!selector)
return;
XPathMatcher* matcher = selector->createMatcher(fFieldActivator);
fMatcherStack->addMatcher(matcher);
matcher->startDocumentFragment();
}
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
// ---------------------------------------------------------------------------
// XMLScanner: Grammar preparsing
// ---------------------------------------------------------------------------
Grammar* XMLScanner::loadGrammar(const XMLCh* const systemId
, const short grammarType
, const bool toCache)
{
InputSource* srcToUse = 0;
if (fEntityHandler){
srcToUse = fEntityHandler->resolveEntity(XMLUni::fgZeroLenString, systemId);
}
//
// First we try to parse it as a URL. If that fails, we assume its
// a file and try it that way.
//
if (!srcToUse) {
try
{
//
// Create a temporary URL. Since this is the primary document,
// it has to be fully qualified. If not, then assume we are just
// mistaking a file for a URL.
//
XMLURL tmpURL(systemId);
if (tmpURL.isRelative()) {
srcToUse = new LocalFileInputSource(systemId);
}
else
{
srcToUse = new URLInputSource(tmpURL);
}
}
catch(const MalformedURLException&)
{
srcToUse = new LocalFileInputSource(systemId);
}
catch(...)
{
// Just rethrow this, since its not our problem
throw;
}
}
Janitor<InputSource> janSrc(srcToUse);
return loadGrammar(*srcToUse, grammarType, toCache);
}
Grammar* XMLScanner::loadGrammar(const char* const systemId
, const short grammarType
, const bool toCache)
{
// We just delegate this to the XMLCh version after transcoding
XMLCh* tmpBuf = XMLString::transcode(systemId);
ArrayJanitor<XMLCh> janBuf(tmpBuf);
return loadGrammar(tmpBuf, grammarType, toCache);
}
Grammar* XMLScanner::loadGrammar(const InputSource& src
, const short grammarType
, const bool toCache)
{
try
{
fGrammarResolver->cacheGrammarFromParse(false);
fGrammarResolver->useCachedGrammarInParse(false);
fRootGrammar = 0;
if (fValScheme == Val_Auto) {
fValidate = true;
}
// Reset some status flags
fInException = false;
fStandalone = false;
fErrorCount = 0;
fHasNoDTD = true;
fSeeXsi = false;
if (grammarType == Grammar::SchemaGrammarType) {
return loadXMLSchemaGrammar(src, toCache);
}
else if (grammarType == Grammar::DTDGrammarType) {
return loadDTDGrammar(src, toCache);
}
// Reset the reader manager to close all files, sockets, etc...
fReaderMgr.reset();
}
//
// NOTE:
//
// In all of the error processing below, the emitError() call MUST come
// before the flush of the reader mgr, or it will fail because it tries
// to find out the position in the XML source of the error.
//
catch(const XMLErrs::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLValid::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLException& excToCatch)
{
//
// Emit the error and catch any user exception thrown from here. Make
// sure in all cases we flush the reader manager.
//
fInException = true;
try
{
if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
emitError
(
XMLErrs::DisplayErrorMessage
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
, excToCatch.getMessage()
);
else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
emitError
(
XMLErrs::XMLException_Fatal
, excToCatch.getType()
, excToCatch.getMessage()
);
else
emitError
(
XMLErrs::XMLException_Error
, excToCatch.getType()
, excToCatch.getMessage()
);
}
catch(...)
{
// Flush the reader manager and rethrow user's error
fReaderMgr.reset();
throw;
}
// If it returned, then reset the reader manager and fall through
fReaderMgr.reset();
}
catch(...)
{
// Reset and rethrow
fReaderMgr.reset();
throw;
}
return 0;
}
Grammar* XMLScanner::loadDTDGrammar(const InputSource& src,
const bool toCache)
{
// Reset the validators
fDTDValidator->reset();
if (fValidatorFromUser)
fValidator->reset();
if (!fValidator->handlesDTD()) {
if (fValidatorFromUser && fValidate)
ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
else {
fValidator = fDTDValidator;
}
}
fDTDGrammar = new DTDGrammar();
fGrammarResolver->putGrammar(XMLUni::fgDTDEntityString, fDTDGrammar);
fGrammar = fDTDGrammar;
fGrammarType = fGrammar->getGrammarType();
fValidator->setGrammar(fGrammar);
//
// And for all installed handlers, send reset events. This gives them
// a chance to flush any cached data.
//
if (fDocHandler)
fDocHandler->resetDocument();
if (fEntityHandler)
fEntityHandler->resetEntities();
if (fErrorReporter)
fErrorReporter->resetErrors();
// Clear out the id reference list
fIDRefList->removeAll();
if (toCache) {
unsigned int sysId = fGrammarResolver->getStringPool()->addOrFind(src.getSystemId());
const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
fGrammarResolver->putGrammar(sysIdStr, fGrammar);
}
//
// Handle the creation of the XML reader object for this input source.
// This will provide us with transcoding and basic lexing services.
//
XMLReader* newReader = fReaderMgr.createReader
(
src
, false
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
);
if (!newReader) {
if (src.getIssueFatalErrorIfNotFound())
ThrowXML1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId());
else
ThrowXML1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId());
}
//
// In order to make the processing work consistently, we have to
// make this look like an external entity. So create an entity
// decl and fill it in and push it with the reader, as happens
// with an external entity. Put a janitor on it to insure it gets
// cleaned up. The reader manager does not adopt them.
//
const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
DTDEntityDecl* declDTD = new DTDEntityDecl(gDTDStr);
declDTD->setSystemId(src.getSystemId());
Janitor<DTDEntityDecl> janDecl(declDTD);
// Mark this one as a throw at end
newReader->setThrowAtEnd(true);
// And push it onto the stack, with its pseudo name
fReaderMgr.pushReader(newReader, declDTD);
//
// If we have a doc type handler and advanced callbacks are enabled,
// call the doctype event.
//
if (fDocTypeHandler) {
// Create a dummy root
DTDElementDecl* rootDecl = new DTDElementDecl(gDTDStr, fEmptyNamespaceId);
rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
rootDecl->setExternalElemDeclaration(true);
Janitor<DTDElementDecl> janSrc(rootDecl);
fDocTypeHandler->doctypeDecl(*rootDecl, src.getPublicId(), src.getSystemId(), false);
}
// Create DTDScanner
DTDScanner dtdScanner((DTDGrammar*)fGrammar, fDocTypeHandler);
dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
// Tell it its not in an include section
dtdScanner.scanExtSubsetDecl(false);
if (fValidate) {
// validate the DTD scan so far
fValidator->preContentValidation(false, true);
}
if (toCache)
fGrammarResolver->cacheGrammars();
return fDTDGrammar;
}
// ---------------------------------------------------------------------------
// XMLScanner: Rest pool of cached grammars
// ---------------------------------------------------------------------------
void XMLScanner::resetCachedGrammarPool()
{
fGrammarResolver->resetCachedGrammar();
}