Newer
Older
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id$
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/internal/XSAXMLScanner.hpp>
#include <xercesc/sax/InputSource.hpp>
#include <xercesc/framework/XMLEntityHandler.hpp>
#include <xercesc/framework/XMLDocumentHandler.hpp>
#include <xercesc/framework/psvi/XSAnnotation.hpp>
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#include <xercesc/validators/schema/SchemaValidator.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// XSAXMLScanner: Constructors and Destructor
// ---------------------------------------------------------------------------
XSAXMLScanner::XSAXMLScanner( GrammarResolver* const grammarResolver
, XMLStringPool* const uriStringPool
, SchemaGrammar* const xsaGrammar
, MemoryManager* const manager) :
SGXMLScanner(0, grammarResolver, manager)
{
fSchemaGrammar = xsaGrammar;
setURIStringPool(uriStringPool);
}
XSAXMLScanner::~XSAXMLScanner()
{
}
// ---------------------------------------------------------------------------
// XSAXMLScanner: SGXMLScanner virtual methods
// ---------------------------------------------------------------------------
// This method will kick off the scanning of the primary content of the
void XSAXMLScanner::scanEndTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the end of the root element.
gotData = true;
// Check if the element stack is empty. If so, then this is an unbalanced
// element (i.e. more ends than starts, perhaps because of bad text
// causing one to be skipped.)
if (fElemStack.isEmpty())
{
emitError(XMLErrs::MoreEndThanStartTags);
fReaderMgr.skipPastChar(chCloseAngle);
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
}
// Pop the stack of the element we are supposed to be ending. Remember
// that we don't own this. The stack just keeps them and reuses them.
unsigned int uriId = fElemStack.getCurrentURI();
// Make sure that its the end of the element that we expect
const XMLCh *elemName = fElemStack.getCurrentSchemaElemName();
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
const ElemStack::StackElem* topElem = fElemStack.popTop();
if (!fReaderMgr.skippedString(elemName))
{
emitError
(
XMLErrs::ExpectedEndOfTagX, elemName
);
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// See if it was the root element, to avoid multiple calls below
const bool isRoot = fElemStack.isEmpty();
// Make sure we are back on the same reader as where we started
if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialTagMarkupError);
// Skip optional whitespace
fReaderMgr.skipPastSpaces();
// Make sure we find the closing bracket
if (!fReaderMgr.skippedChar(chCloseAngle))
{
emitError
(
XMLErrs::UnterminatedEndTag, topElem->fThisElement->getFullName()
);
}
// If validation is enabled, then lets pass him the list of children and
// this element and let him validate it.
if (fValidate)
{
Alberto Massari
committed
bool res = fValidator->checkContent
Alberto Massari
committed
topElem->fThisElement
, topElem->fChildren
, topElem->fChildCount
, &failure
);
Alberto Massari
committed
if (!res)
{
// One of the elements is not valid for the content. NOTE that
// if no children were provided but the content model requires
// them, it comes back with a zero value. But we cannot use that
// to index the child array in this case, and have to put out a
// special message.
if (!topElem->fChildCount)
{
fValidator->emitError
(
XMLValid::EmptyNotValidForContent
, topElem->fThisElement->getFormattedContentModel()
);
}
Alberto Massari
committed
else if (failure >= topElem->fChildCount)
{
fValidator->emitError
(
XMLValid::NotEnoughElemsForCM
, topElem->fThisElement->getFormattedContentModel()
);
}
else
{
fValidator->emitError
(
XMLValid::ElementNotValidForContent
Alberto Massari
committed
, topElem->fChildren[failure]->getRawName()
, topElem->fThisElement->getFormattedContentModel()
);
}
}
}
// now we can reset the datatype buffer, since the
// application has had a chance to copy the characters somewhere else
((SchemaValidator *)fValidator)->clearDatatypeBuffer();
// If we have a doc handler, tell it about the end tag
if (fDocHandler)
{
if (topElem->fPrefixColonPos != -1)
fPrefixBuf.set(elemName, topElem->fPrefixColonPos);
else
fPrefixBuf.reset();
fDocHandler->endElement
(
*topElem->fThisElement
, uriId
, isRoot
, fPrefixBuf.getRawBuffer()
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
);
}
// If this was the root, then done with content
gotData = !isRoot;
if (gotData) {
// Restore the grammar
fGrammar = fElemStack.getCurrentGrammar();
fGrammarType = fGrammar->getGrammarType();
fValidator->setGrammar(fGrammar);
// Restore the validation flag
fValidate = fElemStack.getValidationFlag();
}
}
bool XSAXMLScanner::scanStartTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the root and its empty.
gotData = true;
// Reset element content
fContent.reset();
// The current position is after the open bracket, so we need to read in
// in the element name.
David Abram Cargill
committed
int prefixColonPos;
if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos))
David Abram Cargill
committed
{
if (fQNameBuf.isEmpty())
emitError(XMLErrs::ExpectedElementName);
else
emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
fReaderMgr.skipToChar(chOpenAngle);
return false;
}
// See if its the root element
const bool isRoot = fElemStack.isEmpty();
// Skip any whitespace after the name
fReaderMgr.skipPastSpaces();
// First we have to do the rawest attribute scan. We don't do any
// normalization of them at all, since we don't know yet what type they
// might be (since we need the element decl in order to do that.)
const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
bool isEmpty;
XMLSize_t attCount = rawAttrScan(qnameRawBuf, *fRawAttrList, isEmpty);
// save the contentleafname and currentscope before addlevel, for later use
ContentLeafNameTypeVector* cv = 0;
XMLContentModel* cm = 0;
unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE;
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
bool laxThisOne = false;
if (!isRoot)
{
// schema validator will have correct type if validating
SchemaElementDecl* tempElement = (SchemaElementDecl*)
fElemStack.topElement()->fThisElement;
SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
ComplexTypeInfo *currType = 0;
if (fValidate)
{
currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
if (currType)
modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();
else // something must have gone wrong
modelType = SchemaElementDecl::Any;
}
else {
currType = tempElement->getComplexTypeInfo();
}
if ((modelType == SchemaElementDecl::Mixed_Simple)
|| (modelType == SchemaElementDecl::Mixed_Complex)
|| (modelType == SchemaElementDecl::Children))
{
cm = currType->getContentModel();
cv = cm->getContentLeafNameTypeVector();
currentScope = fElemStack.getCurrentScope();
}
else if (modelType == SchemaElementDecl::Any) {
laxThisOne = true;
}
}
// Now, since we might have to update the namespace map for this element,
// but we don't have the element decl yet, we just tell the element stack
// to expand up to get ready.
unsigned int elemDepth = fElemStack.addLevel();
fElemStack.setValidationFlag(fValidate);
fElemStack.setPrefixColonPos(prefixColonPos);
// Make an initial pass through the list and find any xmlns attributes or
// schema attributes.
if (attCount)
scanRawAttrListforNameSpaces(attCount);
// Resolve the qualified name to a URI and name so that we can look up
// the element decl for this element. We have now update the prefix to
David Abram Cargill
committed
// namespace map so we should get the correct element now.
unsigned int uriId = resolveQNameWithColon
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
(
qnameRawBuf, fPrefixBuf, ElemStack::Mode_Element, prefixColonPos
);
//if schema, check if we should lax or skip the validation of this element
bool parentValidation = fValidate;
if (cv) {
QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
// elementDepth will be > 0, as cv is only constructed if element is not
// root.
laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
}
// Look up the element now in the grammar. This will get us back a
// generic element decl object. We tell him to fault one in if he does
// not find it.
bool wasAdded = false;
const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
XMLElementDecl* elemDecl = fGrammar->getElemDecl
(
uriId, nameRawBuf, qnameRawBuf, currentScope
);
if (!elemDecl)
{
// URI is different, so we try to switch grammar
if (uriId != fURIStringPool->getId(fGrammar->getTargetNamespace())) {
switchGrammar(getURIText(uriId), laxThisOne);
}
// look for a global element declaration
elemDecl = fGrammar->getElemDecl(
uriId, nameRawBuf, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE
);
if (!elemDecl)
{
// if still not found, look in list of undeclared elements
elemDecl = fElemNonDeclPool->getByKey(
nameRawBuf, uriId, Grammar::TOP_LEVEL_SCOPE);
if (!elemDecl)
{
elemDecl = new (fMemoryManager) SchemaElementDecl
(
fPrefixBuf.getRawBuffer(), nameRawBuf, uriId
, SchemaElementDecl::Any, Grammar::TOP_LEVEL_SCOPE
, fMemoryManager
);
elemDecl->setId
(
fElemNonDeclPool->put
(
(void*)elemDecl->getBaseName(), uriId
, Grammar::TOP_LEVEL_SCOPE, (SchemaElementDecl*)elemDecl
)
);
wasAdded = true;
}
}
}
// We do something different here according to whether we found the
// element or not.
if (wasAdded || !elemDecl->isDeclared())
{
if (laxThisOne) {
fValidate = false;
fElemStack.setValidationFlag(fValidate);
}
// If validating then emit an error
if (fValidate)
{
// This is to tell the reuse Validator that this element was
// faulted-in, was not an element in the grammar pool originally
elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
fValidator->emitError
(
XMLValid::ElementNotDefined, elemDecl->getFullName()
);
}
}
// Now we can update the element stack to set the current element
// decl. We expanded the stack above, but couldn't store the element
// decl because we didn't know it yet.
fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
fElemStack.setCurrentURI(uriId);
if (isRoot) {
fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager);
}
// Validate the element
if (fValidate) {
fValidator->validateElement(elemDecl);
}
// squirrel away the element's QName, so that we can do an efficient
// end-tag match
fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
ComplexTypeInfo* typeinfo = (fValidate)
? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
: ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
if (typeinfo)
{
currentScope = typeinfo->getScopeDefined();
// switch grammar if the typeinfo has a different grammar
XMLCh* typeName = typeinfo->getTypeName();
int comma = XMLString::indexOf(typeName, chComma);
if (comma > 0)
{
XMLBufBid bbPrefix(&fBufMgr);
XMLBuffer& prefixBuf = bbPrefix.getBuffer();
prefixBuf.append(typeName, comma);
switchGrammar(prefixBuf.getRawBuffer(), laxThisOne);
}
}
fElemStack.setCurrentScope(currentScope);
// Set element next state
if (elemDepth >= fElemStateSize) {
resizeElemState();
}
fElemState[elemDepth] = 0;
fElemStack.setCurrentGrammar(fGrammar);
// If this is the first element and we are validating, check the root
// element.
if (!isRoot && parentValidation) {
fElemStack.addChild(elemDecl->getElementName(), true);
}
// Now lets get the fAttrList filled in. This involves faulting in any
// defaulted and fixed attributes and normalizing the values of any that
// we got explicitly.
//
// We update the attCount value with the total number of attributes, but
// it goes in with the number of values we got during the raw scan of
// explictly provided attrs above.
attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
if(attCount)
{
// clean up after ourselves:
// clear the map used to detect duplicate attributes
fUndeclaredAttrRegistryNS->removeAll();
}
// Since the element may have default values, call start tag now regardless if it is empty or not
// If we have a document handler, then tell it about this start tag
if (fDocHandler)
{
fDocHandler->startElement
(
*elemDecl, uriId, fPrefixBuf.getRawBuffer(), *fAttrList
, attCount, false, isRoot
);
} // may be where we output something...
// If empty, validate content right now if we are validating and then
// pop the element stack top. Else, we have to update the current stack
// top's namespace mapping elements.
if (isEmpty)
{
// Pop the element stack back off since it'll never be used now
fElemStack.popTop();
// If validating, then insure that its legal to have no content
if (fValidate)
{
Alberto Massari
committed
bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
if (!res)
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
{
// REVISIT: in the case of xsi:type, this may
// return the wrong string...
fValidator->emitError
(
XMLValid::ElementNotValidForContent
, elemDecl->getFullName()
, elemDecl->getFormattedContentModel()
);
}
}
// If we have a doc handler, tell it about the end tag
if (fDocHandler)
{
fDocHandler->endElement
(
*elemDecl, uriId, isRoot, fPrefixBuf.getRawBuffer()
);
}
// If the elem stack is empty, then it was an empty root
if (isRoot) {
gotData = false;
}
else
{
// Restore the grammar
fGrammar = fElemStack.getCurrentGrammar();
fGrammarType = fGrammar->getGrammarType();
fValidator->setGrammar(fGrammar);
// Restore the validation flag
fValidate = fElemStack.getValidationFlag();
}
}
return true;
}
// ---------------------------------------------------------------------------
// XSAXMLScanner: XMLScanner virtual methods
// ---------------------------------------------------------------------------
// This method will reset the scanner data structures, and related plugged
// in stuff, for a new scan session. We get the input source for the primary
// XML entity, create the reader for it, and push it on the stack so that
// upon successful return from here we are ready to go.
void XSAXMLScanner::scanReset(const InputSource& src)
{
fGrammar = fSchemaGrammar;
fGrammarType = Grammar::SchemaGrammarType;
fRootGrammar = fSchemaGrammar;
fValidator->setGrammar(fGrammar);
// Reset validation
fValidate = true;
// And for all installed handlers, send reset events. This gives them
// a chance to flush any cached data.
if (fDocHandler)
fDocHandler->resetDocument();
if (fEntityHandler)
fEntityHandler->resetEntities();
if (fErrorReporter)
fErrorReporter->resetErrors();
// Clear out the id reference list
resetValidationContext();
// Reset the Root Element Name
if (fRootElemName) {
fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
}
fRootElemName = 0;
// Reset the element stack, and give it the latest ids for the special
// URIs it has to know about.
fElemStack.reset
(
fEmptyNamespaceId, fUnknownNamespaceId, fXMLNamespaceId, fXMLNSNamespaceId
);
if (!fSchemaNamespaceId)
fSchemaNamespaceId = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI);
// Reset some status flags
fInException = false;
fStandalone = false;
fErrorCount = 0;
fHasNoDTD = true;
fSeeXsi = false;
fDoNamespaces = true;
fDoSchema = true;
// Reset the validators
fSchemaValidator->reset();
fSchemaValidator->setErrorReporter(fErrorReporter);
fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
fSchemaValidator->setGrammarResolver(fGrammarResolver);
// Handle the creation of the XML reader object for this input source.
// This will provide us with transcoding and basic lexing services.
XMLReader* newReader = fReaderMgr.createReader
(
src
, true
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, fCalculateSrcOfs
);
if (!newReader) {
if (src.getIssueFatalErrorIfNotFound())
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
else
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
}
// Push this read onto the reader manager
fReaderMgr.pushReader(newReader, 0);
// and reset security-related things if necessary:
if(fSecurityManager != 0)
{
fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
fEntityExpansionCount = 0;
}
fElemCount = 0;
if (fUIntPoolRowTotal >= 32)
{ // 8 KB tied up with validating attributes...
fAttDefRegistry->removeAll();
recreateUIntPool();
}
else
{
// note that this will implicitly reset the values of the hashtables,
// though their buckets will still be tied up
resetUIntPool();
}
Alberto Massari
committed
fUndeclaredAttrRegistryNS->removeAll();
}
void XSAXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount)
{
// Make an initial pass through the list and find any xmlns attributes or
// schema attributes.
// When we find one, send it off to be used to update the element stack's
// namespace mappings.
for (index = 0; index < attCount; index++)
{
// each attribute has the prefix:suffix="value"
const KVStringPair* curPair = fRawAttrList->elementAt(index);
const XMLCh* rawPtr = curPair->getKey();
// If either the key begins with "xmlns:" or its just plain
// "xmlns", then use it to update the map.
if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
|| XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
{
const XMLCh* valuePtr = curPair->getValue();
David Abram Cargill
committed
updateNSMap(rawPtr, valuePtr, fRawAttrColonList[index]);
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
// if the schema URI is seen in the the valuePtr, set the boolean seeXsi
if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) {
fSeeXsi = true;
}
}
}
// walk through the list again to deal with "xsi:...."
if (fSeeXsi)
{
// Schema Xsi Type yyyy (e.g. xsi:type="yyyyy")
XMLBufBid bbXsi(&fBufMgr);
XMLBuffer& fXsiType = bbXsi.getBuffer();
QName attName(fMemoryManager);
for (index = 0; index < attCount; index++)
{
// each attribute has the prefix:suffix="value"
const KVStringPair* curPair = fRawAttrList->elementAt(index);
const XMLCh* rawPtr = curPair->getKey();
attName.setName(rawPtr, fEmptyNamespaceId);
const XMLCh* prefPtr = attName.getPrefix();
// if schema URI has been seen, scan for the schema location and uri
// and resolve the schema grammar; or scan for schema type
if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {
const XMLCh* valuePtr = curPair->getValue();
const XMLCh* suffPtr = attName.getLocalPart();
if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE)) {
fXsiType.set(valuePtr);
}
else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL)
&& XMLString::equals(valuePtr, SchemaSymbols::fgATTVAL_TRUE)) {
((SchemaValidator*)fValidator)->setNillable(true);
}
}
}
if (!fXsiType.isEmpty())
{
int colonPos = -1;
unsigned int uriId = resolveQName
(
fXsiType.getRawBuffer(), fPrefixBuf, ElemStack::Mode_Element, colonPos
);
((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId);
}
}
}
void XSAXMLScanner::switchGrammar( const XMLCh* const uriStr
, bool laxValidate)
{
Grammar* tempGrammar = 0;
if (XMLString::equals(uriStr, SchemaSymbols::fgURI_SCHEMAFORSCHEMA)) {
tempGrammar = fSchemaGrammar;
}
else {
tempGrammar = fGrammarResolver->getGrammar(uriStr);
}
if (tempGrammar && tempGrammar->getGrammarType() == Grammar::SchemaGrammarType)
{
fGrammar = tempGrammar;
fGrammarType = Grammar::SchemaGrammarType;
fValidator->setGrammar(fGrammar);
}
else if(!laxValidate) {
fValidator->emitError(XMLValid::GrammarNotFound, uriStr);
}
}
XERCES_CPP_NAMESPACE_END