Newer
Older
* Copyright (c) 2002, 2003 The Apache Software Foundation. All rights
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Id$
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/internal/DGXMLScanner.hpp>
#include <xercesc/util/Janitor.hpp>
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/UnexpectedEOFException.hpp>
#include <xercesc/framework/URLInputSource.hpp>
#include <xercesc/framework/LocalFileInputSource.hpp>
#include <xercesc/framework/XMLDocumentHandler.hpp>
#include <xercesc/framework/XMLEntityHandler.hpp>
#include <xercesc/framework/XMLPScanToken.hpp>
#include <xercesc/framework/XMLGrammarPool.hpp>
#include <xercesc/framework/XMLDTDDescription.hpp>
#include <xercesc/internal/EndOfEntityException.hpp>
#include <xercesc/validators/common/GrammarResolver.hpp>
#include <xercesc/validators/DTD/DocTypeHandler.hpp>
#include <xercesc/validators/DTD/DTDScanner.hpp>
#include <xercesc/validators/DTD/DTDValidator.hpp>
Neil Graham
committed
#include <xercesc/util/OutOfMemoryException.hpp>
#include <xercesc/util/XMLResourceIdentifier.hpp>
#include <xercesc/util/HashPtr.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// DGXMLScanner: Constructors and Destructor
// ---------------------------------------------------------------------------
DGXMLScanner::DGXMLScanner(XMLValidator* const valToAdopt
, GrammarResolver* const grammarResolver
, MemoryManager* const manager) :
XMLScanner(valToAdopt, grammarResolver, manager)
, fAttrNSList(0)
, fDTDValidator(0)
, fDTDGrammar(0)
Neil Graham
committed
, fDTDElemNonDeclPool(0)
, fElemCount(0)
, fAttDefRegistry(0)
, fUndeclaredAttrRegistry(0)
{
try
{
commonInit();
if (valToAdopt)
{
if (!valToAdopt->handlesDTD())
David Abram Cargill
committed
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
}
else
{
fValidator = fDTDValidator;
}
}
Neil Graham
committed
catch(const OutOfMemoryException&)
{
throw;
}
catch(...)
{
cleanUp();
throw;
}
}
DGXMLScanner::DGXMLScanner( XMLDocumentHandler* const docHandler
, DocTypeHandler* const docTypeHandler
, XMLEntityHandler* const entityHandler
, XMLErrorReporter* const errHandler
, XMLValidator* const valToAdopt
, GrammarResolver* const grammarResolver
, MemoryManager* const manager) :
XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
, fAttrNSList(0)
, fDTDValidator(0)
, fDTDGrammar(0)
Neil Graham
committed
, fDTDElemNonDeclPool(0)
, fElemCount(0)
, fAttDefRegistry(0)
, fUndeclaredAttrRegistry(0)
{
try
{
commonInit();
if (valToAdopt)
{
if (!valToAdopt->handlesDTD())
David Abram Cargill
committed
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
}
else
{
fValidator = fDTDValidator;
}
}
Neil Graham
committed
catch(const OutOfMemoryException&)
{
throw;
}
catch(...)
{
cleanUp();
throw;
}
}
DGXMLScanner::~DGXMLScanner()
{
cleanUp();
}
// ---------------------------------------------------------------------------
// XMLScanner: Getter methods
// ---------------------------------------------------------------------------
NameIdPool<DTDEntityDecl>* DGXMLScanner::getEntityDeclPool()
{
Alberto Massari
committed
if(!fGrammar)
return 0;
return ((DTDGrammar*)fGrammar)->getEntityDeclPool();
}
const NameIdPool<DTDEntityDecl>* DGXMLScanner::getEntityDeclPool() const
{
Alberto Massari
committed
if(!fGrammar)
return 0;
return ((DTDGrammar*)fGrammar)->getEntityDeclPool();
}
// ---------------------------------------------------------------------------
// DGXMLScanner: Main entry point to scan a document
// ---------------------------------------------------------------------------
void DGXMLScanner::scanDocument(const InputSource& src)
{
// Bump up the sequence id for this parser instance. This will invalidate
// any previous progressive scan tokens.
fSequenceId++;
try
{
// Reset the scanner and its plugged in stuff for a new run. This
// resets all the data structures, creates the initial reader and
// pushes it on the stack, and sets up the base document path.
scanReset(src);
// If we have a document handler, then call the start document
if (fDocHandler)
fDocHandler->startDocument();
// Scan the prolog part, which is everything before the root element
// including the DTD subsets.
scanProlog();
// If we got to the end of input, then its not a valid XML file.
// Else, go on to scan the content.
if (fReaderMgr.atEOF())
{
emitError(XMLErrs::EmptyMainEntity);
}
else
{
// Scan content, and tell it its not an external entity
David Abram Cargill
committed
if (scanContent())
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
{
// Do post-parse validation if required
if (fValidate)
{
// We handle ID reference semantics at this level since
// its required by XML 1.0.
checkIDRefs();
// Then allow the validator to do any extra stuff it wants
// fValidator->postParseValidation();
}
// That went ok, so scan for any miscellaneous stuff
if (!fReaderMgr.atEOF())
scanMiscellaneous();
}
}
// If we have a document handler, then call the end document
if (fDocHandler)
fDocHandler->endDocument();
// Reset the reader manager to close all files, sockets, etc...
fReaderMgr.reset();
}
// NOTE:
//
// In all of the error processing below, the emitError() call MUST come
// before the flush of the reader mgr, or it will fail because it tries
// to find out the position in the XML source of the error.
catch(const XMLErrs::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLValid::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLException& excToCatch)
{
// Emit the error and catch any user exception thrown from here. Make
// sure in all cases we flush the reader manager.
fInException = true;
try
{
if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
emitError
(
XMLErrs::XMLException_Warning
, excToCatch.getType()
, excToCatch.getMessage()
);
else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
emitError
(
XMLErrs::XMLException_Fatal
, excToCatch.getType()
, excToCatch.getMessage()
);
else
emitError
(
XMLErrs::XMLException_Error
, excToCatch.getType()
, excToCatch.getMessage()
);
}
Neil Graham
committed
catch(const OutOfMemoryException&)
{
throw;
}
catch(...)
{
// Flush the reader manager and rethrow user's error
fReaderMgr.reset();
throw;
}
// If it returned, then reset the reader manager and fall through
fReaderMgr.reset();
}
Neil Graham
committed
catch(const OutOfMemoryException&)
{
throw;
}
catch(...)
{
// Reset and rethrow
fReaderMgr.reset();
throw;
}
}
bool DGXMLScanner::scanNext(XMLPScanToken& token)
{
// Make sure this token is still legal
if (!isLegalToken(token))
David Abram Cargill
committed
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
// Find the next token and remember the reader id
unsigned int orgReader;
XMLTokens curToken;
bool retVal = true;
try
{
while (true)
{
// We have to handle any end of entity exceptions that happen here.
// We could be at the end of X nested entities, each of which will
// generate an end of entity exception as we try to move forward.
try
{
curToken = senseNextToken(orgReader);
break;
}
catch(const EndOfEntityException& toCatch)
{
// Send an end of entity reference event
if (fDocHandler)
fDocHandler->endEntityReference(toCatch.getEntity());
}
}
if (curToken == Token_CharData)
{
scanCharData(fCDataBuf);
}
else if (curToken == Token_EOF)
{
if (!fElemStack.isEmpty())
{
const ElemStack::StackElem* topElem = fElemStack.popTop();
emitError
(
XMLErrs::EndedWithTagsOnStack
, topElem->fThisElement->getFullName()
);
}
retVal = false;
}
else
{
// Its some sort of markup
bool gotData = true;
switch(curToken)
{
case Token_CData :
// Make sure we are within content
if (fElemStack.isEmpty())
emitError(XMLErrs::CDATAOutsideOfContent);
scanCDSection();
break;
case Token_Comment :
scanComment();
break;
case Token_EndTag :
scanEndTag(gotData);
break;
case Token_PI :
scanPI();
break;
case Token_StartTag :
scanStartTag(gotData);
break;
default :
fReaderMgr.skipToChar(chOpenAngle);
break;
}
if (orgReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
// If we hit the end, then do the miscellaneous part
if (!gotData)
{
// Do post-parse validation if required
if (fValidate)
{
// We handle ID reference semantics at this level since
// its required by XML 1.0.
checkIDRefs();
// Then allow the validator to do any extra stuff it wants
// fValidator->postParseValidation();
}
// That went ok, so scan for any miscellaneous stuff
scanMiscellaneous();
if (fDocHandler)
fDocHandler->endDocument();
}
}
}
// NOTE:
//
// In all of the error processing below, the emitError() call MUST come
// before the flush of the reader mgr, or it will fail because it tries
// to find out the position in the XML source of the error.
catch(const XMLErrs::Codes)
{
// This is a 'first failure' exception, so reset and return failure
fReaderMgr.reset();
return false;
}
catch(const XMLValid::Codes)
{
// This is a 'first fatal error' type exit, so reset and reuturn failure
fReaderMgr.reset();
return false;
}
catch(const XMLException& excToCatch)
{
// Emit the error and catch any user exception thrown from here. Make
// sure in all cases we flush the reader manager.
fInException = true;
try
{
if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
emitError
(
XMLErrs::XMLException_Warning
, excToCatch.getType()
, excToCatch.getMessage()
);
else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
emitError
(
XMLErrs::XMLException_Fatal
, excToCatch.getType()
, excToCatch.getMessage()
);
else
emitError
(
XMLErrs::XMLException_Error
, excToCatch.getType()
, excToCatch.getMessage()
);
}
Neil Graham
committed
catch(const OutOfMemoryException&)
{
throw;
}
catch(...)
{
// Reset and rethrow user error
fReaderMgr.reset();
throw;
}
// Reset and return failure
fReaderMgr.reset();
return false;
}
Neil Graham
committed
catch(const OutOfMemoryException&)
{
throw;
}
catch(...)
{
// Reset and rethrow original error
fReaderMgr.reset();
throw;
}
// If we hit the end, then flush the reader manager
if (!retVal)
fReaderMgr.reset();
return retVal;
}
// ---------------------------------------------------------------------------
// DGXMLScanner: Private scanning methods
// ---------------------------------------------------------------------------
// This method will kick off the scanning of the primary content of the
// document, i.e. the elements.
David Abram Cargill
committed
bool DGXMLScanner::scanContent()
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
{
// Go into a loop until we hit the end of the root element, or we fall
// out because there is no root element.
//
// We have to do kind of a deeply nested double loop here in order to
// avoid doing the setup/teardown of the exception handler on each
// round. Doing it this way we only do it when an exception actually
// occurs.
bool gotData = true;
bool inMarkup = false;
while (gotData)
{
try
{
while (gotData)
{
// Sense what the next top level token is. According to what
// this tells us, we will call something to handle that kind
// of thing.
unsigned int orgReader;
const XMLTokens curToken = senseNextToken(orgReader);
// Handle character data and end of file specially. Char data
// is not markup so we don't want to handle it in the loop
// below.
if (curToken == Token_CharData)
{
// Scan the character data and call appropriate events. Let
// him use our local character data buffer for efficiency.
scanCharData(fCDataBuf);
continue;
}
else if (curToken == Token_EOF)
{
// The element stack better be empty at this point or we
// ended prematurely before all elements were closed.
if (!fElemStack.isEmpty())
{
const ElemStack::StackElem* topElem = fElemStack.popTop();
emitError
(
XMLErrs::EndedWithTagsOnStack
, topElem->fThisElement->getFullName()
);
}
// Its the end of file, so clear the got data flag
gotData = false;
continue;
}
// We are in some sort of markup now
inMarkup = true;
// According to the token we got, call the appropriate
// scanning method.
switch(curToken)
{
case Token_CData :
// Make sure we are within content
if (fElemStack.isEmpty())
emitError(XMLErrs::CDATAOutsideOfContent);
scanCDSection();
break;
case Token_Comment :
scanComment();
break;
case Token_EndTag :
scanEndTag(gotData);
break;
case Token_PI :
scanPI();
break;
case Token_StartTag :
scanStartTag(gotData);
break;
default :
fReaderMgr.skipToChar(chOpenAngle);
break;
}
if (orgReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
// And we are back out of markup again
inMarkup = false;
}
}
catch(const EndOfEntityException& toCatch)
{
// If we were in some markup when this happened, then its a
// partial markup error.
if (inMarkup)
emitError(XMLErrs::PartialMarkupInEntity);
// Send an end of entity reference event
if (fDocHandler)
fDocHandler->endEntityReference(toCatch.getEntity());
inMarkup = false;
}
}
// It went ok, so return success
return true;
}
void DGXMLScanner::scanEndTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the end of the root element.
gotData = true;
// Check if the element stack is empty. If so, then this is an unbalanced
// element (i.e. more ends than starts, perhaps because of bad text
// causing one to be skipped.)
if (fElemStack.isEmpty())
{
emitError(XMLErrs::MoreEndThanStartTags);
fReaderMgr.skipPastChar(chCloseAngle);
David Abram Cargill
committed
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
}
// After the </ is the element QName, so get a name from the input
if (!fReaderMgr.getName(fQNameBuf))
{
// It failed so we can't really do anything with it
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// Resolve element name uri if needed
unsigned int uriId = fEmptyNamespaceId;
const ElemStack::StackElem* topElem = fElemStack.topElement();
if (fDoNamespaces)
{
uriId = resolvePrefix
(
topElem->fThisElement->getElementName()->getPrefix()
, ElemStack::Mode_Element
);
}
// Pop the stack of the element we are supposed to be ending. Remember
// that we don't own this. The stack just keeps them and reuses them.
fElemStack.popTop();
// See if it was the root element, to avoid multiple calls below
const bool isRoot = fElemStack.isEmpty();
// Make sure that its the end of the element that we expect
if (!XMLString::equals(topElem->fThisElement->getFullName(), fQNameBuf.getRawBuffer()))
{
emitError
(
XMLErrs::ExpectedEndOfTagX
, topElem->fThisElement->getFullName()
);
}
// Make sure we are back on the same reader as where we started
if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialTagMarkupError);
// Skip optional whitespace
fReaderMgr.skipPastSpaces();
// Make sure we find the closing bracket
if (!fReaderMgr.skippedChar(chCloseAngle))
{
emitError
(
XMLErrs::UnterminatedEndTag
, topElem->fThisElement->getFullName()
);
}
// If validation is enabled, then lets pass him the list of children and
// this element and let him validate it.
if (fValidate)
{
PeiYong Zhang
committed
//
// XML1.0-3rd
// Validity Constraint:
// The declaration matches EMPTY and the element has no content (not even
// entity references, comments, PIs or white space).
//
if ( (topElem->fCommentOrPISeen) &&
(((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty))
{
fValidator->emitError
(
XMLValid::EmptyElemHasContent
, topElem->fThisElement->getFullName()
);
}
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
//
// XML1.0-3rd
// Validity Constraint:
//
// The declaration matches children and the sequence of child elements
// belongs to the language generated by the regular expression in the
// content model, with optional white space, comments and PIs
// (i.e. markup matching production [27] Misc) between the start-tag and
// the first child element, between child elements, or between the last
// child element and the end-tag.
//
// Note that
// a CDATA section containing only white space or
// a reference to an entity whose replacement text is character references
// expanding to white space do not match the nonterminal S, and hence
// cannot appear in these positions; however,
// a reference to an internal entity with a literal value consisting
// of character references expanding to white space does match S,
// since its replacement text is the white space resulting from expansion
// of the character references.
//
if ( (topElem->fReferenceEscaped) &&
(((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children))
{
fValidator->emitError
(
XMLValid::ElemChildrenHasInvalidWS
, topElem->fThisElement->getFullName()
);
}
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
int res = fValidator->checkContent
(
topElem->fThisElement
, topElem->fChildren
, topElem->fChildCount
);
if (res >= 0)
{
// One of the elements is not valid for the content. NOTE that
// if no children were provided but the content model requires
// them, it comes back with a zero value. But we cannot use that
// to index the child array in this case, and have to put out a
// special message.
if (!topElem->fChildCount)
{
fValidator->emitError
(
XMLValid::EmptyNotValidForContent
, topElem->fThisElement->getFormattedContentModel()
);
}
else if ((unsigned int)res >= topElem->fChildCount)
{
fValidator->emitError
(
XMLValid::NotEnoughElemsForCM
, topElem->fThisElement->getFormattedContentModel()
);
}
else
{
fValidator->emitError
(
XMLValid::ElementNotValidForContent
, topElem->fChildren[res]->getRawName()
, topElem->fThisElement->getFormattedContentModel()
);
}
}
}
// If we have a doc handler, tell it about the end tag
if (fDocHandler)
{
fDocHandler->endElement
(
*topElem->fThisElement
, uriId
, isRoot
? topElem->fThisElement->getElementName()->getPrefix()
: XMLUni::fgZeroLenString
);
// pass back type name information
Alberto Massari
committed
fDocHandler->elementTypeInfo(0, 0);
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
}
// If this was the root, then done with content
gotData = !isRoot;
}
// This method handles the high level logic of scanning the DOCType
// declaration. This calls the DTDScanner and kicks off both the scanning of
// the internal subset and the scanning of the external subset, if any.
//
// When we get here the '<!DOCTYPE' part has already been scanned, which is
// what told us that we had a doc type decl to parse.
void DGXMLScanner::scanDocTypeDecl()
{
if (fDocTypeHandler)
fDocTypeHandler->resetDocType();
// There must be some space after DOCTYPE
if (!fReaderMgr.skipPastSpaces())
{
emitError(XMLErrs::ExpectedWhitespace);
// Just skip the Doctype declaration and return
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// Get a buffer for the root element
XMLBufBid bbRootName(&fBufMgr);
// Get a name from the input, which should be the name of the root
// element of the upcoming content.
fReaderMgr.getName(bbRootName.getBuffer());
if (bbRootName.isEmpty())
{
emitError(XMLErrs::NoRootElemInDOCTYPE);
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// Store the root element name for later check
setRootElemName(bbRootName.getRawBuffer());
// This element obviously is not going to exist in the element decl
// pool yet, but we need to call docTypeDecl. So force it into
// the element decl pool, marked as being there because it was in
// the DOCTYPE. Later, when its declared, the status will be updated.
//
// Only do this if we are not reusing the validator! If we are reusing,
// then look it up instead. It has to exist!
DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
(
bbRootName.getRawBuffer()
, fEmptyNamespaceId
, DTDElementDecl::Any
, fGrammarPoolMemoryManager
Neil Graham
committed
);
rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
rootDecl->setExternalElemDeclaration(true);
Neil Graham
committed
if(!fUseCachedGrammar)
{
// this will break getRootElemId on DTDGrammar when
// cached grammars are in use, but
// why would one use this anyway???
try {
((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
}
catch(const XMLException&)
{
delete rootDecl;
throw;
}
Neil Graham
committed
} else
{
// put this in the undeclared pool so it gets deleted...
XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer());
if (elemDecl)
{
rootDecl->setId(elemDecl->getId());
}
else
{
rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl));
}
Neil Graham
committed
}
// Skip any spaces after the name
fReaderMgr.skipPastSpaces();
// And now if we are looking at a >, then we are done. It is not
// required to have an internal or external subset, though why you
// would not escapes me.
if (fReaderMgr.skippedChar(chCloseAngle)) {
// If we have a doc type handler and advanced callbacks are enabled,
// call the doctype event.
if (fDocTypeHandler)
fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
return;
}
// either internal/external subset
if (fValScheme == Val_Auto && !fValidate)
fValidate = true;
bool hasIntSubset = false;
bool hasExtSubset = false;
XMLCh* sysId = 0;
XMLCh* pubId = 0;
DTDScanner dtdScanner
(
(DTDGrammar*) fGrammar
, fDocTypeHandler
, fGrammarPoolMemoryManager
, fMemoryManager
);
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
// If the next character is '[' then we have no external subset cause
// there is no system id, just the opening character of the internal
// subset. Else, has to be an id.
//
// Just look at the next char, don't eat it.
if (fReaderMgr.peekNextChar() == chOpenSquare)
{
hasIntSubset = true;
}
else
{
// Indicate we have an external subset
hasExtSubset = true;
fHasNoDTD = false;
// Get buffers for the ids
XMLBufBid bbPubId(&fBufMgr);
XMLBufBid bbSysId(&fBufMgr);
// Get the external subset id
if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
{
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// Get copies of the ids we got
pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);
sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);
// Skip spaces and check again for the opening of an internal subset
fReaderMgr.skipPastSpaces();
// Just look at the next char, don't eat it.
if (fReaderMgr.peekNextChar() == chOpenSquare) {
hasIntSubset = true;
}
}
// Insure that the ids get cleaned up, if they got allocated
ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);
ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);
// If we have a doc type handler and advanced callbacks are enabled,
// call the doctype event.
if (fDocTypeHandler)
fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);
// Ok, if we had an internal subset, we are just past the [ character
// and need to parse that first.
if (hasIntSubset)
{
// Eat the opening square bracket
fReaderMgr.getNextChar();
checkInternalDTD(hasExtSubset, sysId);
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
// And try to scan the internal subset. If we fail, try to recover
// by skipping forward tot he close angle and returning.
if (!dtdScanner.scanInternalSubset())
{
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// Do a sanity check that some expanded PE did not propogate out of
// the doctype. This could happen if it was terminated early by bad
// syntax.
if (fReaderMgr.getReaderDepth() > 1)
{
emitError(XMLErrs::PEPropogated);
// Ask the reader manager to pop back down to the main level
fReaderMgr.cleanStackBackTo(1);
}
fReaderMgr.skipPastSpaces();
}
// And that should leave us at the closing > of the DOCTYPE line
if (!fReaderMgr.skippedChar(chCloseAngle))
{
// Do a special check for the common scenario of an extra ] char at
// the end. This is easy to recover from.
if (fReaderMgr.skippedChar(chCloseSquare)
&& fReaderMgr.skippedChar(chCloseAngle))
{
emitError(XMLErrs::ExtraCloseSquare);
}
else
{
emitError(XMLErrs::UnterminatedDOCTYPE);
fReaderMgr.skipPastChar(chCloseAngle);
}
}
// If we had an external subset, then we need to deal with that one
// next. If we are reusing the validator, then don't scan it.
if (hasExtSubset) {
if (fUseCachedGrammar)
{
InputSource* sysIdSrc = resolveSystemId(sysId);
Janitor<InputSource> janSysIdSrc(sysIdSrc);
Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId());
if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {
fDTDGrammar = (DTDGrammar*) grammar;
fGrammar = fDTDGrammar;
fValidator->setGrammar(fGrammar);
Neil Graham
committed
// we *cannot* identify the root element on
// cached grammars; else we risk breaking multithreaded
// applications. - NG
/*******
rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
if (rootDecl)
((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId());
else {
rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
(
bbRootName.getRawBuffer()
, fEmptyNamespaceId
, DTDElementDecl::Any
, fGrammarPoolMemoryManager
rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
rootDecl->setExternalElemDeclaration(true);
((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
}
Neil Graham
committed
*********/
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
return;
}
}
if (fLoadExternalDTD || fValidate)
{
// And now create a reader to read this entity
InputSource* srcUsed;
XMLReader* reader = fReaderMgr.createReader
(
sysId
, pubId
, false
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, srcUsed
, fCalculateSrcOfs
);
// Put a janitor on the input source
Janitor<InputSource> janSrc(srcUsed);
// If it failed then throw an exception
if (!reader)
David Abram Cargill
committed
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId(), fMemoryManager);
if (fToCacheGrammar) {
unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
fGrammarResolver->putGrammar(fGrammar);
}
// In order to make the processing work consistently, we have to
// make this look like an external entity. So create an entity
// decl and fill it in and push it with the reader, as happens
// with an external entity. Put a janitor on it to insure it gets
// cleaned up. The reader manager does not adopt them.
const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager);
declDTD->setSystemId(sysId);
Janitor<DTDEntityDecl> janDecl(declDTD);
// Mark this one as a throw at end
reader->setThrowAtEnd(true);
// And push it onto the stack, with its pseudo name
fReaderMgr.pushReader(reader, declDTD);
// Tell it its not in an include section
dtdScanner.scanExtSubsetDecl(false, true);
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
}
}
}
bool DGXMLScanner::scanStartTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the root and its empty.
gotData = true;
// Get the QName. In this case, we are not doing namespaces, so we just
// use it as is and don't have to break it into parts.
if (!fReaderMgr.getName(fQNameBuf))
{
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipToChar(chOpenAngle);
return false;
}
// Assume it won't be an empty tag
bool isEmpty = false;
// See if its the root element
const bool isRoot = fElemStack.isEmpty();
// Lets try to look up the element in the validator's element decl pool
// We can pass bogus values for the URI id and the base name. We know that
// this can only be called if we are doing a DTD style validator and that
// he will only look at the QName.
//
Neil Graham
committed
// We *do not* tell him to fault in a decl if he does not find one - NG.
Neil Graham
committed
const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
XMLElementDecl* elemDecl = fGrammar->getElemDecl
(
fEmptyNamespaceId
, 0
, qnameRawBuf
, Grammar::TOP_LEVEL_SCOPE
);
Neil Graham
committed
// look in the undeclared pool:
if(!elemDecl)
{
elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
}
if(!elemDecl)
{
wasAdded = true;
elemDecl = new (fMemoryManager) DTDElementDecl
(
qnameRawBuf
, fEmptyNamespaceId
, DTDElementDecl::Any
, fMemoryManager
);
elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
}
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
if (fValidate) {
if (wasAdded)
{
// This is to tell the reuse Validator that this element was
// faulted-in, was not an element in the validator pool originally
elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
fValidator->emitError
(
XMLValid::ElementNotDefined
, qnameRawBuf
);
}
// If its not marked declared, then emit an error
else if (!elemDecl->isDeclared())
{
fValidator->emitError
(
XMLValid::ElementNotDefined
, qnameRawBuf
);
}
fValidator->validateElement(elemDecl);
}
// Expand the element stack and add the new element
fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
// If this is the first element and we are validating, check the root
// element.
if (isRoot)
{
fRootGrammar = fGrammar;
if (fValidate)
{
// If a DocType exists, then check if it matches the root name there.
if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName))
fValidator->emitError(XMLValid::RootElemNotLikeDocType);
// Some validators may also want to check the root, call the
// XMLValidator::checkRootElement
if (fValidatorFromUser && !fValidator->checkRootElement(elemDecl->getId()))
fValidator->emitError(XMLValid::RootElemNotLikeDocType);
}
}
else if (fValidate)
{
// If the element stack is not empty, then add this element as a
// child of the previous top element. If its empty, this is the root
// elem and is not the child of anything.
fElemStack.addChild(elemDecl->getElementName(), true);
}
// Skip any whitespace after the name
fReaderMgr.skipPastSpaces();
// We loop until we either see a /> or >, handling attribute/value
// pairs until we get there.
unsigned int attCount = 0;
unsigned int curAttListSize = fAttrList->size();
wasAdded = false;
Alberto Massari
committed
// clear the map used to detect duplicate attributes
fUndeclaredAttrRegistry->removeAll();
fElemCount++;
while (true)
{
// And get the next non-space character
XMLCh nextCh = fReaderMgr.peekNextChar();
// If the next character is not a slash or closed angle bracket,
// then it must be whitespace, since whitespace is required
// between the end of the last attribute and the name of the next
// one.
if (attCount)
{
if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
{
{
// Ok, skip by them and peek another char
fReaderMgr.skipPastSpaces();
nextCh = fReaderMgr.peekNextChar();
}
else
{
// Emit the error but keep on going
emitError(XMLErrs::ExpectedWhitespace);
}
}
}
// Ok, here we first check for any of the special case characters.
// If its not one, then we do the normal case processing, which
// assumes that we've hit an attribute value, Otherwise, we do all
// the special case checks.
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
{
// Assume its going to be an attribute, so get a name from
// the input.
if (!fReaderMgr.getName(fAttNameBuf))
{
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.skipPastChar(chCloseAngle);
return false;
}
// And next must be an equal sign
if (!scanEq())
{
static const XMLCh tmpList[] =
{
chSingleQuote, chDoubleQuote, chCloseAngle
, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedEqSign);
// Try to sync back up by skipping forward until we either
// hit something meaningful.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
{
// Jump back to top for normal processing of these
continue;
}
else if ((chFound == chSingleQuote)
|| (chFound == chDoubleQuote)
{
// Just fall through assuming that the value is to follow
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
return false;
}
else
{
// Something went really wrong
return false;
}
}
// See if this attribute is declared for this element. If we are
// not validating of course it will not be at first, but we will
// fault it into the pool (to avoid lots of redundant errors.)
XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( fAttNameBuf.getRawBuffer());
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
XMLCh * namePtr = fAttNameBuf.getRawBuffer();
// Add this attribute to the attribute list that we use to
// pass them to the handler. We reuse its existing elements
// but expand it as required.
// Note that we want to this first since this will
// make a copy of the namePtr; we can then make use of
// that copy in the hashtable lookup that checks
// for duplicates. This will mean we may have to update
// the type of the XMLAttr later.
XMLAttr* curAtt;
if (attCount >= curAttListSize)
{
if (fDoNamespaces) {
curAtt = new (fMemoryManager) XMLAttr
(
fEmptyNamespaceId
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, (attDef)?attDef->getType():XMLAttDef::CData
, true
, fMemoryManager
);
}
else
{
curAtt = new (fMemoryManager) XMLAttr
(
-1
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, XMLUni::fgZeroLenString
, (attDef)?attDef->getType():XMLAttDef::CData
, true
, fMemoryManager
);
}
fAttrList->addElement(curAtt);
}
else
{
curAtt = fAttrList->elementAt(attCount);
if (fDoNamespaces)
{
curAtt->set
(
fEmptyNamespaceId
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, (attDef)?attDef->getType():XMLAttDef::CData
);
}
else
{
curAtt->set
(
-1
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, XMLUni::fgZeroLenString
, (attDef)?attDef->getType():XMLAttDef::CData
);
}
curAtt->setSpecified(true);
}
// reset namePtr so it refers to newly-allocated memory
namePtr = (XMLCh *)curAtt->getName();
// now need to prepare for duplicate detection
if(attDef)
unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
if(!curCountPtr)
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
curCountPtr = getNewUIntPtr();
*curCountPtr = fElemCount;
fAttDefRegistry->put(attDef, curCountPtr);
}
else if(*curCountPtr < fElemCount)
*curCountPtr = fElemCount;
else
{
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, attDef->getFullName()
, elemDecl->getFullName()
);
}
}
else
{
unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr);
if(!curCountPtr)
{
curCountPtr = getNewUIntPtr();
*curCountPtr = fElemCount;
fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr);
}
else if(*curCountPtr < fElemCount)
*curCountPtr = fElemCount;
else
{
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, namePtr
, elemDecl->getFullName()
}
if (fValidate)
{
if (!attDef)
fValidator->emitError
(
XMLValid::AttNotDefinedForElement
, fAttNameBuf.getRawBuffer()
, qnameRawBuf
);
}
}
// Skip any whitespace before the value and then scan the att
// value. This will come back normalized with entity refs and
// char refs expanded.
fReaderMgr.skipPastSpaces();
if (!scanAttValue(attDef, fAttNameBuf.getRawBuffer(), fAttValueBuf))
{
static const XMLCh tmpList[] =
{
chCloseAngle, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedAttrValue);
// It failed, so lets try to get synced back up. We skip
// forward until we find some whitespace or one of the
// chars in our list.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle)
|| (chFound == chForwardSlash)
{
// Just fall through and process this attribute, though
// the value will be "".
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
return false;
}
else
{
// Something went really wrong
return false;
}
}
// must set the newly-minted value on the XMLAttr:
curAtt->setValue(fAttValueBuf.getRawBuffer());
// Now that its all stretched out, lets look at its type and
// determine if it has a valid value. It will output any needed
// errors, but we just keep going. We only need to do this if
// we are validating.
if (attDef)
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
{
// Let the validator pass judgement on the attribute value
if (fValidate)
{
fValidator->validateAttrValue
(
attDef
, fAttValueBuf.getRawBuffer()
, false
, elemDecl
);
}
}
if (fDoNamespaces)
{
// Make sure that the name is basically well formed for namespace
// enabled rules. It either has no colons, or it has one which
// is neither the first or last char.
const int colonFirst = XMLString::indexOf(fAttNameBuf.getRawBuffer(), chColon);
if (colonFirst != -1)
{
const int colonLast = XMLString::lastIndexOf(fAttNameBuf.getRawBuffer(), chColon);
if (colonFirst != colonLast)
{
emitError(XMLErrs::TooManyColonsInName);
continue;
}
else if ((colonFirst == 0)
|| (colonLast == (int)fAttNameBuf.getLen() - 1))
{
emitError(XMLErrs::InvalidColonPos);
continue;
}
}
}
attCount++;
// And jump back to the top of the loop
continue;
}
// It was some special case character so do all of the checks and
// deal with it.
if (!nextCh)
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
if (nextCh == chForwardSlash)
{
fReaderMgr.getNextChar();
isEmpty = true;
if (!fReaderMgr.skippedChar(chCloseAngle))
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
break;
}
else if (nextCh == chCloseAngle)
{
fReaderMgr.getNextChar();
break;
}
else if (nextCh == chOpenAngle)
{
// Check for this one specially, since its going to be common
// and it is kind of auto-recovering since we've already hit the
// next open bracket, which is what we would have seeked to (and
// skipped this whole tag.)
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
break;
}
else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
{
// Check for this one specially, which is probably a missing
// attribute name, e.g. ="value". Just issue expected name
// error and eat the quoted string, then jump back to the
// top again.
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.getNextChar();
fReaderMgr.skipQuotedString(nextCh);
fReaderMgr.skipPastSpaces();
continue;
}
}
// Make an initial pass through the list and find any xmlns attributes.
if (fDoNamespaces && attCount)
Gareth Reakes
committed
scanAttrListforNameSpaces(fAttrList, attCount, elemDecl);
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
// Now lets get the fAttrList filled in. This involves faulting in any
// defaulted and fixed attributes and normalizing the values of any that
// we got explicitly.
//
// We update the attCount value with the total number of attributes, but
// it goes in with the number of values we got during the raw scan of
// explictly provided attrs above.
attCount = buildAttList(attCount, elemDecl, *fAttrList);
// If we have a document handler, then tell it about this start tag. We
// don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
// any prefix since its just one big name if we are not doing namespaces.
unsigned int uriId = fEmptyNamespaceId;
if (fDocHandler)
{
if (fDoNamespaces)
{
uriId = resolvePrefix
(
elemDecl->getElementName()->getPrefix()
, ElemStack::Mode_Element
);
}
fDocHandler->startElement
(
*elemDecl
, uriId
, (fDoNamespaces) ? elemDecl->getElementName()->getPrefix() : 0
, *fAttrList
, attCount
, false
, isRoot
);
}
// If empty, validate content right now if we are validating and then
// pop the element stack top. Else, we have to update the current stack
// top's namespace mapping elements.
if (isEmpty)
{
// If validating, then insure that its legal to have no content
if (fValidate)
{
const int res = fValidator->checkContent(elemDecl, 0, 0);
if (res >= 0)
{
fValidator->emitError
(
XMLValid::ElementNotValidForContent
, qnameRawBuf
, elemDecl->getFormattedContentModel()
);
}
}
// If we have a doc handler, tell it about the end tag
if (fDocHandler)
{
fDocHandler->endElement
(
*elemDecl
, uriId
, isRoot
, (fDoNamespaces) ? elemDecl->getElementName()->getPrefix()
: XMLUni::fgZeroLenString
);
// pass back type name information
Alberto Massari
committed
fDocHandler->elementTypeInfo(0, 0);
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
}
// Pop the element stack back off since it'll never be used now
fElemStack.popTop();
// If the elem stack is empty, then it was an empty root
if (isRoot)
gotData = false;
}
return true;
}
unsigned int
DGXMLScanner::resolveQName(const XMLCh* const qName
, XMLBuffer& prefixBuf
, const short mode
, int& prefixColonPos)
{
// Lets split out the qName into a URI and name buffer first. The URI
// can be empty.
prefixColonPos = XMLString::indexOf(qName, chColon);
if (prefixColonPos == -1)
{
// Its all name with no prefix, so put the whole thing into the name
// buffer. Then map the empty string to a URI, since the empty string
// represents the default namespace. This will either return some
// explicit URI which the default namespace is mapped to, or the
// the default global namespace.
bool unknown = false;
prefixBuf.reset();
return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown);
}
else
{
// Copy the chars up to but not including the colon into the prefix
// buffer.
prefixBuf.set(qName, prefixColonPos);
// Watch for the special namespace prefixes. We always map these to
// special URIs. 'xml' gets mapped to the official URI that its defined
// to map to by the NS spec. xmlns gets mapped to a special place holder
// URI that we define (so that it maps to something checkable.)
const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer();
if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) {
// if this is an element, it is an error to have xmlns as prefix
if (mode == ElemStack::Mode_Element)
emitError(XMLErrs::NoXMLNSAsElementPrefix, qName);
return fXMLNSNamespaceId;
}
else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) {
return fXMLNamespaceId;
}
else
{
bool unknown = false;
unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown);
if (unknown)
emitError(XMLErrs::UnknownPrefix, prefixRawBuf);
return uriId;
}
}
}
// ---------------------------------------------------------------------------
// DGXMLScanner: Grammar preparsing
// ---------------------------------------------------------------------------
Grammar* DGXMLScanner::loadGrammar(const InputSource& src
, const short grammarType
, const bool toCache)
{
Grammar* loadedGrammar = 0;
try
{
fGrammarResolver->cacheGrammarFromParse(false);
fGrammarResolver->useCachedGrammarInParse(false);
fRootGrammar = 0;
if (fValScheme == Val_Auto) {
fValidate = true;
}
// Reset some status flags
fInException = false;
fStandalone = false;
fErrorCount = 0;
fHasNoDTD = true;
if (grammarType == Grammar::DTDGrammarType) {
loadedGrammar = loadDTDGrammar(src, toCache);
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
}
// Reset the reader manager to close all files, sockets, etc...
fReaderMgr.reset();
}
// NOTE:
//
// In all of the error processing below, the emitError() call MUST come
// before the flush of the reader mgr, or it will fail because it tries
// to find out the position in the XML source of the error.
catch(const XMLErrs::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLValid::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLException& excToCatch)
{
// Emit the error and catch any user exception thrown from here. Make
// sure in all cases we flush the reader manager.
fInException = true;
try
{
if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
emitError
(
XMLErrs::DisplayErrorMessage
, excToCatch.getMessage()
);
else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
emitError
(
XMLErrs::XMLException_Fatal
, excToCatch.getType()
, excToCatch.getMessage()
);
else
emitError
(
XMLErrs::XMLException_Error
, excToCatch.getType()
, excToCatch.getMessage()
);
}
Neil Graham
committed
catch(const OutOfMemoryException&)
{
throw;
}
catch(...)
{
// Flush the reader manager and rethrow user's error
fReaderMgr.reset();
throw;
}
// If it returned, then reset the reader manager and fall through
fReaderMgr.reset();
}
Neil Graham
committed
catch(const OutOfMemoryException&)
{
throw;
}
catch(...)
{
// Reset and rethrow
fReaderMgr.reset();
throw;
}
return loadedGrammar;
}
Grammar* DGXMLScanner::loadDTDGrammar(const InputSource& src,
const bool toCache)
{
// Reset the validators
fDTDValidator->reset();
if (fValidatorFromUser)
fValidator->reset();
fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
fGrammarResolver->putGrammar(fDTDGrammar);
fGrammar = fDTDGrammar;
fValidator->setGrammar(fGrammar);
// And for all installed handlers, send reset events. This gives them
// a chance to flush any cached data.
if (fDocHandler)
fDocHandler->resetDocument();
if (fEntityHandler)
fEntityHandler->resetEntities();
if (fErrorReporter)
fErrorReporter->resetErrors();
// Clear out the id reference list
if (toCache) {
unsigned int sysId = fGrammarResolver->getStringPool()->addOrFind(src.getSystemId());
const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
fGrammarResolver->putGrammar(fGrammar);
}
// Handle the creation of the XML reader object for this input source.
// This will provide us with transcoding and basic lexing services.
XMLReader* newReader = fReaderMgr.createReader
(
src
, false
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, fCalculateSrcOfs
);
if (!newReader) {
if (src.getIssueFatalErrorIfNotFound())
David Abram Cargill
committed
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
David Abram Cargill
committed
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
}
// In order to make the processing work consistently, we have to
// make this look like an external entity. So create an entity
// decl and fill it in and push it with the reader, as happens
// with an external entity. Put a janitor on it to insure it gets
// cleaned up. The reader manager does not adopt them.
const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager);
declDTD->setSystemId(src.getSystemId());
Janitor<DTDEntityDecl> janDecl(declDTD);
// Mark this one as a throw at end
newReader->setThrowAtEnd(true);
// And push it onto the stack, with its pseudo name
fReaderMgr.pushReader(newReader, declDTD);
// If we have a doc type handler and advanced callbacks are enabled,
// call the doctype event.
if (fDocTypeHandler) {
// Create a dummy root
DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
(
gDTDStr
, fEmptyNamespaceId
, DTDElementDecl::Any
, fGrammarPoolMemoryManager
rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
rootDecl->setExternalElemDeclaration(true);
Janitor<DTDElementDecl> janSrc(rootDecl);
fDocTypeHandler->doctypeDecl(*rootDecl, src.getPublicId(), src.getSystemId(), false, true);
}
// Create DTDScanner
DTDScanner dtdScanner
(
(DTDGrammar*)fGrammar
, fDocTypeHandler
, fGrammarPoolMemoryManager
, fMemoryManager
);
dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
// Tell it its not in an include section
dtdScanner.scanExtSubsetDecl(false, true);
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
if (fValidate) {
// validate the DTD scan so far
fValidator->preContentValidation(false, true);
}
if (toCache)
fGrammarResolver->cacheGrammars();
return fDTDGrammar;
}
// ---------------------------------------------------------------------------
// DGXMLScanner: Private helper methods
// ---------------------------------------------------------------------------
// This method handles the common initialization, to avoid having to do
// it redundantly in multiple constructors.
void DGXMLScanner::commonInit()
{
// And we need one for the raw attribute scan. This just stores key/
// value string pairs (prior to any processing.)
fAttrNSList = new (fMemoryManager) ValueVectorOf<XMLAttr*>(8, fMemoryManager);
fDTDValidator = new (fMemoryManager) DTDValidator();
Neil Graham
committed
fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
(
131, false, new (fMemoryManager)HashPtr(), fMemoryManager
);
fUndeclaredAttrRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
(
Neil Graham
committed
7, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
}
void DGXMLScanner::cleanUp()
{
delete fAttrNSList;
delete fDTDValidator;
Neil Graham
committed
delete fDTDElemNonDeclPool;
delete fAttDefRegistry;
delete fUndeclaredAttrRegistry;
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
}
// This method is called from scanStartTagNS() to build up the list of
// XMLAttr objects that will be passed out in the start tag callout. We
// get the key/value pairs from the raw scan of explicitly provided attrs,
// which have not been normalized. And we get the element declaration from
// which we will get any defaulted or fixed attribute defs and add those
// in as well.
unsigned int
DGXMLScanner::buildAttList(const unsigned int attCount
, XMLElementDecl* elemDecl
, RefVectorOf<XMLAttr>& toFill)
{
// Ask the element to clear the 'provided' flag on all of the att defs
// that it owns, and to return us a boolean indicating whether it has
// any defs.
const bool hasDefs = elemDecl->hasAttDefs();
// If there are no expliclitily provided attributes and there are no
// defined attributes for the element, the we don't have anything to do.
// So just return zero in this case.
if (!hasDefs && !attCount)
return 0;
// Keep up with how many attrs we end up with total
unsigned int retCount = attCount;
// And get the current size of the output vector. This lets us use
// existing elements until we fill it, then start adding new ones.
const unsigned int curAttListSize = toFill.size();
// Ok, so lets get an enumerator for the attributes of this element
// and run through them for well formedness and validity checks. But
// make sure that we had any attributes before we do it, since the list
// would have have gotten faulted in anyway.
if (hasDefs)
{
XMLAttDefList& attDefList = elemDecl->getAttDefList();
Neil Graham
committed
for(unsigned int i=0; i<attDefList.getAttDefCount(); i++)
{
// Get the current att def, for convenience and its def type
Neil Graham
committed
XMLAttDef& curDef = attDefList.getAttDef(i);
unsigned int *attCountPtr = fAttDefRegistry->get(&curDef);
if (!attCountPtr || *attCountPtr < fElemCount)
{ // did not occur
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
if (fValidate)
{
// If we are validating and its required, then an error
if (defType == XMLAttDef::Required)
{
fValidator->emitError
(
XMLValid::RequiredAttrNotProvided
, curDef.getFullName()
);
}
else if ((defType == XMLAttDef::Default) ||
(defType == XMLAttDef::Fixed) )
{
if (fStandalone && curDef.isExternal())
{
// XML 1.0 Section 2.9
// Document is standalone, so attributes must not be defaulted.
fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName());
}
}
}
// Fault in the value if needed, and bump the att count
if ((defType == XMLAttDef::Default)
|| (defType == XMLAttDef::Fixed))
{
// Let the validator pass judgement on the attribute value
if (fValidate)
{
fValidator->validateAttrValue
(
&curDef
, curDef.getValue()
, false
, elemDecl
);
}
XMLAttr* curAtt;
if (retCount >= curAttListSize)
{
if (fDoNamespaces)
{
curAtt = new (fMemoryManager) XMLAttr
(
-1
, curDef.getFullName()
, XMLUni::fgZeroLenString
, curDef.getValue()
, curDef.getType()
, false
, fMemoryManager
curAtt = new (fMemoryManager) XMLAttr
(
-1
, curDef.getFullName()
, XMLUni::fgZeroLenString
, curDef.getValue()
, curDef.getType()
, false
, fMemoryManager
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
);
}
fAttrList->addElement(curAtt);
}
else
{
curAtt = fAttrList->elementAt(retCount);
if (fDoNamespaces)
{
curAtt->set
(
fEmptyNamespaceId
, curDef.getFullName()
, curDef.getValue()
, curDef.getType()
);
}
else
{
curAtt->set
(
-1
, curDef.getFullName()
, XMLUni::fgZeroLenString
, curDef.getValue()
, curDef.getType()
);
}
curAtt->setSpecified(false);
}
if (fDoNamespaces)
{
// Map the new attribute's prefix to a URI id and store
// that in the attribute object.
const XMLCh* attPrefix = curAtt->getPrefix();
if (attPrefix && *attPrefix) {
curAtt->setURIId
(
resolvePrefix(attPrefix, ElemStack::Mode_Attribute)
);
}
}
retCount++;
}
}
}
}
for (unsigned int i=0; i < fAttrNSList->size(); i++) {
XMLAttr* providedAttr = fAttrNSList->elementAt(i);
providedAttr->setURIId
(
resolvePrefix
(
providedAttr->getPrefix(),
ElemStack::Mode_Attribute
)
);
}
fAttrNSList->removeAllElements();
return retCount;
}
unsigned int
DGXMLScanner::resolvePrefix( const XMLCh* const prefix
, const ElemStack::MapModes mode)
{
// Watch for the special namespace prefixes. We always map these to
// special URIs. 'xml' gets mapped to the official URI that its defined
// to map to by the NS spec. xmlns gets mapped to a special place holder
// URI that we define (so that it maps to something checkable.)
if (XMLString::equals(prefix, XMLUni::fgXMLNSString))
return fXMLNSNamespaceId;
else if (XMLString::equals(prefix, XMLUni::fgXMLString))
return fXMLNamespaceId;
// Ask the element stack to search up itself for a mapping for the
// passed prefix.
bool unknown;
unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown);
// If it was unknown, then the URI was faked in but we have to issue an error
if (unknown)
emitError(XMLErrs::UnknownPrefix, prefix);
return uriId;
}
// This method will reset the scanner data structures, and related plugged
// in stuff, for a new scan session. We get the input source for the primary
// XML entity, create the reader for it, and push it on the stack so that
// upon successful return from here we are ready to go.
void DGXMLScanner::scanReset(const InputSource& src)
{
// This call implicitly tells us that we are going to reuse the scanner
// if it was previously used. So tell the validator to reset itself.
//
// But, if the fUseCacheGrammar flag is set, then don't reset it.
//
// NOTE: The ReaderMgr is flushed on the way out, because that is
// required to insure that files are closed.
fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar);
fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar);
fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
fGrammarResolver->putGrammar(fDTDGrammar);
fGrammar = fDTDGrammar;
fRootGrammar = 0;
fValidator->setGrammar(fGrammar);
// Reset validation
fValidate = (fValScheme == Val_Always) ? true : false;
// And for all installed handlers, send reset events. This gives them
// a chance to flush any cached data.
if (fDocHandler)
fDocHandler->resetDocument();
if (fEntityHandler)
fEntityHandler->resetEntities();
if (fErrorReporter)
fErrorReporter->resetErrors();
// Clear out the id reference list
fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
fRootElemName = 0;
// Reset the element stack, and give it the latest ids for the special
// URIs it has to know about.
fElemStack.reset
(
fEmptyNamespaceId
, fUnknownNamespaceId
, fXMLNamespaceId
, fXMLNSNamespaceId
);
// Reset some status flags
fInException = false;
fStandalone = false;
fErrorCount = 0;
fHasNoDTD = true;
// Reset the validators
fDTDValidator->reset();
fDTDValidator->setErrorReporter(fErrorReporter);
if (fValidatorFromUser)
fValidator->reset();
// Handle the creation of the XML reader object for this input source.
// This will provide us with transcoding and basic lexing services.
XMLReader* newReader = fReaderMgr.createReader
(
src
, true
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, fCalculateSrcOfs
);
if (!newReader) {
if (src.getIssueFatalErrorIfNotFound())
David Abram Cargill
committed
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
David Abram Cargill
committed
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
}
// Push this read onto the reader manager
fReaderMgr.pushReader(newReader, 0);
// and reset security-related things if necessary:
if(fSecurityManager != 0)
{
fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
fEntityExpansionCount = 0;
}
if(fUIntPoolRowTotal >= 32)
{ // 8 KB tied up with validating attributes...
fAttDefRegistry->removeAll();
fUndeclaredAttrRegistry->removeAll();
recreateUIntPool();
}
else
{
// note that this will implicitly reset the values of the hashtables,
// though their buckets will still be tied up
resetUIntPool();
}
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
}
// This method is called between markup in content. It scans for character
// data that is sent to the document handler. It watches for any markup
// characters that would indicate that the character data has ended. It also
// handles expansion of general and character entities.
//
// sendData() is a local static helper for this method which handles some
// code that must be done in three different places here.
void DGXMLScanner::sendCharData(XMLBuffer& toSend)
{
// If no data in the buffer, then nothing to do
if (toSend.isEmpty())
return;
// We do different things according to whether we are validating or
// not. If not, its always just characters; else, it depends on the
// current element's content model.
if (fValidate)
{
// Get the raw data we need for the callback
const XMLCh* const rawBuf = toSend.getRawBuffer();
const unsigned int len = toSend.getLen();
// And see if the current element is a 'Children' style content model
const ElemStack::StackElem* topElem = fElemStack.topElement();
// Get the character data opts for the current element
XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts();
if (charOpts == XMLElementDecl::NoCharData)
{
// They definitely cannot handle any type of char data
fValidator->emitError(XMLValid::NoCharDataInCM);
}
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
{
// Its all spaces. So, if they can take spaces, then send it
// as ignorable whitespace. If they can handle any char data
// send it as characters.
if (charOpts == XMLElementDecl::SpacesOk) {
if (fDocHandler)
fDocHandler->ignorableWhitespace(rawBuf, len, false);
}
else if (charOpts == XMLElementDecl::AllCharData)
{
if (fDocHandler)
fDocHandler->docCharacters(rawBuf, len, false);
}
}
else
{
// If they can take any char data, then send it. Otherwise, they
// can only handle whitespace and can't handle this stuff so
// issue an error.
if (charOpts == XMLElementDecl::AllCharData)
{
if (fDocHandler)
fDocHandler->docCharacters(rawBuf, len, false);
}
else
{
fValidator->emitError(XMLValid::NoCharDataInCM);
}
}
}
else
{
// Always assume its just char data if not validating
if (fDocHandler)
fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);
}
// Reset buffer
toSend.reset();
}
// This method is called with a key/value string pair that represents an
// xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the
// current top of the element stack based on this data. We know that when
// we get here, that it is one of these forms, so we don't bother confirming
// it.
//
// But we have to ensure
// 1. xxx is not xmlns
// 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
// 3. yyy is not XMLUni::fgXMLNSURIName
// 4. if xxx is not null, then yyy cannot be an empty string.
void DGXMLScanner::updateNSMap(const XMLCh* const attrPrefix
, const XMLCh* const attrLocalName
, const XMLCh* const attrValue)
{
// We either have the default prefix (""), or we point it into the attr
// name parameter. Note that the xmlns is not the prefix we care about
// here. To us, the 'prefix' is really the local part of the attrName
// parameter.
//
// Check 1. xxx is not xmlns
// 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
// 3. yyy is not XMLUni::fgXMLNSURIName
// 4. if xxx is not null, then yyy cannot be an empty string.
if (attrPrefix && *attrPrefix) {
if (XMLString::equals(attrLocalName, XMLUni::fgXMLNSString))
emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
else if (XMLString::equals(attrLocalName, XMLUni::fgXMLString)) {
if (!XMLString::equals(attrValue, XMLUni::fgXMLURIName))
emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
}
Neil Graham
committed
if (!attrValue)
emitError(XMLErrs::NoEmptyStrNamespace, attrLocalName);
else if(!*attrValue && fXMLVersion == XMLReader::XMLV1_0)
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
emitError(XMLErrs::NoEmptyStrNamespace, attrLocalName);
}
if (XMLString::equals(attrValue, XMLUni::fgXMLNSURIName))
emitError(XMLErrs::NoUseOfxmlnsURI);
else if (XMLString::equals(attrValue, XMLUni::fgXMLURIName)) {
if (!XMLString::equals(attrLocalName, XMLUni::fgXMLString))
emitError(XMLErrs::XMLURINotMatchXMLPrefix);
}
// Ok, we have to get the unique id for the attribute value, which is the
// URI that this value should be mapped to. The validator has the
// namespace string pool, so we ask him to find or add this new one. Then
// we ask the element stack to add this prefix to URI Id mapping.
fElemStack.addPrefix
(
attrLocalName
, fURIStringPool->addOrFind(attrValue)
);
}
Gareth Reakes
committed
void DGXMLScanner::scanAttrListforNameSpaces(RefVectorOf<XMLAttr>* theAttrList, int attCount,
XMLElementDecl* elemDecl)
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
{
// Make an initial pass through the list and find any xmlns attributes or
// schema attributes.
// When we find one, send it off to be used to update the element stack's
// namespace mappings.
for (int index = 0; index < attCount; index++)
{
// each attribute has the prefix:suffix="value"
XMLAttr* curAttr = theAttrList->elementAt(index);
const XMLCh* attPrefix = curAttr->getPrefix();
const XMLCh* attLocalName = curAttr->getName();
if (attPrefix && *attPrefix)
{
if (XMLString::equals(attPrefix, XMLUni::fgXMLString)) {
curAttr->setURIId(fXMLNamespaceId);
}
else if (XMLString::equals(attPrefix, XMLUni::fgXMLNSString)) {
curAttr->setURIId(fXMLNSNamespaceId);
updateNSMap(attPrefix, attLocalName, curAttr->getValue());
}
else {
fAttrNSList->addElement(curAttr);
}
}
else if (XMLString::equals(XMLUni::fgXMLNSString, attLocalName))
{
updateNSMap(attPrefix, XMLUni::fgZeroLenString, curAttr->getValue());
}
Gareth Reakes
committed
// check for duplicate namespace attributes:
// by checking for qualified names with the same local part and with prefixes
// which have been bound to namespace names that are identical.
Gareth Reakes
committed
XMLAttr* loopAttr;
for (int attrIndex=0; attrIndex < index; attrIndex++) {
loopAttr = theAttrList->elementAt(attrIndex);
if (loopAttr->getURIId() == curAttr->getURIId() &&
XMLString::equals(loopAttr->getName(), curAttr->getName())) {
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, curAttr->getName()
, elemDecl->getFullName()
);
}
}
}
InputSource* DGXMLScanner::resolveSystemId(const XMLCh* const sysId)
{
// Create a buffer for expanding the system id
XMLBufBid bbSys(&fBufMgr);
XMLBuffer& expSysId = bbSys.getBuffer();
// Allow the entity handler to expand the system id if they choose
// to do so.
InputSource* srcToFill = 0;
if (fEntityHandler)
{
if (!fEntityHandler->expandSystemId(sysId, expSysId))
expSysId.set(sysId);
XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,
expSysId.getRawBuffer());
srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
}
else
{
expSysId.set(sysId);
}
// If they didn't create a source via the entity handler, then we
// have to create one on our own.
if (!srcToFill)
{
ReaderMgr::LastExtEntityInfo lastInfo;
fReaderMgr.getLastExtEntityInfo(lastInfo);
David Abram Cargill
committed
XMLURL urlTmp(fMemoryManager);
if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
(urlTmp.isRelative()))
if (!fStandardUriConformant)
srcToFill = new (fMemoryManager) LocalFileInputSource
(
lastInfo.systemId
, expSysId.getRawBuffer()
Khaled Noaman
committed
, fMemoryManager
);
else
David Abram Cargill
committed
ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
David Abram Cargill
committed
else
{
if (fStandardUriConformant && urlTmp.hasInvalidChar())
ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
}
}
return srcToFill;
}
// ---------------------------------------------------------------------------
// DGXMLScanner: Private parsing methods
// ---------------------------------------------------------------------------
bool DGXMLScanner::scanAttValue( const XMLAttDef* const attDef
, const XMLCh *const attrName
, XMLBuffer& toFill)
{
enum States
{
InWhitespace
, InContent
};
// Get the type and name
const XMLAttDef::AttTypes type = (attDef)
?attDef->getType()
:XMLAttDef::CData;
// Reset the target buffer
toFill.reset();
// Get the next char which must be a single or double quote
XMLCh quoteCh;
if (!fReaderMgr.skipIfQuote(quoteCh))
return false;
// We have to get the current reader because we have to ignore closing
// quotes until we hit the same reader again.
const unsigned int curReader = fReaderMgr.getCurrentReaderNum();
// Get attribute def - to check to see if it's declared externally or not
bool isAttExternal = (attDef)
?attDef->isExternal()
:false;
// Loop until we get the attribute value. Note that we use a double
// loop here to avoid the setup/teardown overhead of the exception
// handler on every round.
XMLCh nextCh;
XMLCh secondCh = 0;
States curState = InContent;
bool firstNonWS = false;
bool gotLeadingSurrogate = false;
bool escaped;
while (true)
{
try
{
while(true)
{
nextCh = fReaderMgr.getNextChar();
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
// Check for our ending quote in the same entity
if (nextCh == quoteCh)
{
if (curReader == fReaderMgr.getCurrentReaderNum())
return true;
// Watch for spillover into a previous entity
if (curReader > fReaderMgr.getCurrentReaderNum())
{
emitError(XMLErrs::PartialMarkupInEntity);
return false;
}
}
// Check for an entity ref now, before we let it affect our
// whitespace normalization logic below. We ignore the empty flag
// in this one.
escaped = false;
if (nextCh == chAmpersand)
{
if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
{
gotLeadingSurrogate = false;
continue;
}
else
{
if (escaped && !fElemStack.isEmpty())
fElemStack.setReferenceEscaped();
}
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
// Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
// Its got to at least be a valid XML character
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
David Abram Cargill
committed
, fMemoryManager
);
emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
}
}
gotLeadingSurrogate = false;
}
// If its not escaped, then make sure its not a < character, which
// is not allowed in attribute values.
if (!escaped && (nextCh == chOpenAngle))
emitError(XMLErrs::BracketInAttrValue, attrName);
// If the attribute is a CDATA type we do simple replacement of
// tabs and new lines with spaces, if the character is not escaped
// by way of a char ref.
//
// Otherwise, we do the standard non-CDATA normalization of
// compressing whitespace to single spaces and getting rid of leading
// and trailing whitespace.
if (type == XMLAttDef::CData)
{
if (!escaped)
{
if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D))
{
// Check Validity Constraint for Standalone document declaration
// XML 1.0, Section 2.9
if (fStandalone && fValidate && isAttExternal)
{
// Can't have a standalone document declaration of "yes" if attribute
// values are subject to normalisation
fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
}
nextCh = chSpace;
}
}
}
else
{
if (curState == InWhitespace)
{
if ((escaped && nextCh != chSpace) || !fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
{
if (firstNonWS)
toFill.append(chSpace);
curState = InContent;
firstNonWS = true;
}
else
{
continue;
}
}
else if (curState == InContent)
{
if ((nextCh == chSpace) ||
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
{
curState = InWhitespace;
// Check Validity Constraint for Standalone document declaration
// XML 1.0, Section 2.9
if (fStandalone && fValidate && isAttExternal)
{
if (!firstNonWS || (nextCh != chSpace) || (fReaderMgr.lookingAtSpace()))
{
// Can't have a standalone document declaration of "yes" if attribute
// values are subject to normalisation
fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
}
}
continue;
}
firstNonWS = true;
}
}
// Else add it to the buffer
toFill.append(nextCh);
if (secondCh)
toFill.append(secondCh);
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
}
}
catch(const EndOfEntityException&)
{
// Just eat it and continue.
gotLeadingSurrogate = false;
escaped = false;
}
}
return true;
}
// This method scans a CDATA section. It collects the character into one
// of the temp buffers and calls the document handler, if any, with the
// characters. It assumes that the <![CDATA string has been scanned before
// this call.
void DGXMLScanner::scanCDSection()
{
static const XMLCh CDataClose[] =
{
chCloseSquare, chCloseAngle, chNull
};
// The next character should be the opening square bracket. If not
// issue an error, but then try to recover by skipping any whitespace
// and checking again.
if (!fReaderMgr.skippedChar(chOpenSquare))
{
emitError(XMLErrs::ExpectedOpenSquareBracket);
fReaderMgr.skipPastSpaces();
// If we still don't find it, then give up, else keep going
if (!fReaderMgr.skippedChar(chOpenSquare))
return;
}
// Get a buffer for this
XMLBufBid bbCData(&fBufMgr);
// We just scan forward until we hit the end of CDATA section sequence.
// CDATA is effectively a big escape mechanism so we don't treat markup
// characters specially here.
bool emittedError = false;
Tinny Ng
committed
// Get the character data opts for the current element
const ElemStack::StackElem* topElem = fElemStack.topElement();
XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts();
while (true)
{
const XMLCh nextCh = fReaderMgr.getNextChar();
// Watch for unexpected end of file
if (!nextCh)
{
emitError(XMLErrs::UnterminatedCDATASection);
David Abram Cargill
committed
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
{
// This document is standalone; this ignorable CDATA whitespace is forbidden.
// XML 1.0, Section 2.9
// And see if the current element is a 'Children' style content model
if (topElem->fThisElement->isExternal()) {
if (charOpts == XMLElementDecl::SpacesOk) // Element Content
{
// Error - standalone should have a value of "no" as whitespace detected in an
// element type with element content whose element declaration was external
fValidator->emitError(XMLValid::NoWSForStandalone);
}
}
}
// If this is a close square bracket it could be our closing
// sequence.
if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
{
Tinny Ng
committed
// make sure we were not expecting a trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
if (fValidate) {
if (charOpts != XMLElementDecl::AllCharData)
{
// They definitely cannot handle any type of char data
fValidator->emitError(XMLValid::NoCharDataInCM);
}
}
// If we have a doc handler, call it
if (fDocHandler)
{
fDocHandler->docCharacters
(
bbCData.getRawBuffer()
, bbCData.getLen()
, true
);
}
// And we are done
break;
}
// Make sure its a valid character. But if we've emitted an error
// already, don't bother with the overhead since we've already told
// them about it.
if (!emittedError)
{
// Deal with surrogate pairs
if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
// Its got to at least be a valid XML character
else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
David Abram Cargill
committed
, fMemoryManager
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);
emittedError = true;
}
}
gotLeadingSurrogate = false;
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
}
}
// Add it to the buffer
bbCData.append(nextCh);
}
}
void DGXMLScanner::scanCharData(XMLBuffer& toUse)
{
// We have to watch for the stupid ]]> sequence, which is illegal in
// character data. So this is a little state machine that handles that.
enum States
{
State_Waiting
, State_GotOne
, State_GotTwo
};
// Reset the buffer before we start
toUse.reset();
// Turn on the 'throw at end' flag of the reader manager
ThrowEOEJanitor jan(&fReaderMgr, true);
// In order to be more efficient we have to use kind of a deeply nested
// set of blocks here. The outer block puts on a try and catches end of
// entity exceptions. The inner loop is the per-character loop. If we
// put the try inside the inner loop, it would work but would require
// the exception handling code setup/teardown code to be invoked for
// each character.
XMLCh nextCh;
XMLCh secondCh = 0;
States curState = State_Waiting;
bool escaped = false;
bool gotLeadingSurrogate = false;
bool notDone = true;
while (notDone)
{
try
{
while (true)
{
// Eat through as many plain content characters as possible without
// needing special handling. Moving most content characters here,
// in this one call, rather than running the overall loop once
// per content character, is a speed optimization.
if (curState == State_Waiting && !gotLeadingSurrogate)
fReaderMgr.movePlainContentChars(toUse);
// Try to get another char from the source
// The code from here on down covers all contengencies,
if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
{
// If we were waiting for a trailing surrogate, its an error
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
notDone = false;
break;
}
// Watch for a reference. Note that the escapement mechanism
// is ignored in this content.
escaped = false;
if (nextCh == chAmpersand)
{
sendCharData(toUse);
// Turn off the throwing at the end of entity during this
ThrowEOEJanitor jan(&fReaderMgr, false);
if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
{
gotLeadingSurrogate = false;
continue;
}
}
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
// Deal with surrogate pairs
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
// Make sure the returned char is a valid XML char
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
David Abram Cargill
committed
, fMemoryManager
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
}
gotLeadingSurrogate = false;
}
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
// Keep the state machine up to date
if (!escaped)
{
if (nextCh == chCloseSquare)
{
if (curState == State_Waiting)
curState = State_GotOne;
else if (curState == State_GotOne)
curState = State_GotTwo;
}
else if (nextCh == chCloseAngle)
{
if (curState == State_GotTwo)
emitError(XMLErrs::BadSequenceInCharData);
curState = State_Waiting;
}
else
{
curState = State_Waiting;
}
}
else
{
curState = State_Waiting;
}
// Add this char to the buffer
toUse.append(nextCh);
if (secondCh)
toUse.append(secondCh);
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
}
}
catch(const EndOfEntityException& toCatch)
{
// Some entity ended, so we have to send any accumulated
// chars and send an end of entity event.
sendCharData(toUse);
gotLeadingSurrogate = false;
if (fDocHandler)
fDocHandler->endEntityReference(toCatch.getEntity());
}
}
// Check the validity constraints as per XML 1.0 Section 2.9
if (fValidate && fStandalone)
{
// See if the text contains whitespace
// Get the raw data we need for the callback
const XMLCh* rawBuf = toUse.getRawBuffer();
const unsigned int len = toUse.getLen();
const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
if (isSpaces)
{
// And see if the current element is a 'Children' style content model
const ElemStack::StackElem* topElem = fElemStack.topElement();
if (topElem->fThisElement->isExternal()) {
// Get the character data opts for the current element
XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts();
if (charOpts == XMLElementDecl::SpacesOk) // => Element Content
{
// Error - standalone should have a value of "no" as whitespace detected in an
// element type with element content whose element declaration was external
//
fValidator->emitError(XMLValid::NoWSForStandalone);
}
}
}
}
// Send any char data that we accumulated into the buffer
sendCharData(toUse);
}
// This method will scan a general/character entity ref. It will either
// expand a char ref and return it directly, or push a reader for a general
// entity.
//
// The return value indicates whether the char parameters hold the value
// or whether the value was pushed as a reader, or that it failed.
//
// The escaped flag tells the caller whether the returned parameter resulted
// from a character reference, which escapes the character in some cases. It
// only makes any difference if the return value indicates the value was
// returned directly.
DGXMLScanner::EntityExpRes
DGXMLScanner::scanEntityRef( const bool inAttVal
, XMLCh& firstCh
, XMLCh& secondCh
, bool& escaped)
{
// Assume no escape
secondCh = 0;
escaped = false;
// We have to insure that its all in one entity
const unsigned int curReader = fReaderMgr.getCurrentReaderNum();
// If the next char is a pound, then its a character reference and we
// need to expand it always.
if (fReaderMgr.skippedChar(chPound))
{
// Its a character reference, so scan it and get back the numeric
// value it represents.
if (!scanCharRef(firstCh, secondCh))
return EntityExp_Failed;
escaped = true;
if (curReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
return EntityExp_Returned;
}
// Expand it since its a normal entity ref
XMLBufBid bbName(&fBufMgr);
if (!fReaderMgr.getName(bbName.getBuffer()))
{
emitError(XMLErrs::ExpectedEntityRefName);
return EntityExp_Failed;
}
// Next char must be a semi-colon. But if its not, just emit
// an error and try to continue.
if (!fReaderMgr.skippedChar(chSemiColon))
emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
// Make sure we ended up on the same entity reader as the & char
if (curReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
// Look up the name in the general entity pool
XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(bbName.getRawBuffer());
// If it does not exist, then obviously an error
if (!decl)
{
// XML 1.0 Section 4.1
// Well-formedness Constraint for entity not found:
// In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
// or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
// or a parameter entity
//
// Else it's Validity Constraint
if (fStandalone || fHasNoDTD)
emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
else {
if (fValidate)
fValidator->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
}
return EntityExp_Failed;
}
// XML 1.0 Section 4.1
// If we are a standalone document, then it has to have been declared
// in the internal subset.
if (fStandalone && !decl->getDeclaredInIntSubset())
emitError(XMLErrs::IllegalRefInStandalone, bbName.getRawBuffer());
if (decl->isExternal())
{
// If its unparsed, then its not valid here
if (decl->isUnparsed())
{
emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer());
return EntityExp_Failed;
}
// If we are in an attribute value, then not valid but keep going
if (inAttVal)
emitError(XMLErrs::NoExtRefsInAttValue);
// And now create a reader to read this entity
InputSource* srcUsed;
XMLReader* reader = fReaderMgr.createReader
(
decl->getBaseURI()
, decl->getSystemId()
, decl->getPublicId()
, false
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, srcUsed
, fCalculateSrcOfs
);
// Put a janitor on the source so it gets cleaned up on exit
Janitor<InputSource> janSrc(srcUsed);
// If the creation failed, and its not because the source was empty,
// then emit an error and return.
if (!reader)
David Abram Cargill
committed
ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed->getSystemId(), fMemoryManager);
// Push the reader. If its a recursive expansion, then emit an error
// and return an failure.
if (!fReaderMgr.pushReader(reader, decl))
{
emitError(XMLErrs::RecursiveEntity, decl->getName());
return EntityExp_Failed;
}
// here's where we need to check if there's a SecurityManager,
// how many entity references we've had
if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
XMLCh expLimStr[16];
David Abram Cargill
committed
XMLString::binToText(fEntityExpansionLimit, expLimStr, 15, 10, fMemoryManager);
emitError
(
XMLErrs::EntityExpansionLimitExceeded
, expLimStr
);
// there seems nothing better to do than reset the entity expansion counter
fEntityExpansionCount = 0;
}
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
// Do a start entity reference event.
//
// <TBD> For now, we supress them in att values. Later, when
// the stuff is in place to correctly allow DOM to handle them
// we'll turn this back on.
if (fDocHandler && !inAttVal)
fDocHandler->startEntityReference(*decl);
// If it starts with the XML string, then parse a text decl
if (checkXMLDecl(true))
scanXMLDecl(Decl_Text);
}
else
{
// If its one of the special char references, then we can return
// it as a character, and its considered escaped.
if (decl->getIsSpecialChar())
{
firstCh = decl->getValue()[0];
escaped = true;
return EntityExp_Returned;
}
// Create a reader over a memory stream over the entity value
// We force it to assume UTF-16 by passing in an encoding
// string. This way it won't both trying to predecode the
// first line, looking for an XML/TextDecl.
XMLReader* valueReader = fReaderMgr.createIntEntReader
(
decl->getName()
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, decl->getValue()
, decl->getValueLen()
, false
);
// Try to push the entity reader onto the reader manager stack,
// where it will become the subsequent input. If it fails, that
// means the entity is recursive, so issue an error. The reader
// will have just been discarded, but we just keep going.
if (!fReaderMgr.pushReader(valueReader, decl))
emitError(XMLErrs::RecursiveEntity, decl->getName());
// here's where we need to check if there's a SecurityManager,
// how many entity references we've had
if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
XMLCh expLimStr[16];
David Abram Cargill
committed
XMLString::binToText(fEntityExpansionLimit, expLimStr, 15, 10, fMemoryManager);
emitError
(
XMLErrs::EntityExpansionLimitExceeded
, expLimStr
);
}
// Do a start entity reference event.
//
// <TBD> For now, we supress them in att values. Later, when
// the stuff is in place to correctly allow DOM to handle them
// we'll turn this back on.
if (fDocHandler && !inAttVal)
fDocHandler->startEntityReference(*decl);
// If it starts with the XML string, then it's an error
if (checkXMLDecl(true)) {
emitError(XMLErrs::TextDeclNotLegalHere);
fReaderMgr.skipPastChar(chCloseAngle);
}
}
return EntityExp_Pushed;
}
XERCES_CPP_NAMESPACE_END