Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Id$
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/internal/IGXMLScanner.hpp>
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/UnexpectedEOFException.hpp>
#include <xercesc/sax/InputSource.hpp>
#include <xercesc/framework/XMLDocumentHandler.hpp>
#include <xercesc/framework/XMLEntityHandler.hpp>
#include <xercesc/framework/XMLPScanToken.hpp>
#include <xercesc/internal/EndOfEntityException.hpp>
#include <xercesc/framework/MemoryManager.hpp>
#include <xercesc/framework/XMLGrammarPool.hpp>
#include <xercesc/framework/XMLDTDDescription.hpp>
#include <xercesc/validators/common/GrammarResolver.hpp>
#include <xercesc/validators/DTD/DocTypeHandler.hpp>
#include <xercesc/validators/DTD/DTDScanner.hpp>
#include <xercesc/validators/DTD/DTDValidator.hpp>
#include <xercesc/validators/schema/SchemaValidator.hpp>
#include <xercesc/validators/schema/identity/FieldActivator.hpp>
#include <xercesc/validators/schema/identity/XPathMatcherStack.hpp>
#include <xercesc/validators/schema/identity/ValueStoreCache.hpp>
#include <xercesc/validators/schema/identity/IC_Selector.hpp>
#include <xercesc/validators/schema/identity/ValueStore.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// IGXMLScanner: Constructors and Destructor
// ---------------------------------------------------------------------------
IGXMLScanner::IGXMLScanner( XMLValidator* const valToAdopt
, GrammarResolver* const grammarResolver
, MemoryManager* const manager) :
XMLScanner(valToAdopt, grammarResolver, manager)
, fSeeXsi(false)
, fElemStateSize(16)
, fElemState(0)
Khaled Noaman
committed
, fElemStack(manager)
, fContent(1023, manager)
, fRawAttrList(0)
, fDTDValidator(0)
, fSchemaValidator(0)
, fDTDGrammar(0)
, fMatcherStack(0)
, fValueStoreCache(0)
, fFieldActivator(0)
{
try
{
commonInit();
// use fDTDValidator as the default validator
if (!valToAdopt)
fValidator = fDTDValidator;
}
catch(...)
{
cleanUp();
throw;
}
}
IGXMLScanner::IGXMLScanner( XMLDocumentHandler* const docHandler
, DocTypeHandler* const docTypeHandler
, XMLEntityHandler* const entityHandler
, XMLErrorReporter* const errHandler
, XMLValidator* const valToAdopt
, GrammarResolver* const grammarResolver
, MemoryManager* const manager) :
XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
, fSeeXsi(false)
, fElemStateSize(16)
, fElemState(0)
Khaled Noaman
committed
, fElemStack(manager)
, fContent(1023, manager)
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
, fRawAttrList(0)
, fDTDValidator(0)
, fSchemaValidator(0)
, fDTDGrammar(0)
, fMatcherStack(0)
, fValueStoreCache(0)
, fFieldActivator(0)
{
try
{
commonInit();
//use fDTDValidator as the default validator
if (!valToAdopt)
fValidator = fDTDValidator;
}
catch(...)
{
cleanUp();
throw;
}
}
IGXMLScanner::~IGXMLScanner()
{
cleanUp();
}
// ---------------------------------------------------------------------------
// XMLScanner: Getter methods
// ---------------------------------------------------------------------------
NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool()
{
return fDTDGrammar->getEntityDeclPool();
}
const NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool() const
{
return fDTDGrammar->getEntityDeclPool();
}
// ---------------------------------------------------------------------------
// IGXMLScanner: Main entry point to scan a document
// ---------------------------------------------------------------------------
void IGXMLScanner::scanDocument(const InputSource& src)
{
// Bump up the sequence id for this parser instance. This will invalidate
// any previous progressive scan tokens.
fSequenceId++;
try
{
// Reset the scanner and its plugged in stuff for a new run. This
// resets all the data structures, creates the initial reader and
// pushes it on the stack, and sets up the base document path.
scanReset(src);
// If we have a document handler, then call the start document
if (fDocHandler)
fDocHandler->startDocument();
// Scan the prolog part, which is everything before the root element
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
// including the DTD subsets.
scanProlog();
// If we got to the end of input, then its not a valid XML file.
// Else, go on to scan the content.
if (fReaderMgr.atEOF())
{
emitError(XMLErrs::EmptyMainEntity);
}
else
{
// Scan content, and tell it its not an external entity
if (scanContent(false))
{
// Do post-parse validation if required
if (fValidate)
{
// We handle ID reference semantics at this level since
// its required by XML 1.0.
checkIDRefs();
// Then allow the validator to do any extra stuff it wants
// fValidator->postParseValidation();
}
// That went ok, so scan for any miscellaneous stuff
if (!fReaderMgr.atEOF())
scanMiscellaneous();
}
}
// If we have a document handler, then call the end document
if (fDocHandler)
fDocHandler->endDocument();
// Reset the reader manager to close all files, sockets, etc...
fReaderMgr.reset();
}
// NOTE:
//
// In all of the error processing below, the emitError() call MUST come
// before the flush of the reader mgr, or it will fail because it tries
// to find out the position in the XML source of the error.
catch(const XMLErrs::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLValid::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLException& excToCatch)
{
// Emit the error and catch any user exception thrown from here. Make
// sure in all cases we flush the reader manager.
fInException = true;
try
{
if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
emitError
(
XMLErrs::XMLException_Warning
, excToCatch.getType()
, excToCatch.getMessage()
);
else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
emitError
(
XMLErrs::XMLException_Fatal
, excToCatch.getType()
, excToCatch.getMessage()
);
else
emitError
(
XMLErrs::XMLException_Error
, excToCatch.getType()
, excToCatch.getMessage()
);
}
catch(...)
{
// Flush the reader manager and rethrow user's error
fReaderMgr.reset();
throw;
}
// If it returned, then reset the reader manager and fall through
fReaderMgr.reset();
}
catch(...)
{
// Reset and rethrow
fReaderMgr.reset();
throw;
}
}
bool IGXMLScanner::scanNext(XMLPScanToken& token)
{
// Make sure this token is still legal
if (!isLegalToken(token))
ThrowXML(RuntimeException, XMLExcepts::Scan_BadPScanToken);
// Find the next token and remember the reader id
unsigned int orgReader;
XMLTokens curToken;
bool retVal = true;
try
{
while (true)
{
// We have to handle any end of entity exceptions that happen here.
// We could be at the end of X nested entities, each of which will
// generate an end of entity exception as we try to move forward.
try
{
curToken = senseNextToken(orgReader);
break;
}
catch(const EndOfEntityException& toCatch)
{
// Send an end of entity reference event
if (fDocHandler)
fDocHandler->endEntityReference(toCatch.getEntity());
}
}
if (curToken == Token_CharData)
{
scanCharData(fCDataBuf);
}
else if (curToken == Token_EOF)
{
if (!fElemStack.isEmpty())
{
const ElemStack::StackElem* topElem = fElemStack.popTop();
emitError
(
XMLErrs::EndedWithTagsOnStack
, topElem->fThisElement->getFullName()
);
}
retVal = false;
}
else
{
// Its some sort of markup
bool gotData = true;
switch(curToken)
{
case Token_CData :
// Make sure we are within content
if (fElemStack.isEmpty())
emitError(XMLErrs::CDATAOutsideOfContent);
scanCDSection();
break;
case Token_Comment :
scanComment();
break;
case Token_EndTag :
scanEndTag(gotData);
break;
case Token_PI :
scanPI();
break;
case Token_StartTag :
if (fDoNamespaces)
scanStartTagNS(gotData);
else
scanStartTag(gotData);
break;
default :
fReaderMgr.skipToChar(chOpenAngle);
break;
}
if (orgReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
// If we hit the end, then do the miscellaneous part
if (!gotData)
{
// Do post-parse validation if required
if (fValidate)
{
// We handle ID reference semantics at this level since
// its required by XML 1.0.
checkIDRefs();
// Then allow the validator to do any extra stuff it wants
// fValidator->postParseValidation();
}
// That went ok, so scan for any miscellaneous stuff
scanMiscellaneous();
if (fValidate)
fValueStoreCache->endDocument();
if (fDocHandler)
fDocHandler->endDocument();
}
}
}
// NOTE:
//
// In all of the error processing below, the emitError() call MUST come
// before the flush of the reader mgr, or it will fail because it tries
// to find out the position in the XML source of the error.
catch(const XMLErrs::Codes)
{
// This is a 'first failure' exception, so reset and return failure
fReaderMgr.reset();
return false;
}
catch(const XMLValid::Codes)
{
// This is a 'first fatal error' type exit, so reset and reuturn failure
fReaderMgr.reset();
return false;
}
catch(const XMLException& excToCatch)
{
// Emit the error and catch any user exception thrown from here. Make
// sure in all cases we flush the reader manager.
fInException = true;
try
{
if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
emitError
(
XMLErrs::XMLException_Warning
, excToCatch.getType()
, excToCatch.getMessage()
);
else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
emitError
(
XMLErrs::XMLException_Fatal
, excToCatch.getType()
, excToCatch.getMessage()
);
else
emitError
(
XMLErrs::XMLException_Error
, excToCatch.getType()
, excToCatch.getMessage()
);
}
catch(...)
{
// Reset and rethrow user error
fReaderMgr.reset();
throw;
}
// Reset and return failure
fReaderMgr.reset();
return false;
}
catch(...)
{
// Reset and rethrow original error
fReaderMgr.reset();
throw;
}
// If we hit the end, then flush the reader manager
if (!retVal)
fReaderMgr.reset();
return retVal;
}
// ---------------------------------------------------------------------------
// IGXMLScanner: Private helper methods. Most of these are implemented in
// IGXMLScanner2.Cpp.
// ---------------------------------------------------------------------------
// This method handles the common initialization, to avoid having to do
// it redundantly in multiple constructors.
void IGXMLScanner::commonInit()
{
// Create the element state array
fElemState = (unsigned int*) fMemoryManager->allocate
(
fElemStateSize * sizeof(unsigned int)
); //new unsigned int[fElemStateSize];
// And we need one for the raw attribute scan. This just stores key/
// value string pairs (prior to any processing.)
fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
fDTDValidator = new (fMemoryManager) DTDValidator();
fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
initValidator(fSchemaValidator);
// Create IdentityConstraint info
fMatcherStack = new (fMemoryManager) XPathMatcherStack(fMemoryManager);
fValueStoreCache = new (fMemoryManager) ValueStoreCache(fMemoryManager);
fFieldActivator = new (fMemoryManager) FieldActivator(fValueStoreCache, fMatcherStack, fMemoryManager);
fValueStoreCache->setScanner(this);
}
void IGXMLScanner::cleanUp()
{
fMemoryManager->deallocate(fElemState); //delete [] fElemState;
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
delete fRawAttrList;
delete fDTDValidator;
delete fSchemaValidator;
delete fFieldActivator;
delete fMatcherStack;
delete fValueStoreCache;
}
// ---------------------------------------------------------------------------
// IGXMLScanner: Private scanning methods
// ---------------------------------------------------------------------------
// This method is called from scanStartTag() to handle the very raw initial
// scan of the attributes. It just fills in the passed collection with
// key/value pairs for each attribute. No processing is done on them at all.
unsigned int
IGXMLScanner::rawAttrScan(const XMLCh* const elemName
, RefVectorOf<KVStringPair>& toFill
, bool& isEmpty)
{
// Keep up with how many attributes we've seen so far, and how many
// elements are available in the vector. This way we can reuse old
// elements until we run out and then expand it.
unsigned int attCount = 0;
unsigned int curVecSize = toFill.size();
// Assume it is not empty
isEmpty = false;
// We loop until we either see a /> or >, handling key/value pairs util
// we get there. We place them in the passed vector, which we will expand
// as required to hold them.
while (true)
{
// Get the next character, which should be non-space
XMLCh nextCh = fReaderMgr.peekNextChar();
// If the next character is not a slash or closed angle bracket,
// then it must be whitespace, since whitespace is required
// between the end of the last attribute and the name of the next
// one.
//
if (attCount)
{
if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
{
{
// Ok, skip by them and get another char
fReaderMgr.getNextChar();
fReaderMgr.skipPastSpaces();
nextCh = fReaderMgr.peekNextChar();
}
else
{
// Emit the error but keep on going
emitError(XMLErrs::ExpectedWhitespace);
}
}
}
// Ok, here we first check for any of the special case characters.
// If its not one, then we do the normal case processing, which
// assumes that we've hit an attribute value, Otherwise, we do all
// the special case checks.
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
{
// Assume its going to be an attribute, so get a name from
// the input.
if (!fReaderMgr.getName(fAttNameBuf))
{
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.skipPastChar(chCloseAngle);
return attCount;
}
// And next must be an equal sign
if (!scanEq())
{
static const XMLCh tmpList[] =
{
chSingleQuote, chDoubleQuote, chCloseAngle
, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedEqSign);
// Try to sync back up by skipping forward until we either
// hit something meaningful.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
{
// Jump back to top for normal processing of these
continue;
}
else if ((chFound == chSingleQuote)
|| (chFound == chDoubleQuote)
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
{
// Just fall through assuming that the value is to follow
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemName);
return attCount;
}
else
{
// Something went really wrong
return attCount;
}
}
// Next should be the quoted attribute value. We just do a simple
// and stupid scan of this value. The only thing we do here
// is to expand entity references.
if (!basicAttrValueScan(fAttNameBuf.getRawBuffer(), fAttValueBuf))
{
static const XMLCh tmpList[] =
{
chCloseAngle, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedAttrValue);
// It failed, so lets try to get synced back up. We skip
// forward until we find some whitespace or one of the
// chars in our list.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle)
|| (chFound == chForwardSlash)
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
{
// Just fall through and process this attribute, though
// the value will be "".
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemName);
return attCount;
}
else
{
// Something went really wrong
return attCount;
}
}
// Make sure that the name is basically well formed for namespace
// enabled rules. It either has no colons, or it has one which
// is neither the first or last char.
const int colonFirst = XMLString::indexOf(fAttNameBuf.getRawBuffer(), chColon);
if (colonFirst != -1)
{
const int colonLast = XMLString::lastIndexOf(fAttNameBuf.getRawBuffer(), chColon);
if (colonFirst != colonLast)
{
emitError(XMLErrs::TooManyColonsInName);
continue;
}
else if ((colonFirst == 0)
|| (colonLast == (int)fAttNameBuf.getLen() - 1))
{
emitError(XMLErrs::InvalidColonPos);
continue;
}
}
// And now lets add it to the passed collection. If we have not
// filled it up yet, then we use the next element. Else we add
// a new one.
KVStringPair* curPair = 0;
if (attCount >= curVecSize)
{
curPair = new (fMemoryManager) KVStringPair
(
fAttNameBuf.getRawBuffer()
, fAttValueBuf.getRawBuffer()
, fMemoryManager
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
);
toFill.addElement(curPair);
}
else
{
curPair = toFill.elementAt(attCount);
curPair->set(fAttNameBuf.getRawBuffer(), fAttValueBuf.getRawBuffer());
}
// And bump the count of attributes we've gotten
attCount++;
// And go to the top again for another attribute
continue;
}
// It was some special case character so do all of the checks and
// deal with it.
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
if (nextCh == chForwardSlash)
{
fReaderMgr.getNextChar();
isEmpty = true;
if (!fReaderMgr.skippedChar(chCloseAngle))
emitError(XMLErrs::UnterminatedStartTag, elemName);
break;
}
else if (nextCh == chCloseAngle)
{
fReaderMgr.getNextChar();
break;
}
else if (nextCh == chOpenAngle)
{
// Check for this one specially, since its going to be common
// and it is kind of auto-recovering since we've already hit the
// next open bracket, which is what we would have seeked to (and
// skipped this whole tag.)
emitError(XMLErrs::UnterminatedStartTag, elemName);
break;
}
else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
{
// Check for this one specially, which is probably a missing
// attribute name, e.g. ="value". Just issue expected name
// error and eat the quoted string, then jump back to the
// top again.
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.getNextChar();
fReaderMgr.skipQuotedString(nextCh);
fReaderMgr.skipPastSpaces();
continue;
}
}
return attCount;
}
// This method will kick off the scanning of the primary content of the
// document, i.e. the elements.
bool IGXMLScanner::scanContent(const bool extEntity)
{
// Go into a loop until we hit the end of the root element, or we fall
// out because there is no root element.
//
// We have to do kind of a deeply nested double loop here in order to
// avoid doing the setup/teardown of the exception handler on each
// round. Doing it this way we only do it when an exception actually
// occurs.
bool gotData = true;
bool inMarkup = false;
while (gotData)
{
try
{
while (gotData)
{
// Sense what the next top level token is. According to what
// this tells us, we will call something to handle that kind
// of thing.
unsigned int orgReader;
const XMLTokens curToken = senseNextToken(orgReader);
// Handle character data and end of file specially. Char data
// is not markup so we don't want to handle it in the loop
// below.
if (curToken == Token_CharData)
{
// Scan the character data and call appropriate events. Let
// him use our local character data buffer for efficiency.
scanCharData(fCDataBuf);
continue;
}
else if (curToken == Token_EOF)
{
// The element stack better be empty at this point or we
// ended prematurely before all elements were closed.
if (!fElemStack.isEmpty())
{
const ElemStack::StackElem* topElem = fElemStack.popTop();
emitError
(
XMLErrs::EndedWithTagsOnStack
, topElem->fThisElement->getFullName()
);
}
// Its the end of file, so clear the got data flag
gotData = false;
continue;
}
// We are in some sort of markup now
inMarkup = true;
// According to the token we got, call the appropriate
// scanning method.
switch(curToken)
{
case Token_CData :
// Make sure we are within content
if (fElemStack.isEmpty())
emitError(XMLErrs::CDATAOutsideOfContent);
scanCDSection();
break;
case Token_Comment :
scanComment();
break;
case Token_EndTag :
scanEndTag(gotData);
break;
case Token_PI :
scanPI();
break;
case Token_StartTag :
if (fDoNamespaces)
scanStartTagNS(gotData);
else
scanStartTag(gotData);
break;
default :
fReaderMgr.skipToChar(chOpenAngle);
break;
}
if (orgReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
// And we are back out of markup again
inMarkup = false;
}
}
catch(const EndOfEntityException& toCatch)
{
// If we were in some markup when this happened, then its a
// partial markup error.
if (inMarkup)
emitError(XMLErrs::PartialMarkupInEntity);
// Send an end of entity reference event
if (fDocHandler)
fDocHandler->endEntityReference(toCatch.getEntity());
inMarkup = false;
}
}
// It went ok, so return success
return true;
}
void IGXMLScanner::scanEndTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the end of the root element.
gotData = true;
// Check if the element stack is empty. If so, then this is an unbalanced
// element (i.e. more ends than starts, perhaps because of bad text
// causing one to be skipped.)
if (fElemStack.isEmpty())
{
emitError(XMLErrs::MoreEndThanStartTags);
fReaderMgr.skipPastChar(chCloseAngle);
ThrowXML(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd);
}
// After the </ is the element QName, so get a name from the input
if (!fReaderMgr.getName(fQNameBuf))
{
// It failed so we can't really do anything with it
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
unsigned int uriId = fEmptyNamespaceId;
int prefixColonPos = -1;
if (fDoNamespaces)
{
uriId = resolveQName
(
fQNameBuf.getRawBuffer()
, fPrefixBuf
, ElemStack::Mode_Element
, prefixColonPos
);
}
// Pop the stack of the element we are supposed to be ending. Remember
// that we don't own this. The stack just keeps them and reuses them.
//
// NOTE: We CANNOT do this until we've resolved the element name because
// the element stack top contains the prefix to URI mappings for this
// element.
unsigned int topUri = fElemStack.getCurrentURI();
const ElemStack::StackElem* topElem = fElemStack.popTop();
// See if it was the root element, to avoid multiple calls below
const bool isRoot = fElemStack.isEmpty();
// Make sure that its the end of the element that we expect
XMLElementDecl* tempElement = topElem->fThisElement;
if (fDoNamespaces && fGrammarType == Grammar::SchemaGrammarType) {
const XMLCh* rawNameBuf = fQNameBuf.getRawBuffer();
if ((topUri != uriId) || (!XMLString::equals(tempElement->getBaseName(), &rawNameBuf[prefixColonPos + 1])))
{
emitError
(
XMLErrs::ExpectedEndOfTagX
, topElem->fThisElement->getFullName()
);
}
}
else {
if (!XMLString::equals(tempElement->getFullName(), fQNameBuf.getRawBuffer()))
{
emitError
(
XMLErrs::ExpectedEndOfTagX
, topElem->fThisElement->getFullName()
);
}
}
// Make sure we are back on the same reader as where we started
if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialTagMarkupError);
// Skip optional whitespace
fReaderMgr.skipPastSpaces();
// Make sure we find the closing bracket
if (!fReaderMgr.skippedChar(chCloseAngle))
{
emitError
(
XMLErrs::UnterminatedEndTag
, topElem->fThisElement->getFullName()
);
}
// If validation is enabled, then lets pass him the list of children and
// this element and let him validate it.
if (fValidate)
{
int res = fValidator->checkContent
(
topElem->fThisElement
, topElem->fChildren
, topElem->fChildCount
);
if (res >= 0)
{
// One of the elements is not valid for the content. NOTE that
// if no children were provided but the content model requires
// them, it comes back with a zero value. But we cannot use that
// to index the child array in this case, and have to put out a
// special message.
if (!topElem->fChildCount)
{
fValidator->emitError
(
XMLValid::EmptyNotValidForContent
, topElem->fThisElement->getFormattedContentModel()
);
}
else if ((unsigned int)res >= topElem->fChildCount)
{
fValidator->emitError
(
XMLValid::NotEnoughElemsForCM
, topElem->fThisElement->getFormattedContentModel()
);
}
else
{
fValidator->emitError
(
XMLValid::ElementNotValidForContent
, topElem->fChildren[res]->getRawName()
, topElem->fThisElement->getFormattedContentModel()
);
}
}
Gareth Reakes
committed
if (fGrammarType == Grammar::SchemaGrammarType) {
// call matchers and de-activate context
int oldCount = fMatcherStack->getMatcherCount();
if (oldCount ||
((SchemaElementDecl*)topElem->fThisElement)->getIdentityConstraintCount()) {
for (int i = oldCount - 1; i >= 0; i--) {
XPathMatcher* matcher = fMatcherStack->getMatcherAt(i);
matcher->endElement(*(topElem->fThisElement), fContent.getRawBuffer());
}
if (fMatcherStack->size() > 0) {
fMatcherStack->popContext();
}
// handle everything *but* keyref's.
int newCount = fMatcherStack->getMatcherCount();
for (int j = oldCount - 1; j >= newCount; j--) {
XPathMatcher* matcher = fMatcherStack->getMatcherAt(j);
IdentityConstraint* ic = matcher->getIdentityConstraint();
if (ic && (ic->getType() != IdentityConstraint::KEYREF))
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
fValueStoreCache->transplant(ic, matcher->getInitialDepth());
}
// now handle keyref's...
for (int k = oldCount - 1; k >= newCount; k--) {
XPathMatcher* matcher = fMatcherStack->getMatcherAt(k);
IdentityConstraint* ic = matcher->getIdentityConstraint();
if (ic && (ic->getType() == IdentityConstraint::KEYREF)) {
ValueStore* values = fValueStoreCache->getValueStoreFor(ic, matcher->getInitialDepth());
if (values) { // nothing to do if nothing matched!
values->endDcocumentFragment(fValueStoreCache);
}
}
}
fValueStoreCache->endElement();
}
}
}
Gareth Reakes
committed
if(!isRoot && fGrammarType == Grammar::SchemaGrammarType)
((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromElement(topElem->fThisElement, fGrammarType);
// If we have a doc handler, tell it about the end tag
if (fDocHandler)
{
fDocHandler->endElement
(
*topElem->fThisElement
, uriId
, isRoot
, fPrefixBuf.getRawBuffer()
);
}
Gareth Reakes
committed
// reset xsi:type ComplexTypeInfo
if (fGrammarType == Grammar::SchemaGrammarType) {
((SchemaElementDecl*)topElem->fThisElement)->reset();
if (!isRoot)
((SchemaElementDecl*)(fElemStack.topElement()->fThisElement))->
setXsiComplexTypeInfo(((SchemaValidator*)fValidator)->getCurrentTypeInfo());
}
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
// If this was the root, then done with content
gotData = !isRoot;
if (gotData) {
if (fDoNamespaces) {
// Restore the grammar
fGrammar = fElemStack.getCurrentGrammar();
fGrammarType = fGrammar->getGrammarType();
if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
if (fValidatorFromUser)
ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator);
else {
fValidator = fSchemaValidator;
}
}
else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
if (fValidatorFromUser)
ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
else {
fValidator = fDTDValidator;
}
}
fValidator->setGrammar(fGrammar);
}
// Restore the validation flag
fValidate = fElemStack.getValidationFlag();
}
}
// This method handles the high level logic of scanning the DOCType
// declaration. This calls the DTDScanner and kicks off both the scanning of
// the internal subset and the scanning of the external subset, if any.
//
// When we get here the '<!DOCTYPE' part has already been scanned, which is
// what told us that we had a doc type decl to parse.
void IGXMLScanner::scanDocTypeDecl()
{
// We have a doc type. So, switch the Grammar.
switchGrammar(XMLUni::fgDTDEntityString);
if (fDocTypeHandler)
fDocTypeHandler->resetDocType();
// There must be some space after DOCTYPE
if (!fReaderMgr.skipPastSpaces())
{
emitError(XMLErrs::ExpectedWhitespace);
// Just skip the Doctype declaration and return
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// Get a buffer for the root element
XMLBufBid bbRootName(&fBufMgr);
// Get a name from the input, which should be the name of the root
// element of the upcoming content.
fReaderMgr.getName(bbRootName.getBuffer());
if (bbRootName.isEmpty())
{
emitError(XMLErrs::NoRootElemInDOCTYPE);
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// Store the root element name for later check
setRootElemName(bbRootName.getRawBuffer());
// This element obviously is not going to exist in the element decl
// pool yet, but we need to call docTypeDecl. So force it into
// the element decl pool, marked as being there because it was in
// the DOCTYPE. Later, when its declared, the status will be updated.
//
// Only do this if we are not reusing the validator! If we are reusing,
// then look it up instead. It has to exist!
DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
(
bbRootName.getRawBuffer()
, fEmptyNamespaceId
, DTDElementDecl::Any
, fGrammarPoolMemoryManager
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
rootDecl->setExternalElemDeclaration(true);
((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
// Skip any spaces after the name
fReaderMgr.skipPastSpaces();
// And now if we are looking at a >, then we are done. It is not
// required to have an internal or external subset, though why you
// would not escapes me.
if (fReaderMgr.skippedChar(chCloseAngle)) {
// If we have a doc type handler and advanced callbacks are enabled,
// call the doctype event.
if (fDocTypeHandler)
fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
return;
}
// either internal/external subset
if (fValScheme == Val_Auto && !fValidate)
fValidate = true;
bool hasIntSubset = false;
bool hasExtSubset = false;
XMLCh* sysId = 0;
XMLCh* pubId = 0;
DTDScanner dtdScanner
(
(DTDGrammar*) fGrammar
, fDocTypeHandler
, fGrammarPoolMemoryManager
, fMemoryManager
);
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
// If the next character is '[' then we have no external subset cause
// there is no system id, just the opening character of the internal
// subset. Else, has to be an id.
//
// Just look at the next char, don't eat it.
if (fReaderMgr.peekNextChar() == chOpenSquare)
{
hasIntSubset = true;
}
else
{
// Indicate we have an external subset
hasExtSubset = true;
fHasNoDTD = false;
// Get buffers for the ids
XMLBufBid bbPubId(&fBufMgr);
XMLBufBid bbSysId(&fBufMgr);
// Get the external subset id
if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
{
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// Get copies of the ids we got
pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);
sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);
// Skip spaces and check again for the opening of an internal subset
fReaderMgr.skipPastSpaces();
// Just look at the next char, don't eat it.
if (fReaderMgr.peekNextChar() == chOpenSquare) {
hasIntSubset = true;
}
}
// Insure that the ids get cleaned up, if they got allocated
ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);
ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
// If we have a doc type handler and advanced callbacks are enabled,
// call the doctype event.
if (fDocTypeHandler)
fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);
// Ok, if we had an internal subset, we are just past the [ character
// and need to parse that first.
if (hasIntSubset)
{
// Eat the opening square bracket
fReaderMgr.getNextChar();
// We can't have any internal subset if we are reusing the validator
if (fUseCachedGrammar || fToCacheGrammar)
ThrowXML(RuntimeException, XMLExcepts::Val_CantHaveIntSS);
// And try to scan the internal subset. If we fail, try to recover
// by skipping forward tot he close angle and returning.
if (!dtdScanner.scanInternalSubset())
{
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
// Do a sanity check that some expanded PE did not propogate out of
// the doctype. This could happen if it was terminated early by bad
// syntax.
if (fReaderMgr.getReaderDepth() > 1)
{
emitError(XMLErrs::PEPropogated);
// Ask the reader manager to pop back down to the main level
fReaderMgr.cleanStackBackTo(1);
}
fReaderMgr.skipPastSpaces();
}
// And that should leave us at the closing > of the DOCTYPE line
if (!fReaderMgr.skippedChar(chCloseAngle))
{
// Do a special check for the common scenario of an extra ] char at
// the end. This is easy to recover from.
if (fReaderMgr.skippedChar(chCloseSquare)
&& fReaderMgr.skippedChar(chCloseAngle))
{
emitError(XMLErrs::ExtraCloseSquare);
}
else
{
emitError(XMLErrs::UnterminatedDOCTYPE);
fReaderMgr.skipPastChar(chCloseAngle);
}
}
// If we had an external subset, then we need to deal with that one
// next. If we are reusing the validator, then don't scan it.
if (hasExtSubset) {
if (fUseCachedGrammar)
{
InputSource* sysIdSrc = resolveSystemId(sysId);
Janitor<InputSource> janSysIdSrc(sysIdSrc);
Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId());
if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {
fDTDGrammar = (DTDGrammar*) grammar;
fGrammar = fDTDGrammar;
fValidator->setGrammar(fGrammar);
rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
if (rootDecl)
((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId());
else {
rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
(
bbRootName.getRawBuffer()
, fEmptyNamespaceId
, DTDElementDecl::Any
, fGrammarPoolMemoryManager
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
rootDecl->setExternalElemDeclaration(true);
((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
}
return;
}
}
if (fLoadExternalDTD || fValidate)
{
// And now create a reader to read this entity
InputSource* srcUsed;
XMLReader* reader = fReaderMgr.createReader
(
sysId
, pubId
, false
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, srcUsed
, fCalculateSrcOfs
);
// Put a janitor on the input source
Janitor<InputSource> janSrc(srcUsed);
// If it failed then throw an exception
if (!reader)
ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId());
if (fToCacheGrammar) {
unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setRootName(sysIdStr);
fGrammarResolver->putGrammar(fGrammar);
}
// In order to make the processing work consistently, we have to
// make this look like an external entity. So create an entity
// decl and fill it in and push it with the reader, as happens
// with an external entity. Put a janitor on it to insure it gets
// cleaned up. The reader manager does not adopt them.
const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager);
declDTD->setSystemId(sysId);
Janitor<DTDEntityDecl> janDecl(declDTD);
// Mark this one as a throw at end
reader->setThrowAtEnd(true);
// And push it onto the stack, with its pseudo name
fReaderMgr.pushReader(reader, declDTD);
// Tell it its not in an include section
dtdScanner.scanExtSubsetDecl(false, true);
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
}
}
}
bool IGXMLScanner::scanStartTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the root and its empty.
gotData = true;
// Get the QName. In this case, we are not doing namespaces, so we just
// use it as is and don't have to break it into parts.
if (!fReaderMgr.getName(fQNameBuf))
{
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipToChar(chOpenAngle);
return false;
}
// Assume it won't be an empty tag
bool isEmpty = false;
// Lets try to look up the element in the validator's element decl pool
// We can pass bogus values for the URI id and the base name. We know that
// this can only be called if we are doing a DTD style validator and that
// he will only look at the QName.
//
// We tell him to fault in a decl if he does not find one.
bool wasAdded = false;
XMLElementDecl* elemDecl = fGrammar->findOrAddElemDecl
(
fEmptyNamespaceId
, 0
, 0
, fQNameBuf.getRawBuffer()
, Grammar::TOP_LEVEL_SCOPE
, wasAdded
);
// We do something different here according to whether we found the
// element or not.
if (wasAdded)
{
// If validating then emit an error
if (fValidate)
{
// This is to tell the reuse Validator that this element was
// faulted-in, was not an element in the validator pool originally
elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
fValidator->emitError
(
XMLValid::ElementNotDefined
, elemDecl->getFullName()
);
}
}
else
{
// If its not marked declared and validating, then emit an error
if (fValidate && !elemDecl->isDeclared())
{
fValidator->emitError
(
XMLValid::ElementNotDefined
, elemDecl->getFullName()
);
}
}
// See if its the root element
const bool isRoot = fElemStack.isEmpty();
// Expand the element stack and add the new element
fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
fElemStack.setValidationFlag(fValidate);
// Validate the element
if (fValidate)
fValidator->validateElement(elemDecl);
// If this is the first element and we are validating, check the root
// element.
if (isRoot)
{
fRootGrammar = fGrammar;
if (fValidate)
{
// If a DocType exists, then check if it matches the root name there.
if (fRootElemName && !XMLString::equals(fQNameBuf.getRawBuffer(), fRootElemName))
fValidator->emitError(XMLValid::RootElemNotLikeDocType);
// Some validators may also want to check the root, call the
// XMLValidator::checkRootElement
if (fValidatorFromUser && !fValidator->checkRootElement(elemDecl->getId()))
fValidator->emitError(XMLValid::RootElemNotLikeDocType);
}
}
else
{
// If the element stack is not empty, then add this element as a
// child of the previous top element. If its empty, this is the root
// elem and is not the child of anything.
fElemStack.addChild(elemDecl->getElementName(), true);
}
// Ask the element decl to clear out the 'provided' flag on all of its
// att defs.
elemDecl->resetDefs();
// Skip any whitespace after the name
fReaderMgr.skipPastSpaces();
// We loop until we either see a /> or >, handling attribute/value
// pairs until we get there.
unsigned int attCount = 0;
unsigned int curAttListSize = fAttrList->size();
while (true)
{
// And get the next non-space character
XMLCh nextCh = fReaderMgr.peekNextChar();
// If the next character is not a slash or closed angle bracket,
// then it must be whitespace, since whitespace is required
// between the end of the last attribute and the name of the next
// one.
if (attCount)
{
if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
{
{
// Ok, skip by them and peek another char
fReaderMgr.skipPastSpaces();
nextCh = fReaderMgr.peekNextChar();
}
else
{
// Emit the error but keep on going
emitError(XMLErrs::ExpectedWhitespace);
}
}
}
// Ok, here we first check for any of the special case characters.
// If its not one, then we do the normal case processing, which
// assumes that we've hit an attribute value, Otherwise, we do all
// the special case checks.
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
{
// Assume its going to be an attribute, so get a name from
// the input.
if (!fReaderMgr.getName(fAttNameBuf))
{
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.skipPastChar(chCloseAngle);
return false;
}
// And next must be an equal sign
if (!scanEq())
{
static const XMLCh tmpList[] =
{
chSingleQuote, chDoubleQuote, chCloseAngle
, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedEqSign);
// Try to sync back up by skipping forward until we either
// hit something meaningful.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
{
// Jump back to top for normal processing of these
continue;
}
else if ((chFound == chSingleQuote)
|| (chFound == chDoubleQuote)
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
{
// Just fall through assuming that the value is to follow
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
return false;
}
else
{
// Something went really wrong
return false;
}
}
// See if this attribute is declared for this element. If we are
// not validating of course it will not be at first, but we will
// fault it into the pool (to avoid lots of redundant errors.)
wasAdded = false;
XMLAttDef* attDef = elemDecl->findAttr
(
fAttNameBuf.getRawBuffer()
, 0
, 0
, 0
, XMLElementDecl::AddIfNotFound
, wasAdded
);
if (wasAdded)
{
// If there is a validation handler, then we are validating
// so emit an error.
if (fValidate)
{
// This is to tell the Validator that this attribute was
// faulted-in, was not an attribute in the attdef originally
attDef->setCreateReason(XMLAttDef::JustFaultIn);
fValidator->emitError
(
XMLValid::AttNotDefinedForElement
, fAttNameBuf.getRawBuffer()
, elemDecl->getFullName()
);
}
}
else
{
// If this attribute was faulted-in and first occurence,
// then emit an error
if (fValidate && attDef->getCreateReason() == XMLAttDef::JustFaultIn
&& !attDef->getProvided())
{
fValidator->emitError
(
XMLValid::AttNotDefinedForElement
, fAttNameBuf.getRawBuffer()
, elemDecl->getFullName()
);
}
}
// If its already provided, then there are more than one of
// this attribute in this start tag, so emit an error.
if (attDef->getProvided())
{
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, attDef->getFullName()
, elemDecl->getFullName()
);
}
else
{
// Mark this one as already seen
attDef->setProvided(true);
}
// Skip any whitespace before the value and then scan the att
// value. This will come back normalized with entity refs and
// char refs expanded.
fReaderMgr.skipPastSpaces();
if (!scanAttValue(attDef, fAttValueBuf))
{
static const XMLCh tmpList[] =
{
chCloseAngle, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedAttrValue);
// It failed, so lets try to get synced back up. We skip
// forward until we find some whitespace or one of the
// chars in our list.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle)
|| (chFound == chForwardSlash)
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
{
// Just fall through and process this attribute, though
// the value will be "".
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
return false;
}
else
{
// Something went really wrong
return false;
}
}
// Now that its all stretched out, lets look at its type and
// determine if it has a valid value. It will output any needed
// errors, but we just keep going. We only need to do this if
// we are validating.
if (!wasAdded && attDef->getCreateReason() != XMLAttDef::JustFaultIn)
{
// Let the validator pass judgement on the attribute value
if (fValidate)
{
fValidator->validateAttrValue
(
attDef
, fAttValueBuf.getRawBuffer()
, false
, elemDecl
);
}
}
// Add this attribute to the attribute list that we use to
// pass them to the handler. We reuse its existing elements
// but expand it as required.
XMLAttr* curAtt;
if (attCount >= curAttListSize)
{
curAtt = new (fMemoryManager) XMLAttr
(
-1
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, fAttValueBuf.getRawBuffer()
, attDef->getType()
, true
, fMemoryManager
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
);
fAttrList->addElement(curAtt);
}
else
{
curAtt = fAttrList->elementAt(attCount);
curAtt->set
(
-1
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, fAttValueBuf.getRawBuffer()
, attDef->getType()
);
curAtt->setSpecified(true);
}
attCount++;
// And jump back to the top of the loop
continue;
}
// It was some special case character so do all of the checks and
// deal with it.
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
if (nextCh == chForwardSlash)
{
fReaderMgr.getNextChar();
isEmpty = true;
if (!fReaderMgr.skippedChar(chCloseAngle))
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
break;
}
else if (nextCh == chCloseAngle)
{
fReaderMgr.getNextChar();
break;
}
else if (nextCh == chOpenAngle)
{
// Check for this one specially, since its going to be common
// and it is kind of auto-recovering since we've already hit the
// next open bracket, which is what we would have seeked to (and
// skipped this whole tag.)
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
break;
}
else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
{
// Check for this one specially, which is probably a missing
// attribute name, e.g. ="value". Just issue expected name
// error and eat the quoted string, then jump back to the
// top again.
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.getNextChar();
fReaderMgr.skipQuotedString(nextCh);
fReaderMgr.skipPastSpaces();
continue;
}
}
// Ok, so lets get an enumerator for the attributes of this element
// and run through them for well formedness and validity checks. But
// make sure that we had any attributes before we do it, since the list
// would have have gotten faulted in anyway.
if (elemDecl->hasAttDefs())
{
XMLAttDefList& attDefList = elemDecl->getAttDefList();
while (attDefList.hasMoreElements())
{
// Get the current att def, for convenience and its def type
const XMLAttDef& curDef = attDefList.nextElement();
const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
if (!curDef.getProvided())
{
if (fValidate)
{
// If we are validating and its required, then an error
if (defType == XMLAttDef::Required)
{
fValidator->emitError
(
XMLValid::RequiredAttrNotProvided
, curDef.getFullName()
);
}
else if ((defType == XMLAttDef::Default) ||
(defType == XMLAttDef::Fixed) )
{
if (fStandalone && curDef.isExternal())
{
// XML 1.0 Section 2.9
// Document is standalone, so attributes must not be defaulted.
fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName());
Gareth Reakes
committed
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
}
}
}
// Fault in the value if needed, and bump the att count
if ((defType == XMLAttDef::Default)
|| (defType == XMLAttDef::Fixed))
{
// Let the validator pass judgement on the attribute value
if (fValidate)
{
fValidator->validateAttrValue
(
&curDef
, curDef.getValue()
, false
, elemDecl
);
}
XMLAttr* curAtt;
if (attCount >= curAttListSize)
{
curAtt = new (fMemoryManager) XMLAttr
(
-1
, curDef.getFullName()
, XMLUni::fgZeroLenString
, curDef.getValue()
, curDef.getType()
, false
, fMemoryManager
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
);
fAttrList->addElement(curAtt);
curAttListSize++;
}
else
{
curAtt = fAttrList->elementAt(attCount);
curAtt->set
(
-1
, curDef.getFullName()
, XMLUni::fgZeroLenString
, curDef.getValue()
, curDef.getType()
);
curAtt->setSpecified(false);
}
attCount++;
}
}
}
}
// If empty, validate content right now if we are validating and then
// pop the element stack top. Else, we have to update the current stack
// top's namespace mapping elements.
if (isEmpty)
{
// If validating, then insure that its legal to have no content
if (fValidate)
{
const int res = fValidator->checkContent(elemDecl, 0, 0);
if (res >= 0)
{
fValidator->emitError
(
XMLValid::ElementNotValidForContent
, elemDecl->getFullName()
, elemDecl->getFormattedContentModel()
);
}
}
// Pop the element stack back off since it'll never be used now
fElemStack.popTop();
// If the elem stack is empty, then it was an empty root
if (isRoot)
gotData = false;
else {
// Restore the validation flag
fValidate = fElemStack.getValidationFlag();
}
}
// If we have a document handler, then tell it about this start tag. We
// don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
// any prefix since its just one big name if we are not doing namespaces.
if (fDocHandler)
{
fDocHandler->startElement
(
*elemDecl
, fEmptyNamespaceId
, 0
, *fAttrList
, attCount
, isEmpty
, isRoot
);
}
return true;
}
// This method is called to scan a start tag when we are processing
// namespaces. There are two different versions of this method, one for
// namespace aware processing an done for non-namespace aware processing.
//
// This method is called after we've scanned the < of a start tag. So we
// have to get the element name, then scan the attributes, after which
// we are either going to see >, />, or attributes followed by one of those
// sequences.
bool IGXMLScanner::scanStartTagNS(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the root and its empty.
gotData = true;
// Reset element content buffer
fContent.reset();
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
// The current position is after the open bracket, so we need to read in
// in the element name.
if (!fReaderMgr.getName(fQNameBuf))
{
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipToChar(chOpenAngle);
return false;
}
// See if its the root element
const bool isRoot = fElemStack.isEmpty();
// Skip any whitespace after the name
fReaderMgr.skipPastSpaces();
// First we have to do the rawest attribute scan. We don't do any
// normalization of them at all, since we don't know yet what type they
// might be (since we need the element decl in order to do that.)
bool isEmpty;
unsigned int attCount = rawAttrScan
(
fQNameBuf.getRawBuffer()
, *fRawAttrList
, isEmpty
);
const bool gotAttrs = (attCount != 0);
// save the contentleafname and currentscope before addlevel, for later use
ContentLeafNameTypeVector* cv = 0;
XMLContentModel* cm = 0;
int currentScope = Grammar::TOP_LEVEL_SCOPE;
bool laxThisOne = false;
if (!isRoot && fGrammarType == Grammar::SchemaGrammarType) {
SchemaElementDecl* tempElement = (SchemaElementDecl*) fElemStack.topElement()->fThisElement;
SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
if ((modelType == SchemaElementDecl::Mixed_Simple)
|| (modelType == SchemaElementDecl::Mixed_Complex)
|| (modelType == SchemaElementDecl::Children))
{
cm = tempElement->getContentModel();
cv = cm->getContentLeafNameTypeVector();
currentScope = fElemStack.getCurrentScope();
}
else if (modelType == SchemaElementDecl::Any) {
laxThisOne = true;
}
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
}
// Now, since we might have to update the namespace map for this element,
// but we don't have the element decl yet, we just tell the element stack
// to expand up to get ready.
unsigned int elemDepth = fElemStack.addLevel();
fElemStack.setValidationFlag(fValidate);
// Check if there is any external schema location specified, and if we are at root,
// go through them first before scanning those specified in the instance document
if (isRoot
&& fDoSchema
&& (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
if (fExternalSchemaLocation)
parseSchemaLocation(fExternalSchemaLocation);
if (fExternalNoNamespaceSchemaLocation)
resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString);
}
// Make an initial pass through the list and find any xmlns attributes or
// schema attributes.
if (attCount)
scanRawAttrListforNameSpaces(fRawAttrList, attCount);
// Also find any default or fixed xmlns attributes in DTD defined for
// this element.
XMLElementDecl* elemDecl = 0;
if (fGrammarType == Grammar::DTDGrammarType) {
elemDecl = fGrammar->getElemDecl
(
fEmptyNamespaceId
, 0
, fQNameBuf.getRawBuffer()
, Grammar::TOP_LEVEL_SCOPE
);
if (elemDecl) {
if (elemDecl->hasAttDefs()) {
XMLAttDefList& attDefList = elemDecl->getAttDefList();
while (attDefList.hasMoreElements())
{
// Get the current att def, for convenience and its def type
const XMLAttDef& curDef = attDefList.nextElement();
const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
// update the NSMap if there are any default/fixed xmlns attributes
if ((defType == XMLAttDef::Default)
|| (defType == XMLAttDef::Fixed))
{
const XMLCh* rawPtr = curDef.getFullName();
if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
|| XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
updateNSMap(rawPtr, curDef.getValue());
}
}
}
}
}
// Resolve the qualified name to a URI and name so that we can look up
// the element decl for this element. We have now update the prefix to
// namespace map so we should get the correct element now.
int prefixColonPos = -1;
const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
unsigned int uriId = resolveQName
(
qnameRawBuf
, fPrefixBuf
, ElemStack::Mode_Element
, prefixColonPos
);
//if schema, check if we should lax or skip the validation of this element
bool parentValidation = fValidate;
if (cv) {
QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
// elementDepth will be > 0, as cv is only constructed if element is not
// root.
laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
}
// Look up the element now in the grammar. This will get us back a
// generic element decl object. We tell him to fault one in if he does
// not find it.
bool wasAdded = false;
Gareth Reakes
committed
bool errorBeforeElementFound = false;
bool laxBeforeElementFound = false;
const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
unsigned orgGrammarUri = fURIStringPool->getId(original_uriStr);
if (uriId != fEmptyNamespaceId) {
// Check in current grammar before switching if necessary
elemDecl = fGrammar->getElemDecl
(
uriId
, nameRawBuf
, qnameRawBuf
, currentScope
);
if (!elemDecl && (orgGrammarUri != uriId)) {
// not found, switch to the specified grammar
const XMLCh* uriStr = getURIText(uriId);
Gareth Reakes
committed
bool errorCondition = !switchGrammar(uriStr) && fValidate;
if (errorCondition && !laxThisOne)
{
fValidator->emitError
(
XMLValid::GrammarNotFound
,uriStr
);
Gareth Reakes
committed
errorBeforeElementFound = true;
Gareth Reakes
committed
else if(errorCondition)
laxBeforeElementFound = true;
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
elemDecl = fGrammar->getElemDecl
(
uriId
, nameRawBuf
, qnameRawBuf
, currentScope
);
}
if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
// if not found, then it may be a reference, try TOP_LEVEL_SCOPE
elemDecl = fGrammar->getElemDecl
(
uriId
, nameRawBuf
, qnameRawBuf
, Grammar::TOP_LEVEL_SCOPE
);
if(!elemDecl) {
// still not found in specified uri
// try emptyNamesapce see if element should be un-qualified.
elemDecl = fGrammar->getElemDecl
(
fEmptyNamespaceId
, nameRawBuf
, qnameRawBuf
, currentScope
);
Gareth Reakes
committed
bool errorCondition = elemDecl && elemDecl->getCreateReason() != XMLElementDecl::JustFaultIn;
if (errorCondition && fValidate) {
fValidator->emitError
(
XMLValid::ElementNotUnQualified
, elemDecl->getFullName()
);
Gareth Reakes
committed
errorBeforeElementFound = true;
else if(errorCondition)
Gareth Reakes
committed
laxBeforeElementFound = true;
}
}
if (!elemDecl) {
// still not found, fault this in and issue error later
// switch back to original grammar first
switchGrammar(original_uriStr);
elemDecl = fGrammar->putElemDecl(uriId
, nameRawBuf
, fPrefixBuf.getRawBuffer()
, qnameRawBuf
, currentScope
, true);
wasAdded = true;
}
}
else if (!elemDecl)
{
//the element has no prefix,
//thus it is either a non-qualified element defined in current targetNS
//or an element that is defined in the globalNS
//try unqualifed first
elemDecl = fGrammar->getElemDecl
Loading
Loading full blame...