154dece154c1bc00b49d9a2f0e1d2a09001118f4
[WebKit-https.git] / Source / WebCore / html / parser / HTMLTokenizer.cpp
1 /*
2  * Copyright (C) 2008-2016 Apple Inc. All Rights Reserved.
3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
26  */
27
28 #include "config.h"
29 #include "HTMLTokenizer.h"
30
31 #include "HTMLEntityParser.h"
32 #include "HTMLNames.h"
33 #include "MarkupTokenizerInlines.h"
34 #include <wtf/text/StringBuilder.h>
35
36
37 namespace WebCore {
38
39 using namespace HTMLNames;
40
41 static inline LChar convertASCIIAlphaToLower(UChar character)
42 {
43     ASSERT(isASCIIAlpha(character));
44     return toASCIILowerUnchecked(character);
45 }
46
47 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const char* string)
48 {
49     unsigned size = vector.size();
50     for (unsigned i = 0; i < size; ++i) {
51         if (!string[i] || vector[i] != string[i])
52             return false;
53     }
54     return !string[size];
55 }
56
57 inline bool HTMLTokenizer::inEndTagBufferingState() const
58 {
59     switch (m_state) {
60     case RCDATAEndTagOpenState:
61     case RCDATAEndTagNameState:
62     case RAWTEXTEndTagOpenState:
63     case RAWTEXTEndTagNameState:
64     case ScriptDataEndTagOpenState:
65     case ScriptDataEndTagNameState:
66     case ScriptDataEscapedEndTagOpenState:
67     case ScriptDataEscapedEndTagNameState:
68         return true;
69     default:
70         return false;
71     }
72 }
73
74 HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options)
75     : m_preprocessor(*this)
76     , m_options(options)
77 {
78 }
79
80 inline void HTMLTokenizer::bufferASCIICharacter(UChar character)
81 {
82     ASSERT(character != kEndOfFileMarker);
83     ASSERT(isASCII(character));
84     LChar narrowedCharacter = character;
85     m_token.appendToCharacter(narrowedCharacter);
86 }
87
88 inline void HTMLTokenizer::bufferCharacter(UChar character)
89 {
90     ASSERT(character != kEndOfFileMarker);
91     m_token.appendToCharacter(character);
92 }
93
94 inline bool HTMLTokenizer::emitAndResumeInDataState(SegmentedString& source)
95 {
96     saveEndTagNameIfNeeded();
97     m_state = DataState;
98     source.advancePastNonNewline();
99     return true;
100 }
101
102 inline bool HTMLTokenizer::emitAndReconsumeInDataState()
103 {
104     saveEndTagNameIfNeeded();
105     m_state = DataState;
106     return true;
107 }
108
109 inline bool HTMLTokenizer::emitEndOfFile(SegmentedString& source)
110 {
111     m_state = DataState;
112     if (haveBufferedCharacterToken())
113         return true;
114     source.advance();
115     m_token.clear();
116     m_token.makeEndOfFile();
117     return true;
118 }
119
120 inline void HTMLTokenizer::saveEndTagNameIfNeeded()
121 {
122     ASSERT(m_token.type() != HTMLToken::Uninitialized);
123     if (m_token.type() == HTMLToken::StartTag)
124         m_appropriateEndTagName = m_token.name();
125 }
126
127 inline bool HTMLTokenizer::haveBufferedCharacterToken() const
128 {
129     return m_token.type() == HTMLToken::Character;
130 }
131
132 inline bool HTMLTokenizer::processEntity(SegmentedString& source)
133 {
134     bool notEnoughCharacters = false;
135     StringBuilder decodedEntity;
136     bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
137     if (notEnoughCharacters)
138         return false;
139     if (!success) {
140         ASSERT(decodedEntity.isEmpty());
141         bufferASCIICharacter('&');
142     } else {
143         for (unsigned i = 0; i < decodedEntity.length(); ++i)
144             bufferCharacter(decodedEntity[i]);
145     }
146     return true;
147 }
148
149 void HTMLTokenizer::flushBufferedEndTag()
150 {
151     m_token.beginEndTag(m_bufferedEndTagName);
152     m_bufferedEndTagName.clear();
153     m_appropriateEndTagName.clear();
154     m_temporaryBuffer.clear();
155 }
156
157 bool HTMLTokenizer::commitToPartialEndTag(SegmentedString& source, UChar character, State state)
158 {
159     ASSERT(source.currentCharacter() == character);
160     appendToTemporaryBuffer(character);
161     source.advance();
162
163     if (haveBufferedCharacterToken()) {
164         // Emit the buffered character token.
165         // The next call to processToken will flush the buffered end tag and continue parsing it.
166         m_state = state;
167         return true;
168     }
169
170     flushBufferedEndTag();
171     return false;
172 }
173
174 bool HTMLTokenizer::commitToCompleteEndTag(SegmentedString& source)
175 {
176     ASSERT(source.currentCharacter() == '>');
177     appendToTemporaryBuffer('>');
178     source.advancePastNonNewline();
179
180     m_state = DataState;
181
182     if (haveBufferedCharacterToken()) {
183         // Emit the character token we already have.
184         // The next call to processToken will flush the buffered end tag and emit it.
185         return true;
186     }
187
188     flushBufferedEndTag();
189     return true;
190 }
191
192 bool HTMLTokenizer::processToken(SegmentedString& source)
193 {
194     if (!m_bufferedEndTagName.isEmpty() && !inEndTagBufferingState()) {
195         // We are back here after emitting a character token that came just before an end tag.
196         // To continue parsing the end tag we need to move the buffered tag name into the token.
197         flushBufferedEndTag();
198
199         // If we are in the data state, the end tag is already complete and we should emit it
200         // now, otherwise, we want to resume parsing the partial end tag.
201         if (m_state == DataState)
202             return true;
203     }
204
205     if (!m_preprocessor.peek(source, isNullCharacterSkippingState(m_state)))
206         return haveBufferedCharacterToken();
207     UChar character = m_preprocessor.nextInputCharacter();
208
209     // https://html.spec.whatwg.org/#tokenization
210     switch (m_state) {
211
212     BEGIN_STATE(DataState)
213         if (character == '&')
214             ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInDataState);
215         if (character == '<') {
216             if (haveBufferedCharacterToken())
217                 RETURN_IN_CURRENT_STATE(true);
218             ADVANCE_PAST_NON_NEWLINE_TO(TagOpenState);
219         }
220         if (character == kEndOfFileMarker)
221             return emitEndOfFile(source);
222         bufferCharacter(character);
223         ADVANCE_TO(DataState);
224     END_STATE()
225
226     BEGIN_STATE(CharacterReferenceInDataState)
227         if (!processEntity(source))
228             RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
229         SWITCH_TO(DataState);
230     END_STATE()
231
232     BEGIN_STATE(RCDATAState)
233         if (character == '&')
234             ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInRCDATAState);
235         if (character == '<')
236             ADVANCE_PAST_NON_NEWLINE_TO(RCDATALessThanSignState);
237         if (character == kEndOfFileMarker)
238             RECONSUME_IN(DataState);
239         bufferCharacter(character);
240         ADVANCE_TO(RCDATAState);
241     END_STATE()
242
243     BEGIN_STATE(CharacterReferenceInRCDATAState)
244         if (!processEntity(source))
245             RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
246         SWITCH_TO(RCDATAState);
247     END_STATE()
248
249     BEGIN_STATE(RAWTEXTState)
250         if (character == '<')
251             ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTLessThanSignState);
252         if (character == kEndOfFileMarker)
253             RECONSUME_IN(DataState);
254         bufferCharacter(character);
255         ADVANCE_TO(RAWTEXTState);
256     END_STATE()
257
258     BEGIN_STATE(ScriptDataState)
259         if (character == '<')
260             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataLessThanSignState);
261         if (character == kEndOfFileMarker)
262             RECONSUME_IN(DataState);
263         bufferCharacter(character);
264         ADVANCE_TO(ScriptDataState);
265     END_STATE()
266
267     BEGIN_STATE(PLAINTEXTState)
268         if (character == kEndOfFileMarker)
269             RECONSUME_IN(DataState);
270         bufferCharacter(character);
271         ADVANCE_TO(PLAINTEXTState);
272     END_STATE()
273
274     BEGIN_STATE(TagOpenState)
275         if (character == '!')
276             ADVANCE_PAST_NON_NEWLINE_TO(MarkupDeclarationOpenState);
277         if (character == '/')
278             ADVANCE_PAST_NON_NEWLINE_TO(EndTagOpenState);
279         if (isASCIIAlpha(character)) {
280             m_token.beginStartTag(convertASCIIAlphaToLower(character));
281             ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
282         }
283         if (character == '?') {
284             parseError();
285             // The spec consumes the current character before switching
286             // to the bogus comment state, but it's easier to implement
287             // if we reconsume the current character.
288             RECONSUME_IN(BogusCommentState);
289         }
290         parseError();
291         bufferASCIICharacter('<');
292         RECONSUME_IN(DataState);
293     END_STATE()
294
295     BEGIN_STATE(EndTagOpenState)
296         if (isASCIIAlpha(character)) {
297             m_token.beginEndTag(convertASCIIAlphaToLower(character));
298             m_appropriateEndTagName.clear();
299             ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
300         }
301         if (character == '>') {
302             parseError();
303             ADVANCE_PAST_NON_NEWLINE_TO(DataState);
304         }
305         if (character == kEndOfFileMarker) {
306             parseError();
307             bufferASCIICharacter('<');
308             bufferASCIICharacter('/');
309             RECONSUME_IN(DataState);
310         }
311         parseError();
312         RECONSUME_IN(BogusCommentState);
313     END_STATE()
314
315     BEGIN_STATE(TagNameState)
316         if (isTokenizerWhitespace(character))
317             ADVANCE_TO(BeforeAttributeNameState);
318         if (character == '/')
319             ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
320         if (character == '>')
321             return emitAndResumeInDataState(source);
322         if (m_options.usePreHTML5ParserQuirks && character == '<')
323             return emitAndReconsumeInDataState();
324         if (character == kEndOfFileMarker) {
325             parseError();
326             RECONSUME_IN(DataState);
327         }
328         m_token.appendToName(toASCIILower(character));
329         ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
330     END_STATE()
331
332     BEGIN_STATE(RCDATALessThanSignState)
333         if (character == '/') {
334             m_temporaryBuffer.clear();
335             ASSERT(m_bufferedEndTagName.isEmpty());
336             ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagOpenState);
337         }
338         bufferASCIICharacter('<');
339         RECONSUME_IN(RCDATAState);
340     END_STATE()
341
342     BEGIN_STATE(RCDATAEndTagOpenState)
343         if (isASCIIAlpha(character)) {
344             appendToTemporaryBuffer(character);
345             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
346             ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagNameState);
347         }
348         bufferASCIICharacter('<');
349         bufferASCIICharacter('/');
350         RECONSUME_IN(RCDATAState);
351     END_STATE()
352
353     BEGIN_STATE(RCDATAEndTagNameState)
354         if (isASCIIAlpha(character)) {
355             appendToTemporaryBuffer(character);
356             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
357             ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagNameState);
358         }
359         if (isTokenizerWhitespace(character)) {
360             if (isAppropriateEndTag()) {
361                 if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
362                     return true;
363                 SWITCH_TO(BeforeAttributeNameState);
364             }
365         } else if (character == '/') {
366             if (isAppropriateEndTag()) {
367                 if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
368                     return true;
369                 SWITCH_TO(SelfClosingStartTagState);
370             }
371         } else if (character == '>') {
372             if (isAppropriateEndTag())
373                 return commitToCompleteEndTag(source);
374         }
375         bufferASCIICharacter('<');
376         bufferASCIICharacter('/');
377         m_token.appendToCharacter(m_temporaryBuffer);
378         m_bufferedEndTagName.clear();
379         m_temporaryBuffer.clear();
380         RECONSUME_IN(RCDATAState);
381     END_STATE()
382
383     BEGIN_STATE(RAWTEXTLessThanSignState)
384         if (character == '/') {
385             m_temporaryBuffer.clear();
386             ASSERT(m_bufferedEndTagName.isEmpty());
387             ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagOpenState);
388         }
389         bufferASCIICharacter('<');
390         RECONSUME_IN(RAWTEXTState);
391     END_STATE()
392
393     BEGIN_STATE(RAWTEXTEndTagOpenState)
394         if (isASCIIAlpha(character)) {
395             appendToTemporaryBuffer(character);
396             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
397             ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState);
398         }
399         bufferASCIICharacter('<');
400         bufferASCIICharacter('/');
401         RECONSUME_IN(RAWTEXTState);
402     END_STATE()
403
404     BEGIN_STATE(RAWTEXTEndTagNameState)
405         if (isASCIIAlpha(character)) {
406             appendToTemporaryBuffer(character);
407             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
408             ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState);
409         }
410         if (isTokenizerWhitespace(character)) {
411             if (isAppropriateEndTag()) {
412                 if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
413                     return true;
414                 SWITCH_TO(BeforeAttributeNameState);
415             }
416         } else if (character == '/') {
417             if (isAppropriateEndTag()) {
418                 if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
419                     return true;
420                 SWITCH_TO(SelfClosingStartTagState);
421             }
422         } else if (character == '>') {
423             if (isAppropriateEndTag())
424                 return commitToCompleteEndTag(source);
425         }
426         bufferASCIICharacter('<');
427         bufferASCIICharacter('/');
428         m_token.appendToCharacter(m_temporaryBuffer);
429         m_bufferedEndTagName.clear();
430         m_temporaryBuffer.clear();
431         RECONSUME_IN(RAWTEXTState);
432     END_STATE()
433
434     BEGIN_STATE(ScriptDataLessThanSignState)
435         if (character == '/') {
436             m_temporaryBuffer.clear();
437             ASSERT(m_bufferedEndTagName.isEmpty());
438             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagOpenState);
439         }
440         if (character == '!') {
441             bufferASCIICharacter('<');
442             bufferASCIICharacter('!');
443             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapeStartState);
444         }
445         bufferASCIICharacter('<');
446         RECONSUME_IN(ScriptDataState);
447     END_STATE()
448
449     BEGIN_STATE(ScriptDataEndTagOpenState)
450         if (isASCIIAlpha(character)) {
451             appendToTemporaryBuffer(character);
452             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
453             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState);
454         }
455         bufferASCIICharacter('<');
456         bufferASCIICharacter('/');
457         RECONSUME_IN(ScriptDataState);
458     END_STATE()
459
460     BEGIN_STATE(ScriptDataEndTagNameState)
461         if (isASCIIAlpha(character)) {
462             appendToTemporaryBuffer(character);
463             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
464             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState);
465         }
466         if (isTokenizerWhitespace(character)) {
467             if (isAppropriateEndTag()) {
468                 if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
469                     return true;
470                 SWITCH_TO(BeforeAttributeNameState);
471             }
472         } else if (character == '/') {
473             if (isAppropriateEndTag()) {
474                 if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
475                     return true;
476                 SWITCH_TO(SelfClosingStartTagState);
477             }
478         } else if (character == '>') {
479             if (isAppropriateEndTag())
480                 return commitToCompleteEndTag(source);
481         }
482         bufferASCIICharacter('<');
483         bufferASCIICharacter('/');
484         m_token.appendToCharacter(m_temporaryBuffer);
485         m_bufferedEndTagName.clear();
486         m_temporaryBuffer.clear();
487         RECONSUME_IN(ScriptDataState);
488     END_STATE()
489
490     BEGIN_STATE(ScriptDataEscapeStartState)
491         if (character == '-') {
492             bufferASCIICharacter('-');
493             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapeStartDashState);
494         } else
495             RECONSUME_IN(ScriptDataState);
496     END_STATE()
497
498     BEGIN_STATE(ScriptDataEscapeStartDashState)
499         if (character == '-') {
500             bufferASCIICharacter('-');
501             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
502         } else
503             RECONSUME_IN(ScriptDataState);
504     END_STATE()
505
506     BEGIN_STATE(ScriptDataEscapedState)
507         if (character == '-') {
508             bufferASCIICharacter('-');
509             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashState);
510         }
511         if (character == '<')
512             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
513         if (character == kEndOfFileMarker) {
514             parseError();
515             RECONSUME_IN(DataState);
516         }
517         bufferCharacter(character);
518         ADVANCE_TO(ScriptDataEscapedState);
519     END_STATE()
520
521     BEGIN_STATE(ScriptDataEscapedDashState)
522         if (character == '-') {
523             bufferASCIICharacter('-');
524             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
525         }
526         if (character == '<')
527             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
528         if (character == kEndOfFileMarker) {
529             parseError();
530             RECONSUME_IN(DataState);
531         }
532         bufferCharacter(character);
533         ADVANCE_TO(ScriptDataEscapedState);
534     END_STATE()
535
536     BEGIN_STATE(ScriptDataEscapedDashDashState)
537         if (character == '-') {
538             bufferASCIICharacter('-');
539             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
540         }
541         if (character == '<')
542             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
543         if (character == '>') {
544             bufferASCIICharacter('>');
545             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataState);
546         }
547         if (character == kEndOfFileMarker) {
548             parseError();
549             RECONSUME_IN(DataState);
550         }
551         bufferCharacter(character);
552         ADVANCE_TO(ScriptDataEscapedState);
553     END_STATE()
554
555     BEGIN_STATE(ScriptDataEscapedLessThanSignState)
556         if (character == '/') {
557             m_temporaryBuffer.clear();
558             ASSERT(m_bufferedEndTagName.isEmpty());
559             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagOpenState);
560         }
561         if (isASCIIAlpha(character)) {
562             bufferASCIICharacter('<');
563             bufferASCIICharacter(character);
564             m_temporaryBuffer.clear();
565             appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
566             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState);
567         }
568         bufferASCIICharacter('<');
569         RECONSUME_IN(ScriptDataEscapedState);
570     END_STATE()
571
572     BEGIN_STATE(ScriptDataEscapedEndTagOpenState)
573         if (isASCIIAlpha(character)) {
574             appendToTemporaryBuffer(character);
575             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
576             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState);
577         }
578         bufferASCIICharacter('<');
579         bufferASCIICharacter('/');
580         RECONSUME_IN(ScriptDataEscapedState);
581     END_STATE()
582
583     BEGIN_STATE(ScriptDataEscapedEndTagNameState)
584         if (isASCIIAlpha(character)) {
585             appendToTemporaryBuffer(character);
586             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
587             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState);
588         }
589         if (isTokenizerWhitespace(character)) {
590             if (isAppropriateEndTag()) {
591                 if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
592                     return true;
593                 SWITCH_TO(BeforeAttributeNameState);
594             }
595         } else if (character == '/') {
596             if (isAppropriateEndTag()) {
597                 if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
598                     return true;
599                 SWITCH_TO(SelfClosingStartTagState);
600             }
601         } else if (character == '>') {
602             if (isAppropriateEndTag())
603                 return commitToCompleteEndTag(source);
604         }
605         bufferASCIICharacter('<');
606         bufferASCIICharacter('/');
607         m_token.appendToCharacter(m_temporaryBuffer);
608         m_bufferedEndTagName.clear();
609         m_temporaryBuffer.clear();
610         RECONSUME_IN(ScriptDataEscapedState);
611     END_STATE()
612
613     BEGIN_STATE(ScriptDataDoubleEscapeStartState)
614         if (isTokenizerWhitespace(character) || character == '/' || character == '>') {
615             bufferASCIICharacter(character);
616             if (temporaryBufferIs("script"))
617                 ADVANCE_TO(ScriptDataDoubleEscapedState);
618             else
619                 ADVANCE_TO(ScriptDataEscapedState);
620         }
621         if (isASCIIAlpha(character)) {
622             bufferASCIICharacter(character);
623             appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
624             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState);
625         }
626         RECONSUME_IN(ScriptDataEscapedState);
627     END_STATE()
628
629     BEGIN_STATE(ScriptDataDoubleEscapedState)
630         if (character == '-') {
631             bufferASCIICharacter('-');
632             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashState);
633         }
634         if (character == '<') {
635             bufferASCIICharacter('<');
636             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
637         }
638         if (character == kEndOfFileMarker) {
639             parseError();
640             RECONSUME_IN(DataState);
641         }
642         bufferCharacter(character);
643         ADVANCE_TO(ScriptDataDoubleEscapedState);
644     END_STATE()
645
646     BEGIN_STATE(ScriptDataDoubleEscapedDashState)
647         if (character == '-') {
648             bufferASCIICharacter('-');
649             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState);
650         }
651         if (character == '<') {
652             bufferASCIICharacter('<');
653             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
654         }
655         if (character == kEndOfFileMarker) {
656             parseError();
657             RECONSUME_IN(DataState);
658         }
659         bufferCharacter(character);
660         ADVANCE_TO(ScriptDataDoubleEscapedState);
661     END_STATE()
662
663     BEGIN_STATE(ScriptDataDoubleEscapedDashDashState)
664         if (character == '-') {
665             bufferASCIICharacter('-');
666             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState);
667         }
668         if (character == '<') {
669             bufferASCIICharacter('<');
670             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
671         }
672         if (character == '>') {
673             bufferASCIICharacter('>');
674             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataState);
675         }
676         if (character == kEndOfFileMarker) {
677             parseError();
678             RECONSUME_IN(DataState);
679         }
680         bufferCharacter(character);
681         ADVANCE_TO(ScriptDataDoubleEscapedState);
682     END_STATE()
683
684     BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState)
685         if (character == '/') {
686             bufferASCIICharacter('/');
687             m_temporaryBuffer.clear();
688             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState);
689         }
690         RECONSUME_IN(ScriptDataDoubleEscapedState);
691     END_STATE()
692
693     BEGIN_STATE(ScriptDataDoubleEscapeEndState)
694         if (isTokenizerWhitespace(character) || character == '/' || character == '>') {
695             bufferASCIICharacter(character);
696             if (temporaryBufferIs("script"))
697                 ADVANCE_TO(ScriptDataEscapedState);
698             else
699                 ADVANCE_TO(ScriptDataDoubleEscapedState);
700         }
701         if (isASCIIAlpha(character)) {
702             bufferASCIICharacter(character);
703             appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
704             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState);
705         }
706         RECONSUME_IN(ScriptDataDoubleEscapedState);
707     END_STATE()
708
709     BEGIN_STATE(BeforeAttributeNameState)
710         if (isTokenizerWhitespace(character))
711             ADVANCE_TO(BeforeAttributeNameState);
712         if (character == '/')
713             ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
714         if (character == '>')
715             return emitAndResumeInDataState(source);
716         if (m_options.usePreHTML5ParserQuirks && character == '<')
717             return emitAndReconsumeInDataState();
718         if (character == kEndOfFileMarker) {
719             parseError();
720             RECONSUME_IN(DataState);
721         }
722         if (character == '"' || character == '\'' || character == '<' || character == '=')
723             parseError();
724         m_token.beginAttribute(source.numberOfCharactersConsumed());
725         m_token.appendToAttributeName(toASCIILower(character));
726         ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
727     END_STATE()
728
729     BEGIN_STATE(AttributeNameState)
730         if (isTokenizerWhitespace(character))
731             ADVANCE_TO(AfterAttributeNameState);
732         if (character == '/')
733             ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
734         if (character == '=')
735             ADVANCE_PAST_NON_NEWLINE_TO(BeforeAttributeValueState);
736         if (character == '>')
737             return emitAndResumeInDataState(source);
738         if (m_options.usePreHTML5ParserQuirks && character == '<')
739             return emitAndReconsumeInDataState();
740         if (character == kEndOfFileMarker) {
741             parseError();
742             RECONSUME_IN(DataState);
743         }
744         if (character == '"' || character == '\'' || character == '<' || character == '=')
745             parseError();
746         m_token.appendToAttributeName(toASCIILower(character));
747         ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
748     END_STATE()
749
750     BEGIN_STATE(AfterAttributeNameState)
751         if (isTokenizerWhitespace(character))
752             ADVANCE_TO(AfterAttributeNameState);
753         if (character == '/')
754             ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
755         if (character == '=')
756             ADVANCE_PAST_NON_NEWLINE_TO(BeforeAttributeValueState);
757         if (character == '>')
758             return emitAndResumeInDataState(source);
759         if (m_options.usePreHTML5ParserQuirks && character == '<')
760             return emitAndReconsumeInDataState();
761         if (character == kEndOfFileMarker) {
762             parseError();
763             RECONSUME_IN(DataState);
764         }
765         if (character == '"' || character == '\'' || character == '<')
766             parseError();
767         m_token.beginAttribute(source.numberOfCharactersConsumed());
768         m_token.appendToAttributeName(toASCIILower(character));
769         ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
770     END_STATE()
771
772     BEGIN_STATE(BeforeAttributeValueState)
773         if (isTokenizerWhitespace(character))
774             ADVANCE_TO(BeforeAttributeValueState);
775         if (character == '"')
776             ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueDoubleQuotedState);
777         if (character == '&')
778             RECONSUME_IN(AttributeValueUnquotedState);
779         if (character == '\'')
780             ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueSingleQuotedState);
781         if (character == '>') {
782             parseError();
783             return emitAndResumeInDataState(source);
784         }
785         if (character == kEndOfFileMarker) {
786             parseError();
787             RECONSUME_IN(DataState);
788         }
789         if (character == '<' || character == '=' || character == '`')
790             parseError();
791         m_token.appendToAttributeValue(character);
792         ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueUnquotedState);
793     END_STATE()
794
795     BEGIN_STATE(AttributeValueDoubleQuotedState)
796         if (character == '"') {
797             m_token.endAttribute(source.numberOfCharactersConsumed());
798             ADVANCE_PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState);
799         }
800         if (character == '&') {
801             m_additionalAllowedCharacter = '"';
802             ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
803         }
804         if (character == kEndOfFileMarker) {
805             parseError();
806             m_token.endAttribute(source.numberOfCharactersConsumed());
807             RECONSUME_IN(DataState);
808         }
809         m_token.appendToAttributeValue(character);
810         ADVANCE_TO(AttributeValueDoubleQuotedState);
811     END_STATE()
812
813     BEGIN_STATE(AttributeValueSingleQuotedState)
814         if (character == '\'') {
815             m_token.endAttribute(source.numberOfCharactersConsumed());
816             ADVANCE_PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState);
817         }
818         if (character == '&') {
819             m_additionalAllowedCharacter = '\'';
820             ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
821         }
822         if (character == kEndOfFileMarker) {
823             parseError();
824             m_token.endAttribute(source.numberOfCharactersConsumed());
825             RECONSUME_IN(DataState);
826         }
827         m_token.appendToAttributeValue(character);
828         ADVANCE_TO(AttributeValueSingleQuotedState);
829     END_STATE()
830
831     BEGIN_STATE(AttributeValueUnquotedState)
832         if (isTokenizerWhitespace(character)) {
833             m_token.endAttribute(source.numberOfCharactersConsumed());
834             ADVANCE_TO(BeforeAttributeNameState);
835         }
836         if (character == '&') {
837             m_additionalAllowedCharacter = '>';
838             ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
839         }
840         if (character == '>') {
841             m_token.endAttribute(source.numberOfCharactersConsumed());
842             return emitAndResumeInDataState(source);
843         }
844         if (character == kEndOfFileMarker) {
845             parseError();
846             m_token.endAttribute(source.numberOfCharactersConsumed());
847             RECONSUME_IN(DataState);
848         }
849         if (character == '"' || character == '\'' || character == '<' || character == '=' || character == '`')
850             parseError();
851         m_token.appendToAttributeValue(character);
852         ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueUnquotedState);
853     END_STATE()
854
855     BEGIN_STATE(CharacterReferenceInAttributeValueState)
856         bool notEnoughCharacters = false;
857         StringBuilder decodedEntity;
858         bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
859         if (notEnoughCharacters)
860             RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
861         if (!success) {
862             ASSERT(decodedEntity.isEmpty());
863             m_token.appendToAttributeValue('&');
864         } else {
865             for (unsigned i = 0; i < decodedEntity.length(); ++i)
866                 m_token.appendToAttributeValue(decodedEntity[i]);
867         }
868         // We're supposed to switch back to the attribute value state that
869         // we were in when we were switched into this state. Rather than
870         // keeping track of this explictly, we observe that the previous
871         // state can be determined by m_additionalAllowedCharacter.
872         if (m_additionalAllowedCharacter == '"')
873             SWITCH_TO(AttributeValueDoubleQuotedState);
874         if (m_additionalAllowedCharacter == '\'')
875             SWITCH_TO(AttributeValueSingleQuotedState);
876         ASSERT(m_additionalAllowedCharacter == '>');
877         SWITCH_TO(AttributeValueUnquotedState);
878     END_STATE()
879
880     BEGIN_STATE(AfterAttributeValueQuotedState)
881         if (isTokenizerWhitespace(character))
882             ADVANCE_TO(BeforeAttributeNameState);
883         if (character == '/')
884             ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
885         if (character == '>')
886             return emitAndResumeInDataState(source);
887         if (m_options.usePreHTML5ParserQuirks && character == '<')
888             return emitAndReconsumeInDataState();
889         if (character == kEndOfFileMarker) {
890             parseError();
891             RECONSUME_IN(DataState);
892         }
893         parseError();
894         RECONSUME_IN(BeforeAttributeNameState);
895     END_STATE()
896
897     BEGIN_STATE(SelfClosingStartTagState)
898         if (character == '>') {
899             m_token.setSelfClosing();
900             return emitAndResumeInDataState(source);
901         }
902         if (character == kEndOfFileMarker) {
903             parseError();
904             RECONSUME_IN(DataState);
905         }
906         parseError();
907         RECONSUME_IN(BeforeAttributeNameState);
908     END_STATE()
909
910     BEGIN_STATE(BogusCommentState)
911         m_token.beginComment();
912         RECONSUME_IN(ContinueBogusCommentState);
913     END_STATE()
914
915     BEGIN_STATE(ContinueBogusCommentState)
916         if (character == '>')
917             return emitAndResumeInDataState(source);
918         if (character == kEndOfFileMarker)
919             return emitAndReconsumeInDataState();
920         m_token.appendToComment(character);
921         ADVANCE_TO(ContinueBogusCommentState);
922     END_STATE()
923
924     BEGIN_STATE(MarkupDeclarationOpenState)
925         if (character == '-') {
926             auto result = source.advancePast("--");
927             if (result == SegmentedString::DidMatch) {
928                 m_token.beginComment();
929                 SWITCH_TO(CommentStartState);
930             }
931             if (result == SegmentedString::NotEnoughCharacters)
932                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
933         } else if (isASCIIAlphaCaselessEqual(character, 'd')) {
934             auto result = source.advancePastLettersIgnoringASCIICase("doctype");
935             if (result == SegmentedString::DidMatch)
936                 SWITCH_TO(DOCTYPEState);
937             if (result == SegmentedString::NotEnoughCharacters)
938                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
939         } else if (character == '[' && shouldAllowCDATA()) {
940             auto result = source.advancePast("[CDATA[");
941             if (result == SegmentedString::DidMatch)
942                 SWITCH_TO(CDATASectionState);
943             if (result == SegmentedString::NotEnoughCharacters)
944                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
945         }
946         parseError();
947         RECONSUME_IN(BogusCommentState);
948     END_STATE()
949
950     BEGIN_STATE(CommentStartState)
951         if (character == '-')
952             ADVANCE_PAST_NON_NEWLINE_TO(CommentStartDashState);
953         if (character == '>') {
954             parseError();
955             return emitAndResumeInDataState(source);
956         }
957         if (character == kEndOfFileMarker) {
958             parseError();
959             return emitAndReconsumeInDataState();
960         }
961         m_token.appendToComment(character);
962         ADVANCE_TO(CommentState);
963     END_STATE()
964
965     BEGIN_STATE(CommentStartDashState)
966         if (character == '-')
967             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
968         if (character == '>') {
969             parseError();
970             return emitAndResumeInDataState(source);
971         }
972         if (character == kEndOfFileMarker) {
973             parseError();
974             return emitAndReconsumeInDataState();
975         }
976         m_token.appendToComment('-');
977         m_token.appendToComment(character);
978         ADVANCE_TO(CommentState);
979     END_STATE()
980
981     BEGIN_STATE(CommentState)
982         if (character == '-')
983             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndDashState);
984         if (character == kEndOfFileMarker) {
985             parseError();
986             return emitAndReconsumeInDataState();
987         }
988         m_token.appendToComment(character);
989         ADVANCE_TO(CommentState);
990     END_STATE()
991
992     BEGIN_STATE(CommentEndDashState)
993         if (character == '-')
994             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
995         if (character == kEndOfFileMarker) {
996             parseError();
997             return emitAndReconsumeInDataState();
998         }
999         m_token.appendToComment('-');
1000         m_token.appendToComment(character);
1001         ADVANCE_TO(CommentState);
1002     END_STATE()
1003
1004     BEGIN_STATE(CommentEndState)
1005         if (character == '>')
1006             return emitAndResumeInDataState(source);
1007         if (character == '!') {
1008             parseError();
1009             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndBangState);
1010         }
1011         if (character == '-') {
1012             parseError();
1013             m_token.appendToComment('-');
1014             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
1015         }
1016         if (character == kEndOfFileMarker) {
1017             parseError();
1018             return emitAndReconsumeInDataState();
1019         }
1020         parseError();
1021         m_token.appendToComment('-');
1022         m_token.appendToComment('-');
1023         m_token.appendToComment(character);
1024         ADVANCE_TO(CommentState);
1025     END_STATE()
1026
1027     BEGIN_STATE(CommentEndBangState)
1028         if (character == '-') {
1029             m_token.appendToComment('-');
1030             m_token.appendToComment('-');
1031             m_token.appendToComment('!');
1032             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndDashState);
1033         }
1034         if (character == '>')
1035             return emitAndResumeInDataState(source);
1036         if (character == kEndOfFileMarker) {
1037             parseError();
1038             return emitAndReconsumeInDataState();
1039         }
1040         m_token.appendToComment('-');
1041         m_token.appendToComment('-');
1042         m_token.appendToComment('!');
1043         m_token.appendToComment(character);
1044         ADVANCE_TO(CommentState);
1045     END_STATE()
1046
1047     BEGIN_STATE(DOCTYPEState)
1048         if (isTokenizerWhitespace(character))
1049             ADVANCE_TO(BeforeDOCTYPENameState);
1050         if (character == kEndOfFileMarker) {
1051             parseError();
1052             m_token.beginDOCTYPE();
1053             m_token.setForceQuirks();
1054             return emitAndReconsumeInDataState();
1055         }
1056         parseError();
1057         RECONSUME_IN(BeforeDOCTYPENameState);
1058     END_STATE()
1059
1060     BEGIN_STATE(BeforeDOCTYPENameState)
1061         if (isTokenizerWhitespace(character))
1062             ADVANCE_TO(BeforeDOCTYPENameState);
1063         if (character == '>') {
1064             parseError();
1065             m_token.beginDOCTYPE();
1066             m_token.setForceQuirks();
1067             return emitAndResumeInDataState(source);
1068         }
1069         if (character == kEndOfFileMarker) {
1070             parseError();
1071             m_token.beginDOCTYPE();
1072             m_token.setForceQuirks();
1073             return emitAndReconsumeInDataState();
1074         }
1075         m_token.beginDOCTYPE(toASCIILower(character));
1076         ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPENameState);
1077     END_STATE()
1078
1079     BEGIN_STATE(DOCTYPENameState)
1080         if (isTokenizerWhitespace(character))
1081             ADVANCE_TO(AfterDOCTYPENameState);
1082         if (character == '>')
1083             return emitAndResumeInDataState(source);
1084         if (character == kEndOfFileMarker) {
1085             parseError();
1086             m_token.setForceQuirks();
1087             return emitAndReconsumeInDataState();
1088         }
1089         m_token.appendToName(toASCIILower(character));
1090         ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPENameState);
1091     END_STATE()
1092
1093     BEGIN_STATE(AfterDOCTYPENameState)
1094         if (isTokenizerWhitespace(character))
1095             ADVANCE_TO(AfterDOCTYPENameState);
1096         if (character == '>')
1097             return emitAndResumeInDataState(source);
1098         if (character == kEndOfFileMarker) {
1099             parseError();
1100             m_token.setForceQuirks();
1101             return emitAndReconsumeInDataState();
1102         }
1103         if (isASCIIAlphaCaselessEqual(character, 'p')) {
1104             auto result = source.advancePastLettersIgnoringASCIICase("public");
1105             if (result == SegmentedString::DidMatch)
1106                 SWITCH_TO(AfterDOCTYPEPublicKeywordState);
1107             if (result == SegmentedString::NotEnoughCharacters)
1108                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
1109         } else if (isASCIIAlphaCaselessEqual(character, 's')) {
1110             auto result = source.advancePastLettersIgnoringASCIICase("system");
1111             if (result == SegmentedString::DidMatch)
1112                 SWITCH_TO(AfterDOCTYPESystemKeywordState);
1113             if (result == SegmentedString::NotEnoughCharacters)
1114                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
1115         }
1116         parseError();
1117         m_token.setForceQuirks();
1118         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1119     END_STATE()
1120
1121     BEGIN_STATE(AfterDOCTYPEPublicKeywordState)
1122         if (isTokenizerWhitespace(character))
1123             ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
1124         if (character == '"') {
1125             parseError();
1126             m_token.setPublicIdentifierToEmptyString();
1127             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
1128         }
1129         if (character == '\'') {
1130             parseError();
1131             m_token.setPublicIdentifierToEmptyString();
1132             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
1133         }
1134         if (character == '>') {
1135             parseError();
1136             m_token.setForceQuirks();
1137             return emitAndResumeInDataState(source);
1138         }
1139         if (character == kEndOfFileMarker) {
1140             parseError();
1141             m_token.setForceQuirks();
1142             return emitAndReconsumeInDataState();
1143         }
1144         parseError();
1145         m_token.setForceQuirks();
1146         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1147     END_STATE()
1148
1149     BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState)
1150         if (isTokenizerWhitespace(character))
1151             ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
1152         if (character == '"') {
1153             m_token.setPublicIdentifierToEmptyString();
1154             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
1155         }
1156         if (character == '\'') {
1157             m_token.setPublicIdentifierToEmptyString();
1158             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
1159         }
1160         if (character == '>') {
1161             parseError();
1162             m_token.setForceQuirks();
1163             return emitAndResumeInDataState(source);
1164         }
1165         if (character == kEndOfFileMarker) {
1166             parseError();
1167             m_token.setForceQuirks();
1168             return emitAndReconsumeInDataState();
1169         }
1170         parseError();
1171         m_token.setForceQuirks();
1172         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1173     END_STATE()
1174
1175     BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState)
1176         if (character == '"')
1177             ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState);
1178         if (character == '>') {
1179             parseError();
1180             m_token.setForceQuirks();
1181             return emitAndResumeInDataState(source);
1182         }
1183         if (character == kEndOfFileMarker) {
1184             parseError();
1185             m_token.setForceQuirks();
1186             return emitAndReconsumeInDataState();
1187         }
1188         m_token.appendToPublicIdentifier(character);
1189         ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
1190     END_STATE()
1191
1192     BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState)
1193         if (character == '\'')
1194             ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState);
1195         if (character == '>') {
1196             parseError();
1197             m_token.setForceQuirks();
1198             return emitAndResumeInDataState(source);
1199         }
1200         if (character == kEndOfFileMarker) {
1201             parseError();
1202             m_token.setForceQuirks();
1203             return emitAndReconsumeInDataState();
1204         }
1205         m_token.appendToPublicIdentifier(character);
1206         ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
1207     END_STATE()
1208
1209     BEGIN_STATE(AfterDOCTYPEPublicIdentifierState)
1210         if (isTokenizerWhitespace(character))
1211             ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
1212         if (character == '>')
1213             return emitAndResumeInDataState(source);
1214         if (character == '"') {
1215             parseError();
1216             m_token.setSystemIdentifierToEmptyString();
1217             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
1218         }
1219         if (character == '\'') {
1220             parseError();
1221             m_token.setSystemIdentifierToEmptyString();
1222             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
1223         }
1224         if (character == kEndOfFileMarker) {
1225             parseError();
1226             m_token.setForceQuirks();
1227             return emitAndReconsumeInDataState();
1228         }
1229         parseError();
1230         m_token.setForceQuirks();
1231         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1232     END_STATE()
1233
1234     BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState)
1235         if (isTokenizerWhitespace(character))
1236             ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
1237         if (character == '>')
1238             return emitAndResumeInDataState(source);
1239         if (character == '"') {
1240             m_token.setSystemIdentifierToEmptyString();
1241             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
1242         }
1243         if (character == '\'') {
1244             m_token.setSystemIdentifierToEmptyString();
1245             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
1246         }
1247         if (character == kEndOfFileMarker) {
1248             parseError();
1249             m_token.setForceQuirks();
1250             return emitAndReconsumeInDataState();
1251         }
1252         parseError();
1253         m_token.setForceQuirks();
1254         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1255     END_STATE()
1256
1257     BEGIN_STATE(AfterDOCTYPESystemKeywordState)
1258         if (isTokenizerWhitespace(character))
1259             ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
1260         if (character == '"') {
1261             parseError();
1262             m_token.setSystemIdentifierToEmptyString();
1263             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
1264         }
1265         if (character == '\'') {
1266             parseError();
1267             m_token.setSystemIdentifierToEmptyString();
1268             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
1269         }
1270         if (character == '>') {
1271             parseError();
1272             m_token.setForceQuirks();
1273             return emitAndResumeInDataState(source);
1274         }
1275         if (character == kEndOfFileMarker) {
1276             parseError();
1277             m_token.setForceQuirks();
1278             return emitAndReconsumeInDataState();
1279         }
1280         parseError();
1281         m_token.setForceQuirks();
1282         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1283     END_STATE()
1284
1285     BEGIN_STATE(BeforeDOCTYPESystemIdentifierState)
1286         if (isTokenizerWhitespace(character))
1287             ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
1288         if (character == '"') {
1289             m_token.setSystemIdentifierToEmptyString();
1290             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
1291         }
1292         if (character == '\'') {
1293             m_token.setSystemIdentifierToEmptyString();
1294             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
1295         }
1296         if (character == '>') {
1297             parseError();
1298             m_token.setForceQuirks();
1299             return emitAndResumeInDataState(source);
1300         }
1301         if (character == kEndOfFileMarker) {
1302             parseError();
1303             m_token.setForceQuirks();
1304             return emitAndReconsumeInDataState();
1305         }
1306         parseError();
1307         m_token.setForceQuirks();
1308         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1309     END_STATE()
1310
1311     BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState)
1312         if (character == '"')
1313             ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState);
1314         if (character == '>') {
1315             parseError();
1316             m_token.setForceQuirks();
1317             return emitAndResumeInDataState(source);
1318         }
1319         if (character == kEndOfFileMarker) {
1320             parseError();
1321             m_token.setForceQuirks();
1322             return emitAndReconsumeInDataState();
1323         }
1324         m_token.appendToSystemIdentifier(character);
1325         ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
1326     END_STATE()
1327
1328     BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState)
1329         if (character == '\'')
1330             ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState);
1331         if (character == '>') {
1332             parseError();
1333             m_token.setForceQuirks();
1334             return emitAndResumeInDataState(source);
1335         }
1336         if (character == kEndOfFileMarker) {
1337             parseError();
1338             m_token.setForceQuirks();
1339             return emitAndReconsumeInDataState();
1340         }
1341         m_token.appendToSystemIdentifier(character);
1342         ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
1343     END_STATE()
1344
1345     BEGIN_STATE(AfterDOCTYPESystemIdentifierState)
1346         if (isTokenizerWhitespace(character))
1347             ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
1348         if (character == '>')
1349             return emitAndResumeInDataState(source);
1350         if (character == kEndOfFileMarker) {
1351             parseError();
1352             m_token.setForceQuirks();
1353             return emitAndReconsumeInDataState();
1354         }
1355         parseError();
1356         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1357     END_STATE()
1358
1359     BEGIN_STATE(BogusDOCTYPEState)
1360         if (character == '>')
1361             return emitAndResumeInDataState(source);
1362         if (character == kEndOfFileMarker)
1363             return emitAndReconsumeInDataState();
1364         ADVANCE_TO(BogusDOCTYPEState);
1365     END_STATE()
1366
1367     BEGIN_STATE(CDATASectionState)
1368         if (character == ']')
1369             ADVANCE_PAST_NON_NEWLINE_TO(CDATASectionRightSquareBracketState);
1370         if (character == kEndOfFileMarker)
1371             RECONSUME_IN(DataState);
1372         bufferCharacter(character);
1373         ADVANCE_TO(CDATASectionState);
1374     END_STATE()
1375
1376     BEGIN_STATE(CDATASectionRightSquareBracketState)
1377         if (character == ']')
1378             ADVANCE_PAST_NON_NEWLINE_TO(CDATASectionDoubleRightSquareBracketState);
1379         bufferASCIICharacter(']');
1380         RECONSUME_IN(CDATASectionState);
1381     END_STATE()
1382
1383     BEGIN_STATE(CDATASectionDoubleRightSquareBracketState)
1384         if (character == '>')
1385             ADVANCE_PAST_NON_NEWLINE_TO(DataState);
1386         bufferASCIICharacter(']');
1387         bufferASCIICharacter(']');
1388         RECONSUME_IN(CDATASectionState);
1389     END_STATE()
1390
1391     }
1392
1393     ASSERT_NOT_REACHED();
1394     return false;
1395 }
1396
1397 String HTMLTokenizer::bufferedCharacters() const
1398 {
1399     // FIXME: Add an assert about m_state.
1400     StringBuilder characters;
1401     characters.reserveCapacity(numberOfBufferedCharacters());
1402     characters.append('<');
1403     characters.append('/');
1404     characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size());
1405     return characters.toString();
1406 }
1407
1408 void HTMLTokenizer::updateStateFor(const AtomicString& tagName)
1409 {
1410     if (tagName == textareaTag || tagName == titleTag)
1411         m_state = RCDATAState;
1412     else if (tagName == plaintextTag)
1413         m_state = PLAINTEXTState;
1414     else if (tagName == scriptTag)
1415         m_state = ScriptDataState;
1416     else if (tagName == styleTag
1417         || tagName == iframeTag
1418         || tagName == xmpTag
1419         || (tagName == noembedTag && m_options.pluginsEnabled)
1420         || tagName == noframesTag
1421         || (tagName == noscriptTag && m_options.scriptEnabled))
1422         m_state = RAWTEXTState;
1423 }
1424
1425 inline void HTMLTokenizer::appendToTemporaryBuffer(UChar character)
1426 {
1427     ASSERT(isASCII(character));
1428     m_temporaryBuffer.append(character);
1429 }
1430
1431 inline bool HTMLTokenizer::temporaryBufferIs(const char* expectedString)
1432 {
1433     return vectorEqualsString(m_temporaryBuffer, expectedString);
1434 }
1435
1436 inline void HTMLTokenizer::appendToPossibleEndTag(UChar character)
1437 {
1438     ASSERT(isASCII(character));
1439     m_bufferedEndTagName.append(character);
1440 }
1441
1442 inline bool HTMLTokenizer::isAppropriateEndTag() const
1443 {
1444     if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())
1445         return false;
1446
1447     unsigned size = m_bufferedEndTagName.size();
1448
1449     for (unsigned i = 0; i < size; i++) {
1450         if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])
1451             return false;
1452     }
1453
1454     return true;
1455 }
1456
1457 inline void HTMLTokenizer::parseError()
1458 {
1459 }
1460
1461 }