Streamline and speed up tokenizer and segmented string classes
[WebKit-https.git] / Source / WebCore / html / parser / HTMLTokenizer.cpp
1 /*
2  * Copyright (C) 2008-2016 Apple Inc. All Rights Reserved.
3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
26  */
27
28 #include "config.h"
29 #include "HTMLTokenizer.h"
30
31 #include "HTMLEntityParser.h"
32 #include "HTMLNames.h"
33 #include "MarkupTokenizerInlines.h"
34 #include <wtf/text/StringBuilder.h>
35
36 using namespace WTF;
37
38 namespace WebCore {
39
40 using namespace HTMLNames;
41
42 static inline LChar convertASCIIAlphaToLower(UChar character)
43 {
44     ASSERT(isASCIIAlpha(character));
45     return toASCIILowerUnchecked(character);
46 }
47
48 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const char* string)
49 {
50     unsigned size = vector.size();
51     for (unsigned i = 0; i < size; ++i) {
52         if (!string[i] || vector[i] != string[i])
53             return false;
54     }
55     return !string[size];
56 }
57
58 inline bool HTMLTokenizer::inEndTagBufferingState() const
59 {
60     switch (m_state) {
61     case RCDATAEndTagOpenState:
62     case RCDATAEndTagNameState:
63     case RAWTEXTEndTagOpenState:
64     case RAWTEXTEndTagNameState:
65     case ScriptDataEndTagOpenState:
66     case ScriptDataEndTagNameState:
67     case ScriptDataEscapedEndTagOpenState:
68     case ScriptDataEscapedEndTagNameState:
69         return true;
70     default:
71         return false;
72     }
73 }
74
75 HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options)
76     : m_preprocessor(*this)
77     , m_options(options)
78 {
79 }
80
81 inline void HTMLTokenizer::bufferASCIICharacter(UChar character)
82 {
83     ASSERT(character != kEndOfFileMarker);
84     ASSERT(isASCII(character));
85     LChar narrowedCharacter = character;
86     m_token.appendToCharacter(narrowedCharacter);
87 }
88
89 inline void HTMLTokenizer::bufferCharacter(UChar character)
90 {
91     ASSERT(character != kEndOfFileMarker);
92     m_token.appendToCharacter(character);
93 }
94
95 inline bool HTMLTokenizer::emitAndResumeInDataState(SegmentedString& source)
96 {
97     saveEndTagNameIfNeeded();
98     m_state = DataState;
99     source.advancePastNonNewline();
100     return true;
101 }
102
103 inline bool HTMLTokenizer::emitAndReconsumeInDataState()
104 {
105     saveEndTagNameIfNeeded();
106     m_state = DataState;
107     return true;
108 }
109
110 inline bool HTMLTokenizer::emitEndOfFile(SegmentedString& source)
111 {
112     m_state = DataState;
113     if (haveBufferedCharacterToken())
114         return true;
115     source.advance();
116     m_token.clear();
117     m_token.makeEndOfFile();
118     return true;
119 }
120
121 inline void HTMLTokenizer::saveEndTagNameIfNeeded()
122 {
123     ASSERT(m_token.type() != HTMLToken::Uninitialized);
124     if (m_token.type() == HTMLToken::StartTag)
125         m_appropriateEndTagName = m_token.name();
126 }
127
128 inline bool HTMLTokenizer::haveBufferedCharacterToken() const
129 {
130     return m_token.type() == HTMLToken::Character;
131 }
132
133 inline bool HTMLTokenizer::processEntity(SegmentedString& source)
134 {
135     bool notEnoughCharacters = false;
136     StringBuilder decodedEntity;
137     bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
138     if (notEnoughCharacters)
139         return false;
140     if (!success) {
141         ASSERT(decodedEntity.isEmpty());
142         bufferASCIICharacter('&');
143     } else {
144         for (unsigned i = 0; i < decodedEntity.length(); ++i)
145             bufferCharacter(decodedEntity[i]);
146     }
147     return true;
148 }
149
150 void HTMLTokenizer::flushBufferedEndTag()
151 {
152     m_token.beginEndTag(m_bufferedEndTagName);
153     m_bufferedEndTagName.clear();
154     m_appropriateEndTagName.clear();
155     m_temporaryBuffer.clear();
156 }
157
158 bool HTMLTokenizer::commitToPartialEndTag(SegmentedString& source, UChar character, State state)
159 {
160     ASSERT(source.currentCharacter() == character);
161     appendToTemporaryBuffer(character);
162     source.advancePastNonNewline();
163
164     if (haveBufferedCharacterToken()) {
165         // Emit the buffered character token.
166         // The next call to processToken will flush the buffered end tag and continue parsing it.
167         m_state = state;
168         return true;
169     }
170
171     flushBufferedEndTag();
172     return false;
173 }
174
175 bool HTMLTokenizer::commitToCompleteEndTag(SegmentedString& source)
176 {
177     ASSERT(source.currentCharacter() == '>');
178     appendToTemporaryBuffer('>');
179     source.advancePastNonNewline();
180
181     m_state = DataState;
182
183     if (haveBufferedCharacterToken()) {
184         // Emit the character token we already have.
185         // The next call to processToken will flush the buffered end tag and emit it.
186         return true;
187     }
188
189     flushBufferedEndTag();
190     return true;
191 }
192
193 bool HTMLTokenizer::processToken(SegmentedString& source)
194 {
195     if (!m_bufferedEndTagName.isEmpty() && !inEndTagBufferingState()) {
196         // We are back here after emitting a character token that came just before an end tag.
197         // To continue parsing the end tag we need to move the buffered tag name into the token.
198         flushBufferedEndTag();
199
200         // If we are in the data state, the end tag is already complete and we should emit it
201         // now, otherwise, we want to resume parsing the partial end tag.
202         if (m_state == DataState)
203             return true;
204     }
205
206     if (!m_preprocessor.peek(source, isNullCharacterSkippingState(m_state)))
207         return haveBufferedCharacterToken();
208     UChar character = m_preprocessor.nextInputCharacter();
209
210     // https://html.spec.whatwg.org/#tokenization
211     switch (m_state) {
212
213     BEGIN_STATE(DataState)
214         if (character == '&')
215             ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInDataState);
216         if (character == '<') {
217             if (haveBufferedCharacterToken())
218                 RETURN_IN_CURRENT_STATE(true);
219             ADVANCE_PAST_NON_NEWLINE_TO(TagOpenState);
220         }
221         if (character == kEndOfFileMarker)
222             return emitEndOfFile(source);
223         bufferCharacter(character);
224         ADVANCE_TO(DataState);
225     END_STATE()
226
227     BEGIN_STATE(CharacterReferenceInDataState)
228         if (!processEntity(source))
229             RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
230         SWITCH_TO(DataState);
231     END_STATE()
232
233     BEGIN_STATE(RCDATAState)
234         if (character == '&')
235             ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInRCDATAState);
236         if (character == '<')
237             ADVANCE_PAST_NON_NEWLINE_TO(RCDATALessThanSignState);
238         if (character == kEndOfFileMarker)
239             RECONSUME_IN(DataState);
240         bufferCharacter(character);
241         ADVANCE_TO(RCDATAState);
242     END_STATE()
243
244     BEGIN_STATE(CharacterReferenceInRCDATAState)
245         if (!processEntity(source))
246             RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
247         SWITCH_TO(RCDATAState);
248     END_STATE()
249
250     BEGIN_STATE(RAWTEXTState)
251         if (character == '<')
252             ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTLessThanSignState);
253         if (character == kEndOfFileMarker)
254             RECONSUME_IN(DataState);
255         bufferCharacter(character);
256         ADVANCE_TO(RAWTEXTState);
257     END_STATE()
258
259     BEGIN_STATE(ScriptDataState)
260         if (character == '<')
261             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataLessThanSignState);
262         if (character == kEndOfFileMarker)
263             RECONSUME_IN(DataState);
264         bufferCharacter(character);
265         ADVANCE_TO(ScriptDataState);
266     END_STATE()
267
268     BEGIN_STATE(PLAINTEXTState)
269         if (character == kEndOfFileMarker)
270             RECONSUME_IN(DataState);
271         bufferCharacter(character);
272         ADVANCE_TO(PLAINTEXTState);
273     END_STATE()
274
275     BEGIN_STATE(TagOpenState)
276         if (character == '!')
277             ADVANCE_PAST_NON_NEWLINE_TO(MarkupDeclarationOpenState);
278         if (character == '/')
279             ADVANCE_PAST_NON_NEWLINE_TO(EndTagOpenState);
280         if (isASCIIAlpha(character)) {
281             m_token.beginStartTag(convertASCIIAlphaToLower(character));
282             ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
283         }
284         if (character == '?') {
285             parseError();
286             // The spec consumes the current character before switching
287             // to the bogus comment state, but it's easier to implement
288             // if we reconsume the current character.
289             RECONSUME_IN(BogusCommentState);
290         }
291         parseError();
292         bufferASCIICharacter('<');
293         RECONSUME_IN(DataState);
294     END_STATE()
295
296     BEGIN_STATE(EndTagOpenState)
297         if (isASCIIAlpha(character)) {
298             m_token.beginEndTag(convertASCIIAlphaToLower(character));
299             m_appropriateEndTagName.clear();
300             ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
301         }
302         if (character == '>') {
303             parseError();
304             ADVANCE_PAST_NON_NEWLINE_TO(DataState);
305         }
306         if (character == kEndOfFileMarker) {
307             parseError();
308             bufferASCIICharacter('<');
309             bufferASCIICharacter('/');
310             RECONSUME_IN(DataState);
311         }
312         parseError();
313         RECONSUME_IN(BogusCommentState);
314     END_STATE()
315
316     BEGIN_STATE(TagNameState)
317         if (isTokenizerWhitespace(character))
318             ADVANCE_TO(BeforeAttributeNameState);
319         if (character == '/')
320             ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
321         if (character == '>')
322             return emitAndResumeInDataState(source);
323         if (m_options.usePreHTML5ParserQuirks && character == '<')
324             return emitAndReconsumeInDataState();
325         if (character == kEndOfFileMarker) {
326             parseError();
327             RECONSUME_IN(DataState);
328         }
329         m_token.appendToName(toASCIILower(character));
330         ADVANCE_PAST_NON_NEWLINE_TO(TagNameState);
331     END_STATE()
332
333     BEGIN_STATE(RCDATALessThanSignState)
334         if (character == '/') {
335             m_temporaryBuffer.clear();
336             ASSERT(m_bufferedEndTagName.isEmpty());
337             ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagOpenState);
338         }
339         bufferASCIICharacter('<');
340         RECONSUME_IN(RCDATAState);
341     END_STATE()
342
343     BEGIN_STATE(RCDATAEndTagOpenState)
344         if (isASCIIAlpha(character)) {
345             appendToTemporaryBuffer(character);
346             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
347             ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagNameState);
348         }
349         bufferASCIICharacter('<');
350         bufferASCIICharacter('/');
351         RECONSUME_IN(RCDATAState);
352     END_STATE()
353
354     BEGIN_STATE(RCDATAEndTagNameState)
355         if (isASCIIAlpha(character)) {
356             appendToTemporaryBuffer(character);
357             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
358             ADVANCE_PAST_NON_NEWLINE_TO(RCDATAEndTagNameState);
359         }
360         if (isTokenizerWhitespace(character)) {
361             if (isAppropriateEndTag()) {
362                 if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
363                     return true;
364                 SWITCH_TO(BeforeAttributeNameState);
365             }
366         } else if (character == '/') {
367             if (isAppropriateEndTag()) {
368                 if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
369                     return true;
370                 SWITCH_TO(SelfClosingStartTagState);
371             }
372         } else if (character == '>') {
373             if (isAppropriateEndTag())
374                 return commitToCompleteEndTag(source);
375         }
376         bufferASCIICharacter('<');
377         bufferASCIICharacter('/');
378         m_token.appendToCharacter(m_temporaryBuffer);
379         m_bufferedEndTagName.clear();
380         m_temporaryBuffer.clear();
381         RECONSUME_IN(RCDATAState);
382     END_STATE()
383
384     BEGIN_STATE(RAWTEXTLessThanSignState)
385         if (character == '/') {
386             m_temporaryBuffer.clear();
387             ASSERT(m_bufferedEndTagName.isEmpty());
388             ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagOpenState);
389         }
390         bufferASCIICharacter('<');
391         RECONSUME_IN(RAWTEXTState);
392     END_STATE()
393
394     BEGIN_STATE(RAWTEXTEndTagOpenState)
395         if (isASCIIAlpha(character)) {
396             appendToTemporaryBuffer(character);
397             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
398             ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState);
399         }
400         bufferASCIICharacter('<');
401         bufferASCIICharacter('/');
402         RECONSUME_IN(RAWTEXTState);
403     END_STATE()
404
405     BEGIN_STATE(RAWTEXTEndTagNameState)
406         if (isASCIIAlpha(character)) {
407             appendToTemporaryBuffer(character);
408             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
409             ADVANCE_PAST_NON_NEWLINE_TO(RAWTEXTEndTagNameState);
410         }
411         if (isTokenizerWhitespace(character)) {
412             if (isAppropriateEndTag()) {
413                 if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
414                     return true;
415                 SWITCH_TO(BeforeAttributeNameState);
416             }
417         } else if (character == '/') {
418             if (isAppropriateEndTag()) {
419                 if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
420                     return true;
421                 SWITCH_TO(SelfClosingStartTagState);
422             }
423         } else if (character == '>') {
424             if (isAppropriateEndTag())
425                 return commitToCompleteEndTag(source);
426         }
427         bufferASCIICharacter('<');
428         bufferASCIICharacter('/');
429         m_token.appendToCharacter(m_temporaryBuffer);
430         m_bufferedEndTagName.clear();
431         m_temporaryBuffer.clear();
432         RECONSUME_IN(RAWTEXTState);
433     END_STATE()
434
435     BEGIN_STATE(ScriptDataLessThanSignState)
436         if (character == '/') {
437             m_temporaryBuffer.clear();
438             ASSERT(m_bufferedEndTagName.isEmpty());
439             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagOpenState);
440         }
441         if (character == '!') {
442             bufferASCIICharacter('<');
443             bufferASCIICharacter('!');
444             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapeStartState);
445         }
446         bufferASCIICharacter('<');
447         RECONSUME_IN(ScriptDataState);
448     END_STATE()
449
450     BEGIN_STATE(ScriptDataEndTagOpenState)
451         if (isASCIIAlpha(character)) {
452             appendToTemporaryBuffer(character);
453             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
454             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState);
455         }
456         bufferASCIICharacter('<');
457         bufferASCIICharacter('/');
458         RECONSUME_IN(ScriptDataState);
459     END_STATE()
460
461     BEGIN_STATE(ScriptDataEndTagNameState)
462         if (isASCIIAlpha(character)) {
463             appendToTemporaryBuffer(character);
464             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
465             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEndTagNameState);
466         }
467         if (isTokenizerWhitespace(character)) {
468             if (isAppropriateEndTag()) {
469                 if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
470                     return true;
471                 SWITCH_TO(BeforeAttributeNameState);
472             }
473         } else if (character == '/') {
474             if (isAppropriateEndTag()) {
475                 if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
476                     return true;
477                 SWITCH_TO(SelfClosingStartTagState);
478             }
479         } else if (character == '>') {
480             if (isAppropriateEndTag())
481                 return commitToCompleteEndTag(source);
482         }
483         bufferASCIICharacter('<');
484         bufferASCIICharacter('/');
485         m_token.appendToCharacter(m_temporaryBuffer);
486         m_bufferedEndTagName.clear();
487         m_temporaryBuffer.clear();
488         RECONSUME_IN(ScriptDataState);
489     END_STATE()
490
491     BEGIN_STATE(ScriptDataEscapeStartState)
492         if (character == '-') {
493             bufferASCIICharacter('-');
494             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapeStartDashState);
495         } else
496             RECONSUME_IN(ScriptDataState);
497     END_STATE()
498
499     BEGIN_STATE(ScriptDataEscapeStartDashState)
500         if (character == '-') {
501             bufferASCIICharacter('-');
502             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
503         } else
504             RECONSUME_IN(ScriptDataState);
505     END_STATE()
506
507     BEGIN_STATE(ScriptDataEscapedState)
508         if (character == '-') {
509             bufferASCIICharacter('-');
510             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashState);
511         }
512         if (character == '<')
513             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
514         if (character == kEndOfFileMarker) {
515             parseError();
516             RECONSUME_IN(DataState);
517         }
518         bufferCharacter(character);
519         ADVANCE_TO(ScriptDataEscapedState);
520     END_STATE()
521
522     BEGIN_STATE(ScriptDataEscapedDashState)
523         if (character == '-') {
524             bufferASCIICharacter('-');
525             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
526         }
527         if (character == '<')
528             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
529         if (character == kEndOfFileMarker) {
530             parseError();
531             RECONSUME_IN(DataState);
532         }
533         bufferCharacter(character);
534         ADVANCE_TO(ScriptDataEscapedState);
535     END_STATE()
536
537     BEGIN_STATE(ScriptDataEscapedDashDashState)
538         if (character == '-') {
539             bufferASCIICharacter('-');
540             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedDashDashState);
541         }
542         if (character == '<')
543             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedLessThanSignState);
544         if (character == '>') {
545             bufferASCIICharacter('>');
546             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataState);
547         }
548         if (character == kEndOfFileMarker) {
549             parseError();
550             RECONSUME_IN(DataState);
551         }
552         bufferCharacter(character);
553         ADVANCE_TO(ScriptDataEscapedState);
554     END_STATE()
555
556     BEGIN_STATE(ScriptDataEscapedLessThanSignState)
557         if (character == '/') {
558             m_temporaryBuffer.clear();
559             ASSERT(m_bufferedEndTagName.isEmpty());
560             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagOpenState);
561         }
562         if (isASCIIAlpha(character)) {
563             bufferASCIICharacter('<');
564             bufferASCIICharacter(character);
565             m_temporaryBuffer.clear();
566             appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
567             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState);
568         }
569         bufferASCIICharacter('<');
570         RECONSUME_IN(ScriptDataEscapedState);
571     END_STATE()
572
573     BEGIN_STATE(ScriptDataEscapedEndTagOpenState)
574         if (isASCIIAlpha(character)) {
575             appendToTemporaryBuffer(character);
576             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
577             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState);
578         }
579         bufferASCIICharacter('<');
580         bufferASCIICharacter('/');
581         RECONSUME_IN(ScriptDataEscapedState);
582     END_STATE()
583
584     BEGIN_STATE(ScriptDataEscapedEndTagNameState)
585         if (isASCIIAlpha(character)) {
586             appendToTemporaryBuffer(character);
587             appendToPossibleEndTag(convertASCIIAlphaToLower(character));
588             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataEscapedEndTagNameState);
589         }
590         if (isTokenizerWhitespace(character)) {
591             if (isAppropriateEndTag()) {
592                 if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
593                     return true;
594                 SWITCH_TO(BeforeAttributeNameState);
595             }
596         } else if (character == '/') {
597             if (isAppropriateEndTag()) {
598                 if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
599                     return true;
600                 SWITCH_TO(SelfClosingStartTagState);
601             }
602         } else if (character == '>') {
603             if (isAppropriateEndTag())
604                 return commitToCompleteEndTag(source);
605         }
606         bufferASCIICharacter('<');
607         bufferASCIICharacter('/');
608         m_token.appendToCharacter(m_temporaryBuffer);
609         m_bufferedEndTagName.clear();
610         m_temporaryBuffer.clear();
611         RECONSUME_IN(ScriptDataEscapedState);
612     END_STATE()
613
614     BEGIN_STATE(ScriptDataDoubleEscapeStartState)
615         if (isTokenizerWhitespace(character) || character == '/' || character == '>') {
616             bufferASCIICharacter(character);
617             if (temporaryBufferIs("script"))
618                 ADVANCE_TO(ScriptDataDoubleEscapedState);
619             else
620                 ADVANCE_TO(ScriptDataEscapedState);
621         }
622         if (isASCIIAlpha(character)) {
623             bufferASCIICharacter(character);
624             appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
625             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeStartState);
626         }
627         RECONSUME_IN(ScriptDataEscapedState);
628     END_STATE()
629
630     BEGIN_STATE(ScriptDataDoubleEscapedState)
631         if (character == '-') {
632             bufferASCIICharacter('-');
633             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashState);
634         }
635         if (character == '<') {
636             bufferASCIICharacter('<');
637             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
638         }
639         if (character == kEndOfFileMarker) {
640             parseError();
641             RECONSUME_IN(DataState);
642         }
643         bufferCharacter(character);
644         ADVANCE_TO(ScriptDataDoubleEscapedState);
645     END_STATE()
646
647     BEGIN_STATE(ScriptDataDoubleEscapedDashState)
648         if (character == '-') {
649             bufferASCIICharacter('-');
650             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState);
651         }
652         if (character == '<') {
653             bufferASCIICharacter('<');
654             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
655         }
656         if (character == kEndOfFileMarker) {
657             parseError();
658             RECONSUME_IN(DataState);
659         }
660         bufferCharacter(character);
661         ADVANCE_TO(ScriptDataDoubleEscapedState);
662     END_STATE()
663
664     BEGIN_STATE(ScriptDataDoubleEscapedDashDashState)
665         if (character == '-') {
666             bufferASCIICharacter('-');
667             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedDashDashState);
668         }
669         if (character == '<') {
670             bufferASCIICharacter('<');
671             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapedLessThanSignState);
672         }
673         if (character == '>') {
674             bufferASCIICharacter('>');
675             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataState);
676         }
677         if (character == kEndOfFileMarker) {
678             parseError();
679             RECONSUME_IN(DataState);
680         }
681         bufferCharacter(character);
682         ADVANCE_TO(ScriptDataDoubleEscapedState);
683     END_STATE()
684
685     BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState)
686         if (character == '/') {
687             bufferASCIICharacter('/');
688             m_temporaryBuffer.clear();
689             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState);
690         }
691         RECONSUME_IN(ScriptDataDoubleEscapedState);
692     END_STATE()
693
694     BEGIN_STATE(ScriptDataDoubleEscapeEndState)
695         if (isTokenizerWhitespace(character) || character == '/' || character == '>') {
696             bufferASCIICharacter(character);
697             if (temporaryBufferIs("script"))
698                 ADVANCE_TO(ScriptDataEscapedState);
699             else
700                 ADVANCE_TO(ScriptDataDoubleEscapedState);
701         }
702         if (isASCIIAlpha(character)) {
703             bufferASCIICharacter(character);
704             appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
705             ADVANCE_PAST_NON_NEWLINE_TO(ScriptDataDoubleEscapeEndState);
706         }
707         RECONSUME_IN(ScriptDataDoubleEscapedState);
708     END_STATE()
709
710     BEGIN_STATE(BeforeAttributeNameState)
711         if (isTokenizerWhitespace(character))
712             ADVANCE_TO(BeforeAttributeNameState);
713         if (character == '/')
714             ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
715         if (character == '>')
716             return emitAndResumeInDataState(source);
717         if (m_options.usePreHTML5ParserQuirks && character == '<')
718             return emitAndReconsumeInDataState();
719         if (character == kEndOfFileMarker) {
720             parseError();
721             RECONSUME_IN(DataState);
722         }
723         if (character == '"' || character == '\'' || character == '<' || character == '=')
724             parseError();
725         m_token.beginAttribute(source.numberOfCharactersConsumed());
726         m_token.appendToAttributeName(toASCIILower(character));
727         ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
728     END_STATE()
729
730     BEGIN_STATE(AttributeNameState)
731         if (isTokenizerWhitespace(character))
732             ADVANCE_TO(AfterAttributeNameState);
733         if (character == '/')
734             ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
735         if (character == '=')
736             ADVANCE_PAST_NON_NEWLINE_TO(BeforeAttributeValueState);
737         if (character == '>')
738             return emitAndResumeInDataState(source);
739         if (m_options.usePreHTML5ParserQuirks && character == '<')
740             return emitAndReconsumeInDataState();
741         if (character == kEndOfFileMarker) {
742             parseError();
743             RECONSUME_IN(DataState);
744         }
745         if (character == '"' || character == '\'' || character == '<' || character == '=')
746             parseError();
747         m_token.appendToAttributeName(toASCIILower(character));
748         ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
749     END_STATE()
750
751     BEGIN_STATE(AfterAttributeNameState)
752         if (isTokenizerWhitespace(character))
753             ADVANCE_TO(AfterAttributeNameState);
754         if (character == '/')
755             ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
756         if (character == '=')
757             ADVANCE_PAST_NON_NEWLINE_TO(BeforeAttributeValueState);
758         if (character == '>')
759             return emitAndResumeInDataState(source);
760         if (m_options.usePreHTML5ParserQuirks && character == '<')
761             return emitAndReconsumeInDataState();
762         if (character == kEndOfFileMarker) {
763             parseError();
764             RECONSUME_IN(DataState);
765         }
766         if (character == '"' || character == '\'' || character == '<')
767             parseError();
768         m_token.beginAttribute(source.numberOfCharactersConsumed());
769         m_token.appendToAttributeName(toASCIILower(character));
770         ADVANCE_PAST_NON_NEWLINE_TO(AttributeNameState);
771     END_STATE()
772
773     BEGIN_STATE(BeforeAttributeValueState)
774         if (isTokenizerWhitespace(character))
775             ADVANCE_TO(BeforeAttributeValueState);
776         if (character == '"')
777             ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueDoubleQuotedState);
778         if (character == '&')
779             RECONSUME_IN(AttributeValueUnquotedState);
780         if (character == '\'')
781             ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueSingleQuotedState);
782         if (character == '>') {
783             parseError();
784             return emitAndResumeInDataState(source);
785         }
786         if (character == kEndOfFileMarker) {
787             parseError();
788             RECONSUME_IN(DataState);
789         }
790         if (character == '<' || character == '=' || character == '`')
791             parseError();
792         m_token.appendToAttributeValue(character);
793         ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueUnquotedState);
794     END_STATE()
795
796     BEGIN_STATE(AttributeValueDoubleQuotedState)
797         if (character == '"') {
798             m_token.endAttribute(source.numberOfCharactersConsumed());
799             ADVANCE_PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState);
800         }
801         if (character == '&') {
802             m_additionalAllowedCharacter = '"';
803             ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
804         }
805         if (character == kEndOfFileMarker) {
806             parseError();
807             m_token.endAttribute(source.numberOfCharactersConsumed());
808             RECONSUME_IN(DataState);
809         }
810         m_token.appendToAttributeValue(character);
811         ADVANCE_TO(AttributeValueDoubleQuotedState);
812     END_STATE()
813
814     BEGIN_STATE(AttributeValueSingleQuotedState)
815         if (character == '\'') {
816             m_token.endAttribute(source.numberOfCharactersConsumed());
817             ADVANCE_PAST_NON_NEWLINE_TO(AfterAttributeValueQuotedState);
818         }
819         if (character == '&') {
820             m_additionalAllowedCharacter = '\'';
821             ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
822         }
823         if (character == kEndOfFileMarker) {
824             parseError();
825             m_token.endAttribute(source.numberOfCharactersConsumed());
826             RECONSUME_IN(DataState);
827         }
828         m_token.appendToAttributeValue(character);
829         ADVANCE_TO(AttributeValueSingleQuotedState);
830     END_STATE()
831
832     BEGIN_STATE(AttributeValueUnquotedState)
833         if (isTokenizerWhitespace(character)) {
834             m_token.endAttribute(source.numberOfCharactersConsumed());
835             ADVANCE_TO(BeforeAttributeNameState);
836         }
837         if (character == '&') {
838             m_additionalAllowedCharacter = '>';
839             ADVANCE_PAST_NON_NEWLINE_TO(CharacterReferenceInAttributeValueState);
840         }
841         if (character == '>') {
842             m_token.endAttribute(source.numberOfCharactersConsumed());
843             return emitAndResumeInDataState(source);
844         }
845         if (character == kEndOfFileMarker) {
846             parseError();
847             m_token.endAttribute(source.numberOfCharactersConsumed());
848             RECONSUME_IN(DataState);
849         }
850         if (character == '"' || character == '\'' || character == '<' || character == '=' || character == '`')
851             parseError();
852         m_token.appendToAttributeValue(character);
853         ADVANCE_PAST_NON_NEWLINE_TO(AttributeValueUnquotedState);
854     END_STATE()
855
856     BEGIN_STATE(CharacterReferenceInAttributeValueState)
857         bool notEnoughCharacters = false;
858         StringBuilder decodedEntity;
859         bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
860         if (notEnoughCharacters)
861             RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
862         if (!success) {
863             ASSERT(decodedEntity.isEmpty());
864             m_token.appendToAttributeValue('&');
865         } else {
866             for (unsigned i = 0; i < decodedEntity.length(); ++i)
867                 m_token.appendToAttributeValue(decodedEntity[i]);
868         }
869         // We're supposed to switch back to the attribute value state that
870         // we were in when we were switched into this state. Rather than
871         // keeping track of this explictly, we observe that the previous
872         // state can be determined by m_additionalAllowedCharacter.
873         if (m_additionalAllowedCharacter == '"')
874             SWITCH_TO(AttributeValueDoubleQuotedState);
875         if (m_additionalAllowedCharacter == '\'')
876             SWITCH_TO(AttributeValueSingleQuotedState);
877         ASSERT(m_additionalAllowedCharacter == '>');
878         SWITCH_TO(AttributeValueUnquotedState);
879     END_STATE()
880
881     BEGIN_STATE(AfterAttributeValueQuotedState)
882         if (isTokenizerWhitespace(character))
883             ADVANCE_TO(BeforeAttributeNameState);
884         if (character == '/')
885             ADVANCE_PAST_NON_NEWLINE_TO(SelfClosingStartTagState);
886         if (character == '>')
887             return emitAndResumeInDataState(source);
888         if (m_options.usePreHTML5ParserQuirks && character == '<')
889             return emitAndReconsumeInDataState();
890         if (character == kEndOfFileMarker) {
891             parseError();
892             RECONSUME_IN(DataState);
893         }
894         parseError();
895         RECONSUME_IN(BeforeAttributeNameState);
896     END_STATE()
897
898     BEGIN_STATE(SelfClosingStartTagState)
899         if (character == '>') {
900             m_token.setSelfClosing();
901             return emitAndResumeInDataState(source);
902         }
903         if (character == kEndOfFileMarker) {
904             parseError();
905             RECONSUME_IN(DataState);
906         }
907         parseError();
908         RECONSUME_IN(BeforeAttributeNameState);
909     END_STATE()
910
911     BEGIN_STATE(BogusCommentState)
912         m_token.beginComment();
913         RECONSUME_IN(ContinueBogusCommentState);
914     END_STATE()
915
916     BEGIN_STATE(ContinueBogusCommentState)
917         if (character == '>')
918             return emitAndResumeInDataState(source);
919         if (character == kEndOfFileMarker)
920             return emitAndReconsumeInDataState();
921         m_token.appendToComment(character);
922         ADVANCE_TO(ContinueBogusCommentState);
923     END_STATE()
924
925     BEGIN_STATE(MarkupDeclarationOpenState)
926         if (character == '-') {
927             auto result = source.advancePast("--");
928             if (result == SegmentedString::DidMatch) {
929                 m_token.beginComment();
930                 SWITCH_TO(CommentStartState);
931             }
932             if (result == SegmentedString::NotEnoughCharacters)
933                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
934         } else if (isASCIIAlphaCaselessEqual(character, 'd')) {
935             auto result = source.advancePastLettersIgnoringASCIICase("doctype");
936             if (result == SegmentedString::DidMatch)
937                 SWITCH_TO(DOCTYPEState);
938             if (result == SegmentedString::NotEnoughCharacters)
939                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
940         } else if (character == '[' && shouldAllowCDATA()) {
941             auto result = source.advancePast("[CDATA[");
942             if (result == SegmentedString::DidMatch)
943                 SWITCH_TO(CDATASectionState);
944             if (result == SegmentedString::NotEnoughCharacters)
945                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
946         }
947         parseError();
948         RECONSUME_IN(BogusCommentState);
949     END_STATE()
950
951     BEGIN_STATE(CommentStartState)
952         if (character == '-')
953             ADVANCE_PAST_NON_NEWLINE_TO(CommentStartDashState);
954         if (character == '>') {
955             parseError();
956             return emitAndResumeInDataState(source);
957         }
958         if (character == kEndOfFileMarker) {
959             parseError();
960             return emitAndReconsumeInDataState();
961         }
962         m_token.appendToComment(character);
963         ADVANCE_TO(CommentState);
964     END_STATE()
965
966     BEGIN_STATE(CommentStartDashState)
967         if (character == '-')
968             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
969         if (character == '>') {
970             parseError();
971             return emitAndResumeInDataState(source);
972         }
973         if (character == kEndOfFileMarker) {
974             parseError();
975             return emitAndReconsumeInDataState();
976         }
977         m_token.appendToComment('-');
978         m_token.appendToComment(character);
979         ADVANCE_TO(CommentState);
980     END_STATE()
981
982     BEGIN_STATE(CommentState)
983         if (character == '-')
984             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndDashState);
985         if (character == kEndOfFileMarker) {
986             parseError();
987             return emitAndReconsumeInDataState();
988         }
989         m_token.appendToComment(character);
990         ADVANCE_TO(CommentState);
991     END_STATE()
992
993     BEGIN_STATE(CommentEndDashState)
994         if (character == '-')
995             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
996         if (character == kEndOfFileMarker) {
997             parseError();
998             return emitAndReconsumeInDataState();
999         }
1000         m_token.appendToComment('-');
1001         m_token.appendToComment(character);
1002         ADVANCE_TO(CommentState);
1003     END_STATE()
1004
1005     BEGIN_STATE(CommentEndState)
1006         if (character == '>')
1007             return emitAndResumeInDataState(source);
1008         if (character == '!') {
1009             parseError();
1010             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndBangState);
1011         }
1012         if (character == '-') {
1013             parseError();
1014             m_token.appendToComment('-');
1015             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndState);
1016         }
1017         if (character == kEndOfFileMarker) {
1018             parseError();
1019             return emitAndReconsumeInDataState();
1020         }
1021         parseError();
1022         m_token.appendToComment('-');
1023         m_token.appendToComment('-');
1024         m_token.appendToComment(character);
1025         ADVANCE_TO(CommentState);
1026     END_STATE()
1027
1028     BEGIN_STATE(CommentEndBangState)
1029         if (character == '-') {
1030             m_token.appendToComment('-');
1031             m_token.appendToComment('-');
1032             m_token.appendToComment('!');
1033             ADVANCE_PAST_NON_NEWLINE_TO(CommentEndDashState);
1034         }
1035         if (character == '>')
1036             return emitAndResumeInDataState(source);
1037         if (character == kEndOfFileMarker) {
1038             parseError();
1039             return emitAndReconsumeInDataState();
1040         }
1041         m_token.appendToComment('-');
1042         m_token.appendToComment('-');
1043         m_token.appendToComment('!');
1044         m_token.appendToComment(character);
1045         ADVANCE_TO(CommentState);
1046     END_STATE()
1047
1048     BEGIN_STATE(DOCTYPEState)
1049         if (isTokenizerWhitespace(character))
1050             ADVANCE_TO(BeforeDOCTYPENameState);
1051         if (character == kEndOfFileMarker) {
1052             parseError();
1053             m_token.beginDOCTYPE();
1054             m_token.setForceQuirks();
1055             return emitAndReconsumeInDataState();
1056         }
1057         parseError();
1058         RECONSUME_IN(BeforeDOCTYPENameState);
1059     END_STATE()
1060
1061     BEGIN_STATE(BeforeDOCTYPENameState)
1062         if (isTokenizerWhitespace(character))
1063             ADVANCE_TO(BeforeDOCTYPENameState);
1064         if (character == '>') {
1065             parseError();
1066             m_token.beginDOCTYPE();
1067             m_token.setForceQuirks();
1068             return emitAndResumeInDataState(source);
1069         }
1070         if (character == kEndOfFileMarker) {
1071             parseError();
1072             m_token.beginDOCTYPE();
1073             m_token.setForceQuirks();
1074             return emitAndReconsumeInDataState();
1075         }
1076         m_token.beginDOCTYPE(toASCIILower(character));
1077         ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPENameState);
1078     END_STATE()
1079
1080     BEGIN_STATE(DOCTYPENameState)
1081         if (isTokenizerWhitespace(character))
1082             ADVANCE_TO(AfterDOCTYPENameState);
1083         if (character == '>')
1084             return emitAndResumeInDataState(source);
1085         if (character == kEndOfFileMarker) {
1086             parseError();
1087             m_token.setForceQuirks();
1088             return emitAndReconsumeInDataState();
1089         }
1090         m_token.appendToName(toASCIILower(character));
1091         ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPENameState);
1092     END_STATE()
1093
1094     BEGIN_STATE(AfterDOCTYPENameState)
1095         if (isTokenizerWhitespace(character))
1096             ADVANCE_TO(AfterDOCTYPENameState);
1097         if (character == '>')
1098             return emitAndResumeInDataState(source);
1099         if (character == kEndOfFileMarker) {
1100             parseError();
1101             m_token.setForceQuirks();
1102             return emitAndReconsumeInDataState();
1103         }
1104         if (isASCIIAlphaCaselessEqual(character, 'p')) {
1105             auto result = source.advancePastLettersIgnoringASCIICase("public");
1106             if (result == SegmentedString::DidMatch)
1107                 SWITCH_TO(AfterDOCTYPEPublicKeywordState);
1108             if (result == SegmentedString::NotEnoughCharacters)
1109                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
1110         } else if (isASCIIAlphaCaselessEqual(character, 's')) {
1111             auto result = source.advancePastLettersIgnoringASCIICase("system");
1112             if (result == SegmentedString::DidMatch)
1113                 SWITCH_TO(AfterDOCTYPESystemKeywordState);
1114             if (result == SegmentedString::NotEnoughCharacters)
1115                 RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
1116         }
1117         parseError();
1118         m_token.setForceQuirks();
1119         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1120     END_STATE()
1121
1122     BEGIN_STATE(AfterDOCTYPEPublicKeywordState)
1123         if (isTokenizerWhitespace(character))
1124             ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
1125         if (character == '"') {
1126             parseError();
1127             m_token.setPublicIdentifierToEmptyString();
1128             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
1129         }
1130         if (character == '\'') {
1131             parseError();
1132             m_token.setPublicIdentifierToEmptyString();
1133             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
1134         }
1135         if (character == '>') {
1136             parseError();
1137             m_token.setForceQuirks();
1138             return emitAndResumeInDataState(source);
1139         }
1140         if (character == kEndOfFileMarker) {
1141             parseError();
1142             m_token.setForceQuirks();
1143             return emitAndReconsumeInDataState();
1144         }
1145         parseError();
1146         m_token.setForceQuirks();
1147         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1148     END_STATE()
1149
1150     BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState)
1151         if (isTokenizerWhitespace(character))
1152             ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
1153         if (character == '"') {
1154             m_token.setPublicIdentifierToEmptyString();
1155             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
1156         }
1157         if (character == '\'') {
1158             m_token.setPublicIdentifierToEmptyString();
1159             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
1160         }
1161         if (character == '>') {
1162             parseError();
1163             m_token.setForceQuirks();
1164             return emitAndResumeInDataState(source);
1165         }
1166         if (character == kEndOfFileMarker) {
1167             parseError();
1168             m_token.setForceQuirks();
1169             return emitAndReconsumeInDataState();
1170         }
1171         parseError();
1172         m_token.setForceQuirks();
1173         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1174     END_STATE()
1175
1176     BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState)
1177         if (character == '"')
1178             ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState);
1179         if (character == '>') {
1180             parseError();
1181             m_token.setForceQuirks();
1182             return emitAndResumeInDataState(source);
1183         }
1184         if (character == kEndOfFileMarker) {
1185             parseError();
1186             m_token.setForceQuirks();
1187             return emitAndReconsumeInDataState();
1188         }
1189         m_token.appendToPublicIdentifier(character);
1190         ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
1191     END_STATE()
1192
1193     BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState)
1194         if (character == '\'')
1195             ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPEPublicIdentifierState);
1196         if (character == '>') {
1197             parseError();
1198             m_token.setForceQuirks();
1199             return emitAndResumeInDataState(source);
1200         }
1201         if (character == kEndOfFileMarker) {
1202             parseError();
1203             m_token.setForceQuirks();
1204             return emitAndReconsumeInDataState();
1205         }
1206         m_token.appendToPublicIdentifier(character);
1207         ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
1208     END_STATE()
1209
1210     BEGIN_STATE(AfterDOCTYPEPublicIdentifierState)
1211         if (isTokenizerWhitespace(character))
1212             ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
1213         if (character == '>')
1214             return emitAndResumeInDataState(source);
1215         if (character == '"') {
1216             parseError();
1217             m_token.setSystemIdentifierToEmptyString();
1218             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
1219         }
1220         if (character == '\'') {
1221             parseError();
1222             m_token.setSystemIdentifierToEmptyString();
1223             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
1224         }
1225         if (character == kEndOfFileMarker) {
1226             parseError();
1227             m_token.setForceQuirks();
1228             return emitAndReconsumeInDataState();
1229         }
1230         parseError();
1231         m_token.setForceQuirks();
1232         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1233     END_STATE()
1234
1235     BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState)
1236         if (isTokenizerWhitespace(character))
1237             ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
1238         if (character == '>')
1239             return emitAndResumeInDataState(source);
1240         if (character == '"') {
1241             m_token.setSystemIdentifierToEmptyString();
1242             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
1243         }
1244         if (character == '\'') {
1245             m_token.setSystemIdentifierToEmptyString();
1246             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
1247         }
1248         if (character == kEndOfFileMarker) {
1249             parseError();
1250             m_token.setForceQuirks();
1251             return emitAndReconsumeInDataState();
1252         }
1253         parseError();
1254         m_token.setForceQuirks();
1255         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1256     END_STATE()
1257
1258     BEGIN_STATE(AfterDOCTYPESystemKeywordState)
1259         if (isTokenizerWhitespace(character))
1260             ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
1261         if (character == '"') {
1262             parseError();
1263             m_token.setSystemIdentifierToEmptyString();
1264             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
1265         }
1266         if (character == '\'') {
1267             parseError();
1268             m_token.setSystemIdentifierToEmptyString();
1269             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
1270         }
1271         if (character == '>') {
1272             parseError();
1273             m_token.setForceQuirks();
1274             return emitAndResumeInDataState(source);
1275         }
1276         if (character == kEndOfFileMarker) {
1277             parseError();
1278             m_token.setForceQuirks();
1279             return emitAndReconsumeInDataState();
1280         }
1281         parseError();
1282         m_token.setForceQuirks();
1283         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1284     END_STATE()
1285
1286     BEGIN_STATE(BeforeDOCTYPESystemIdentifierState)
1287         if (isTokenizerWhitespace(character))
1288             ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
1289         if (character == '"') {
1290             m_token.setSystemIdentifierToEmptyString();
1291             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
1292         }
1293         if (character == '\'') {
1294             m_token.setSystemIdentifierToEmptyString();
1295             ADVANCE_PAST_NON_NEWLINE_TO(DOCTYPESystemIdentifierSingleQuotedState);
1296         }
1297         if (character == '>') {
1298             parseError();
1299             m_token.setForceQuirks();
1300             return emitAndResumeInDataState(source);
1301         }
1302         if (character == kEndOfFileMarker) {
1303             parseError();
1304             m_token.setForceQuirks();
1305             return emitAndReconsumeInDataState();
1306         }
1307         parseError();
1308         m_token.setForceQuirks();
1309         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1310     END_STATE()
1311
1312     BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState)
1313         if (character == '"')
1314             ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState);
1315         if (character == '>') {
1316             parseError();
1317             m_token.setForceQuirks();
1318             return emitAndResumeInDataState(source);
1319         }
1320         if (character == kEndOfFileMarker) {
1321             parseError();
1322             m_token.setForceQuirks();
1323             return emitAndReconsumeInDataState();
1324         }
1325         m_token.appendToSystemIdentifier(character);
1326         ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
1327     END_STATE()
1328
1329     BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState)
1330         if (character == '\'')
1331             ADVANCE_PAST_NON_NEWLINE_TO(AfterDOCTYPESystemIdentifierState);
1332         if (character == '>') {
1333             parseError();
1334             m_token.setForceQuirks();
1335             return emitAndResumeInDataState(source);
1336         }
1337         if (character == kEndOfFileMarker) {
1338             parseError();
1339             m_token.setForceQuirks();
1340             return emitAndReconsumeInDataState();
1341         }
1342         m_token.appendToSystemIdentifier(character);
1343         ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
1344     END_STATE()
1345
1346     BEGIN_STATE(AfterDOCTYPESystemIdentifierState)
1347         if (isTokenizerWhitespace(character))
1348             ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
1349         if (character == '>')
1350             return emitAndResumeInDataState(source);
1351         if (character == kEndOfFileMarker) {
1352             parseError();
1353             m_token.setForceQuirks();
1354             return emitAndReconsumeInDataState();
1355         }
1356         parseError();
1357         ADVANCE_PAST_NON_NEWLINE_TO(BogusDOCTYPEState);
1358     END_STATE()
1359
1360     BEGIN_STATE(BogusDOCTYPEState)
1361         if (character == '>')
1362             return emitAndResumeInDataState(source);
1363         if (character == kEndOfFileMarker)
1364             return emitAndReconsumeInDataState();
1365         ADVANCE_TO(BogusDOCTYPEState);
1366     END_STATE()
1367
1368     BEGIN_STATE(CDATASectionState)
1369         if (character == ']')
1370             ADVANCE_PAST_NON_NEWLINE_TO(CDATASectionRightSquareBracketState);
1371         if (character == kEndOfFileMarker)
1372             RECONSUME_IN(DataState);
1373         bufferCharacter(character);
1374         ADVANCE_TO(CDATASectionState);
1375     END_STATE()
1376
1377     BEGIN_STATE(CDATASectionRightSquareBracketState)
1378         if (character == ']')
1379             ADVANCE_PAST_NON_NEWLINE_TO(CDATASectionDoubleRightSquareBracketState);
1380         bufferASCIICharacter(']');
1381         RECONSUME_IN(CDATASectionState);
1382     END_STATE()
1383
1384     BEGIN_STATE(CDATASectionDoubleRightSquareBracketState)
1385         if (character == '>')
1386             ADVANCE_PAST_NON_NEWLINE_TO(DataState);
1387         bufferASCIICharacter(']');
1388         bufferASCIICharacter(']');
1389         RECONSUME_IN(CDATASectionState);
1390     END_STATE()
1391
1392     }
1393
1394     ASSERT_NOT_REACHED();
1395     return false;
1396 }
1397
1398 String HTMLTokenizer::bufferedCharacters() const
1399 {
1400     // FIXME: Add an assert about m_state.
1401     StringBuilder characters;
1402     characters.reserveCapacity(numberOfBufferedCharacters());
1403     characters.append('<');
1404     characters.append('/');
1405     characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size());
1406     return characters.toString();
1407 }
1408
1409 void HTMLTokenizer::updateStateFor(const AtomicString& tagName)
1410 {
1411     if (tagName == textareaTag || tagName == titleTag)
1412         m_state = RCDATAState;
1413     else if (tagName == plaintextTag)
1414         m_state = PLAINTEXTState;
1415     else if (tagName == scriptTag)
1416         m_state = ScriptDataState;
1417     else if (tagName == styleTag
1418         || tagName == iframeTag
1419         || tagName == xmpTag
1420         || (tagName == noembedTag && m_options.pluginsEnabled)
1421         || tagName == noframesTag
1422         || (tagName == noscriptTag && m_options.scriptEnabled))
1423         m_state = RAWTEXTState;
1424 }
1425
1426 inline void HTMLTokenizer::appendToTemporaryBuffer(UChar character)
1427 {
1428     ASSERT(isASCII(character));
1429     m_temporaryBuffer.append(character);
1430 }
1431
1432 inline bool HTMLTokenizer::temporaryBufferIs(const char* expectedString)
1433 {
1434     return vectorEqualsString(m_temporaryBuffer, expectedString);
1435 }
1436
1437 inline void HTMLTokenizer::appendToPossibleEndTag(UChar character)
1438 {
1439     ASSERT(isASCII(character));
1440     m_bufferedEndTagName.append(character);
1441 }
1442
1443 inline bool HTMLTokenizer::isAppropriateEndTag() const
1444 {
1445     if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())
1446         return false;
1447
1448     unsigned size = m_bufferedEndTagName.size();
1449
1450     for (unsigned i = 0; i < size; i++) {
1451         if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])
1452             return false;
1453     }
1454
1455     return true;
1456 }
1457
1458 inline void HTMLTokenizer::parseError()
1459 {
1460 }
1461
1462 }