2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef HTMLTreeBuilder_h
27 #define HTMLTreeBuilder_h
30 #include "FragmentScriptingPermission.h"
31 #include "HTMLConstructionSite.h"
32 #include "HTMLElementStack.h"
33 #include "HTMLFormattingElementList.h"
34 #include "HTMLTokenizer.h"
35 #include <wtf/Noncopyable.h>
36 #include <wtf/OwnPtr.h>
37 #include <wtf/PassOwnPtr.h>
38 #include <wtf/PassRefPtr.h>
39 #include <wtf/RefPtr.h>
40 #include <wtf/unicode/Unicode.h>
44 class AtomicHTMLToken;
46 class DocumentFragment;
52 class HTMLTreeBuilder : public Noncopyable {
54 static PassOwnPtr<HTMLTreeBuilder> create(HTMLTokenizer* tokenizer, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
56 return adoptPtr(new HTMLTreeBuilder(tokenizer, document, reportErrors, usePreHTML5ParserQuirks));
58 static PassOwnPtr<HTMLTreeBuilder> create(HTMLTokenizer* tokenizer, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
60 return adoptPtr(new HTMLTreeBuilder(tokenizer, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks));
66 void setPaused(bool paused) { m_isPaused = paused; }
67 bool isPaused() const { return m_isPaused; }
69 // The token really should be passed as a const& since it's never modified.
70 void constructTreeFromToken(HTMLToken&);
71 void constructTreeFromAtomicToken(AtomicHTMLToken&);
73 // Must be called when parser is paused before calling the parser again.
74 PassRefPtr<Element> takeScriptToProcess(int& scriptStartLine);
76 // Done, close any open tags, etc.
79 static bool scriptEnabled(Frame*);
80 static bool pluginsEnabled(Frame*);
83 class FakeInsertionMode;
84 class ExternalCharacterTokenBuffer;
85 // Represents HTML5 "insertion mode"
86 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
105 InForeignContentMode,
110 AfterAfterFramesetMode,
113 HTMLTreeBuilder(HTMLTokenizer*, HTMLDocument*, bool reportErrors, bool usePreHTML5ParserQuirks);
114 HTMLTreeBuilder(HTMLTokenizer*, DocumentFragment*, Element* contextElement, FragmentScriptingPermission, bool usePreHTML5ParserQuirks);
116 bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
118 void processToken(AtomicHTMLToken&);
120 void processDoctypeToken(AtomicHTMLToken&);
121 void processStartTag(AtomicHTMLToken&);
122 void processEndTag(AtomicHTMLToken&);
123 void processComment(AtomicHTMLToken&);
124 void processCharacter(AtomicHTMLToken&);
125 void processEndOfFile(AtomicHTMLToken&);
127 bool processStartTagForInHead(AtomicHTMLToken&);
128 void processStartTagForInBody(AtomicHTMLToken&);
129 void processStartTagForInTable(AtomicHTMLToken&);
130 void processEndTagForInBody(AtomicHTMLToken&);
131 void processEndTagForInTable(AtomicHTMLToken&);
132 void processEndTagForInTableBody(AtomicHTMLToken&);
133 void processEndTagForInRow(AtomicHTMLToken&);
134 void processEndTagForInCell(AtomicHTMLToken&);
136 void processIsindexStartTagForInBody(AtomicHTMLToken&);
137 bool processBodyEndTagForInBody(AtomicHTMLToken&);
138 bool processTableEndTagForInTable();
139 bool processCaptionEndTagForInCaption();
140 bool processColgroupEndTagForInColumnGroup();
141 bool processTrEndTagForInRow();
142 // FIXME: This function should be inlined into its one call site or it
143 // needs to assert which tokens it can be called with.
144 void processAnyOtherEndTagForInBody(AtomicHTMLToken&);
146 void processCharacterBuffer(ExternalCharacterTokenBuffer&);
148 void processFakeStartTag(const QualifiedName&, PassRefPtr<NamedNodeMap> attributes = 0);
149 void processFakeEndTag(const QualifiedName&);
150 void processFakeCharacters(const String&);
151 void processFakePEndTagIfPInButtonScope();
153 void processGenericRCDATAStartTag(AtomicHTMLToken&);
154 void processGenericRawTextStartTag(AtomicHTMLToken&);
155 void processScriptStartTag(AtomicHTMLToken&);
157 // Default processing for the different insertion modes.
158 void defaultForInitial();
159 void defaultForBeforeHTML();
160 void defaultForBeforeHead();
161 void defaultForInHead();
162 void defaultForInHeadNoscript();
163 void defaultForAfterHead();
164 void defaultForInTableText();
166 void prepareToReprocessToken();
168 void reprocessStartTag(AtomicHTMLToken&);
169 void reprocessEndTag(AtomicHTMLToken&);
171 PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&);
173 HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
174 void callTheAdoptionAgency(AtomicHTMLToken&);
178 template <bool shouldClose(const Element*)>
179 void processCloseWhenNestedTag(AtomicHTMLToken&);
183 // FIXME: Implement error reporting.
184 void parseError(AtomicHTMLToken&) { }
186 InsertionMode insertionMode() const { return m_insertionMode; }
187 void setInsertionMode(InsertionMode mode)
189 m_insertionMode = mode;
190 m_isFakeInsertionMode = false;
193 bool isFakeInsertionMode() { return m_isFakeInsertionMode; }
194 void setFakeInsertionMode(InsertionMode mode)
196 m_insertionMode = mode;
197 m_isFakeInsertionMode = true;
200 void resetInsertionModeAppropriately();
202 void processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token);
203 void resetForeignInsertionMode();
205 class FragmentParsingContext : public Noncopyable {
207 FragmentParsingContext();
208 FragmentParsingContext(DocumentFragment*, Element* contextElement, FragmentScriptingPermission);
209 ~FragmentParsingContext();
211 Document* document() const;
212 DocumentFragment* fragment() const { return m_fragment; }
213 Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; }
214 FragmentScriptingPermission scriptingPermission() const { ASSERT(m_fragment); return m_scriptingPermission; }
219 RefPtr<Document> m_dummyDocumentForFragmentParsing;
220 DocumentFragment* m_fragment;
221 Element* m_contextElement;
223 // FragmentScriptingNotAllowed causes the Parser to remove children
224 // from <script> tags (so javascript doesn't show up in pastes).
225 FragmentScriptingPermission m_scriptingPermission;
228 FragmentParsingContext m_fragmentContext;
230 Document* m_document;
231 HTMLConstructionSite m_tree;
235 bool m_isFakeInsertionMode;
237 // FIXME: InsertionModes should be a separate object to prevent direct
238 // manipulation of these variables. For now, be careful to always use
239 // setInsertionMode and never set m_insertionMode directly.
240 InsertionMode m_insertionMode;
241 InsertionMode m_originalInsertionMode;
243 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
244 Vector<UChar> m_pendingTableCharacters;
246 // HTML5 spec requires that we be able to change the state of the tokenizer
247 // from within parser actions.
248 HTMLTokenizer* m_tokenizer;
250 RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
251 int m_scriptToProcessStartLine; // Starting line number of the script tag needing processing.
253 // FIXME: We probably want to remove this member. Originally, it was
254 // created to service the legacy tree builder, but it seems to be used for
255 // some other things now.
256 int m_lastScriptElementStartLine;
258 bool m_usePreHTML5ParserQuirks;
260 bool m_hasPendingForeignInsertionModeSteps;