2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef HTMLTreeBuilder_h
27 #define HTMLTreeBuilder_h
30 #include "FragmentScriptingPermission.h"
31 #include "HTMLConstructionSite.h"
32 #include "HTMLElementStack.h"
33 #include "HTMLFormattingElementList.h"
34 #include "HTMLTokenizer.h"
35 #include <wtf/text/TextPosition.h>
36 #include <wtf/Noncopyable.h>
37 #include <wtf/OwnPtr.h>
38 #include <wtf/PassOwnPtr.h>
39 #include <wtf/PassRefPtr.h>
40 #include <wtf/RefPtr.h>
41 #include <wtf/unicode/Unicode.h>
45 class AtomicHTMLToken;
47 class DocumentFragment;
52 class HTMLDocumentParser;
54 class HTMLTreeBuilder {
55 WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED;
57 static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
59 return adoptPtr(new HTMLTreeBuilder(parser, document, reportErrors, usePreHTML5ParserQuirks));
61 static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
63 return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks));
67 bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
71 void setPaused(bool paused) { m_isPaused = paused; }
72 bool isPaused() const { return m_isPaused; }
74 // The token really should be passed as a const& since it's never modified.
75 void constructTreeFromToken(HTMLToken&);
76 void constructTreeFromAtomicToken(AtomicHTMLToken&);
78 // Must be called when parser is paused before calling the parser again.
79 PassRefPtr<Element> takeScriptToProcess(TextPosition1& scriptStartPosition);
81 // Done, close any open tags, etc.
84 static bool scriptEnabled(Frame*);
85 static bool pluginsEnabled(Frame*);
88 class FakeInsertionMode;
89 class ExternalCharacterTokenBuffer;
90 // Represents HTML5 "insertion mode"
91 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
110 InForeignContentMode,
115 AfterAfterFramesetMode,
118 HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument*, bool reportErrors, bool usePreHTML5ParserQuirks);
119 HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment*, Element* contextElement, FragmentScriptingPermission, bool usePreHTML5ParserQuirks);
121 void processToken(AtomicHTMLToken&);
123 void processDoctypeToken(AtomicHTMLToken&);
124 void processStartTag(AtomicHTMLToken&);
125 void processEndTag(AtomicHTMLToken&);
126 void processComment(AtomicHTMLToken&);
127 void processCharacter(AtomicHTMLToken&);
128 void processEndOfFile(AtomicHTMLToken&);
130 bool processStartTagForInHead(AtomicHTMLToken&);
131 void processStartTagForInBody(AtomicHTMLToken&);
132 void processStartTagForInTable(AtomicHTMLToken&);
133 void processEndTagForInBody(AtomicHTMLToken&);
134 void processEndTagForInTable(AtomicHTMLToken&);
135 void processEndTagForInTableBody(AtomicHTMLToken&);
136 void processEndTagForInRow(AtomicHTMLToken&);
137 void processEndTagForInCell(AtomicHTMLToken&);
139 void processIsindexStartTagForInBody(AtomicHTMLToken&);
140 bool processBodyEndTagForInBody(AtomicHTMLToken&);
141 bool processTableEndTagForInTable();
142 bool processCaptionEndTagForInCaption();
143 bool processColgroupEndTagForInColumnGroup();
144 bool processTrEndTagForInRow();
145 // FIXME: This function should be inlined into its one call site or it
146 // needs to assert which tokens it can be called with.
147 void processAnyOtherEndTagForInBody(AtomicHTMLToken&);
149 void processCharacterBuffer(ExternalCharacterTokenBuffer&);
151 void processFakeStartTag(const QualifiedName&, PassRefPtr<NamedNodeMap> attributes = 0);
152 void processFakeEndTag(const QualifiedName&);
153 void processFakeCharacters(const String&);
154 void processFakePEndTagIfPInButtonScope();
156 void processGenericRCDATAStartTag(AtomicHTMLToken&);
157 void processGenericRawTextStartTag(AtomicHTMLToken&);
158 void processScriptStartTag(AtomicHTMLToken&);
160 // Default processing for the different insertion modes.
161 void defaultForInitial();
162 void defaultForBeforeHTML();
163 void defaultForBeforeHead();
164 void defaultForInHead();
165 void defaultForInHeadNoscript();
166 void defaultForAfterHead();
167 void defaultForInTableText();
169 void prepareToReprocessToken();
171 void reprocessStartTag(AtomicHTMLToken&);
172 void reprocessEndTag(AtomicHTMLToken&);
174 PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&);
176 HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
177 void callTheAdoptionAgency(AtomicHTMLToken&);
181 template <bool shouldClose(const Element*)>
182 void processCloseWhenNestedTag(AtomicHTMLToken&);
186 // FIXME: Implement error reporting.
187 void parseError(AtomicHTMLToken&) { }
189 InsertionMode insertionMode() const { return m_insertionMode; }
190 void setInsertionMode(InsertionMode mode)
192 m_insertionMode = mode;
193 m_isFakeInsertionMode = false;
196 bool isFakeInsertionMode() { return m_isFakeInsertionMode; }
197 void setFakeInsertionMode(InsertionMode mode)
199 m_insertionMode = mode;
200 m_isFakeInsertionMode = true;
203 void resetInsertionModeAppropriately();
205 void processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token);
206 void resetForeignInsertionMode();
208 class FragmentParsingContext {
209 WTF_MAKE_NONCOPYABLE(FragmentParsingContext);
211 FragmentParsingContext();
212 FragmentParsingContext(DocumentFragment*, Element* contextElement, FragmentScriptingPermission);
213 ~FragmentParsingContext();
215 Document* document() const;
216 DocumentFragment* fragment() const { return m_fragment; }
217 Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; }
218 FragmentScriptingPermission scriptingPermission() const { ASSERT(m_fragment); return m_scriptingPermission; }
223 // Use a shared dummy document to avoid expensive Document creation.
224 // Hold a raw pointer to the document since there is no need to ref it.
225 HTMLDocument* m_dummyDocumentForFragmentParsing;
226 DocumentFragment* m_fragment;
227 Element* m_contextElement;
229 // FragmentScriptingNotAllowed causes the Parser to remove children
230 // from <script> tags (so javascript doesn't show up in pastes).
231 FragmentScriptingPermission m_scriptingPermission;
234 FragmentParsingContext m_fragmentContext;
236 Document* m_document;
237 HTMLConstructionSite m_tree;
241 bool m_isFakeInsertionMode;
243 // FIXME: InsertionModes should be a separate object to prevent direct
244 // manipulation of these variables. For now, be careful to always use
245 // setInsertionMode and never set m_insertionMode directly.
246 InsertionMode m_insertionMode;
247 InsertionMode m_originalInsertionMode;
249 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
250 Vector<UChar> m_pendingTableCharacters;
252 // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
253 // from within parser actions. We also need it to track the current position.
254 HTMLDocumentParser* m_parser;
256 RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
257 TextPosition1 m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
259 // FIXME: We probably want to remove this member. Originally, it was
260 // created to service the legacy tree builder, but it seems to be used for
261 // some other things now.
262 TextPosition0 m_lastScriptElementStartPosition;
264 bool m_usePreHTML5ParserQuirks;
266 bool m_hasPendingForeignInsertionModeSteps;