HTMLDocumentParser::insert should be aware of threaded parsing
authorabarth@webkit.org <abarth@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Wed, 30 Jan 2013 23:16:03 +0000 (23:16 +0000)
committerabarth@webkit.org <abarth@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Wed, 30 Jan 2013 23:16:03 +0000 (23:16 +0000)
https://bugs.webkit.org/show_bug.cgi?id=107764

Reviewed by Eric Seidel.

This patch is an incremental step towards recovering from
document.write invalidating our speculative parsing buffer. The
approach I've taken is to make it possible to transfer the
HTMLDocumentParser's HTMLTokenizer and HTMLToken to the background
thread. To make that possible, I've taught the HTMLDocumentParser how
to operate without a tokenizer or a token.

Not having a tokenizer or a token while parsing in the background also
helps us avoid accidentially feeding input to the main thread's
tokenizer when we're supposed to feed it to the background thread.

This patch shouldn't have any behavior change (other than possibly
fixing a crash in fast/parser when threading parsing is enabled).

* html/parser/HTMLDocumentParser.cpp:
(WebCore::HTMLDocumentParser::HTMLDocumentParser):
(WebCore::HTMLDocumentParser::didFailSpeculation):
(WebCore):
(WebCore::HTMLDocumentParser::insert):
(WebCore::HTMLDocumentParser::finish):
(WebCore::HTMLDocumentParser::resumeParsingAfterScriptExecution):
* html/parser/HTMLDocumentParser.h:
(HTMLDocumentParser):
* html/parser/HTMLTreeBuilder.cpp:
(WebCore::HTMLTreeBuilder::constructTree):
(WebCore::HTMLTreeBuilder::processStartTagForInBody):
(WebCore::HTMLTreeBuilder::processEndTag):
(WebCore::HTMLTreeBuilder::processGenericRCDATAStartTag):
(WebCore::HTMLTreeBuilder::processGenericRawTextStartTag):
(WebCore::HTMLTreeBuilder::processScriptStartTag):
* html/parser/TextDocumentParser.cpp:
(WebCore::TextDocumentParser::TextDocumentParser):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@141328 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/html/parser/HTMLDocumentParser.cpp
Source/WebCore/html/parser/HTMLDocumentParser.h
Source/WebCore/html/parser/HTMLTreeBuilder.cpp
Source/WebCore/html/parser/TextDocumentParser.cpp

index bb8c292..fa024c6 100644 (file)
@@ -1,3 +1,43 @@
+2013-01-30  Adam Barth  <abarth@webkit.org>
+
+        HTMLDocumentParser::insert should be aware of threaded parsing
+        https://bugs.webkit.org/show_bug.cgi?id=107764
+
+        Reviewed by Eric Seidel.
+
+        This patch is an incremental step towards recovering from
+        document.write invalidating our speculative parsing buffer. The
+        approach I've taken is to make it possible to transfer the
+        HTMLDocumentParser's HTMLTokenizer and HTMLToken to the background
+        thread. To make that possible, I've taught the HTMLDocumentParser how
+        to operate without a tokenizer or a token.
+
+        Not having a tokenizer or a token while parsing in the background also
+        helps us avoid accidentially feeding input to the main thread's
+        tokenizer when we're supposed to feed it to the background thread.
+
+        This patch shouldn't have any behavior change (other than possibly
+        fixing a crash in fast/parser when threading parsing is enabled).
+
+        * html/parser/HTMLDocumentParser.cpp:
+        (WebCore::HTMLDocumentParser::HTMLDocumentParser):
+        (WebCore::HTMLDocumentParser::didFailSpeculation):
+        (WebCore):
+        (WebCore::HTMLDocumentParser::insert):
+        (WebCore::HTMLDocumentParser::finish):
+        (WebCore::HTMLDocumentParser::resumeParsingAfterScriptExecution):
+        * html/parser/HTMLDocumentParser.h:
+        (HTMLDocumentParser):
+        * html/parser/HTMLTreeBuilder.cpp:
+        (WebCore::HTMLTreeBuilder::constructTree):
+        (WebCore::HTMLTreeBuilder::processStartTagForInBody):
+        (WebCore::HTMLTreeBuilder::processEndTag):
+        (WebCore::HTMLTreeBuilder::processGenericRCDATAStartTag):
+        (WebCore::HTMLTreeBuilder::processGenericRawTextStartTag):
+        (WebCore::HTMLTreeBuilder::processScriptStartTag):
+        * html/parser/TextDocumentParser.cpp:
+        (WebCore::TextDocumentParser::TextDocumentParser):
+
 2013-01-30  Rafael Weinstein  <rafaelw@chromium.org>
 
         [HTMLTemplateElement] prevent the parser from removing nodes from the content when the foster agency is processing formatting elements
index 5b8be2f..c000766 100644 (file)
@@ -77,8 +77,8 @@ static HTMLTokenizerState::State tokenizerStateForContextElement(Element* contex
 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors)
     : ScriptableDocumentParser(document)
     , m_options(document)
-    , m_token(adoptPtr(new HTMLToken))
-    , m_tokenizer(HTMLTokenizer::create(m_options))
+    , m_token(m_options.useThreading ? nullptr : adoptPtr(new HTMLToken))
+    , m_tokenizer(m_options.useThreading ? nullptr : HTMLTokenizer::create(m_options))
     , m_scriptRunner(HTMLScriptRunner::create(document, this))
     , m_treeBuilder(HTMLTreeBuilder::create(this, document, reportErrors, m_options))
     , m_parserScheduler(HTMLParserScheduler::create(this))
@@ -90,6 +90,7 @@ HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors
     , m_haveBackgroundParser(false)
     , m_pumpSessionNestingLevel(0)
 {
+    ASSERT(shouldUseThreading() || (m_token && m_tokenizer));
 }
 
 // FIXME: Member variables should be grouped into self-initializing structs to
@@ -108,6 +109,7 @@ HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* cont
     , m_haveBackgroundParser(false)
     , m_pumpSessionNestingLevel(0)
 {
+    ASSERT(!shouldUseThreading());
     bool reportErrors = false; // For now document fragment parsing never reports errors.
     m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options));
 }
@@ -284,6 +286,11 @@ void HTMLDocumentParser::didReceiveTokensFromBackgroundParser(PassOwnPtr<Compact
     processTokensFromBackgroundParser(tokens);
 }
 
+void HTMLDocumentParser::didFailSpeculation(PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>)
+{
+    // FIXME: Tell the background parser to resume parsing with this token and tokenizer.
+}
+
 void HTMLDocumentParser::processTokensFromBackgroundParser(PassOwnPtr<CompactHTMLTokenStream> tokens)
 {
     ASSERT(shouldUseThreading());
@@ -446,11 +453,29 @@ void HTMLDocumentParser::insert(const SegmentedString& source)
     // but we need to ensure it isn't deleted yet.
     RefPtr<HTMLDocumentParser> protect(this);
 
+#if ENABLE(THREADED_HTML_PARSER)
+    if (!m_tokenizer) {
+        ASSERT(!inPumpSession());
+        ASSERT(m_haveBackgroundParser || wasCreatedByScript());
+        m_token = adoptPtr(new HTMLToken);
+        m_tokenizer = HTMLTokenizer::create(m_options);
+    }
+#endif
+
     SegmentedString excludedLineNumberSource(source);
     excludedLineNumberSource.setExcludeLineNumbers();
     m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource);
     pumpTokenizerIfPossible(ForceSynchronous);
-    
+
+#if ENABLE(THREADED_HTML_PARSER)
+    if (!inPumpSession() && m_haveBackgroundParser) {
+        // FIXME: If the tokenizer is in the same state as when we started this function,
+        // then we haven't necessarily failed our speculation.
+        didFailSpeculation(m_token.release(), m_tokenizer.release());
+        return;
+    }
+#endif
+
     if (isWaitingForScripts()) {
         // Check the document.write() output with a separate preload scanner as
         // the main scanner can't deal with insertions.
@@ -604,6 +629,14 @@ void HTMLDocumentParser::finish()
         HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::finishPartial, ParserMap::identifierForParser(this)));
         return;
     }
+    if (shouldUseThreading() && !wasCreatedByScript()) {
+        ASSERT(!m_tokenizer && !m_token);
+        // We're finishing before receiving any data. Rather than booting up
+        // the background parser just to spin it down, we finish parsing
+        // synchronously.
+        m_token = adoptPtr(new HTMLToken);
+        m_tokenizer = HTMLTokenizer::create(m_options);
+    }
 #endif
 
     attemptToEnd();
@@ -670,7 +703,7 @@ void HTMLDocumentParser::resumeParsingAfterScriptExecution()
     if (shouldUseThreading()) {
         while (!m_pendingTokens.isEmpty()) {
             processTokensFromBackgroundParser(m_pendingTokens.takeFirst());
-            if (isWaitingForScripts())
+            if (isWaitingForScripts() || isStopped())
                 return;
         }
         return;
index 754f81e..a83b29c 100644 (file)
@@ -124,6 +124,7 @@ private:
 #if ENABLE(THREADED_HTML_PARSER)
     void startBackgroundParser();
     void stopBackgroundParser();
+    void didFailSpeculation(PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>);
     void processTokensFromBackgroundParser(PassOwnPtr<CompactHTMLTokenStream>);
 #endif
 
index 78313ee..e78e5c7 100644 (file)
@@ -367,13 +367,15 @@ void HTMLTreeBuilder::constructTree(AtomicHTMLToken* token)
     else
         processToken(token);
 
-    bool inForeignContent = !m_tree.isEmpty()
-        && !m_tree.currentStackItem()->isInHTMLNamespace()
-        && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem())
-        && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem());
+    if (m_parser->tokenizer()) {
+        bool inForeignContent = !m_tree.isEmpty()
+            && !m_tree.currentStackItem()->isInHTMLNamespace()
+            && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem())
+            && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem());
 
-    m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
-    m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
+        m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
+        m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
+    }
 
     m_tree.executeQueuedTasks();
     // We might be detached now.
@@ -740,7 +742,8 @@ void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken* token)
     if (token->name() == plaintextTag) {
         processFakePEndTagIfPInButtonScope();
         m_tree.insertHTMLElement(token);
-        m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
+        if (m_parser->tokenizer())
+            m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
         return;
     }
     if (token->name() == buttonTag) {
@@ -850,7 +853,8 @@ void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken* token)
     if (token->name() == textareaTag) {
         m_tree.insertHTMLElement(token);
         m_shouldSkipLeadingNewline = true;
-        m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
+        if (m_parser->tokenizer())
+            m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
         m_originalInsertionMode = m_insertionMode;
         m_framesetOk = false;
         setInsertionMode(TextMode);
@@ -2163,12 +2167,14 @@ void HTMLTreeBuilder::processEndTag(AtomicHTMLToken* token)
                 m_scriptToProcess->removeChildren();
             setInsertionMode(m_originalInsertionMode);
 
-            // This token will not have been created by the tokenizer if a
-            // self-closing script tag was encountered and pre-HTML5 parser
-            // quirks are enabled. We must set the tokenizer's state to
-            // DataState explicitly if the tokenizer didn't have a chance to.
-            ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_options.usePreHTML5ParserQuirks || m_options.useThreading);
-            m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
+            if (m_parser->tokenizer()) {
+                // This token will not have been created by the tokenizer if a
+                // self-closing script tag was encountered and pre-HTML5 parser
+                // quirks are enabled. We must set the tokenizer's state to
+                // DataState explicitly if the tokenizer didn't have a chance to.
+                ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_options.usePreHTML5ParserQuirks || m_options.useThreading);
+                m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
+            }
             return;
         }
         m_tree.openElements()->pop();
@@ -2706,7 +2712,8 @@ void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken* token)
 {
     ASSERT(token->type() == HTMLTokenTypes::StartTag);
     m_tree.insertHTMLElement(token);
-    m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
+    if (m_parser->tokenizer())
+        m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
     m_originalInsertionMode = m_insertionMode;
     setInsertionMode(TextMode);
 }
@@ -2715,7 +2722,8 @@ void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken* token)
 {
     ASSERT(token->type() == HTMLTokenTypes::StartTag);
     m_tree.insertHTMLElement(token);
-    m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
+    if (m_parser->tokenizer())
+        m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
     m_originalInsertionMode = m_insertionMode;
     setInsertionMode(TextMode);
 }
@@ -2724,7 +2732,8 @@ void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken* token)
 {
     ASSERT(token->type() == HTMLTokenTypes::StartTag);
     m_tree.insertScriptElement(token);
-    m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
+    if (m_parser->tokenizer())
+        m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
     m_originalInsertionMode = m_insertionMode;
 
     TextPosition position = m_parser->textPosition();
index a0797b9..796c7ec 100644 (file)
@@ -38,7 +38,9 @@ TextDocumentParser::TextDocumentParser(HTMLDocument* document)
     : HTMLDocumentParser(document, false)
     , m_haveInsertedFakePreElement(false)
 {
-    tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
+    // FIXME: If we're using threading, we need to tell the BackgroundHTMLParser to use PLAINTEXTState.
+    if (tokenizer())
+        tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
 }
 
 TextDocumentParser::~TextDocumentParser()