https://bugs.webkit.org/show_bug.cgi?id=109240
Reviewed by Adam Barth.
Before the HTML5 parser re-write the text document parser
was completely custom. With the HTML5 parser, we just made
the TextDocumentParser use the HTMLDocumentParser with an
artificial script tag.
However, our solution was slightly over-engineered to avoid
lying about the column numbers of the first line of the text document
during parsing. :)
This change makes us use a simpler (and threading-compatible)
solution by just inserting a real "<pre>" tag into the
input stream instead of hacking one together with the treebuilder
and manually setting the Tokenizer state.
fast/parser/empty-text-resource.html covers this case.
* html/parser/TextDocumentParser.cpp:
(WebCore::TextDocumentParser::TextDocumentParser):
(WebCore::TextDocumentParser::insertFakePreElement):
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@142363
268f45cc-cd09-0410-ab3c-
d52691b4dbfc
+2013-02-09 Eric Seidel <eric@webkit.org>
+
+ Fix TextDocumentParser to play nice with threading
+ https://bugs.webkit.org/show_bug.cgi?id=109240
+
+ Reviewed by Adam Barth.
+
+ Before the HTML5 parser re-write the text document parser
+ was completely custom. With the HTML5 parser, we just made
+ the TextDocumentParser use the HTMLDocumentParser with an
+ artificial script tag.
+
+ However, our solution was slightly over-engineered to avoid
+ lying about the column numbers of the first line of the text document
+ during parsing. :)
+
+ This change makes us use a simpler (and threading-compatible)
+ solution by just inserting a real "<pre>" tag into the
+ input stream instead of hacking one together with the treebuilder
+ and manually setting the Tokenizer state.
+
+ fast/parser/empty-text-resource.html covers this case.
+
+ * html/parser/TextDocumentParser.cpp:
+ (WebCore::TextDocumentParser::TextDocumentParser):
+ (WebCore::TextDocumentParser::insertFakePreElement):
+
2013-02-09 Kent Tamura <tkent@chromium.org>
Add missing copyright header
delete this;
}
+void BackgroundHTMLParser::forcePlaintextForTextDocument()
+{
+ // This is only used by the TextDocumentParser (a subclass of HTMLDocumentParser)
+ // to force us into the PLAINTEXT state w/o using a <plaintext> tag.
+ // The TextDocumentParser uses a <pre> tag for historical/compatibility reasons.
+ m_tokenizer->setState(HTMLTokenizerState::PLAINTEXTState);
+}
+
void BackgroundHTMLParser::markEndOfFile()
{
// FIXME: This should use InputStreamPreprocessor::endOfFileMarker
void finish();
void stop();
+ void forcePlaintextForTextDocument();
+
private:
BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> >, const HTMLParserOptions&, const WeakPtr<HTMLDocumentParser>&, PassOwnPtr<XSSAuditor>);
#endif // ENABLE(THREADED_HTML_PARSER)
+void HTMLDocumentParser::forcePlaintextForTextDocument()
+{
+#if ENABLE(THREADED_HTML_PARSER)
+ if (shouldUseThreading()) {
+ // This method is called before any data is appended, so we have to start
+ // the background parser ourselves.
+ if (!m_haveBackgroundParser)
+ startBackgroundParser();
+
+ HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::forcePlaintextForTextDocument, m_backgroundParser));
+ } else
+#endif
+ m_tokenizer->setState(HTMLTokenizerState::PLAINTEXTState);
+}
+
void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
{
ASSERT(!isStopped());
HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
+ void forcePlaintextForTextDocument();
+
private:
static PassRefPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission permission)
{
: HTMLDocumentParser(document, false)
, m_haveInsertedFakePreElement(false)
{
- // FIXME: If we're using threading, we need to tell the BackgroundHTMLParser to use PLAINTEXTState.
- if (tokenizer())
- tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
}
TextDocumentParser::~TextDocumentParser()
// We create a fake token and give it to the tree builder rather than
// sending fake bytes through the front-end of the parser to avoid
// distrubing the line/column number calculations.
-
Vector<Attribute> attributes;
attributes.append(Attribute(styleAttr, "word-wrap: break-word; white-space: pre-wrap;"));
RefPtr<AtomicHTMLToken> fakePre = AtomicHTMLToken::create(HTMLTokenTypes::StartTag, preTag.localName(), attributes);
-
treeBuilder()->constructTree(fakePre.get());
+
// Normally we would skip the first \n after a <pre> element, but we don't
// want to skip the first \n for text documents!
treeBuilder()->setShouldSkipLeadingNewline(false);
+ // Although Text Documents expose a "pre" element in their DOM, they
+ // act like a <plaintext> tag, so we have to force plaintext mode.
+ forcePlaintextForTextDocument();
+
m_haveInsertedFakePreElement = true;
}