2010-07-24 Adam Barth <abarth@webkit.org>
authorabarth@webkit.org <abarth@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 24 Jul 2010 17:33:12 +0000 (17:33 +0000)
committerabarth@webkit.org <abarth@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 24 Jul 2010 17:33:12 +0000 (17:33 +0000)
        Reviewed by Eric Seidel.

        Save a per-character branch in the HTML parser
        https://bugs.webkit.org/show_bug.cgi?id=42921

        The parsing benchmark claims this is a wash, but it makes me feel
        better.

        * html/HTMLToken.h:
        (WebCore::HTMLToken::clear):
        (WebCore::HTMLToken::beginStartTag):
        (WebCore::HTMLToken::beginEndTag):
        (WebCore::HTMLToken::ensureIsCharacterToken):
        (WebCore::HTMLToken::beginComment):
        (WebCore::HTMLToken::beginDOCTYPE):
        * html/HTMLTokenizer.cpp:
        (WebCore::HTMLTokenizer::bufferCharacter):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@64010 268f45cc-cd09-0410-ab3c-d52691b4dbfc

WebCore/ChangeLog
WebCore/html/HTMLToken.h
WebCore/html/HTMLTokenizer.cpp

index c8ce79d..e3dfb70 100644 (file)
@@ -1,3 +1,23 @@
+2010-07-24  Adam Barth  <abarth@webkit.org>
+
+        Reviewed by Eric Seidel.
+
+        Save a per-character branch in the HTML parser
+        https://bugs.webkit.org/show_bug.cgi?id=42921
+
+        The parsing benchmark claims this is a wash, but it makes me feel
+        better.
+
+        * html/HTMLToken.h:
+        (WebCore::HTMLToken::clear):
+        (WebCore::HTMLToken::beginStartTag):
+        (WebCore::HTMLToken::beginEndTag):
+        (WebCore::HTMLToken::ensureIsCharacterToken):
+        (WebCore::HTMLToken::beginComment):
+        (WebCore::HTMLToken::beginDOCTYPE):
+        * html/HTMLTokenizer.cpp:
+        (WebCore::HTMLTokenizer::bufferCharacter):
+
 2010-07-23  Patrick Gansterer  <paroga@paroga.com>
 
         Reviewed by Darin Adler.
index 407e5d7..8465a35 100644 (file)
@@ -59,6 +59,7 @@ public:
     void clear()
     {
         m_type = Uninitialized;
+        m_data.clear();
     }
 
     void makeEndOfFile()
@@ -72,7 +73,6 @@ public:
         ASSERT(character);
         ASSERT(m_type == Uninitialized);
         m_type = StartTag;
-        m_data.clear();
         m_selfClosing = false;
         m_currentAttribute = 0;
         m_attributes.clear();
@@ -85,7 +85,6 @@ public:
     {
         ASSERT(m_type == Uninitialized);
         m_type = EndTag;
-        m_data.clear();
         m_selfClosing = false;
         m_currentAttribute = 0;
         m_attributes.clear();
@@ -93,27 +92,24 @@ public:
         m_data.append(characters);
     }
 
-    void beginCharacter(UChar character)
+    // Starting a character token works slightly differently than starting
+    // other types of tokens because we want to save a per-character branch.
+    void ensureIsCharacterToken()
     {
-        ASSERT(character);
-        ASSERT(m_type == Uninitialized);
+        ASSERT(m_type == Uninitialized || m_type == Character);
         m_type = Character;
-        m_data.clear();
-        m_data.append(character);
     }
 
     void beginComment()
     {
         ASSERT(m_type == Uninitialized);
         m_type = Comment;
-        m_data.clear();
     }
 
     void beginDOCTYPE()
     {
         ASSERT(m_type == Uninitialized);
         m_type = DOCTYPE;
-        m_data.clear();
         m_doctypeData.set(new DoctypeData());
     }
 
index 55fd1ca..3829580 100644 (file)
@@ -1608,10 +1608,7 @@ inline bool HTMLTokenizer::isAppropriateEndTag()
 inline void HTMLTokenizer::bufferCharacter(UChar character)
 {
     ASSERT(character != InputStreamPreprocessor::endOfFileMarker);
-    if (m_token->type() != HTMLToken::Character) {
-        m_token->beginCharacter(character);
-        return;
-    }
+    m_token->ensureIsCharacterToken();
     m_token->appendToCharacter(character);
 }