Deduplicate shortish Text node strings during tree construction.
authorakling@apple.com <akling@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 25 Nov 2013 21:53:32 +0000 (21:53 +0000)
committerakling@apple.com <akling@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 25 Nov 2013 21:53:32 +0000 (21:53 +0000)
<https://webkit.org/b/124855>

Let HTMLConstructionSite keep a hash set of already seen strings over
its lifetime. Use this to deduplicate the strings inside Text nodes
for any string up to 64 characters of length.

This optimization already sort-of existed for whitespace-only Texts,
but those are laundered in the AtomicString table which we definitely
don't want to pollute with every single Text. It might be a good idea
to stop using the AtomicString table for all-whitespace Text too.

3.82 MB progression on HTML5-8266 locally.

Reviewed by Anders Carlsson.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@159764 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/html/parser/HTMLConstructionSite.cpp
Source/WebCore/html/parser/HTMLConstructionSite.h

index 538c420338d5b09527523fc749f24a5f565db2ac..db2e78f915b71000300e0fe92f32f0fdd9196f7c 100644 (file)
@@ -1,3 +1,21 @@
+2013-11-25  Andreas Kling  <akling@apple.com>
+
+        Deduplicate shortish Text node strings during tree construction.
+        <https://webkit.org/b/124855>
+
+        Let HTMLConstructionSite keep a hash set of already seen strings over
+        its lifetime. Use this to deduplicate the strings inside Text nodes
+        for any string up to 64 characters of length.
+
+        This optimization already sort-of existed for whitespace-only Texts,
+        but those are laundered in the AtomicString table which we definitely
+        don't want to pollute with every single Text. It might be a good idea
+        to stop using the AtomicString table for all-whitespace Text too.
+
+        3.82 MB progression on HTML5-8266 locally.
+
+        Reviewed by Anders Carlsson.
+
 2013-11-25  Nick Diego Yamane  <nick.yamane@openbossa.org>
 
         Remove unnecessary MediaStreamTrackDescriptor forward declaration
index 1963b2927911095dfc0fe7357d973e35d9b04912..f39dda8e3eaf4c4d9bb0860ef4c7667a2e61557b 100644 (file)
@@ -505,11 +505,11 @@ void HTMLConstructionSite::insertTextNode(const String& characters, WhitespaceMo
     }
 
     while (currentPosition < characters.length()) {
-        RefPtr<Text> textNode = Text::createWithLengthLimit(task.parent->document(), shouldUseAtomicString ? AtomicString(characters).string() : characters, currentPosition, lengthLimit);
+        RefPtr<Text> textNode = Text::createWithLengthLimit(task.parent->document(), stringForTextNode(characters, shouldUseAtomicString), currentPosition, lengthLimit);
         // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
         if (!textNode->length()) {
             String substring = characters.substring(currentPosition);
-            textNode = Text::create(task.parent->document(), shouldUseAtomicString ? AtomicString(substring).string() : substring);
+            textNode = Text::create(task.parent->document(), stringForTextNode(substring, shouldUseAtomicString));
         }
 
         currentPosition += textNode->length();
@@ -666,4 +666,14 @@ void HTMLConstructionSite::fosterParent(PassRefPtr<Node> node)
     m_attachmentQueue.append(task);
 }
 
+String HTMLConstructionSite::stringForTextNode(const String& string, bool shouldUseAtomicString)
+{
+    static const unsigned maximumLengthForDeduplication = 64;
+    if (shouldUseAtomicString)
+        return AtomicString(string).string();
+    if (string.length() > maximumLengthForDeduplication)
+        return string;
+    return *m_stringsForDeduplication.add(string).iterator;
+}
+
 }
index d4c03130dbde2e5f11fca76a78d142619f21b99e..ffdcd5477a109a9bdd8d403548da5b02d418cdb1 100644 (file)
@@ -170,6 +170,8 @@ private:
     void mergeAttributesFromTokenIntoElement(AtomicHTMLToken*, Element*);
     void dispatchDocumentElementAvailableIfNeeded();
 
+    String stringForTextNode(const String&, bool shouldUseAtomicString);
+
     Document* m_document;
     
     // This is the root ContainerNode to which the parser attaches all newly
@@ -196,6 +198,8 @@ private:
     unsigned m_maximumDOMTreeDepth;
 
     bool m_inQuirksMode;
+
+    HashSet<String> m_stringsForDeduplication;
 };
 
 } // namespace WebCore