BackgroundHTMLParser should be able to atomize well-known strings
authoreric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 9 Mar 2013 01:06:36 +0000 (01:06 +0000)
committereric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 9 Mar 2013 01:06:36 +0000 (01:06 +0000)
https://bugs.webkit.org/show_bug.cgi?id=107337

Reviewed by Adam Barth.

Testing this patch easily requires applying bug 107236 locally
to remove all the rendering noise from Parser/html-threaded-parser.html.

This patch adds a new class HTMLIdentifier which allows us to avoid
allocating strings for known tag/attribute names from HTMLNames.

There is still a lot of meat on this bone, but I think it's important to
land something "smallish" to start and iterate from there.

This took Parser/html-threaded-parser.html from:
median= 443.726500002 ms, stdev= 7.25002679952 ms, min= 430.244000047 ms, max= 455.511000007 ms
to:
median= 427.849500004 ms, stdev= 9.96967058292 ms, min= 417.914000049 ms, max= 461.528000014 ms
on my MBP.

* CMakeLists.txt:
* GNUmakefile.list.am:
* Target.pri:
* WebCore.gypi:
* WebCore.vcproj/WebCore.vcproj:
* WebCore.vcxproj/WebCore.vcxproj:
* html/parser/AtomicHTMLToken.h:
(WebCore::AtomicHTMLToken::AtomicHTMLToken):
* html/parser/BackgroundHTMLParser.cpp:
(WebCore::tokenExitsForeignContent):
(WebCore::tokenExitsSVG):
(WebCore::tokenExitsMath):
(WebCore::BackgroundHTMLParser::simulateTreeBuilder):
* html/parser/CSSPreloadScanner.cpp:
(WebCore::CSSPreloadScanner::scan):
* html/parser/CSSPreloadScanner.h:
(WebCore):
(CSSPreloadScanner):
* html/parser/CompactHTMLToken.cpp:
(SameSizeAsCompactHTMLToken):
(WebCore::CompactHTMLToken::CompactHTMLToken):
* html/parser/CompactHTMLToken.h:
(WebCore::CompactHTMLToken::Attribute::Attribute):
(Attribute):
(WebCore::CompactHTMLToken::data):
(WebCore::CompactHTMLToken::publicIdentifier):
(CompactHTMLToken):
* html/parser/HTMLDocumentParser.cpp:
(WebCore::HTMLDocumentParser::startBackgroundParser):
* html/parser/HTMLIdentifier.cpp: Added.
(WebCore):
(WebCore::identifierTable):
(WebCore::HTMLIdentifier::hasIndex):
(WebCore::HTMLIdentifier::findIndex):
(WebCore::nameForIndex):
(WebCore::HTMLIdentifier::asString):
(WebCore::HTMLIdentifier::asStringImpl):
(WebCore::HTMLIdentifier::addNames):
(WebCore::HTMLIdentifier::init):
* html/parser/HTMLIdentifier.h: Added.
(WebCore):
(HTMLIdentifier):
(WebCore::HTMLIdentifier::HTMLIdentifier):
(WebCore::HTMLIdentifier::isSafeToSendToAnotherThread):
* html/parser/HTMLParserIdioms.cpp:
(WebCore::threadSafeEqual):
(WebCore::threadSafeMatch):
* html/parser/HTMLParserIdioms.h:
(WebCore):
(WebCore::threadSafeHTMLNamesMatch):
* html/parser/HTMLPreloadScanner.cpp:
(WebCore::TokenPreloadScanner::tagIdFor):
(WebCore::TokenPreloadScanner::StartTagScanner::match):
(TokenPreloadScanner::StartTagScanner):
(WebCore::TokenPreloadScanner::StartTagScanner::processAttribute):
* html/parser/HTMLPreloadScanner.h:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@145292 268f45cc-cd09-0410-ab3c-d52691b4dbfc

21 files changed:
Source/WebCore/CMakeLists.txt
Source/WebCore/ChangeLog
Source/WebCore/GNUmakefile.list.am
Source/WebCore/Target.pri
Source/WebCore/WebCore.gypi
Source/WebCore/WebCore.vcproj/WebCore.vcproj
Source/WebCore/WebCore.vcxproj/WebCore.vcxproj
Source/WebCore/WebCore.xcodeproj/project.pbxproj
Source/WebCore/html/parser/AtomicHTMLToken.h
Source/WebCore/html/parser/BackgroundHTMLParser.cpp
Source/WebCore/html/parser/CSSPreloadScanner.cpp
Source/WebCore/html/parser/CSSPreloadScanner.h
Source/WebCore/html/parser/CompactHTMLToken.cpp
Source/WebCore/html/parser/CompactHTMLToken.h
Source/WebCore/html/parser/HTMLDocumentParser.cpp
Source/WebCore/html/parser/HTMLIdentifier.cpp [new file with mode: 0644]
Source/WebCore/html/parser/HTMLIdentifier.h [new file with mode: 0644]
Source/WebCore/html/parser/HTMLParserIdioms.cpp
Source/WebCore/html/parser/HTMLParserIdioms.h
Source/WebCore/html/parser/HTMLPreloadScanner.cpp
Source/WebCore/html/parser/HTMLPreloadScanner.h

index f3bdb7c..645bbfd 100644 (file)
@@ -1554,6 +1554,7 @@ set(WebCore_SOURCES
     html/parser/HTMLParserScheduler.cpp
     html/parser/HTMLParserThread.cpp
     html/parser/HTMLFormattingElementList.cpp
+    html/parser/HTMLIdentifier.cpp
     html/parser/HTMLMetaCharsetParser.cpp
     html/parser/HTMLPreloadScanner.cpp
     html/parser/HTMLResourcePreloader.cpp
index 8ded75e..5bda1a8 100644 (file)
@@ -1,3 +1,82 @@
+2013-03-08  Eric Seidel  <eric@webkit.org>
+
+        BackgroundHTMLParser should be able to atomize well-known strings
+        https://bugs.webkit.org/show_bug.cgi?id=107337
+
+        Reviewed by Adam Barth.
+
+        Testing this patch easily requires applying bug 107236 locally
+        to remove all the rendering noise from Parser/html-threaded-parser.html.
+
+        This patch adds a new class HTMLIdentifier which allows us to avoid
+        allocating strings for known tag/attribute names from HTMLNames.
+
+        There is still a lot of meat on this bone, but I think it's important to
+        land something "smallish" to start and iterate from there.
+
+        This took Parser/html-threaded-parser.html from:
+        median= 443.726500002 ms, stdev= 7.25002679952 ms, min= 430.244000047 ms, max= 455.511000007 ms
+        to:
+        median= 427.849500004 ms, stdev= 9.96967058292 ms, min= 417.914000049 ms, max= 461.528000014 ms
+        on my MBP.
+
+        * CMakeLists.txt:
+        * GNUmakefile.list.am:
+        * Target.pri:
+        * WebCore.gypi:
+        * WebCore.vcproj/WebCore.vcproj:
+        * WebCore.vcxproj/WebCore.vcxproj:
+        * html/parser/AtomicHTMLToken.h:
+        (WebCore::AtomicHTMLToken::AtomicHTMLToken):
+        * html/parser/BackgroundHTMLParser.cpp:
+        (WebCore::tokenExitsForeignContent):
+        (WebCore::tokenExitsSVG):
+        (WebCore::tokenExitsMath):
+        (WebCore::BackgroundHTMLParser::simulateTreeBuilder):
+        * html/parser/CSSPreloadScanner.cpp:
+        (WebCore::CSSPreloadScanner::scan):
+        * html/parser/CSSPreloadScanner.h:
+        (WebCore):
+        (CSSPreloadScanner):
+        * html/parser/CompactHTMLToken.cpp:
+        (SameSizeAsCompactHTMLToken):
+        (WebCore::CompactHTMLToken::CompactHTMLToken):
+        * html/parser/CompactHTMLToken.h:
+        (WebCore::CompactHTMLToken::Attribute::Attribute):
+        (Attribute):
+        (WebCore::CompactHTMLToken::data):
+        (WebCore::CompactHTMLToken::publicIdentifier):
+        (CompactHTMLToken):
+        * html/parser/HTMLDocumentParser.cpp:
+        (WebCore::HTMLDocumentParser::startBackgroundParser):
+        * html/parser/HTMLIdentifier.cpp: Added.
+        (WebCore):
+        (WebCore::identifierTable):
+        (WebCore::HTMLIdentifier::hasIndex):
+        (WebCore::HTMLIdentifier::findIndex):
+        (WebCore::nameForIndex):
+        (WebCore::HTMLIdentifier::asString):
+        (WebCore::HTMLIdentifier::asStringImpl):
+        (WebCore::HTMLIdentifier::addNames):
+        (WebCore::HTMLIdentifier::init):
+        * html/parser/HTMLIdentifier.h: Added.
+        (WebCore):
+        (HTMLIdentifier):
+        (WebCore::HTMLIdentifier::HTMLIdentifier):
+        (WebCore::HTMLIdentifier::isSafeToSendToAnotherThread):
+        * html/parser/HTMLParserIdioms.cpp:
+        (WebCore::threadSafeEqual):
+        (WebCore::threadSafeMatch):
+        * html/parser/HTMLParserIdioms.h:
+        (WebCore):
+        (WebCore::threadSafeHTMLNamesMatch):
+        * html/parser/HTMLPreloadScanner.cpp:
+        (WebCore::TokenPreloadScanner::tagIdFor):
+        (WebCore::TokenPreloadScanner::StartTagScanner::match):
+        (TokenPreloadScanner::StartTagScanner):
+        (WebCore::TokenPreloadScanner::StartTagScanner::processAttribute):
+        * html/parser/HTMLPreloadScanner.h:
+
 2013-03-08  Brandon Jones  <bajones@google.com>
 
         Check to ensure MultisampleRenderbuffer creation succeeds
index f569fd1..30877d6 100644 (file)
@@ -3616,6 +3616,8 @@ webcore_sources += \
        Source/WebCore/html/parser/HTMLEntityTable.h \
        Source/WebCore/html/parser/HTMLFormattingElementList.cpp \
        Source/WebCore/html/parser/HTMLFormattingElementList.h \
+       Source/WebCore/html/parser/HTMLIdentifier.cpp \
+       Source/WebCore/html/parser/HTMLIdentifier.h \
        Source/WebCore/html/parser/HTMLInputStream.h \
        Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp \
        Source/WebCore/html/parser/HTMLMetaCharsetParser.h \
index 1b655fb..244bcb1 100644 (file)
@@ -735,6 +735,7 @@ SOURCES += \
     html/parser/HTMLEntityParser.cpp \
     html/parser/HTMLEntitySearch.cpp \
     html/parser/HTMLFormattingElementList.cpp \
+    html/parser/HTMLIdentifier.cpp \
     html/parser/HTMLMetaCharsetParser.cpp \
     html/parser/HTMLParserIdioms.cpp \
     html/parser/HTMLParserOptions.cpp \
index e80aaf4..98d3aef 100644 (file)
             'html/parser/HTMLEntityTable.h',
             'html/parser/HTMLFormattingElementList.cpp',
             'html/parser/HTMLFormattingElementList.h',
+            'html/parser/HTMLIdentifier.cpp',
+            'html/parser/HTMLIdentifier.h',
             'html/parser/HTMLInputStream.h',
             'html/parser/HTMLMetaCharsetParser.cpp',
             'html/parser/HTMLMetaCharsetParser.h',
index edd0015..c64bea9 100755 (executable)
                                        >
                                </File>
                                <File
+                                       RelativePath="..\html\parser\HTMLIdentifier.cpp"
+                                       >
+                               </File>
+                               <File
+                                       RelativePath="..\html\parser\HTMLIdentifier.h"
+                                       >
+                               </File>
+                               <File
                                        RelativePath="..\html\parser\HTMLInputStream.h"
                                        >
                                </File>
index c1eff3b..ec48217 100644 (file)
     <ClCompile Include="..\html\parser\HTMLEntityParser.cpp" />
     <ClCompile Include="..\html\parser\HTMLEntitySearch.cpp" />
     <ClCompile Include="..\html\parser\HTMLFormattingElementList.cpp" />
+    <ClCompile Include="..\html\parser\HTMLIdenitifier.cpp" />
     <ClCompile Include="..\html\parser\HTMLMetaCharsetParser.cpp" />
     <ClCompile Include="..\html\parser\HTMLParserIdioms.cpp" />
     <ClCompile Include="..\html\parser\HTMLParserOptions.cpp" />
     <ClInclude Include="..\html\parser\HTMLEntityParser.h" />
     <ClInclude Include="..\html\parser\HTMLEntitySearch.h" />
     <ClInclude Include="..\html\parser\HTMLFormattingElementList.h" />
+    <ClInclude Include="..\html\parser\HTMLIdentifier.h" />
     <ClInclude Include="..\html\parser\HTMLInputStream.h" />
     <ClInclude Include="..\html\parser\HTMLMetaCharsetParser.h" />
     <ClInclude Include="..\html\parser\HTMLParserIdioms.h" />
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
index 9679f62..17cae31 100644 (file)
                A88AD52F09524B92001DD196 /* QuartzCore.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = A85D7A2F0879EBA9006A9172 /* QuartzCore.framework */; };
                A88DD4870B4629A300C02990 /* PathTraversalState.h in Headers */ = {isa = PBXBuildFile; fileRef = A88DD4860B4629A300C02990 /* PathTraversalState.h */; };
                A88DD4890B4629B000C02990 /* PathTraversalState.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A88DD4880B4629B000C02990 /* PathTraversalState.cpp */; };
+               A895709F16E9BD5900184E55 /* HTMLIdentifier.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A895709D16E9BD5900184E55 /* HTMLIdentifier.cpp */; };
+               A89570A016E9BD5900184E55 /* HTMLIdentifier.h in Headers */ = {isa = PBXBuildFile; fileRef = A895709E16E9BD5900184E55 /* HTMLIdentifier.h */; settings = {ATTRIBUTES = (Private, ); }; };
                A89943280B42338800D7C802 /* BitmapImage.h in Headers */ = {isa = PBXBuildFile; fileRef = A89943260B42338700D7C802 /* BitmapImage.h */; settings = {ATTRIBUTES = (Private, ); }; };
                A89943290B42338800D7C802 /* BitmapImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A89943270B42338700D7C802 /* BitmapImage.cpp */; };
                A89CCC520F44E98100B5DA10 /* ReplaceNodeWithSpanCommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A89CCC500F44E98100B5DA10 /* ReplaceNodeWithSpanCommand.cpp */; };
                A886CDC214FBBAA300D279F4 /* WorkerContextWebSocket.idl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = WorkerContextWebSocket.idl; path = Modules/websockets/WorkerContextWebSocket.idl; sourceTree = "<group>"; };
                A88DD4860B4629A300C02990 /* PathTraversalState.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PathTraversalState.h; sourceTree = "<group>"; };
                A88DD4880B4629B000C02990 /* PathTraversalState.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PathTraversalState.cpp; sourceTree = "<group>"; };
+               A895709D16E9BD5900184E55 /* HTMLIdentifier.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HTMLIdentifier.cpp; path = parser/HTMLIdentifier.cpp; sourceTree = "<group>"; };
+               A895709E16E9BD5900184E55 /* HTMLIdentifier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HTMLIdentifier.h; path = parser/HTMLIdentifier.h; sourceTree = "<group>"; };
                A89943260B42338700D7C802 /* BitmapImage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BitmapImage.h; sourceTree = "<group>"; };
                A89943270B42338700D7C802 /* BitmapImage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = BitmapImage.cpp; sourceTree = "<group>"; };
                A89CCC500F44E98100B5DA10 /* ReplaceNodeWithSpanCommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ReplaceNodeWithSpanCommand.cpp; sourceTree = "<group>"; };
                97C1F5511228558800EDE616 /* parser */ = {
                        isa = PBXGroup;
                        children = (
+                               97C1F552122855CB00EDE617 /* AtomicHTMLToken.h */,
                                97DC69E916B9F50E008CFD6F /* BackgroundHTMLInputStream.cpp */,
                                97DC69EA16B9F50E008CFD6F /* BackgroundHTMLInputStream.h */,
                                979DC03116A7992100807794 /* BackgroundHTMLParser.cpp */,
                                977B3854122883E900B81FF8 /* HTMLEntityTable.h */,
                                977B3855122883E900B81FF8 /* HTMLFormattingElementList.cpp */,
                                977B3856122883E900B81FF8 /* HTMLFormattingElementList.h */,
+                               A895709D16E9BD5900184E55 /* HTMLIdentifier.cpp */,
+                               A895709E16E9BD5900184E55 /* HTMLIdentifier.h */,
                                97BC849A12370A4B000C6161 /* HTMLInputStream.h */,
                                2BE8E2C812A58A0100FAD550 /* HTMLMetaCharsetParser.cpp */,
                                2BE8E2C612A589EC00FAD550 /* HTMLMetaCharsetParser.h */,
                                97C1F552122855CB00EDE616 /* HTMLToken.h */,
                                977B385E122883E900B81FF8 /* HTMLTokenizer.cpp */,
                                977B385F122883E900B81FF8 /* HTMLTokenizer.h */,
-                               97C1F552122855CB00EDE617 /* AtomicHTMLToken.h */,
                                977B37211228721700B81FF8 /* HTMLTreeBuilder.cpp */,
                                977B37221228721700B81FF8 /* HTMLTreeBuilder.h */,
                                977B3860122883E900B81FF8 /* HTMLViewSourceParser.cpp */,
                                7E66E23416D6EB6C00F7E7FF /* WebGLCompressedTextureATC.h in Headers */,
                                7EA30F6A16DFFE7500257D0B /* JSWebGLCompressedTextureATC.h in Headers */,
                                2D8287F716E4A0380086BD00 /* HitTestLocation.h in Headers */,
+                               A89570A016E9BD5900184E55 /* HTMLIdentifier.h in Headers */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                                7E66E23316D6EB6C00F7E7FF /* WebGLCompressedTextureATC.cpp in Sources */,
                                7EA30F6916DFFE7500257D0B /* JSWebGLCompressedTextureATC.cpp in Sources */,
                                2D8287F616E4A0380086BD00 /* HitTestLocation.cpp in Sources */,
+                               A895709F16E9BD5900184E55 /* HTMLIdentifier.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index a6d237d..415b0d2 100644 (file)
@@ -163,10 +163,10 @@ public:
             ASSERT_NOT_REACHED();
             break;
         case HTMLToken::DOCTYPE:
-            m_name = token.data();
+            m_name = token.data().asString();
             m_doctypeData = adoptPtr(new DoctypeData());
             m_doctypeData->m_hasPublicIdentifier = true;
-            append(m_doctypeData->m_publicIdentifier, token.publicIdentifier());
+            append(m_doctypeData->m_publicIdentifier, token.publicIdentifier().asString());
             m_doctypeData->m_hasSystemIdentifier = true;
             append(m_doctypeData->m_systemIdentifier, token.systemIdentifier());
             m_doctypeData->m_forceQuirks = token.doctypeForcesQuirks();
@@ -176,7 +176,7 @@ public:
         case HTMLToken::StartTag:
             m_attributes.reserveInitialCapacity(token.attributes().size());
             for (Vector<CompactHTMLToken::Attribute>::const_iterator it = token.attributes().begin(); it != token.attributes().end(); ++it) {
-                QualifiedName name(nullAtom, it->name, nullAtom);
+                QualifiedName name(nullAtom, it->name.asString(), nullAtom);
                 // FIXME: This is N^2 for the number of attributes.
                 if (!findAttributeInVector(m_attributes, name))
                     m_attributes.append(Attribute(name, it->value));
@@ -184,17 +184,24 @@ public:
             // Fall through!
         case HTMLToken::EndTag:
             m_selfClosing = token.selfClosing();
-            m_name = token.data();
+            m_name = token.data().asString();
             break;
         case HTMLToken::Comment:
-            m_data = token.data();
+            m_data = token.data().asString();
             break;
-        case HTMLToken::Character:
-            m_externalCharacters = token.data().characters();
-            m_externalCharactersLength = token.data().length();
+        case HTMLToken::Character: {
+            const String& string = token.data().asString();
+            m_externalCharacters = string.characters();
+            m_externalCharactersLength = string.length();
             m_isAll8BitData = token.isAll8BitData();
+            // FIXME: We would like a stronger ASSERT here:
+            // ASSERT(string.is8Bit() == token.isAll8BitData());
+            // but currently that fires, likely due to bugs in HTMLTokenizer
+            // not setting isAll8BitData in all the times it could.
+            ASSERT(!token.isAll8BitData() || string.is8Bit());
             break;
         }
+        }
     }
 
 #endif
index 6925e10..eef293a 100644 (file)
@@ -69,65 +69,68 @@ static void checkThatXSSInfosAreSafeToSendToAnotherThread(const XSSInfoStream& x
 
 static bool tokenExitsForeignContent(const CompactHTMLToken& token)
 {
-    // FIXME: This is copied from HTMLTreeBuilder::processTokenInForeignContent and changed to use threadSafeMatch.
-    const String& tagName = token.data();
-    return threadSafeMatch(tagName, bTag)
-        || threadSafeMatch(tagName, bigTag)
-        || threadSafeMatch(tagName, blockquoteTag)
-        || threadSafeMatch(tagName, bodyTag)
-        || threadSafeMatch(tagName, brTag)
-        || threadSafeMatch(tagName, centerTag)
-        || threadSafeMatch(tagName, codeTag)
-        || threadSafeMatch(tagName, ddTag)
-        || threadSafeMatch(tagName, divTag)
-        || threadSafeMatch(tagName, dlTag)
-        || threadSafeMatch(tagName, dtTag)
-        || threadSafeMatch(tagName, emTag)
-        || threadSafeMatch(tagName, embedTag)
-        || threadSafeMatch(tagName, h1Tag)
-        || threadSafeMatch(tagName, h2Tag)
-        || threadSafeMatch(tagName, h3Tag)
-        || threadSafeMatch(tagName, h4Tag)
-        || threadSafeMatch(tagName, h5Tag)
-        || threadSafeMatch(tagName, h6Tag)
-        || threadSafeMatch(tagName, headTag)
-        || threadSafeMatch(tagName, hrTag)
-        || threadSafeMatch(tagName, iTag)
-        || threadSafeMatch(tagName, imgTag)
-        || threadSafeMatch(tagName, liTag)
-        || threadSafeMatch(tagName, listingTag)
-        || threadSafeMatch(tagName, menuTag)
-        || threadSafeMatch(tagName, metaTag)
-        || threadSafeMatch(tagName, nobrTag)
-        || threadSafeMatch(tagName, olTag)
-        || threadSafeMatch(tagName, pTag)
-        || threadSafeMatch(tagName, preTag)
-        || threadSafeMatch(tagName, rubyTag)
-        || threadSafeMatch(tagName, sTag)
-        || threadSafeMatch(tagName, smallTag)
-        || threadSafeMatch(tagName, spanTag)
-        || threadSafeMatch(tagName, strongTag)
-        || threadSafeMatch(tagName, strikeTag)
-        || threadSafeMatch(tagName, subTag)
-        || threadSafeMatch(tagName, supTag)
-        || threadSafeMatch(tagName, tableTag)
-        || threadSafeMatch(tagName, ttTag)
-        || threadSafeMatch(tagName, uTag)
-        || threadSafeMatch(tagName, ulTag)
-        || threadSafeMatch(tagName, varTag)
-        || (threadSafeMatch(tagName, fontTag) && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)));
+    // FIXME: This is copied from HTMLTreeBuilder::processTokenInForeignContent and changed to use threadSafeHTMLNamesMatch.
+    const HTMLIdentifier& tagName = token.data();
+    return threadSafeHTMLNamesMatch(tagName, bTag)
+        || threadSafeHTMLNamesMatch(tagName, bigTag)
+        || threadSafeHTMLNamesMatch(tagName, blockquoteTag)
+        || threadSafeHTMLNamesMatch(tagName, bodyTag)
+        || threadSafeHTMLNamesMatch(tagName, brTag)
+        || threadSafeHTMLNamesMatch(tagName, centerTag)
+        || threadSafeHTMLNamesMatch(tagName, codeTag)
+        || threadSafeHTMLNamesMatch(tagName, ddTag)
+        || threadSafeHTMLNamesMatch(tagName, divTag)
+        || threadSafeHTMLNamesMatch(tagName, dlTag)
+        || threadSafeHTMLNamesMatch(tagName, dtTag)
+        || threadSafeHTMLNamesMatch(tagName, emTag)
+        || threadSafeHTMLNamesMatch(tagName, embedTag)
+        || threadSafeHTMLNamesMatch(tagName, h1Tag)
+        || threadSafeHTMLNamesMatch(tagName, h2Tag)
+        || threadSafeHTMLNamesMatch(tagName, h3Tag)
+        || threadSafeHTMLNamesMatch(tagName, h4Tag)
+        || threadSafeHTMLNamesMatch(tagName, h5Tag)
+        || threadSafeHTMLNamesMatch(tagName, h6Tag)
+        || threadSafeHTMLNamesMatch(tagName, headTag)
+        || threadSafeHTMLNamesMatch(tagName, hrTag)
+        || threadSafeHTMLNamesMatch(tagName, iTag)
+        || threadSafeHTMLNamesMatch(tagName, imgTag)
+        || threadSafeHTMLNamesMatch(tagName, liTag)
+        || threadSafeHTMLNamesMatch(tagName, listingTag)
+        || threadSafeHTMLNamesMatch(tagName, menuTag)
+        || threadSafeHTMLNamesMatch(tagName, metaTag)
+        || threadSafeHTMLNamesMatch(tagName, nobrTag)
+        || threadSafeHTMLNamesMatch(tagName, olTag)
+        || threadSafeHTMLNamesMatch(tagName, pTag)
+        || threadSafeHTMLNamesMatch(tagName, preTag)
+        || threadSafeHTMLNamesMatch(tagName, rubyTag)
+        || threadSafeHTMLNamesMatch(tagName, sTag)
+        || threadSafeHTMLNamesMatch(tagName, smallTag)
+        || threadSafeHTMLNamesMatch(tagName, spanTag)
+        || threadSafeHTMLNamesMatch(tagName, strongTag)
+        || threadSafeHTMLNamesMatch(tagName, strikeTag)
+        || threadSafeHTMLNamesMatch(tagName, subTag)
+        || threadSafeHTMLNamesMatch(tagName, supTag)
+        || threadSafeHTMLNamesMatch(tagName, tableTag)
+        || threadSafeHTMLNamesMatch(tagName, ttTag)
+        || threadSafeHTMLNamesMatch(tagName, uTag)
+        || threadSafeHTMLNamesMatch(tagName, ulTag)
+        || threadSafeHTMLNamesMatch(tagName, varTag)
+        || (threadSafeHTMLNamesMatch(tagName, fontTag) && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)));
 }
 
 static bool tokenExitsSVG(const CompactHTMLToken& token)
 {
-    const String& tagName = token.data();
-    return equalIgnoringCase(tagName, SVGNames::foreignObjectTag.localName());
+    // FIXME: It's very fragile that we special case foreignObject here to be case-insensitive.
+    // FIXME: Using CaseFoldingHash::equal instead of equalIgnoringCase, as equalIgnoringCase
+    // wants non-const StringImpl* (even though it never modifies them).
+    // https://bugs.webkit.org/show_bug.cgi?id=111892 is for fixing equalIgnoringCase.
+    return CaseFoldingHash::equal(token.data().asStringImpl(), SVGNames::foreignObjectTag.localName().impl());
 }
 
 static bool tokenExitsMath(const CompactHTMLToken& token)
 {
     // FIXME: This is copied from HTMLElementStack::isMathMLTextIntegrationPoint and changed to use threadSafeMatch.
-    const String& tagName = token.data();
+    const HTMLIdentifier& tagName = token.data();
     return threadSafeMatch(tagName, MathMLNames::miTag)
         || threadSafeMatch(tagName, MathMLNames::moTag)
         || threadSafeMatch(tagName, MathMLNames::mnTag)
@@ -203,7 +206,7 @@ void BackgroundHTMLParser::markEndOfFile()
 bool BackgroundHTMLParser::simulateTreeBuilder(const CompactHTMLToken& token)
 {
     if (token.type() == HTMLToken::StartTag) {
-        const String& tagName = token.data();
+        const HTMLIdentifier& tagName = token.data();
         if (threadSafeMatch(tagName, SVGNames::svgTag))
             m_namespaceStack.append(SVG);
         if (threadSafeMatch(tagName, MathMLNames::mathTag))
@@ -215,30 +218,30 @@ bool BackgroundHTMLParser::simulateTreeBuilder(const CompactHTMLToken& token)
             m_namespaceStack.append(HTML);
         if (!inForeignContent()) {
             // FIXME: This is just a copy of Tokenizer::updateStateFor which uses threadSafeMatches.
-            if (threadSafeMatch(tagName, textareaTag) || threadSafeMatch(tagName, titleTag))
+            if (threadSafeHTMLNamesMatch(tagName, textareaTag) || threadSafeHTMLNamesMatch(tagName, titleTag))
                 m_tokenizer->setState(HTMLTokenizer::RCDATAState);
-            else if (threadSafeMatch(tagName, plaintextTag))
+            else if (threadSafeHTMLNamesMatch(tagName, plaintextTag))
                 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
-            else if (threadSafeMatch(tagName, scriptTag))
+            else if (threadSafeHTMLNamesMatch(tagName, scriptTag))
                 m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
-            else if (threadSafeMatch(tagName, styleTag)
-                || threadSafeMatch(tagName, iframeTag)
-                || threadSafeMatch(tagName, xmpTag)
-                || (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled)
-                || threadSafeMatch(tagName, noframesTag)
-                || (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled))
+            else if (threadSafeHTMLNamesMatch(tagName, styleTag)
+                || threadSafeHTMLNamesMatch(tagName, iframeTag)
+                || threadSafeHTMLNamesMatch(tagName, xmpTag)
+                || (threadSafeHTMLNamesMatch(tagName, noembedTag) && m_options.pluginsEnabled)
+                || threadSafeHTMLNamesMatch(tagName, noframesTag)
+                || (threadSafeHTMLNamesMatch(tagName, noscriptTag) && m_options.scriptEnabled))
                 m_tokenizer->setState(HTMLTokenizer::RAWTEXTState);
         }
     }
 
     if (token.type() == HTMLToken::EndTag) {
-        const String& tagName = token.data();
+        const HTMLIdentifier& tagName = token.data();
         if ((m_namespaceStack.last() == SVG && threadSafeMatch(tagName, SVGNames::svgTag))
             || (m_namespaceStack.last() == MathML && threadSafeMatch(tagName, MathMLNames::mathTag))
             || (m_namespaceStack.contains(SVG) && m_namespaceStack.last() == HTML && tokenExitsSVG(token))
             || (m_namespaceStack.contains(MathML) && m_namespaceStack.last() == HTML && tokenExitsMath(token)))
             m_namespaceStack.removeLast();
-        if (threadSafeMatch(tagName, scriptTag)) {
+        if (threadSafeHTMLNamesMatch(tagName, scriptTag)) {
             if (!inForeignContent())
                 m_tokenizer->setState(HTMLTokenizer::DataState);
             return false;
index 0c66fc1..a60b90c 100644 (file)
@@ -29,6 +29,7 @@
 #include "CSSPreloadScanner.h"
 
 #include "CachedResourceRequestInitiators.h"
+#include "HTMLIdentifier.h"
 #include "HTMLParserIdioms.h"
 
 namespace WebCore {
@@ -64,16 +65,19 @@ void CSSPreloadScanner::scan(const HTMLToken::DataVector& data, PreloadRequestSt
     scanCommon(data.data(), data.data() + data.size(), requests);
 }
 
-void CSSPreloadScanner::scan(const String& data, PreloadRequestStream& requests)
+#if ENABLE(THREADED_HTML_PARSER)
+void CSSPreloadScanner::scan(const HTMLIdentifier& identifier, PreloadRequestStream& requests)
 {
-    if (data.is8Bit()) {
-        const LChar* begin = data.characters8();
-        scanCommon(begin, begin + data.length(), requests);
+    const StringImpl* data = identifier.asStringImpl();
+    if (data->is8Bit()) {
+        const LChar* begin = data->characters8();
+        scanCommon(begin, begin + data->length(), requests);
         return;
     }
-    const UChar* begin = data.characters16();
-    scanCommon(begin, begin + data.length(), requests);
+    const UChar* begin = data->characters16();
+    scanCommon(begin, begin + data->length(), requests);
 }
+#endif
 
 inline void CSSPreloadScanner::tokenize(UChar c)
 {
index 4b69bc3..9c20910 100644 (file)
@@ -33,6 +33,8 @@
 
 namespace WebCore {
 
+class HTMLIdentifier;
+
 class CSSPreloadScanner {
     WTF_MAKE_NONCOPYABLE(CSSPreloadScanner);
 public:
@@ -42,7 +44,7 @@ public:
     void reset();
 
     void scan(const HTMLToken::DataVector&, PreloadRequestStream&);
-    void scan(const String&, PreloadRequestStream&);
+    void scan(const HTMLIdentifier&, PreloadRequestStream&);
 
 private:
     enum State {
index b4788a0..76ddc6b 100644 (file)
@@ -38,7 +38,7 @@ namespace WebCore {
 
 struct SameSizeAsCompactHTMLToken  {
     unsigned bitfields;
-    String name;
+    HTMLIdentifier data;
     Vector<Attribute> vector;
     TextPosition textPosition;
 };
@@ -56,10 +56,10 @@ CompactHTMLToken::CompactHTMLToken(const HTMLToken* token, const TextPosition& t
         ASSERT_NOT_REACHED();
         break;
     case HTMLToken::DOCTYPE: {
-        m_data = String(token->name());
+        m_data = HTMLIdentifier(token->name(), Likely8Bit);
         // There is only 1 DOCTYPE token per document, so to avoid increasing the
         // size of CompactHTMLToken, we just use the m_attributes vector.
-        m_attributes.append(Attribute(String(token->publicIdentifier()), String(token->systemIdentifier())));
+        m_attributes.append(Attribute(HTMLIdentifier(token->publicIdentifier(), Likely8Bit), String(token->systemIdentifier())));
         m_doctypeForcesQuirks = token->forceQuirks();
         break;
     }
@@ -68,19 +68,17 @@ CompactHTMLToken::CompactHTMLToken(const HTMLToken* token, const TextPosition& t
     case HTMLToken::StartTag:
         m_attributes.reserveInitialCapacity(token->attributes().size());
         for (Vector<HTMLToken::Attribute>::const_iterator it = token->attributes().begin(); it != token->attributes().end(); ++it)
-            m_attributes.append(Attribute(StringImpl::create8BitIfPossible(it->name), StringImpl::create8BitIfPossible(it->value)));
+            m_attributes.append(Attribute(HTMLIdentifier(it->name, Likely8Bit), StringImpl::create8BitIfPossible(it->value)));
         // Fall through!
     case HTMLToken::EndTag:
         m_selfClosing = token->selfClosing();
         // Fall through!
     case HTMLToken::Comment:
-    case HTMLToken::Character:
-        if (token->isAll8BitData()) {
-            m_data = String::make8BitFrom16BitSource(token->data());
-            m_isAll8BitData = true;
-        } else
-            m_data = String(token->data());
+    case HTMLToken::Character: {
+        m_isAll8BitData = token->isAll8BitData();
+        m_data = HTMLIdentifier(token->data(), token->isAll8BitData() ? Force8Bit : Force16Bit);
         break;
+    }
     default:
         ASSERT_NOT_REACHED();
         break;
index c898f6e..ffa4ae4 100644 (file)
@@ -28,6 +28,7 @@
 
 #if ENABLE(THREADED_HTML_PARSER)
 
+#include "HTMLIdentifier.h"
 #include "HTMLToken.h"
 #include <wtf/OwnPtr.h>
 #include <wtf/PassOwnPtr.h>
@@ -44,13 +45,13 @@ class QualifiedName;
 class CompactHTMLToken {
 public:
     struct Attribute {
-        Attribute(const String& name, const String& value)
+        Attribute(const HTMLIdentifier& name, const String& value)
             : name(name)
             , value(value)
         {
         }
 
-        String name;
+        HTMLIdentifier name;
         String value;
     };
 
@@ -59,7 +60,7 @@ public:
     bool isSafeToSendToAnotherThread() const;
 
     HTMLToken::Type type() const { return static_cast<HTMLToken::Type>(m_type); }
-    const String& data() const { return m_data; }
+    const HTMLIdentifier& data() const { return m_data; }
     bool selfClosing() const { return m_selfClosing; }
     bool isAll8BitData() const { return m_isAll8BitData; }
     const Vector<Attribute>& attributes() const { return m_attributes; }
@@ -68,7 +69,7 @@ public:
 
     // There is only 1 DOCTYPE token per document, so to avoid increasing the
     // size of CompactHTMLToken, we just use the m_attributes vector.
-    const String& publicIdentifier() const { return m_attributes[0].name; }
+    const HTMLIdentifier& publicIdentifier() const { return m_attributes[0].name; }
     const String& systemIdentifier() const { return m_attributes[0].value; }
     bool doctypeForcesQuirks() const { return m_doctypeForcesQuirks; }
 
@@ -78,7 +79,7 @@ private:
     unsigned m_isAll8BitData : 1;
     unsigned m_doctypeForcesQuirks: 1;
 
-    String m_data; // "name", "characters", or "data" depending on m_type
+    HTMLIdentifier m_data; // "name", "characters", or "data" depending on m_type
     Vector<Attribute> m_attributes;
     TextPosition m_textPosition;
 };
index b04ef5b..263c4ef 100644 (file)
@@ -34,6 +34,7 @@
 #include "DocumentLoader.h"
 #include "Element.h"
 #include "Frame.h"
+#include "HTMLIdentifier.h"
 #include "HTMLNames.h"
 #include "HTMLParserScheduler.h"
 #include "HTMLParserThread.h"
@@ -663,6 +664,8 @@ void HTMLDocumentParser::startBackgroundParser()
     ASSERT(!m_haveBackgroundParser);
     m_haveBackgroundParser = true;
 
+    HTMLIdentifier::init();
+
     RefPtr<WeakReference<BackgroundHTMLParser> > reference = WeakReference<BackgroundHTMLParser>::createUnbound();
     m_backgroundParser = WeakPtr<BackgroundHTMLParser>(reference);
 
diff --git a/Source/WebCore/html/parser/HTMLIdentifier.cpp b/Source/WebCore/html/parser/HTMLIdentifier.cpp
new file mode 100644 (file)
index 0000000..7a8ec6d
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2013 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#if ENABLE(THREADED_HTML_PARSER)
+
+#include "HTMLIdentifier.h"
+
+#include "HTMLNames.h"
+#include <wtf/HashMap.h>
+#include <wtf/MainThread.h>
+#include <wtf/text/StringHash.h>
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+typedef std::pair<unsigned, StringImpl*> IdentifierEntry;
+typedef HashMap<unsigned, IdentifierEntry, AlreadyHashed> IdentifierTable;
+
+unsigned HTMLIdentifier::maxNameLength = 0;
+
+static IdentifierTable& identifierTable()
+{
+    DEFINE_STATIC_LOCAL(IdentifierTable, table, ());
+    ASSERT(isMainThread() || !table.isEmpty());
+    return table;
+}
+
+#ifndef NDEBUG
+bool HTMLIdentifier::hasIndex(const StringImpl* string)
+{
+    const IdentifierTable& table = identifierTable();
+    return table.contains(string->hash());
+}
+#endif
+
+unsigned HTMLIdentifier::findIndex(const UChar* characters, unsigned length)
+{
+    // We don't need to try hashing if we know the string is too long.
+    if (length > maxNameLength)
+        return invalidIndex;
+    // computeHashAndMaskTop8Bits is the function StringImpl::hash() uses.
+    unsigned hash = StringHasher::computeHashAndMaskTop8Bits(characters, length);
+    const IdentifierTable& table = identifierTable();
+    ASSERT(!table.isEmpty());
+
+    IdentifierTable::const_iterator it = table.find(hash);
+    if (it == table.end())
+        return invalidIndex;
+    return it->value.first;
+}
+
+const unsigned kHTMLNamesIndexOffset = 0;
+const unsigned kHTMLAttrsIndexOffset = 1000;
+COMPILE_ASSERT(kHTMLAttrsIndexOffset > HTMLTagsCount, kHTMLAttrsIndexOffset_should_be_larger_than_HTMLTagsCount);
+
+static const String& nameForIndex(unsigned index)
+{
+    unsigned adjustedIndex;
+    QualifiedName** names;
+    if (index < kHTMLAttrsIndexOffset) {
+        ASSERT(index < kHTMLNamesIndexOffset + HTMLTagsCount);
+        adjustedIndex = index - kHTMLNamesIndexOffset;
+        names = getHTMLTags();
+    } else {
+        ASSERT(index < kHTMLAttrsIndexOffset + HTMLAttrsCount);
+        adjustedIndex = index - kHTMLAttrsIndexOffset;
+        names = getHTMLAttrs();
+    }
+    // HTMLAttrs and HTMLNames may have collisions, but
+    // we shouldn't care which we ended up storing, since their
+    // components are all AtomicStrings and should use the same
+    // underlying StringImpl*.
+    return names[adjustedIndex]->localName().string();
+}
+
+const String& HTMLIdentifier::asString() const
+{
+    ASSERT(isMainThread());
+    if (m_index != invalidIndex)
+        return nameForIndex(m_index);
+    return m_string;
+}
+
+const StringImpl* HTMLIdentifier::asStringImpl() const
+{
+    if (m_index != invalidIndex)
+        return nameForIndex(m_index).impl();
+    return m_string.impl();
+}
+
+void HTMLIdentifier::addNames(QualifiedName** names, unsigned namesCount, unsigned indexOffset)
+{
+    IdentifierTable& table = identifierTable();
+    for (unsigned i = 0; i < namesCount; ++i) {
+        StringImpl* name = names[i]->localName().impl();
+        unsigned hash = name->hash();
+        unsigned index = i + indexOffset;
+        IdentifierEntry entry(index, name);
+        IdentifierTable::AddResult addResult = table.add(hash, entry);
+        maxNameLength = std::max(maxNameLength, name->length());
+        // Ensure we're using the same hashing algorithm to get and set.
+        ASSERT_UNUSED(addResult, !addResult.isNewEntry || HTMLIdentifier::findIndex(name->characters(), name->length()) == index);
+        // We expect some hash collisions, but only for identical strings.
+        // Since all of these names are AtomicStrings pointers should be equal.
+        // Note: If you hit this ASSERT, then we had a hash collision among
+        // HTMLNames strings, and we need to re-design how we use this hash!
+        ASSERT_UNUSED(addResult, !addResult.isNewEntry || name == addResult.iterator->value.second);
+    }
+}
+
+void HTMLIdentifier::init()
+{
+    ASSERT(isMainThread()); // Not technically necessary, but this is our current expected usage.
+    static bool isInitialized = false;
+    if (isInitialized)
+        return;
+    isInitialized = true;
+
+    // FIXME: We should atomize small whitespace (\n, \n\n, etc.)
+    addNames(getHTMLTags(), HTMLTagsCount, kHTMLNamesIndexOffset);
+    addNames(getHTMLAttrs(), HTMLAttrsCount, kHTMLAttrsIndexOffset);
+}
+
+}
+
+#endif // ENABLE(THREADED_HTML_PARSER)
diff --git a/Source/WebCore/html/parser/HTMLIdentifier.h b/Source/WebCore/html/parser/HTMLIdentifier.h
new file mode 100644 (file)
index 0000000..721f747
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2013 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLIdentifier_h
+#define HTMLIdentifier_h
+
+#if ENABLE(THREADED_HTML_PARSER)
+
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+class QualifiedName;
+
+enum CharacterWidth {
+    Likely8Bit,
+    Force8Bit,
+    Force16Bit
+};
+
+class HTMLIdentifier {
+public:
+    HTMLIdentifier() : m_index(0) { }
+
+    template<size_t inlineCapacity>
+    HTMLIdentifier(const Vector<UChar, inlineCapacity>& vector, CharacterWidth width)
+        : m_index(findIndex(vector.data(), vector.size()))
+    {
+        if (m_index != invalidIndex)
+            return;
+        if (width == Likely8Bit)
+            m_string = StringImpl::create8BitIfPossible(vector);
+        else if (width == Force8Bit)
+            m_string = String::make8BitFrom16BitSource(vector);
+        else
+            m_string = String(vector);
+    }
+
+    // asString should only be used on the main thread.
+    const String& asString() const;
+    // asStringImpl() is safe to call from any thread.
+    const StringImpl* asStringImpl() const;
+
+    static void init();
+
+    bool isSafeToSendToAnotherThread() const { return m_string.isSafeToSendToAnotherThread(); }
+
+#ifndef NDEBUG
+    static bool hasIndex(const StringImpl*);
+#endif
+
+private:
+    static const unsigned invalidIndex = -1;
+    static unsigned maxNameLength;
+    static unsigned findIndex(const UChar* characters, unsigned length);
+    static void addNames(QualifiedName** names, unsigned namesCount, unsigned indexOffset);
+
+    // FIXME: This could be a union.
+    unsigned m_index;
+    String m_string;
+};
+
+}
+
+#endif // ENABLE(THREADED_HTML_PARSER)
+
+#endif
index 9447cfe..b77d58b 100644 (file)
@@ -26,6 +26,7 @@
 #include "HTMLParserIdioms.h"
 
 #include "Decimal.h"
+#include "HTMLIdentifier.h"
 #include "QualifiedName.h"
 #include <limits>
 #include <wtf/MathExtras.h>
@@ -277,8 +278,10 @@ bool parseHTMLNonNegativeInteger(const String& input, unsigned& value)
     return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
 }
 
-static bool threadSafeEqual(StringImpl* a, StringImpl* b)
+static bool threadSafeEqual(const StringImpl* a, const StringImpl* b)
 {
+    if (a == b)
+        return true;
     if (a->hash() != b->hash())
         return false;
     return StringHash::equal(a, b);
@@ -289,9 +292,11 @@ bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b)
     return threadSafeEqual(a.localName().impl(), b.localName().impl());
 }
 
-bool threadSafeMatch(const String& localName, const QualifiedName& qName)
+#if ENABLE(THREADED_HTML_PARSER)
+bool threadSafeMatch(const HTMLIdentifier& localName, const QualifiedName& qName)
 {
-    return threadSafeEqual(localName.impl(), qName.localName().impl());
+    return threadSafeEqual(localName.asStringImpl(), qName.localName().impl());
 }
+#endif
 
 }
index a031e45..bc58089 100644 (file)
@@ -25,6 +25,8 @@
 #ifndef HTMLParserIdioms_h
 #define HTMLParserIdioms_h
 
+#include "HTMLIdentifier.h"
+#include "QualifiedName.h"
 #include <wtf/Forward.h>
 #include <wtf/text/WTFString.h>
 #include <wtf/unicode/Unicode.h>
@@ -32,7 +34,6 @@
 namespace WebCore {
 
 class Decimal;
-class QualifiedName;
 
 // Space characters as defined by the HTML specification.
 bool isHTMLSpace(UChar);
@@ -93,7 +94,16 @@ inline bool isNotHTMLSpace(UChar character)
 }
 
 bool threadSafeMatch(const QualifiedName&, const QualifiedName&);
-bool threadSafeMatch(const String&, const QualifiedName&);
+#if ENABLE(THREADED_HTML_PARSER)
+bool threadSafeMatch(const HTMLIdentifier&, const QualifiedName&);
+inline bool threadSafeHTMLNamesMatch(const HTMLIdentifier& tagName, const QualifiedName& qName)
+{
+    // When the QualifiedName is known to HTMLIdentifier,
+    // all we have to do is a pointer compare.
+    ASSERT(HTMLIdentifier::hasIndex(qName.localName().impl()));
+    return tagName.asStringImpl() == qName.localName().impl();
+}
+#endif
 
 }
 
index a57c5d3..e011ea2 100644 (file)
@@ -63,24 +63,26 @@ TokenPreloadScanner::TagId TokenPreloadScanner::tagIdFor(const HTMLToken::DataVe
     return UnknownTagId;
 }
 
-TokenPreloadScanner::TagId TokenPreloadScanner::tagIdFor(const String& tagName)
+#if ENABLE(THREADED_HTML_PARSER)
+TokenPreloadScanner::TagId TokenPreloadScanner::tagIdFor(const HTMLIdentifier& tagName)
 {
-    if (threadSafeMatch(tagName, imgTag))
+    if (threadSafeHTMLNamesMatch(tagName, imgTag))
         return ImgTagId;
-    if (threadSafeMatch(tagName, inputTag))
+    if (threadSafeHTMLNamesMatch(tagName, inputTag))
         return InputTagId;
-    if (threadSafeMatch(tagName, linkTag))
+    if (threadSafeHTMLNamesMatch(tagName, linkTag))
         return LinkTagId;
-    if (threadSafeMatch(tagName, scriptTag))
+    if (threadSafeHTMLNamesMatch(tagName, scriptTag))
         return ScriptTagId;
-    if (threadSafeMatch(tagName, styleTag))
+    if (threadSafeHTMLNamesMatch(tagName, styleTag))
         return StyleTagId;
-    if (threadSafeMatch(tagName, baseTag))
+    if (threadSafeHTMLNamesMatch(tagName, baseTag))
         return BaseTagId;
-    if (threadSafeMatch(tagName, templateTag))
+    if (threadSafeHTMLNamesMatch(tagName, templateTag))
         return TemplateTagId;
     return UnknownTagId;
 }
+#endif
 
 String TokenPreloadScanner::initiatorFor(TagId tagId)
 {
@@ -147,28 +149,42 @@ public:
         return request.release();
     }
 
+static bool match(const AtomicString& name, const QualifiedName& qName)
+{
+    ASSERT(isMainThread());
+    return qName.localName() == name;
+}
+
+#if ENABLE(THREADED_HTML_PARSER)
+static bool match(const HTMLIdentifier& name, const QualifiedName& qName)
+{
+    return threadSafeHTMLNamesMatch(name, qName);
+}
+#endif
+
 private:
-    void processAttribute(const String& attributeName, const String& attributeValue)
+    template<typename NameType>
+    void processAttribute(const NameType& attributeName, const String& attributeValue)
     {
-        if (threadSafeMatch(attributeName, charsetAttr))
+        if (match(attributeName, charsetAttr))
             m_charset = attributeValue;
 
         if (m_tagId == ScriptTagId || m_tagId == ImgTagId) {
-            if (threadSafeMatch(attributeName, srcAttr))
+            if (match(attributeName, srcAttr))
                 setUrlToLoad(attributeValue);
-            else if (threadSafeMatch(attributeName, crossoriginAttr) && !attributeValue.isNull())
+            else if (match(attributeName, crossoriginAttr) && !attributeValue.isNull())
                 m_crossOriginMode = stripLeadingAndTrailingHTMLSpaces(attributeValue);
         } else if (m_tagId == LinkTagId) {
-            if (threadSafeMatch(attributeName, hrefAttr))
+            if (match(attributeName, hrefAttr))
                 setUrlToLoad(attributeValue);
-            else if (threadSafeMatch(attributeName, relAttr))
+            else if (match(attributeName, relAttr))
                 m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
-            else if (threadSafeMatch(attributeName, mediaAttr))
+            else if (match(attributeName, mediaAttr))
                 m_linkMediaAttributeIsScreen = linkMediaAttributeIsScreen(attributeValue);
         } else if (m_tagId == InputTagId) {
-            if (threadSafeMatch(attributeName, srcAttr))
+            if (match(attributeName, srcAttr))
                 setUrlToLoad(attributeValue);
-            else if (threadSafeMatch(attributeName, typeAttr))
+            else if (match(attributeName, typeAttr))
                 m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image());
         }
     }
index 47a5444..d2f2c11 100644 (file)
@@ -86,7 +86,7 @@ private:
     inline void scanCommon(const Token&, PreloadRequestStream& requests);
 
     static TagId tagIdFor(const HTMLToken::DataVector&);
-    static TagId tagIdFor(const String&);
+    static TagId tagIdFor(const HTMLIdentifier&);
 
     static String initiatorFor(TagId);