Make URL filters case-insensitive by default
authorbenjamin@webkit.org <benjamin@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 16 Jan 2015 21:01:00 +0000 (21:01 +0000)
committerbenjamin@webkit.org <benjamin@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 16 Jan 2015 21:01:00 +0000 (21:01 +0000)
https://bugs.webkit.org/show_bug.cgi?id=140531

Reviewed by Andreas Kling.

Safari's filters were case-insensitive. Adopt that convention in the engine,
and add a flag in case someone ever need a case-sensitive filter.

* contentextensions/ContentExtensionRule.h:
* contentextensions/ContentExtensionsBackend.cpp:
(WebCore::ContentExtensions::ContentExtensionsBackend::setRuleList):
* contentextensions/ContentExtensionsManager.cpp:
(WebCore::ContentExtensions::ExtensionsManager::loadTrigger):
* contentextensions/URLFilterParser.cpp:
(WebCore::ContentExtensions::trivialAtomFromASCIICharacter):
(WebCore::ContentExtensions::GraphBuilder::GraphBuilder):
(WebCore::ContentExtensions::GraphBuilder::atomPatternCharacter):
(WebCore::ContentExtensions::GraphBuilder::generateTransition):
(WebCore::ContentExtensions::URLFilterParser::addPattern):
* contentextensions/URLFilterParser.h:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@178593 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/contentextensions/ContentExtensionRule.h
Source/WebCore/contentextensions/ContentExtensionsBackend.cpp
Source/WebCore/contentextensions/ContentExtensionsManager.cpp
Source/WebCore/contentextensions/URLFilterParser.cpp
Source/WebCore/contentextensions/URLFilterParser.h

index 0aa7bc0..ed13085 100644 (file)
@@ -1,3 +1,26 @@
+2015-01-16  Benjamin Poulain  <benjamin@webkit.org>
+
+        Make URL filters case-insensitive by default
+        https://bugs.webkit.org/show_bug.cgi?id=140531
+
+        Reviewed by Andreas Kling.
+
+        Safari's filters were case-insensitive. Adopt that convention in the engine,
+        and add a flag in case someone ever need a case-sensitive filter.
+
+        * contentextensions/ContentExtensionRule.h:
+        * contentextensions/ContentExtensionsBackend.cpp:
+        (WebCore::ContentExtensions::ContentExtensionsBackend::setRuleList):
+        * contentextensions/ContentExtensionsManager.cpp:
+        (WebCore::ContentExtensions::ExtensionsManager::loadTrigger):
+        * contentextensions/URLFilterParser.cpp:
+        (WebCore::ContentExtensions::trivialAtomFromASCIICharacter):
+        (WebCore::ContentExtensions::GraphBuilder::GraphBuilder):
+        (WebCore::ContentExtensions::GraphBuilder::atomPatternCharacter):
+        (WebCore::ContentExtensions::GraphBuilder::generateTransition):
+        (WebCore::ContentExtensions::URLFilterParser::addPattern):
+        * contentextensions/URLFilterParser.h:
+
 2015-01-16  Benjamin Poulain  <bpoulain@apple.com>
 
         Add the alternative syntax for CSS Selector's descendant combinator (">>")
index 98d3ecc..80b8c36 100644 (file)
@@ -46,6 +46,7 @@ class ContentExtensionRule {
 public:
     struct Trigger {
         String urlFilter;
+        bool urlFilterIsCaseSensitive = false;
     };
 
     struct Action {
index e032964..4906bdc 100644 (file)
@@ -70,7 +70,7 @@ void ContentExtensionsBackend::setRuleList(const String& identifier, const Vecto
         const ContentExtensionRule::Trigger& trigger = contentExtensionRule.trigger();
         ASSERT(trigger.urlFilter.length());
 
-        String error = urlFilterParser.addPattern(trigger.urlFilter, ruleIndex);
+        String error = urlFilterParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, ruleIndex);
 
         if (!error.isNull()) {
             dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), error.utf8().data());
index 4e4c83a..3ca10c2 100644 (file)
@@ -67,8 +67,12 @@ static bool loadTrigger(ExecState& exec, JSObject& ruleObject, ContentExtensionR
         WTFLogAlways("Invalid url-filter object. The url is empty.");
         return false;
     }
-
     trigger.urlFilter = urlFilter;
+
+    JSValue urlFilterCaseObject = triggerObject.get(&exec, Identifier(&exec, "url-filter-is-case-sensitive"));
+    if (urlFilterCaseObject && !exec.hadException() && urlFilterCaseObject.isBoolean())
+        trigger.urlFilterIsCaseSensitive = urlFilterCaseObject.toBoolean(&exec);
+
     return true;
 }
 
index 2b8f786..ea557cb 100644 (file)
@@ -38,12 +38,16 @@ namespace ContentExtensions {
 const uint16_t hasNonCharacterMask = 0x0080;
 const uint16_t characterMask = 0x0007F;
 const uint16_t newlineClassIDBuiltinMask = 0x100;
+const uint16_t caseInsensitiveMask = 0x200;
 
-static TrivialAtom trivialAtomFromASCIICharacter(char character)
+static TrivialAtom trivialAtomFromASCIICharacter(char character, bool caseSensitive)
 {
     ASSERT(isASCII(character));
 
-    return static_cast<uint16_t>(character);
+    if (caseSensitive || !isASCIIAlpha(character))
+        return static_cast<uint16_t>(character);
+
+    return static_cast<uint16_t>(toASCIILower(character)) | caseInsensitiveMask;
 }
 
 enum class TrivialAtomQuantifier : uint16_t {
@@ -71,8 +75,9 @@ private:
         unsigned end;
     };
 public:
-    GraphBuilder(NFA& nfa, PrefixTreeEntry& prefixTreeRoot, uint64_t patternId)
+    GraphBuilder(NFA& nfa, PrefixTreeEntry& prefixTreeRoot, bool patternIsCaseSensitive, uint64_t patternId)
         : m_nfa(nfa)
+        , m_patternIsCaseSensitive(patternIsCaseSensitive)
         , m_patternId(patternId)
         , m_activeGroup({ nfa.root(), nfa.root() })
         , m_lastPrefixTreeEntry(&prefixTreeRoot)
@@ -113,7 +118,7 @@ public:
         m_hasValidAtom = true;
 
         ASSERT(m_lastPrefixTreeEntry);
-        m_pendingTrivialAtom = trivialAtomFromASCIICharacter(asciiChararacter);
+        m_pendingTrivialAtom = trivialAtomFromASCIICharacter(asciiChararacter, m_patternIsCaseSensitive);
     }
 
     void atomBuiltInCharacterClass(JSC::Yarr::BuiltInCharacterClassID builtInCharacterClassID, bool inverted)
@@ -240,8 +245,14 @@ private:
             ASSERT(trivialAtom & newlineClassIDBuiltinMask);
             for (unsigned i = 1; i < 128; ++i)
                 m_nfa.addTransition(source, target, i);
-        } else
-            m_nfa.addTransition(source, target, static_cast<char>(trivialAtom & characterMask));
+        } else {
+            if (trivialAtom & caseInsensitiveMask) {
+                char character = static_cast<char>(trivialAtom & characterMask);
+                m_nfa.addTransition(source, target, character);
+                m_nfa.addTransition(source, target, toASCIIUpper(character));
+            } else
+                m_nfa.addTransition(source, target, static_cast<char>(trivialAtom & characterMask));
+        }
     }
 
     BoundedSubGraph sinkTrivialAtom(TrivialAtom trivialAtom, unsigned start)
@@ -330,6 +341,7 @@ private:
     }
 
     NFA& m_nfa;
+    bool m_patternIsCaseSensitive;
     const uint64_t m_patternId;
 
     BoundedSubGraph m_activeGroup;
@@ -350,7 +362,7 @@ URLFilterParser::URLFilterParser(NFA& nfa)
     m_prefixTreeRoot.nfaNode = nfa.root();
 }
 
-String URLFilterParser::addPattern(const String& pattern, uint64_t patternId)
+String URLFilterParser::addPattern(const String& pattern, bool patternIsCaseSensitive, uint64_t patternId)
 {
     if (!pattern.containsOnlyASCII())
         return ASCIILiteral("URLFilterParser only supports ASCII patterns.");
@@ -363,7 +375,7 @@ String URLFilterParser::addPattern(const String& pattern, uint64_t patternId)
 
     String error;
 
-    GraphBuilder graphBuilder(m_nfa, m_prefixTreeRoot, patternId);
+    GraphBuilder graphBuilder(m_nfa, m_prefixTreeRoot, patternIsCaseSensitive, patternId);
     error = String(JSC::Yarr::parse(graphBuilder, pattern, 0));
     if (error.isNull())
         graphBuilder.finalize();
index a66b630..5bfb3f1 100644 (file)
@@ -48,7 +48,7 @@ struct PrefixTreeEntry {
 class URLFilterParser {
 public:
     explicit URLFilterParser(NFA&);
-    String addPattern(const String& pattern, uint64_t patternId);
+    String addPattern(const String& pattern, bool patternIsCaseSensitive, uint64_t patternId);
 
 private:
     NFA& m_nfa;