+2015-01-16 Benjamin Poulain <benjamin@webkit.org>
+
+ Make URL filters case-insensitive by default
+ https://bugs.webkit.org/show_bug.cgi?id=140531
+
+ Reviewed by Andreas Kling.
+
+ Safari's filters were case-insensitive. Adopt that convention in the engine,
+ and add a flag in case someone ever need a case-sensitive filter.
+
+ * contentextensions/ContentExtensionRule.h:
+ * contentextensions/ContentExtensionsBackend.cpp:
+ (WebCore::ContentExtensions::ContentExtensionsBackend::setRuleList):
+ * contentextensions/ContentExtensionsManager.cpp:
+ (WebCore::ContentExtensions::ExtensionsManager::loadTrigger):
+ * contentextensions/URLFilterParser.cpp:
+ (WebCore::ContentExtensions::trivialAtomFromASCIICharacter):
+ (WebCore::ContentExtensions::GraphBuilder::GraphBuilder):
+ (WebCore::ContentExtensions::GraphBuilder::atomPatternCharacter):
+ (WebCore::ContentExtensions::GraphBuilder::generateTransition):
+ (WebCore::ContentExtensions::URLFilterParser::addPattern):
+ * contentextensions/URLFilterParser.h:
+
2015-01-16 Benjamin Poulain <bpoulain@apple.com>
Add the alternative syntax for CSS Selector's descendant combinator (">>")
const uint16_t hasNonCharacterMask = 0x0080;
const uint16_t characterMask = 0x0007F;
const uint16_t newlineClassIDBuiltinMask = 0x100;
+const uint16_t caseInsensitiveMask = 0x200;
-static TrivialAtom trivialAtomFromASCIICharacter(char character)
+static TrivialAtom trivialAtomFromASCIICharacter(char character, bool caseSensitive)
{
ASSERT(isASCII(character));
- return static_cast<uint16_t>(character);
+ if (caseSensitive || !isASCIIAlpha(character))
+ return static_cast<uint16_t>(character);
+
+ return static_cast<uint16_t>(toASCIILower(character)) | caseInsensitiveMask;
}
enum class TrivialAtomQuantifier : uint16_t {
unsigned end;
};
public:
- GraphBuilder(NFA& nfa, PrefixTreeEntry& prefixTreeRoot, uint64_t patternId)
+ GraphBuilder(NFA& nfa, PrefixTreeEntry& prefixTreeRoot, bool patternIsCaseSensitive, uint64_t patternId)
: m_nfa(nfa)
+ , m_patternIsCaseSensitive(patternIsCaseSensitive)
, m_patternId(patternId)
, m_activeGroup({ nfa.root(), nfa.root() })
, m_lastPrefixTreeEntry(&prefixTreeRoot)
m_hasValidAtom = true;
ASSERT(m_lastPrefixTreeEntry);
- m_pendingTrivialAtom = trivialAtomFromASCIICharacter(asciiChararacter);
+ m_pendingTrivialAtom = trivialAtomFromASCIICharacter(asciiChararacter, m_patternIsCaseSensitive);
}
void atomBuiltInCharacterClass(JSC::Yarr::BuiltInCharacterClassID builtInCharacterClassID, bool inverted)
ASSERT(trivialAtom & newlineClassIDBuiltinMask);
for (unsigned i = 1; i < 128; ++i)
m_nfa.addTransition(source, target, i);
- } else
- m_nfa.addTransition(source, target, static_cast<char>(trivialAtom & characterMask));
+ } else {
+ if (trivialAtom & caseInsensitiveMask) {
+ char character = static_cast<char>(trivialAtom & characterMask);
+ m_nfa.addTransition(source, target, character);
+ m_nfa.addTransition(source, target, toASCIIUpper(character));
+ } else
+ m_nfa.addTransition(source, target, static_cast<char>(trivialAtom & characterMask));
+ }
}
BoundedSubGraph sinkTrivialAtom(TrivialAtom trivialAtom, unsigned start)
}
NFA& m_nfa;
+ bool m_patternIsCaseSensitive;
const uint64_t m_patternId;
BoundedSubGraph m_activeGroup;
m_prefixTreeRoot.nfaNode = nfa.root();
}
-String URLFilterParser::addPattern(const String& pattern, uint64_t patternId)
+String URLFilterParser::addPattern(const String& pattern, bool patternIsCaseSensitive, uint64_t patternId)
{
if (!pattern.containsOnlyASCII())
return ASCIILiteral("URLFilterParser only supports ASCII patterns.");
String error;
- GraphBuilder graphBuilder(m_nfa, m_prefixTreeRoot, patternId);
+ GraphBuilder graphBuilder(m_nfa, m_prefixTreeRoot, patternIsCaseSensitive, patternId);
error = String(JSC::Yarr::parse(graphBuilder, pattern, 0));
if (error.isNull())
graphBuilder.finalize();