Prepare to use CSS selectors in content extensions.
[WebKit-https.git] / Source / WebCore / contentextensions / ContentExtensionsBackend.cpp
1 /*
2  * Copyright (C) 2014 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ContentExtensionsBackend.h"
28
29 #if ENABLE(CONTENT_EXTENSIONS)
30
31 #include "ContentExtensionsDebugging.h"
32 #include "DFABytecodeCompiler.h"
33 #include "DFABytecodeInterpreter.h"
34 #include "NFA.h"
35 #include "NFAToDFA.h"
36 #include "URL.h"
37 #include "URLFilterParser.h"
38 #include <wtf/CurrentTime.h>
39 #include <wtf/DataLog.h>
40 #include <wtf/NeverDestroyed.h>
41 #include <wtf/text/CString.h>
42
43 namespace WebCore {
44
45 namespace ContentExtensions {
46     
47 Vector<unsigned> ContentExtensionsBackend::serializeActions(const Vector<ContentExtensionRule>& ruleList, Vector<SerializedActionByte>& actions)
48 {
49     ASSERT(!actions.size());
50     
51     Vector<unsigned> actionLocations;
52         
53     for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) {
54         const ContentExtensionRule& rule = ruleList[ruleIndex];
55         actionLocations.append(actions.size());
56         
57         switch (rule.action().type()) {
58         case ActionType::InvalidAction:
59             RELEASE_ASSERT_NOT_REACHED();
60
61         case ActionType::BlockLoad:
62         case ActionType::BlockCookies:
63         case ActionType::IgnorePreviousRules:
64             actions.append(static_cast<SerializedActionByte>(rule.action().type()));
65             break;
66
67         case ActionType::CSSDisplayNone: {
68             const String& selector = rule.action().cssSelector();
69             // Append action type (1 byte).
70             actions.append(static_cast<SerializedActionByte>(ActionType::CSSDisplayNone));
71             // Append Selector length (4 bytes).
72             unsigned selectorLength = selector.length();
73             actions.resize(actions.size() + sizeof(unsigned));
74             *reinterpret_cast<unsigned*>(&actions[actions.size() - sizeof(unsigned)]) = selectorLength;
75             bool wideCharacters = !selector.is8Bit();
76             actions.append(wideCharacters);
77             // Append Selector.
78             if (wideCharacters) {
79                 for (unsigned i = 0; i < selectorLength; i++) {
80                     actions.resize(actions.size() + sizeof(UChar));
81                     *reinterpret_cast<UChar*>(&actions[actions.size() - sizeof(UChar)]) = selector[i];
82                 }
83             } else {
84                 for (unsigned i = 0; i < selectorLength; i++)
85                     actions.append(selector[i]);
86             }
87             break;
88         }
89         }
90     }
91     return actionLocations;
92 }
93
94 void ContentExtensionsBackend::setRuleList(const String& identifier, const Vector<ContentExtensionRule>& ruleList)
95 {
96     ASSERT(!identifier.isEmpty());
97     if (identifier.isEmpty())
98         return;
99
100     if (ruleList.isEmpty()) {
101         removeRuleList(identifier);
102         return;
103     }
104
105 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
106     double nfaBuildTimeStart = monotonicallyIncreasingTime();
107 #endif
108
109     Vector<SerializedActionByte> actions;
110     Vector<unsigned> actionLocations = serializeActions(ruleList, actions);
111
112     NFA nfa;
113     URLFilterParser urlFilterParser(nfa);
114     for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) {
115         const ContentExtensionRule& contentExtensionRule = ruleList[ruleIndex];
116         const Trigger& trigger = contentExtensionRule.trigger();
117         ASSERT(trigger.urlFilter.length());
118
119         String error = urlFilterParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocations[ruleIndex]);
120
121         if (!error.isNull()) {
122             dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), error.utf8().data());
123             continue;
124         }
125     }
126
127 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
128     double nfaBuildTimeEnd = monotonicallyIncreasingTime();
129     dataLogF("    Time spent building the NFA: %f\n", (nfaBuildTimeEnd - nfaBuildTimeStart));
130 #endif
131
132 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
133     nfa.debugPrintDot();
134 #endif
135
136 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
137     double dfaBuildTimeStart = monotonicallyIncreasingTime();
138 #endif
139
140     const DFA dfa = NFAToDFA::convert(nfa);
141
142 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
143     double dfaBuildTimeEnd = monotonicallyIncreasingTime();
144     dataLogF("    Time spent building the DFA: %f\n", (dfaBuildTimeEnd - dfaBuildTimeStart));
145 #endif
146
147     // FIXME: never add a DFA that only matches the empty set.
148
149 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
150     dfa.debugPrintDot();
151 #endif
152
153     Vector<DFABytecode> bytecode;
154     DFABytecodeCompiler compiler(dfa, bytecode);
155     compiler.compile();
156     CompiledContentExtension compiledContentExtension = { bytecode, actions };
157     m_ruleLists.set(identifier, compiledContentExtension);
158 }
159
160 void ContentExtensionsBackend::removeRuleList(const String& identifier)
161 {
162     m_ruleLists.remove(identifier);
163 }
164
165 void ContentExtensionsBackend::removeAllRuleLists()
166 {
167     m_ruleLists.clear();
168 }
169
170 Vector<Action> ContentExtensionsBackend::actionsForURL(const URL& url)
171 {
172     const String& urlString = url.string();
173     ASSERT_WITH_MESSAGE(urlString.containsOnlyASCII(), "A decoded URL should only contain ASCII characters. The matching algorithm assumes the input is ASCII.");
174     const CString& urlCString = urlString.utf8();
175
176     Vector<Action> actions;
177     for (auto& ruleListSlot : m_ruleLists) {
178         const CompiledContentExtension& compiledContentExtension = ruleListSlot.value;
179         DFABytecodeInterpreter interpreter(compiledContentExtension.bytecode);
180         DFABytecodeInterpreter::Actions triggeredActions = interpreter.interpret(urlCString);
181         
182         if (!triggeredActions.isEmpty()) {
183             Vector<unsigned> actionLocations;
184             actionLocations.reserveInitialCapacity(triggeredActions.size());
185             for (auto actionLocation : triggeredActions)
186                 actionLocations.append(static_cast<unsigned>(actionLocation));
187             std::sort(actionLocations.begin(), actionLocations.end());
188             
189             // Add actions in reverse order to properly deal with IgnorePreviousRules.
190             for (unsigned i = actionLocations.size(); i; i--) {
191                 Action action = Action::deserialize(ruleListSlot.value.actions, actionLocations[i - 1]);
192                 if (action.type() == ActionType::IgnorePreviousRules)
193                     break;
194                 actions.append(action);
195                 if (action.type() == ActionType::BlockLoad)
196                     break;
197             }
198         }
199     }
200     return actions;
201 }
202
203 } // namespace ContentExtensions
204
205 } // namespace WebCore
206
207 #endif // ENABLE(CONTENT_EXTENSIONS)