060cef8abe926f35f35e690f6b74f1e19cd8662b
[WebKit-https.git] / Source / WebCore / contentextensions / ContentExtensionCompiler.cpp
1 /*
2  * Copyright (C) 2015 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ContentExtensionCompiler.h"
28
29 #if ENABLE(CONTENT_EXTENSIONS)
30
31 #include "CombinedURLFilters.h"
32 #include "CompiledContentExtension.h"
33 #include "ContentExtensionActions.h"
34 #include "ContentExtensionError.h"
35 #include "ContentExtensionParser.h"
36 #include "ContentExtensionRule.h"
37 #include "ContentExtensionsDebugging.h"
38 #include "DFABytecodeCompiler.h"
39 #include "NFA.h"
40 #include "NFAToDFA.h"
41 #include "URLFilterParser.h"
42 #include <wtf/CurrentTime.h>
43 #include <wtf/DataLog.h>
44 #include <wtf/text/CString.h>
45 #include <wtf/text/StringBuilder.h>
46
47 namespace WebCore {
48 namespace ContentExtensions {
49
50 static void serializeSelector(Vector<SerializedActionByte>& actions, const String& selector)
51 {
52     // Append action type (1 byte).
53     actions.append(static_cast<SerializedActionByte>(ActionType::CSSDisplayNoneSelector));
54     // Append Selector length (4 bytes).
55     unsigned selectorLength = selector.length();
56     actions.resize(actions.size() + sizeof(unsigned));
57     *reinterpret_cast<unsigned*>(&actions[actions.size() - sizeof(unsigned)]) = selectorLength;
58     bool wideCharacters = !selector.is8Bit();
59     actions.append(wideCharacters);
60     // Append Selector.
61     if (wideCharacters) {
62         unsigned startIndex = actions.size();
63         actions.resize(actions.size() + sizeof(UChar) * selectorLength);
64         for (unsigned i = 0; i < selectorLength; ++i)
65             *reinterpret_cast<UChar*>(&actions[startIndex + i * sizeof(UChar)]) = selector[i];
66     } else {
67         for (unsigned i = 0; i < selectorLength; ++i)
68             actions.append(selector[i]);
69     }
70 }
71     
72 static Vector<unsigned> serializeActions(const Vector<ContentExtensionRule>& ruleList, Vector<SerializedActionByte>& actions)
73 {
74     ASSERT(!actions.size());
75     
76     Vector<unsigned> actionLocations;
77         
78     for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) {
79         const ContentExtensionRule& rule = ruleList[ruleIndex];
80         
81         // Consolidate css selectors with identical triggers.
82         if (rule.action().type() == ActionType::CSSDisplayNoneSelector) {
83             StringBuilder selector;
84             selector.append(rule.action().stringArgument());
85             actionLocations.append(actions.size());
86             for (unsigned i = ruleIndex + 1; i < ruleList.size(); i++) {
87                 if (rule.trigger() == ruleList[i].trigger() && ruleList[i].action().type() == ActionType::CSSDisplayNoneSelector) {
88                     actionLocations.append(actions.size());
89                     ruleIndex++;
90                     selector.append(',');
91                     selector.append(ruleList[i].action().stringArgument());
92                 } else
93                     break;
94             }
95             serializeSelector(actions, selector.toString());
96             continue;
97         }
98         
99         // Identical sequential actions should not be rewritten.
100         if (ruleIndex && rule.action() == ruleList[ruleIndex - 1].action()) {
101             actionLocations.append(actionLocations[ruleIndex - 1]);
102             continue;
103         }
104
105         actionLocations.append(actions.size());
106         switch (rule.action().type()) {
107         case ActionType::CSSDisplayNoneSelector:
108         case ActionType::CSSDisplayNoneStyleSheet:
109         case ActionType::InvalidAction:
110             RELEASE_ASSERT_NOT_REACHED();
111
112         case ActionType::BlockLoad:
113         case ActionType::BlockCookies:
114         case ActionType::IgnorePreviousRules:
115             actions.append(static_cast<SerializedActionByte>(rule.action().type()));
116             break;
117         }
118     }
119     return actionLocations;
120 }
121
122 typedef HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> UniversalActionLocationsSet;
123
124 static void addUniversalActionsToDFA(DFA& dfa, const UniversalActionLocationsSet& universalActionLocations)
125 {
126     if (universalActionLocations.isEmpty())
127         return;
128
129     unsigned actionsStart = dfa.actions.size();
130     dfa.actions.reserveCapacity(dfa.actions.size() + universalActionLocations.size());
131     for (uint64_t actionLocation : universalActionLocations)
132         dfa.actions.uncheckedAppend(actionLocation);
133     unsigned actionsEnd = dfa.actions.size();
134     unsigned actionsLength = actionsEnd - actionsStart;
135     RELEASE_ASSERT_WITH_MESSAGE(actionsLength < std::numeric_limits<uint16_t>::max(), "Too many uncombined actions that match everything");
136     dfa.nodes[dfa.root].setActions(actionsStart, static_cast<uint16_t>(actionsLength));
137 }
138
139 std::error_code compileRuleList(ContentExtensionCompilationClient& client, const String& ruleList)
140 {
141     Vector<ContentExtensionRule> parsedRuleList;
142     auto parserError = parseRuleList(ruleList, parsedRuleList);
143     if (parserError)
144         return parserError;
145
146 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
147     double patternPartitioningStart = monotonicallyIncreasingTime();
148 #endif
149
150     Vector<SerializedActionByte> actions;
151     Vector<unsigned> actionLocations = serializeActions(parsedRuleList, actions);
152     client.writeActions(WTF::move(actions));
153     LOG_LARGE_STRUCTURES(actions, actions.capacity() * sizeof(SerializedActionByte));
154     actions.clear();
155
156     UniversalActionLocationsSet universalActionLocations;
157
158     CombinedURLFilters combinedURLFilters;
159     URLFilterParser urlFilterParser(combinedURLFilters);
160     bool ignorePreviousRulesSeen = false;
161     for (unsigned ruleIndex = 0; ruleIndex < parsedRuleList.size(); ++ruleIndex) {
162         const ContentExtensionRule& contentExtensionRule = parsedRuleList[ruleIndex];
163         const Trigger& trigger = contentExtensionRule.trigger();
164         ASSERT(trigger.urlFilter.length());
165
166         // High bits are used for flags. This should match how they are used in DFABytecodeCompiler::compileNode.
167         uint64_t actionLocationAndFlags = (static_cast<uint64_t>(trigger.flags) << 32) | static_cast<uint64_t>(actionLocations[ruleIndex]);
168         URLFilterParser::ParseStatus status = urlFilterParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags);
169
170         if (status == URLFilterParser::MatchesEverything) {
171             if (ignorePreviousRulesSeen)
172                 return ContentExtensionError::RegexMatchesEverythingAfterIgnorePreviousRules;
173             universalActionLocations.add(actionLocationAndFlags);
174         }
175
176         if (status != URLFilterParser::Ok && status != URLFilterParser::MatchesEverything) {
177             dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data());
178             return ContentExtensionError::JSONInvalidRegex;
179         }
180         
181         if (contentExtensionRule.action().type() == ActionType::IgnorePreviousRules)
182             ignorePreviousRulesSeen = true;
183     }
184     LOG_LARGE_STRUCTURES(parsedRuleList, parsedRuleList.capacity() * sizeof(ContentExtensionRule)); // Doesn't include strings.
185     LOG_LARGE_STRUCTURES(actionLocations, actionLocations.capacity() * sizeof(unsigned));
186     parsedRuleList.clear();
187     actionLocations.clear();
188
189 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
190     double patternPartitioningEnd = monotonicallyIncreasingTime();
191     dataLogF("    Time spent partitioning the rules into groups: %f\n", (patternPartitioningEnd - patternPartitioningStart));
192 #endif
193
194     LOG_LARGE_STRUCTURES(combinedURLFilters, combinedURLFilters.memoryUsed());
195
196 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
197     double totalNFAToByteCodeBuildTimeStart = monotonicallyIncreasingTime();
198 #endif
199
200     Vector<DFABytecode> bytecode;
201     bool firstNFASeen = false;
202     combinedURLFilters.processNFAs([&](NFA&& nfa) {
203 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
204         nfa.debugPrintDot();
205 #endif
206
207         LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed());
208
209 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
210         double dfaBuildTimeStart = monotonicallyIncreasingTime();
211 #endif
212         DFA dfa = NFAToDFA::convert(nfa);
213         LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
214
215 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
216         double dfaBuildTimeEnd = monotonicallyIncreasingTime();
217         dataLogF("    Time spent building the DFA %zu: %f\n", i, (dfaBuildTimeEnd - dfaBuildTimeStart));
218 #endif
219
220 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
221         double dfaMinimizationTimeStart = monotonicallyIncreasingTime();
222 #endif
223         dfa.minimize();
224 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
225         double dfaMinimizationTimeEnd = monotonicallyIncreasingTime();
226         dataLogF("    Time spent miniminizing the DFA %zu: %f\n", i, (dfaMinimizationTimeEnd - dfaMinimizationTimeStart));
227 #endif
228
229 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
230         WTFLogAlways("DFA %zu", i);
231         dfa.debugPrintDot();
232 #endif
233         ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "All actions on the DFA root should come from regular expressions that match everything.");
234
235         if (!firstNFASeen) {
236             // Put all the universal actions on the first DFA.
237             addUniversalActionsToDFA(dfa, universalActionLocations);
238         }
239
240         DFABytecodeCompiler compiler(dfa, bytecode);
241         compiler.compile();
242
243         firstNFASeen = true;
244     });
245
246     if (!firstNFASeen) {
247         // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any
248         // create a dummy one and add any universal actions.
249
250         NFA dummyNFA;
251         DFA dummyDFA = NFAToDFA::convert(dummyNFA);
252
253         addUniversalActionsToDFA(dummyDFA, universalActionLocations);
254
255         DFABytecodeCompiler compiler(dummyDFA, bytecode);
256         compiler.compile();
257     }
258
259     // FIXME: combinedURLFilters should be cleared incrementally as it is processing NFAs. 
260     combinedURLFilters.clear();
261
262     LOG_LARGE_STRUCTURES(universalActionLocations, universalActionLocations.capacity() * sizeof(unsigned));
263     universalActionLocations.clear();
264
265 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
266     double totalNFAToByteCodeBuildTimeEnd = monotonicallyIncreasingTime();
267     dataLogF("    Time spent building and compiling the DFAs: %f\n", (totalNFAToByteCodeBuildTimeEnd - totalNFAToByteCodeBuildTimeStart));
268     dataLogF("    Bytecode size %zu\n", bytecode.size());
269     dataLogF("    DFA count %zu\n", nfas.size());
270 #endif
271
272     LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
273     client.writeBytecode(WTF::move(bytecode));
274     bytecode.clear();
275
276     return { };
277 }
278
279 } // namespace ContentExtensions
280 } // namespace WebCore
281
282 #endif // ENABLE(CONTENT_EXTENSIONS)