[WTF] Move currentCPUTime and sleep(Seconds) to CPUTime.h and Seconds.h respectively
[WebKit-https.git] / Source / WebCore / contentextensions / ContentExtensionCompiler.cpp
1 /*
2  * Copyright (C) 2015-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ContentExtensionCompiler.h"
28
29 #if ENABLE(CONTENT_EXTENSIONS)
30
31 #include "CombinedURLFilters.h"
32 #include "CompiledContentExtension.h"
33 #include "ContentExtensionActions.h"
34 #include "ContentExtensionError.h"
35 #include "ContentExtensionParser.h"
36 #include "ContentExtensionRule.h"
37 #include "ContentExtensionsDebugging.h"
38 #include "DFABytecodeCompiler.h"
39 #include "DFACombiner.h"
40 #include "NFA.h"
41 #include "NFAToDFA.h"
42 #include "URLFilterParser.h"
43 #include <wtf/DataLog.h>
44 #include <wtf/text/CString.h>
45 #include <wtf/text/StringBuilder.h>
46
47 namespace WebCore {
48 namespace ContentExtensions {
49
50 static void serializeString(Vector<SerializedActionByte>& actions, const String& string)
51 {
52     // Append Selector length (4 bytes).
53     uint32_t stringLength = string.length();
54     actions.grow(actions.size() + sizeof(uint32_t));
55     *reinterpret_cast<uint32_t*>(&actions[actions.size() - sizeof(uint32_t)]) = stringLength;
56     bool wideCharacters = !string.is8Bit();
57     actions.append(wideCharacters);
58     // Append Selector.
59     if (wideCharacters) {
60         uint32_t startIndex = actions.size();
61         actions.grow(actions.size() + sizeof(UChar) * stringLength);
62         for (uint32_t i = 0; i < stringLength; ++i)
63             *reinterpret_cast<UChar*>(&actions[startIndex + i * sizeof(UChar)]) = string[i];
64     } else {
65         for (uint32_t i = 0; i < stringLength; ++i)
66             actions.append(string[i]);
67     }
68 }
69
70 // css-display-none combining is special because we combine the string arguments with commas because we know they are css selectors.
71 struct PendingDisplayNoneActions {
72     StringBuilder combinedSelectors;
73     Vector<uint32_t> clientLocations;
74 };
75
76 using PendingDisplayNoneActionsMap = HashMap<Trigger, PendingDisplayNoneActions, TriggerHash, TriggerHashTraits>;
77
78 static void resolvePendingDisplayNoneActions(Vector<SerializedActionByte>& actions, Vector<uint32_t>& actionLocations, PendingDisplayNoneActionsMap& map)
79 {
80     for (auto& pendingDisplayNoneActions : map.values()) {
81         uint32_t actionLocation = actions.size();
82         actions.append(static_cast<SerializedActionByte>(ActionType::CSSDisplayNoneSelector));
83         serializeString(actions, pendingDisplayNoneActions.combinedSelectors.toString());
84         for (uint32_t clientLocation : pendingDisplayNoneActions.clientLocations)
85             actionLocations[clientLocation] = actionLocation;
86     }
87     map.clear();
88 }
89
90 static Vector<unsigned> serializeActions(const Vector<ContentExtensionRule>& ruleList, Vector<SerializedActionByte>& actions)
91 {
92     ASSERT(!actions.size());
93
94     Vector<uint32_t> actionLocations;
95
96     using ActionLocation = uint32_t;
97     using ActionMap = HashMap<ResourceFlags, ActionLocation, DefaultHash<ResourceFlags>::Hash, WTF::UnsignedWithZeroKeyHashTraits<ResourceFlags>>;
98     using StringActionMap = HashMap<std::pair<String, ResourceFlags>, ActionLocation, DefaultHash<std::pair<String, ResourceFlags>>::Hash, PairHashTraits<HashTraits<String>, WTF::UnsignedWithZeroKeyHashTraits<ResourceFlags>>>;
99     ActionMap blockLoadActionsMap;
100     ActionMap blockCookiesActionsMap;
101     PendingDisplayNoneActionsMap cssDisplayNoneActionsMap;
102     ActionMap ignorePreviousRuleActionsMap;
103     ActionMap makeHTTPSActionsMap;
104     StringActionMap notifyActionsMap;
105
106     for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) {
107         const ContentExtensionRule& rule = ruleList[ruleIndex];
108         ActionType actionType = rule.action().type();
109
110         if (actionType == ActionType::IgnorePreviousRules) {
111             resolvePendingDisplayNoneActions(actions, actionLocations, cssDisplayNoneActionsMap);
112
113             blockLoadActionsMap.clear();
114             blockCookiesActionsMap.clear();
115             cssDisplayNoneActionsMap.clear();
116             makeHTTPSActionsMap.clear();
117             notifyActionsMap.clear();
118         } else
119             ignorePreviousRuleActionsMap.clear();
120
121         // Anything with condition is just pushed.
122         // We could try to merge conditions but that case is not common in practice.
123         if (!rule.trigger().conditions.isEmpty()) {
124             actionLocations.append(actions.size());
125
126             actions.append(static_cast<SerializedActionByte>(actionType));
127             if (hasStringArgument(actionType))
128                 serializeString(actions, rule.action().stringArgument());
129             else
130                 ASSERT(rule.action().stringArgument().isNull());
131             continue;
132         }
133
134         ResourceFlags flags = rule.trigger().flags;
135         unsigned actionLocation = std::numeric_limits<unsigned>::max();
136         
137         auto findOrMakeActionLocation = [&] (ActionMap& map) {
138             const auto existingAction = map.find(flags);
139             if (existingAction == map.end()) {
140                 actionLocation = actions.size();
141                 actions.append(static_cast<SerializedActionByte>(actionType));
142                 map.set(flags, actionLocation);
143             } else
144                 actionLocation = existingAction->value;
145         };
146         
147         auto findOrMakeStringActionLocation = [&] (StringActionMap& map) {
148             const String& argument = rule.action().stringArgument();
149             auto existingAction = map.find(std::make_pair(argument, flags));
150             if (existingAction == map.end()) {
151                 actionLocation = actions.size();
152                 actions.append(static_cast<SerializedActionByte>(actionType));
153                 serializeString(actions, argument);
154                 map.set(std::make_pair(argument, flags), actionLocation);
155             } else
156                 actionLocation = existingAction->value;
157         };
158
159         switch (actionType) {
160         case ActionType::CSSDisplayNoneSelector: {
161             const auto addResult = cssDisplayNoneActionsMap.add(rule.trigger(), PendingDisplayNoneActions());
162             auto& pendingStringActions = addResult.iterator->value;
163             if (!pendingStringActions.combinedSelectors.isEmpty())
164                 pendingStringActions.combinedSelectors.append(',');
165             pendingStringActions.combinedSelectors.append(rule.action().stringArgument());
166             pendingStringActions.clientLocations.append(actionLocations.size());
167
168             actionLocation = std::numeric_limits<unsigned>::max();
169             break;
170         }
171         case ActionType::IgnorePreviousRules:
172             findOrMakeActionLocation(ignorePreviousRuleActionsMap);
173             break;
174         case ActionType::BlockLoad:
175             findOrMakeActionLocation(blockLoadActionsMap);
176             break;
177         case ActionType::BlockCookies:
178             findOrMakeActionLocation(blockCookiesActionsMap);
179             break;
180         case ActionType::MakeHTTPS:
181             findOrMakeActionLocation(makeHTTPSActionsMap);
182             break;
183         case ActionType::Notify:
184             findOrMakeStringActionLocation(notifyActionsMap);
185             break;
186         }
187
188         actionLocations.append(actionLocation);
189     }
190     resolvePendingDisplayNoneActions(actions, actionLocations, cssDisplayNoneActionsMap);
191     return actionLocations;
192 }
193
194 typedef HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> UniversalActionSet;
195
196 static void addUniversalActionsToDFA(DFA& dfa, UniversalActionSet&& universalActions)
197 {
198     if (universalActions.isEmpty())
199         return;
200
201     DFANode& root = dfa.nodes[dfa.root];
202     ASSERT(!root.actionsLength());
203     unsigned actionsStart = dfa.actions.size();
204     dfa.actions.reserveCapacity(dfa.actions.size() + universalActions.size());
205     for (uint64_t action : universalActions)
206         dfa.actions.uncheckedAppend(action);
207     unsigned actionsEnd = dfa.actions.size();
208
209     unsigned actionsLength = actionsEnd - actionsStart;
210     RELEASE_ASSERT_WITH_MESSAGE(actionsLength < std::numeric_limits<uint16_t>::max(), "Too many uncombined actions that match everything");
211     root.setActions(actionsStart, static_cast<uint16_t>(actionsLength));
212 }
213
214 template<typename Functor>
215 static void compileToBytecode(CombinedURLFilters&& filters, UniversalActionSet&& universalActions, Functor writeBytecodeToClient)
216 {
217     // Smaller maxNFASizes risk high compiling and interpreting times from having too many DFAs,
218     // larger maxNFASizes use too much memory when compiling.
219     const unsigned maxNFASize = 75000;
220
221     bool firstNFASeen = false;
222
223     auto lowerDFAToBytecode = [&](DFA&& dfa)
224     {
225 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
226         dataLogF("DFA\n");
227         dfa.debugPrintDot();
228 #endif
229         ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "All actions on the DFA root should come from regular expressions that match everything.");
230
231         if (!firstNFASeen) {
232             // Put all the universal actions on the first DFA.
233             addUniversalActionsToDFA(dfa, WTFMove(universalActions));
234         }
235
236         Vector<DFABytecode> bytecode;
237         DFABytecodeCompiler compiler(dfa, bytecode);
238         compiler.compile();
239         LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
240         writeBytecodeToClient(WTFMove(bytecode));
241         firstNFASeen = true;
242     };
243
244     const unsigned smallDFASize = 100;
245     DFACombiner smallDFACombiner;
246     filters.processNFAs(maxNFASize, [&](NFA&& nfa) {
247 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
248         dataLogF("NFA\n");
249         nfa.debugPrintDot();
250 #endif
251         LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed());
252         DFA dfa = NFAToDFA::convert(nfa);
253         LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
254
255         if (dfa.graphSize() < smallDFASize)
256             smallDFACombiner.addDFA(WTFMove(dfa));
257         else {
258             dfa.minimize();
259             lowerDFAToBytecode(WTFMove(dfa));
260         }
261     });
262
263     smallDFACombiner.combineDFAs(smallDFASize, [&](DFA&& dfa) {
264         LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
265         lowerDFAToBytecode(WTFMove(dfa));
266     });
267
268     ASSERT(filters.isEmpty());
269
270     if (!firstNFASeen) {
271         // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any
272         // create a dummy one and add any universal actions.
273
274         DFA dummyDFA = DFA::empty();
275         addUniversalActionsToDFA(dummyDFA, WTFMove(universalActions));
276
277         Vector<DFABytecode> bytecode;
278         DFABytecodeCompiler compiler(dummyDFA, bytecode);
279         compiler.compile();
280         LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
281         writeBytecodeToClient(WTFMove(bytecode));
282     }
283     LOG_LARGE_STRUCTURES(universalActions, universalActions.capacity() * sizeof(unsigned));
284 }
285
286 std::error_code compileRuleList(ContentExtensionCompilationClient& client, String&& ruleJSON)
287 {
288     auto ruleList = parseRuleList(WTFMove(ruleJSON));
289     if (!ruleList.has_value())
290         return ruleList.error();
291     Vector<ContentExtensionRule> parsedRuleList = WTFMove(ruleList.value());
292
293     bool domainConditionSeen = false;
294     bool topURLConditionSeen = false;
295     for (const auto& rule : parsedRuleList) {
296         switch (rule.trigger().conditionType) {
297         case Trigger::ConditionType::None:
298             break;
299         case Trigger::ConditionType::IfDomain:
300         case Trigger::ConditionType::UnlessDomain:
301             domainConditionSeen = true;
302             break;
303         case Trigger::ConditionType::IfTopURL:
304         case Trigger::ConditionType::UnlessTopURL:
305             topURLConditionSeen = true;
306             break;
307         }
308     }
309     if (topURLConditionSeen && domainConditionSeen)
310         return ContentExtensionError::JSONTopURLAndDomainConditions;
311
312 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
313     MonotonicTime patternPartitioningStart = MonotonicTime::now();
314 #endif
315     
316     client.writeSource(ruleJSON);
317
318     Vector<SerializedActionByte> actions;
319     Vector<unsigned> actionLocations = serializeActions(parsedRuleList, actions);
320     LOG_LARGE_STRUCTURES(actions, actions.capacity() * sizeof(SerializedActionByte));
321     client.writeActions(WTFMove(actions), domainConditionSeen);
322
323     UniversalActionSet universalActionsWithoutConditions;
324     UniversalActionSet universalActionsWithConditions;
325     UniversalActionSet universalTopURLActions;
326
327     // FIXME: These don't all need to be in memory at the same time.
328     CombinedURLFilters filtersWithoutConditions;
329     CombinedURLFilters filtersWithConditions;
330     CombinedURLFilters topURLFilters;
331     URLFilterParser filtersWithoutConditionParser(filtersWithoutConditions);
332     URLFilterParser filtersWithConditionParser(filtersWithConditions);
333     URLFilterParser topURLFilterParser(topURLFilters);
334     
335     for (unsigned ruleIndex = 0; ruleIndex < parsedRuleList.size(); ++ruleIndex) {
336         const ContentExtensionRule& contentExtensionRule = parsedRuleList[ruleIndex];
337         const Trigger& trigger = contentExtensionRule.trigger();
338         ASSERT(trigger.urlFilter.length());
339
340         // High bits are used for flags. This should match how they are used in DFABytecodeCompiler::compileNode.
341         ASSERT(!trigger.flags || ActionFlagMask & (static_cast<uint64_t>(trigger.flags) << 32));
342         ASSERT(!(~ActionFlagMask & (static_cast<uint64_t>(trigger.flags) << 32)));
343         uint64_t actionLocationAndFlags = (static_cast<uint64_t>(trigger.flags) << 32) | static_cast<uint64_t>(actionLocations[ruleIndex]);
344         URLFilterParser::ParseStatus status = URLFilterParser::Ok;
345         if (trigger.conditions.isEmpty()) {
346             ASSERT(trigger.conditionType == Trigger::ConditionType::None);
347             status = filtersWithoutConditionParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags);
348             if (status == URLFilterParser::MatchesEverything) {
349                 universalActionsWithoutConditions.add(actionLocationAndFlags);
350                 status = URLFilterParser::Ok;
351             }
352             if (status != URLFilterParser::Ok) {
353                 dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data());
354                 return ContentExtensionError::JSONInvalidRegex;
355             }
356         } else {
357             switch (trigger.conditionType) {
358             case Trigger::ConditionType::IfDomain:
359             case Trigger::ConditionType::IfTopURL:
360                 actionLocationAndFlags |= IfConditionFlag;
361                 break;
362             case Trigger::ConditionType::None:
363             case Trigger::ConditionType::UnlessDomain:
364             case Trigger::ConditionType::UnlessTopURL:
365                 ASSERT(!(actionLocationAndFlags & IfConditionFlag));
366                 break;
367             }
368             
369             status = filtersWithConditionParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags);
370             if (status == URLFilterParser::MatchesEverything) {
371                 universalActionsWithConditions.add(actionLocationAndFlags);
372                 status = URLFilterParser::Ok;
373             }
374             if (status != URLFilterParser::Ok) {
375                 dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data());
376                 return ContentExtensionError::JSONInvalidRegex;
377             }
378             for (const String& condition : trigger.conditions) {
379                 if (domainConditionSeen) {
380                     ASSERT(!topURLConditionSeen);
381                     topURLFilters.addDomain(actionLocationAndFlags, condition);
382                 } else {
383                     ASSERT(topURLConditionSeen);
384                     status = topURLFilterParser.addPattern(condition, trigger.topURLConditionIsCaseSensitive, actionLocationAndFlags);
385                     if (status == URLFilterParser::MatchesEverything) {
386                         universalTopURLActions.add(actionLocationAndFlags);
387                         status = URLFilterParser::Ok;
388                     }
389                     if (status != URLFilterParser::Ok) {
390                         dataLogF("Error while parsing %s: %s\n", condition.utf8().data(), URLFilterParser::statusString(status).utf8().data());
391                         return ContentExtensionError::JSONInvalidRegex;
392                     }
393                 }
394             }
395         }
396         ASSERT(status == URLFilterParser::Ok);
397     }
398     LOG_LARGE_STRUCTURES(parsedRuleList, parsedRuleList.capacity() * sizeof(ContentExtensionRule)); // Doesn't include strings.
399     LOG_LARGE_STRUCTURES(actionLocations, actionLocations.capacity() * sizeof(unsigned));
400     parsedRuleList.clear();
401     actionLocations.clear();
402
403 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
404     MonotonicTime patternPartitioningEnd = MonotonicTime::now();
405     dataLogF("    Time spent partitioning the rules into groups: %f\n", (patternPartitioningEnd - patternPartitioningStart).seconds());
406 #endif
407
408     LOG_LARGE_STRUCTURES(filtersWithoutConditions, filtersWithoutConditions.memoryUsed());
409     LOG_LARGE_STRUCTURES(filtersWithConditions, filtersWithConditions.memoryUsed());
410     LOG_LARGE_STRUCTURES(topURLFilters, topURLFilters.memoryUsed());
411
412 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
413     MonotonicTime totalNFAToByteCodeBuildTimeStart = MonotonicTime::now();
414 #endif
415
416     compileToBytecode(WTFMove(filtersWithoutConditions), WTFMove(universalActionsWithoutConditions), [&](Vector<DFABytecode>&& bytecode) {
417         client.writeFiltersWithoutConditionsBytecode(WTFMove(bytecode));
418     });
419     compileToBytecode(WTFMove(filtersWithConditions), WTFMove(universalActionsWithConditions), [&](Vector<DFABytecode>&& bytecode) {
420         client.writeFiltersWithConditionsBytecode(WTFMove(bytecode));
421     });
422     compileToBytecode(WTFMove(topURLFilters), WTFMove(universalTopURLActions), [&](Vector<DFABytecode>&& bytecode) {
423         client.writeTopURLFiltersBytecode(WTFMove(bytecode));
424     });
425     
426 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
427     MonotonicTime totalNFAToByteCodeBuildTimeEnd = MonotonicTime::now();
428     dataLogF("    Time spent building and compiling the DFAs: %f\n", (totalNFAToByteCodeBuildTimeEnd - totalNFAToByteCodeBuildTimeStart).seconds());
429
430     dataLogF("    Number of machines without condition filters: %d (total bytecode size = %d)\n", machinesWithoutConditionsCount, totalBytecodeSizeForMachinesWithoutConditions);
431     dataLogF("    Number of machines with condition filters: %d (total bytecode size = %d)\n", machinesWithConditionsCount, totalBytecodeSizeForMachinesWithConditions);
432 #endif
433
434     client.finalize();
435
436     return { };
437 }
438
439 } // namespace ContentExtensions
440 } // namespace WebCore
441
442 #endif // ENABLE(CONTENT_EXTENSIONS)