Optimize content extensions interpreting speed.
[WebKit-https.git] / Source / WebCore / contentextensions / ContentExtensionCompiler.cpp
1 /*
2  * Copyright (C) 2015 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ContentExtensionCompiler.h"
28
29 #if ENABLE(CONTENT_EXTENSIONS)
30
31 #include "CombinedURLFilters.h"
32 #include "CompiledContentExtension.h"
33 #include "ContentExtensionActions.h"
34 #include "ContentExtensionError.h"
35 #include "ContentExtensionParser.h"
36 #include "ContentExtensionRule.h"
37 #include "ContentExtensionsDebugging.h"
38 #include "DFABytecodeCompiler.h"
39 #include "NFA.h"
40 #include "NFAToDFA.h"
41 #include "URLFilterParser.h"
42 #include <wtf/CurrentTime.h>
43 #include <wtf/DataLog.h>
44 #include <wtf/text/CString.h>
45 #include <wtf/text/StringBuilder.h>
46
47 namespace WebCore {
48 namespace ContentExtensions {
49
50 static void serializeSelector(Vector<SerializedActionByte>& actions, const String& selector)
51 {
52     // Append action type (1 byte).
53     actions.append(static_cast<SerializedActionByte>(ActionType::CSSDisplayNoneSelector));
54     // Append Selector length (4 bytes).
55     unsigned selectorLength = selector.length();
56     actions.resize(actions.size() + sizeof(unsigned));
57     *reinterpret_cast<unsigned*>(&actions[actions.size() - sizeof(unsigned)]) = selectorLength;
58     bool wideCharacters = !selector.is8Bit();
59     actions.append(wideCharacters);
60     // Append Selector.
61     if (wideCharacters) {
62         unsigned startIndex = actions.size();
63         actions.resize(actions.size() + sizeof(UChar) * selectorLength);
64         for (unsigned i = 0; i < selectorLength; ++i)
65             *reinterpret_cast<UChar*>(&actions[startIndex + i * sizeof(UChar)]) = selector[i];
66     } else {
67         for (unsigned i = 0; i < selectorLength; ++i)
68             actions.append(selector[i]);
69     }
70 }
71     
72 static Vector<unsigned> serializeActions(const Vector<ContentExtensionRule>& ruleList, Vector<SerializedActionByte>& actions)
73 {
74     ASSERT(!actions.size());
75     
76     Vector<unsigned> actionLocations;
77         
78     for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) {
79         const ContentExtensionRule& rule = ruleList[ruleIndex];
80         
81         // Consolidate css selectors with identical triggers.
82         if (rule.action().type() == ActionType::CSSDisplayNoneSelector) {
83             StringBuilder selector;
84             selector.append(rule.action().stringArgument());
85             actionLocations.append(actions.size());
86             for (unsigned i = ruleIndex + 1; i < ruleList.size(); i++) {
87                 if (rule.trigger() == ruleList[i].trigger() && ruleList[i].action().type() == ActionType::CSSDisplayNoneSelector) {
88                     actionLocations.append(actions.size());
89                     ruleIndex++;
90                     selector.append(',');
91                     selector.append(ruleList[i].action().stringArgument());
92                 } else
93                     break;
94             }
95             serializeSelector(actions, selector.toString());
96             continue;
97         }
98         
99         // Identical sequential actions should not be rewritten.
100         if (ruleIndex && rule.action() == ruleList[ruleIndex - 1].action()) {
101             actionLocations.append(actionLocations[ruleIndex - 1]);
102             continue;
103         }
104
105         actionLocations.append(actions.size());
106         switch (rule.action().type()) {
107         case ActionType::CSSDisplayNoneSelector:
108         case ActionType::CSSDisplayNoneStyleSheet:
109         case ActionType::InvalidAction:
110             RELEASE_ASSERT_NOT_REACHED();
111
112         case ActionType::BlockLoad:
113         case ActionType::BlockCookies:
114         case ActionType::IgnorePreviousRules:
115             actions.append(static_cast<SerializedActionByte>(rule.action().type()));
116             break;
117         }
118     }
119     return actionLocations;
120 }
121
122
123 std::error_code compileRuleList(ContentExtensionCompilationClient& client, const String& ruleList)
124 {
125     Vector<ContentExtensionRule> parsedRuleList;
126     auto parserError = parseRuleList(ruleList, parsedRuleList);
127     if (parserError)
128         return parserError;
129
130 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
131     double patternPartitioningStart = monotonicallyIncreasingTime();
132 #endif
133
134     Vector<SerializedActionByte> actions;
135     Vector<unsigned> actionLocations = serializeActions(parsedRuleList, actions);
136     HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> universalActionLocations;
137
138     CombinedURLFilters combinedURLFilters;
139     URLFilterParser urlFilterParser(combinedURLFilters);
140     bool ignorePreviousRulesSeen = false;
141     for (unsigned ruleIndex = 0; ruleIndex < parsedRuleList.size(); ++ruleIndex) {
142         const ContentExtensionRule& contentExtensionRule = parsedRuleList[ruleIndex];
143         const Trigger& trigger = contentExtensionRule.trigger();
144         ASSERT(trigger.urlFilter.length());
145
146         // High bits are used for flags. This should match how they are used in DFABytecodeCompiler::compileNode.
147         uint64_t actionLocationAndFlags = (static_cast<uint64_t>(trigger.flags) << 32) | static_cast<uint64_t>(actionLocations[ruleIndex]);
148         URLFilterParser::ParseStatus status = urlFilterParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags);
149
150         if (status == URLFilterParser::MatchesEverything) {
151             if (ignorePreviousRulesSeen)
152                 return ContentExtensionError::RegexMatchesEverythingAfterIgnorePreviousRules;
153             universalActionLocations.add(actionLocationAndFlags);
154         }
155
156         if (status != URLFilterParser::Ok && status != URLFilterParser::MatchesEverything) {
157             dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data());
158             return ContentExtensionError::JSONInvalidRegex;
159         }
160         
161         if (contentExtensionRule.action().type() == ActionType::IgnorePreviousRules)
162             ignorePreviousRulesSeen = true;
163     }
164
165 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
166     double patternPartitioningEnd = monotonicallyIncreasingTime();
167     dataLogF("    Time spent partitioning the rules into groups: %f\n", (patternPartitioningEnd - patternPartitioningStart));
168 #endif
169
170 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
171     double nfaBuildTimeStart = monotonicallyIncreasingTime();
172 #endif
173
174     Vector<NFA> nfas = combinedURLFilters.createNFAs();
175     if (!nfas.size() && universalActionLocations.size())
176         nfas.append(NFA());
177
178 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
179     double nfaBuildTimeEnd = monotonicallyIncreasingTime();
180     dataLogF("    Time spent building the NFAs: %f\n", (nfaBuildTimeEnd - nfaBuildTimeStart));
181 #endif
182
183 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
184     for (size_t i = 0; i < nfas.size(); ++i) {
185         WTFLogAlways("NFA %zu", i);
186         const NFA& nfa = nfas[i];
187         nfa.debugPrintDot();
188     }
189 #endif
190
191 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
192     double dfaBuildTimeStart = monotonicallyIncreasingTime();
193 #endif
194     Vector<DFABytecode> bytecode;
195     for (size_t i = 0; i < nfas.size(); ++i) {
196         DFA dfa = NFAToDFA::convert(nfas[i]);
197
198 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
199         WTFLogAlways("DFA %zu", i);
200         dfa.debugPrintDot();
201 #endif
202
203         ASSERT_WITH_MESSAGE(!dfa.nodeAt(dfa.root()).actions.size(), "All actions on the DFA root should come from regular expressions that match everything.");
204         if (!i) {
205             // Put all the universal actions on the first DFA.
206             for (uint64_t actionLocation : universalActionLocations)
207                 dfa.nodeAt(dfa.root()).actions.append(actionLocation);
208         }
209         DFABytecodeCompiler compiler(dfa, bytecode);
210         compiler.compile();
211     }
212
213 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
214     double dfaBuildTimeEnd = monotonicallyIncreasingTime();
215     dataLogF("    Time spent building and compiling the DFAs: %f\n", (dfaBuildTimeEnd - dfaBuildTimeStart));
216     dataLogF("    Bytecode size %zu\n", bytecode.size());
217     dataLogF("    DFA count %zu\n", nfas.size());
218 #endif
219
220     client.writeBytecode(WTF::move(bytecode));
221     client.writeActions(WTF::move(actions));
222
223     return { };
224 }
225
226 } // namespace ContentExtensions
227 } // namespace WebCore
228
229 #endif // ENABLE(CONTENT_EXTENSIONS)