Use less memory when compiling content extensions
[WebKit-https.git] / Source / WebCore / contentextensions / ContentExtensionCompiler.cpp
1 /*
2  * Copyright (C) 2015 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ContentExtensionCompiler.h"
28
29 #if ENABLE(CONTENT_EXTENSIONS)
30
31 #include "CombinedURLFilters.h"
32 #include "CompiledContentExtension.h"
33 #include "ContentExtensionActions.h"
34 #include "ContentExtensionError.h"
35 #include "ContentExtensionParser.h"
36 #include "ContentExtensionRule.h"
37 #include "ContentExtensionsDebugging.h"
38 #include "DFABytecodeCompiler.h"
39 #include "NFA.h"
40 #include "NFAToDFA.h"
41 #include "URLFilterParser.h"
42 #include <wtf/CurrentTime.h>
43 #include <wtf/DataLog.h>
44 #include <wtf/text/CString.h>
45 #include <wtf/text/StringBuilder.h>
46
47 namespace WebCore {
48 namespace ContentExtensions {
49
50 static void serializeSelector(Vector<SerializedActionByte>& actions, const String& selector)
51 {
52     // Append action type (1 byte).
53     actions.append(static_cast<SerializedActionByte>(ActionType::CSSDisplayNoneSelector));
54     // Append Selector length (4 bytes).
55     unsigned selectorLength = selector.length();
56     actions.resize(actions.size() + sizeof(unsigned));
57     *reinterpret_cast<unsigned*>(&actions[actions.size() - sizeof(unsigned)]) = selectorLength;
58     bool wideCharacters = !selector.is8Bit();
59     actions.append(wideCharacters);
60     // Append Selector.
61     if (wideCharacters) {
62         unsigned startIndex = actions.size();
63         actions.resize(actions.size() + sizeof(UChar) * selectorLength);
64         for (unsigned i = 0; i < selectorLength; ++i)
65             *reinterpret_cast<UChar*>(&actions[startIndex + i * sizeof(UChar)]) = selector[i];
66     } else {
67         for (unsigned i = 0; i < selectorLength; ++i)
68             actions.append(selector[i]);
69     }
70 }
71     
72 static Vector<unsigned> serializeActions(const Vector<ContentExtensionRule>& ruleList, Vector<SerializedActionByte>& actions)
73 {
74     ASSERT(!actions.size());
75     
76     Vector<unsigned> actionLocations;
77         
78     for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) {
79         const ContentExtensionRule& rule = ruleList[ruleIndex];
80         
81         // Consolidate css selectors with identical triggers.
82         if (rule.action().type() == ActionType::CSSDisplayNoneSelector) {
83             StringBuilder selector;
84             selector.append(rule.action().stringArgument());
85             actionLocations.append(actions.size());
86             for (unsigned i = ruleIndex + 1; i < ruleList.size(); i++) {
87                 if (rule.trigger() == ruleList[i].trigger() && ruleList[i].action().type() == ActionType::CSSDisplayNoneSelector) {
88                     actionLocations.append(actions.size());
89                     ruleIndex++;
90                     selector.append(',');
91                     selector.append(ruleList[i].action().stringArgument());
92                 } else
93                     break;
94             }
95             serializeSelector(actions, selector.toString());
96             continue;
97         }
98         
99         // Identical sequential actions should not be rewritten.
100         if (ruleIndex && rule.action() == ruleList[ruleIndex - 1].action()) {
101             actionLocations.append(actionLocations[ruleIndex - 1]);
102             continue;
103         }
104
105         actionLocations.append(actions.size());
106         switch (rule.action().type()) {
107         case ActionType::CSSDisplayNoneSelector:
108         case ActionType::CSSDisplayNoneStyleSheet:
109         case ActionType::InvalidAction:
110             RELEASE_ASSERT_NOT_REACHED();
111
112         case ActionType::BlockLoad:
113         case ActionType::BlockCookies:
114         case ActionType::IgnorePreviousRules:
115             actions.append(static_cast<SerializedActionByte>(rule.action().type()));
116             break;
117         }
118     }
119     return actionLocations;
120 }
121
122
123 std::error_code compileRuleList(ContentExtensionCompilationClient& client, const String& ruleList)
124 {
125     Vector<ContentExtensionRule> parsedRuleList;
126     auto parserError = parseRuleList(ruleList, parsedRuleList);
127     if (parserError)
128         return parserError;
129
130 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
131     double patternPartitioningStart = monotonicallyIncreasingTime();
132 #endif
133
134     Vector<SerializedActionByte> actions;
135     Vector<unsigned> actionLocations = serializeActions(parsedRuleList, actions);
136     client.writeActions(WTF::move(actions));
137     actions.clear();
138
139     HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> universalActionLocations;
140
141     CombinedURLFilters combinedURLFilters;
142     URLFilterParser urlFilterParser(combinedURLFilters);
143     bool ignorePreviousRulesSeen = false;
144     for (unsigned ruleIndex = 0; ruleIndex < parsedRuleList.size(); ++ruleIndex) {
145         const ContentExtensionRule& contentExtensionRule = parsedRuleList[ruleIndex];
146         const Trigger& trigger = contentExtensionRule.trigger();
147         ASSERT(trigger.urlFilter.length());
148
149         // High bits are used for flags. This should match how they are used in DFABytecodeCompiler::compileNode.
150         uint64_t actionLocationAndFlags = (static_cast<uint64_t>(trigger.flags) << 32) | static_cast<uint64_t>(actionLocations[ruleIndex]);
151         URLFilterParser::ParseStatus status = urlFilterParser.addPattern(trigger.urlFilter, trigger.urlFilterIsCaseSensitive, actionLocationAndFlags);
152
153         if (status == URLFilterParser::MatchesEverything) {
154             if (ignorePreviousRulesSeen)
155                 return ContentExtensionError::RegexMatchesEverythingAfterIgnorePreviousRules;
156             universalActionLocations.add(actionLocationAndFlags);
157         }
158
159         if (status != URLFilterParser::Ok && status != URLFilterParser::MatchesEverything) {
160             dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), URLFilterParser::statusString(status).utf8().data());
161             return ContentExtensionError::JSONInvalidRegex;
162         }
163         
164         if (contentExtensionRule.action().type() == ActionType::IgnorePreviousRules)
165             ignorePreviousRulesSeen = true;
166     }
167     parsedRuleList.clear();
168     actionLocations.clear();
169
170 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
171     double patternPartitioningEnd = monotonicallyIncreasingTime();
172     dataLogF("    Time spent partitioning the rules into groups: %f\n", (patternPartitioningEnd - patternPartitioningStart));
173 #endif
174
175 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
176     double nfaBuildTimeStart = monotonicallyIncreasingTime();
177 #endif
178
179     Vector<NFA> nfas = combinedURLFilters.createNFAs();
180     combinedURLFilters.clear();
181     if (!nfas.size() && universalActionLocations.size())
182         nfas.append(NFA());
183
184 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
185     double nfaBuildTimeEnd = monotonicallyIncreasingTime();
186     dataLogF("    Time spent building the NFAs: %f\n", (nfaBuildTimeEnd - nfaBuildTimeStart));
187 #endif
188
189 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
190     for (size_t i = 0; i < nfas.size(); ++i) {
191         WTFLogAlways("NFA %zu", i);
192         const NFA& nfa = nfas[i];
193         nfa.debugPrintDot();
194     }
195 #endif
196
197 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
198     double totalNFAToByteCodeBuildTimeStart = monotonicallyIncreasingTime();
199 #endif
200
201     Vector<DFABytecode> bytecode;
202     for (size_t i = 0; i < nfas.size(); ++i) {
203 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
204         double dfaBuildTimeStart = monotonicallyIncreasingTime();
205 #endif
206         DFA dfa = NFAToDFA::convert(nfas[i]);
207 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
208         double dfaBuildTimeEnd = monotonicallyIncreasingTime();
209         dataLogF("    Time spent building the DFA %zu: %f\n", i, (dfaBuildTimeEnd - dfaBuildTimeStart));
210 #endif
211
212 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
213         double dfaMinimizationTimeStart = monotonicallyIncreasingTime();
214 #endif
215         dfa.minimize();
216 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
217         double dfaMinimizationTimeEnd = monotonicallyIncreasingTime();
218         dataLogF("    Time spent miniminizing the DFA %zu: %f\n", i, (dfaMinimizationTimeEnd - dfaMinimizationTimeStart));
219 #endif
220
221 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
222         WTFLogAlways("DFA %zu", i);
223         dfa.debugPrintDot();
224 #endif
225
226         ASSERT_WITH_MESSAGE(!dfa.nodeAt(dfa.root()).actions.size(), "All actions on the DFA root should come from regular expressions that match everything.");
227         if (!i) {
228             // Put all the universal actions on the first DFA.
229             for (uint64_t actionLocation : universalActionLocations)
230                 dfa.nodeAt(dfa.root()).actions.append(actionLocation);
231         }
232         DFABytecodeCompiler compiler(dfa, bytecode);
233         compiler.compile();
234     }
235     universalActionLocations.clear();
236
237 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
238     double totalNFAToByteCodeBuildTimeEnd = monotonicallyIncreasingTime();
239     dataLogF("    Time spent building and compiling the DFAs: %f\n", (totalNFAToByteCodeBuildTimeEnd - totalNFAToByteCodeBuildTimeStart));
240     dataLogF("    Bytecode size %zu\n", bytecode.size());
241     dataLogF("    DFA count %zu\n", nfas.size());
242 #endif
243     nfas.clear();
244
245     client.writeBytecode(WTF::move(bytecode));
246
247     return { };
248 }
249
250 } // namespace ContentExtensions
251 } // namespace WebCore
252
253 #endif // ENABLE(CONTENT_EXTENSIONS)