[Content Extensions] Add CombinedURLFilters debugging code.
[WebKit-https.git] / Source / WebCore / contentextensions / CombinedURLFilters.cpp
1 /*
2  * Copyright (C) 2015 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "CombinedURLFilters.h"
28
29 #if ENABLE(CONTENT_EXTENSIONS)
30
31 #include "Term.h"
32 #include <wtf/DataLog.h>
33 #include <wtf/Vector.h>
34 #include <wtf/text/CString.h>
35
36 namespace WebCore {
37
38 namespace ContentExtensions {
39
40 struct PrefixTreeEdge {
41     Term term;
42     std::unique_ptr<PrefixTreeVertex> child;
43 };
44     
45 typedef Vector<PrefixTreeEdge, 0, WTF::CrashOnOverflow, 1> PrefixTreeEdges;
46
47 struct PrefixTreeVertex {
48     PrefixTreeEdges edges;
49     ActionList finalActions;
50     bool inVariableLengthPrefix { false };
51 };
52
53 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
54 static size_t recursiveMemoryUsed(const PrefixTreeVertex& vertex)
55 {
56     size_t size = sizeof(PrefixTreeVertex)
57         + vertex.edges.capacity() * sizeof(PrefixTreeEdge)
58         + vertex.finalActions.capacity() * sizeof(uint64_t);
59     for (const auto& edge : vertex.edges) {
60         ASSERT(edge.child);
61         size += recursiveMemoryUsed(*edge.child.get());
62     }
63     return size;
64 }
65
66 size_t CombinedURLFilters::memoryUsed() const
67 {
68     ASSERT(m_prefixTreeRoot);
69     return recursiveMemoryUsed(*m_prefixTreeRoot.get());
70 }
71 #endif
72     
73 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
74 static String prefixTreeVertexToString(const PrefixTreeVertex& vertex, unsigned depth)
75 {
76     StringBuilder builder;
77     while (depth--)
78         builder.append("  ");
79     builder.append("vertex actions: ");
80     for (auto action : vertex.finalActions) {
81         builder.appendNumber(action);
82         builder.append(',');
83     }
84     builder.append('\n');
85     return builder.toString();
86 }
87
88 static void recursivePrint(const PrefixTreeVertex& vertex, unsigned depth)
89 {
90     dataLogF("%s", prefixTreeVertexToString(vertex, depth).utf8().data());
91     for (const auto& edge : vertex.edges) {
92         StringBuilder builder;
93         for (unsigned i = 0; i < depth * 2; ++i)
94             builder.append(' ');
95         builder.append("vertex edge: ");
96         builder.append(edge.term.toString());
97         builder.append('\n');
98         dataLogF("%s", builder.toString().utf8().data());
99         ASSERT(edge.child);
100         recursivePrint(*edge.child.get(), depth + 1);
101     }
102 }
103
104 void CombinedURLFilters::print() const
105 {
106     recursivePrint(*m_prefixTreeRoot.get(), 0);
107 }
108 #endif
109
110 CombinedURLFilters::CombinedURLFilters()
111     : m_prefixTreeRoot(std::make_unique<PrefixTreeVertex>())
112 {
113 }
114
115 CombinedURLFilters::~CombinedURLFilters()
116 {
117 }
118
119 void CombinedURLFilters::clear()
120 {
121     m_prefixTreeRoot = std::make_unique<PrefixTreeVertex>();
122 }
123
124 void CombinedURLFilters::addPattern(uint64_t actionId, const Vector<Term>& pattern)
125 {
126     ASSERT_WITH_MESSAGE(!pattern.isEmpty(), "The parser should have excluded empty patterns before reaching CombinedURLFilters.");
127
128     if (pattern.isEmpty())
129         return;
130
131     Vector<PrefixTreeVertex*, 128> prefixTreeVerticesForPattern;
132     prefixTreeVerticesForPattern.reserveInitialCapacity(pattern.size() + 1);
133
134     // Extend the prefix tree with the new pattern.
135     bool hasNewTerm = false;
136     PrefixTreeVertex* lastPrefixTree = m_prefixTreeRoot.get();
137     prefixTreeVerticesForPattern.append(lastPrefixTree);
138
139     for (const Term& term : pattern) {
140         size_t nextEntryIndex = WTF::notFound;
141         for (size_t i = 0; i < lastPrefixTree->edges.size(); ++i) {
142             if (lastPrefixTree->edges[i].term == term) {
143                 nextEntryIndex = i;
144                 break;
145             }
146         }
147         if (nextEntryIndex != WTF::notFound)
148             lastPrefixTree = lastPrefixTree->edges[nextEntryIndex].child.get();
149         else {
150             hasNewTerm = true;
151
152             lastPrefixTree->edges.append(PrefixTreeEdge({term, std::make_unique<PrefixTreeVertex>()}));
153             lastPrefixTree = lastPrefixTree->edges.last().child.get();
154         }
155         prefixTreeVerticesForPattern.append(lastPrefixTree);
156     }
157
158     ActionList& actions = prefixTreeVerticesForPattern.last()->finalActions;
159     if (actions.find(actionId) == WTF::notFound)
160         actions.append(actionId);
161
162     if (!hasNewTerm)
163         return;
164
165     bool hasSeenVariableLengthTerms = false;
166     for (unsigned i = pattern.size(); i--;) {
167         const Term& term = pattern[i];
168         hasSeenVariableLengthTerms |= !term.hasFixedLength();
169         prefixTreeVerticesForPattern[i + 1]->inVariableLengthPrefix |= hasSeenVariableLengthTerms;
170     }
171     prefixTreeVerticesForPattern[0]->inVariableLengthPrefix |= hasSeenVariableLengthTerms;
172 }
173
174 struct ActiveSubtree {
175     const PrefixTreeVertex* vertex;
176     PrefixTreeEdges::const_iterator iterator;
177 };
178
179 static void generateNFAForSubtree(NFA& nfa, unsigned rootId, const PrefixTreeVertex& prefixTreeVertex)
180 {
181     ASSERT_WITH_MESSAGE(!prefixTreeVertex.inVariableLengthPrefix, "This code assumes the subtrees with variable prefix length have already been handled.");
182
183     struct ActiveNFASubtree : ActiveSubtree {
184         ActiveNFASubtree(const PrefixTreeVertex* vertex, PrefixTreeEdges::const_iterator iterator, unsigned nodeIndex)
185             : ActiveSubtree({ vertex, iterator })
186             , lastNodeIndex(nodeIndex)
187         {
188         }
189         unsigned lastNodeIndex;
190     };
191
192     Vector<ActiveNFASubtree> activeStack;
193     activeStack.append(ActiveNFASubtree(&prefixTreeVertex, prefixTreeVertex.edges.begin(), rootId));
194
195     while (true) {
196     ProcessSubtree:
197         for (ActiveNFASubtree& activeSubtree = activeStack.last(); activeSubtree.iterator != activeSubtree.vertex->edges.end(); ++activeSubtree.iterator) {
198             if (activeSubtree.iterator->child->inVariableLengthPrefix)
199                 continue;
200
201             const Term& term = activeSubtree.iterator->term;
202             unsigned newEndNodeIndex = term.generateGraph(nfa, activeSubtree.lastNodeIndex, activeSubtree.iterator->child->finalActions);
203
204             PrefixTreeVertex* prefixTreeVertex = activeSubtree.iterator->child.get();
205             if (!prefixTreeVertex->edges.isEmpty()) {
206                 activeStack.append(ActiveNFASubtree(prefixTreeVertex, prefixTreeVertex->edges.begin(), newEndNodeIndex));
207                 goto ProcessSubtree;
208             }
209         }
210
211         activeStack.removeLast();
212         if (activeStack.isEmpty())
213             break;
214         ++activeStack.last().iterator;
215     }
216 }
217
218 void CombinedURLFilters::processNFAs(std::function<void(NFA&&)> handler) const
219 {
220     Vector<ActiveSubtree> activeStack;
221     activeStack.append(ActiveSubtree({ m_prefixTreeRoot.get(), m_prefixTreeRoot->edges.begin() }));
222
223     while (true) {
224     ProcessSubtree:
225         ActiveSubtree& activeSubtree = activeStack.last();
226
227         // We go depth first into the subtrees with variable prefix. Find the next subtree.
228         for (; activeSubtree.iterator != activeSubtree.vertex->edges.end(); ++activeSubtree.iterator) {
229             PrefixTreeVertex* prefixTreeVertex = activeSubtree.iterator->child.get();
230             if (prefixTreeVertex->inVariableLengthPrefix) {
231                 activeStack.append(ActiveSubtree({ prefixTreeVertex, prefixTreeVertex->edges.begin() }));
232                 goto ProcessSubtree;
233             }
234         }
235
236         // After we reached here, we know that all the subtrees with variable prefixes have been processed,
237         // time to generate the NFA for the graph rooted here.
238         bool needToGenerate = activeSubtree.vertex->edges.isEmpty() && !activeSubtree.vertex->finalActions.isEmpty();
239         if (!needToGenerate) {
240             for (const auto& edge : activeSubtree.vertex->edges) {
241                 if (!edge.child->inVariableLengthPrefix) {
242                     needToGenerate = true;
243                     break;
244                 }
245             }
246         }
247
248         if (needToGenerate) {
249             NFA nfa;
250
251             unsigned prefixEnd = nfa.root();
252
253             for (unsigned i = 0; i < activeStack.size() - 1; ++i) {
254                 const Term& term = activeStack[i].iterator->term;
255                 prefixEnd = term.generateGraph(nfa, prefixEnd, activeStack[i].iterator->child->finalActions);
256             }
257
258             for (const auto& edge : activeSubtree.vertex->edges) {
259                 if (!edge.child->inVariableLengthPrefix) {
260                     unsigned newSubtreeStart = edge.term.generateGraph(nfa, prefixEnd, edge.child->finalActions);
261                     generateNFAForSubtree(nfa, newSubtreeStart, *edge.child);
262                 }
263             }
264             
265             handler(WTF::move(nfa));
266         }
267
268         // We have processed all the subtrees of this level, pop the stack and move on to the next sibling.
269         activeStack.removeLast();
270         if (activeStack.isEmpty())
271             break;
272         ++activeStack.last().iterator;
273     }
274 }
275
276 } // namespace ContentExtensions
277 } // namespace WebCore
278
279 #endif // ENABLE(CONTENT_EXTENSIONS)