479d3d7205c6f7d1a25f8e6de28962f9487d39c4
[WebKit-https.git] / Source / WebCore / contentextensions / CombinedURLFilters.cpp
1 /*
2  * Copyright (C) 2015 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "CombinedURLFilters.h"
28
29 #if ENABLE(CONTENT_EXTENSIONS)
30
31 #include "Term.h"
32 #include <wtf/Vector.h>
33
34 namespace WebCore {
35
36 namespace ContentExtensions {
37
38 typedef Vector<std::pair<Term, std::unique_ptr<PrefixTreeVertex>>, 0, WTF::CrashOnOverflow, 1> PrefixTreeEdges;
39
40 struct PrefixTreeVertex {
41     PrefixTreeEdges edges;
42     ActionList finalActions;
43     bool inVariableLengthPrefix { false };
44 };
45
46 static size_t recursiveMemoryUsed(const std::unique_ptr<PrefixTreeVertex>& node)
47 {
48     size_t size = sizeof(PrefixTreeVertex)
49         + node->edges.capacity() * sizeof(std::pair<Term, std::unique_ptr<PrefixTreeVertex>>)
50         + node->finalActions.capacity() * sizeof(uint64_t);
51     for (const auto& child : node->edges)
52         size += recursiveMemoryUsed(child.second);
53     return size;
54 }
55
56 size_t CombinedURLFilters::memoryUsed() const
57 {
58     return recursiveMemoryUsed(m_prefixTreeRoot);
59 }
60     
61 CombinedURLFilters::CombinedURLFilters()
62     : m_prefixTreeRoot(std::make_unique<PrefixTreeVertex>())
63 {
64 }
65
66 CombinedURLFilters::~CombinedURLFilters()
67 {
68 }
69
70 void CombinedURLFilters::clear()
71 {
72     m_prefixTreeRoot = std::make_unique<PrefixTreeVertex>();
73 }
74
75 void CombinedURLFilters::addPattern(uint64_t actionId, const Vector<Term>& pattern)
76 {
77     ASSERT_WITH_MESSAGE(!pattern.isEmpty(), "The parser should have excluded empty patterns before reaching CombinedURLFilters.");
78
79     if (pattern.isEmpty())
80         return;
81
82     Vector<PrefixTreeVertex*, 128> prefixTreeVerticesForPattern;
83     prefixTreeVerticesForPattern.reserveInitialCapacity(pattern.size() + 1);
84
85     // Extend the prefix tree with the new pattern.
86     bool hasNewTerm = false;
87     PrefixTreeVertex* lastPrefixTree = m_prefixTreeRoot.get();
88     prefixTreeVerticesForPattern.append(lastPrefixTree);
89
90     for (const Term& term : pattern) {
91         size_t nextEntryIndex = WTF::notFound;
92         for (size_t i = 0; i < lastPrefixTree->edges.size(); ++i) {
93             if (lastPrefixTree->edges[i].first == term) {
94                 nextEntryIndex = i;
95                 break;
96             }
97         }
98         if (nextEntryIndex != WTF::notFound)
99             lastPrefixTree = lastPrefixTree->edges[nextEntryIndex].second.get();
100         else {
101             hasNewTerm = true;
102
103             std::unique_ptr<PrefixTreeVertex> nextPrefixTreeVertex = std::make_unique<PrefixTreeVertex>();
104
105             ASSERT(lastPrefixTree->edges.find(std::make_pair(term, std::make_unique<PrefixTreeVertex>())) == WTF::notFound);
106             lastPrefixTree->edges.append(std::make_pair(term, WTF::move(nextPrefixTreeVertex)));
107             lastPrefixTree = lastPrefixTree->edges.last().second.get();
108         }
109         prefixTreeVerticesForPattern.append(lastPrefixTree);
110     }
111
112     ActionList& actions = prefixTreeVerticesForPattern.last()->finalActions;
113     if (actions.find(actionId) == WTF::notFound)
114         actions.append(actionId);
115
116     if (!hasNewTerm)
117         return;
118
119     bool hasSeenVariableLengthTerms = false;
120     for (unsigned i = pattern.size(); i--;) {
121         const Term& term = pattern[i];
122         hasSeenVariableLengthTerms |= !term.hasFixedLength();
123         prefixTreeVerticesForPattern[i + 1]->inVariableLengthPrefix |= hasSeenVariableLengthTerms;
124     }
125     prefixTreeVerticesForPattern[0]->inVariableLengthPrefix |= hasSeenVariableLengthTerms;
126 }
127
128 struct ActiveSubtree {
129     const PrefixTreeVertex* vertex;
130     PrefixTreeEdges::const_iterator iterator;
131 };
132
133 static void generateNFAForSubtree(NFA& nfa, unsigned rootId, const PrefixTreeVertex& prefixTreeVertex)
134 {
135     ASSERT_WITH_MESSAGE(!prefixTreeVertex.inVariableLengthPrefix, "This code assumes the subtrees with variable prefix length have already been handled.");
136
137     struct ActiveNFASubtree : ActiveSubtree {
138         ActiveNFASubtree(const PrefixTreeVertex* vertex, PrefixTreeEdges::const_iterator iterator, unsigned nodeIndex)
139             : ActiveSubtree({ vertex, iterator })
140             , lastNodeIndex(nodeIndex)
141         {
142         }
143         unsigned lastNodeIndex;
144     };
145
146     Vector<ActiveNFASubtree> activeStack;
147     activeStack.append(ActiveNFASubtree(&prefixTreeVertex, prefixTreeVertex.edges.begin(), rootId));
148
149     while (true) {
150     ProcessSubtree:
151         for (ActiveNFASubtree& activeSubtree = activeStack.last(); activeSubtree.iterator != activeSubtree.vertex->edges.end(); ++activeSubtree.iterator) {
152             if (activeSubtree.iterator->second->inVariableLengthPrefix)
153                 continue;
154
155             const Term& term = activeSubtree.iterator->first;
156             unsigned newEndNodeIndex = term.generateGraph(nfa, activeSubtree.lastNodeIndex, activeSubtree.iterator->second->finalActions);
157
158             PrefixTreeVertex* prefixTreeVertex = activeSubtree.iterator->second.get();
159             if (!prefixTreeVertex->edges.isEmpty()) {
160                 activeStack.append(ActiveNFASubtree(prefixTreeVertex, prefixTreeVertex->edges.begin(), newEndNodeIndex));
161                 goto ProcessSubtree;
162             }
163         }
164
165         activeStack.removeLast();
166         if (activeStack.isEmpty())
167             break;
168         ++activeStack.last().iterator;
169     }
170 }
171
172 void CombinedURLFilters::processNFAs(std::function<void(NFA&&)> handler) const
173 {
174     Vector<ActiveSubtree> activeStack;
175     activeStack.append(ActiveSubtree({ m_prefixTreeRoot.get(), m_prefixTreeRoot->edges.begin() }));
176
177     while (true) {
178     ProcessSubtree:
179         ActiveSubtree& activeSubtree = activeStack.last();
180
181         // We go depth first into the subtrees with variable prefix. Find the next subtree.
182         for (; activeSubtree.iterator != activeSubtree.vertex->edges.end(); ++activeSubtree.iterator) {
183             PrefixTreeVertex* prefixTreeVertex = activeSubtree.iterator->second.get();
184             if (prefixTreeVertex->inVariableLengthPrefix) {
185                 activeStack.append(ActiveSubtree({ prefixTreeVertex, prefixTreeVertex->edges.begin() }));
186                 goto ProcessSubtree;
187             }
188         }
189
190         // After we reached here, we know that all the subtrees with variable prefixes have been processed,
191         // time to generate the NFA for the graph rooted here.
192         bool needToGenerate = activeSubtree.vertex->edges.isEmpty() && !activeSubtree.vertex->finalActions.isEmpty();
193         if (!needToGenerate) {
194             for (const auto& edge : activeSubtree.vertex->edges) {
195                 if (!edge.second->inVariableLengthPrefix) {
196                     needToGenerate = true;
197                     break;
198                 }
199             }
200         }
201
202         if (needToGenerate) {
203             NFA nfa;
204
205             unsigned prefixEnd = nfa.root();
206
207             for (unsigned i = 0; i < activeStack.size() - 1; ++i) {
208                 const Term& term = activeStack[i].iterator->first;
209                 prefixEnd = term.generateGraph(nfa, prefixEnd, activeStack[i].iterator->second->finalActions);
210             }
211
212             for (const auto& edge : activeSubtree.vertex->edges) {
213                 if (!edge.second->inVariableLengthPrefix) {
214                     const Term& term = edge.first;
215                     unsigned newSubtreeStart = term.generateGraph(nfa, prefixEnd, edge.second->finalActions);
216                     generateNFAForSubtree(nfa, newSubtreeStart, *edge.second);
217                 }
218             }
219             
220             handler(WTF::move(nfa));
221         }
222
223         // We have processed all the subtrees of this level, pop the stack and move on to the next sibling.
224         activeStack.removeLast();
225         if (activeStack.isEmpty())
226             break;
227         ++activeStack.last().iterator;
228     }
229 }
230
231 } // namespace ContentExtensions
232 } // namespace WebCore
233
234 #endif // ENABLE(CONTENT_EXTENSIONS)