Use less memory when compiling content extensions
[WebKit-https.git] / Source / WebCore / contentextensions / CombinedURLFilters.cpp
1 /*
2  * Copyright (C) 2015 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "CombinedURLFilters.h"
28
29 #if ENABLE(CONTENT_EXTENSIONS)
30
31 #include "Term.h"
32 #include <wtf/HashMap.h>
33
34 namespace WebCore {
35
36 namespace ContentExtensions {
37
38 typedef HashMap<Term, std::unique_ptr<PrefixTreeVertex>, TermHash, TermHashTraits> PrefixTreeEdges;
39
40 struct PrefixTreeVertex {
41     PrefixTreeEdges edges;
42     ActionSet finalActions;
43     bool inVariableLengthPrefix { false };
44 };
45
46 CombinedURLFilters::CombinedURLFilters()
47     : m_prefixTreeRoot(std::make_unique<PrefixTreeVertex>())
48 {
49 }
50
51 CombinedURLFilters::~CombinedURLFilters()
52 {
53 }
54
55 void CombinedURLFilters::clear()
56 {
57     m_prefixTreeRoot = std::make_unique<PrefixTreeVertex>();
58 }
59
60 void CombinedURLFilters::addPattern(uint64_t actionId, const Vector<Term>& pattern)
61 {
62     ASSERT_WITH_MESSAGE(!pattern.isEmpty(), "The parser should have excluded empty patterns before reaching CombinedURLFilters.");
63
64     if (pattern.isEmpty())
65         return;
66
67     Vector<PrefixTreeVertex*, 128> prefixTreeVerticesForPattern;
68     prefixTreeVerticesForPattern.reserveInitialCapacity(pattern.size() + 1);
69
70     // Extend the prefix tree with the new pattern.
71     bool hasNewTerm = false;
72     PrefixTreeVertex* lastPrefixTree = m_prefixTreeRoot.get();
73     prefixTreeVerticesForPattern.append(lastPrefixTree);
74
75     for (const Term& term : pattern) {
76         auto nextEntry = lastPrefixTree->edges.find(term);
77         if (nextEntry != lastPrefixTree->edges.end())
78             lastPrefixTree = nextEntry->value.get();
79         else {
80             hasNewTerm = true;
81
82             std::unique_ptr<PrefixTreeVertex> nextPrefixTreeVertex = std::make_unique<PrefixTreeVertex>();
83
84             auto addResult = lastPrefixTree->edges.set(term, WTF::move(nextPrefixTreeVertex));
85             ASSERT(addResult.isNewEntry);
86
87             lastPrefixTree = addResult.iterator->value.get();
88         }
89         prefixTreeVerticesForPattern.append(lastPrefixTree);
90     }
91
92     prefixTreeVerticesForPattern.last()->finalActions.add(actionId);
93
94     if (!hasNewTerm)
95         return;
96
97     bool hasSeenVariableLengthTerms = false;
98     for (unsigned i = pattern.size(); i--;) {
99         const Term& term = pattern[i];
100         hasSeenVariableLengthTerms |= !term.hasFixedLength();
101         prefixTreeVerticesForPattern[i + 1]->inVariableLengthPrefix |= hasSeenVariableLengthTerms;
102     }
103     prefixTreeVerticesForPattern[0]->inVariableLengthPrefix |= hasSeenVariableLengthTerms;
104 }
105
106 struct ActiveSubtree {
107     const PrefixTreeVertex* vertex;
108     PrefixTreeEdges::const_iterator iterator;
109 };
110
111 static void generateNFAForSubtree(NFA& nfa, unsigned rootId, const PrefixTreeVertex& prefixTreeVertex)
112 {
113     ASSERT_WITH_MESSAGE(!prefixTreeVertex.inVariableLengthPrefix, "This code assumes the subtrees with variable prefix length have already been handled.");
114
115     struct ActiveNFASubtree : ActiveSubtree {
116         ActiveNFASubtree(const PrefixTreeVertex* vertex, PrefixTreeEdges::const_iterator iterator, unsigned nodeIndex)
117             : ActiveSubtree({ vertex, iterator })
118             , lastNodeIndex(nodeIndex)
119         {
120         }
121         unsigned lastNodeIndex;
122     };
123
124     Vector<ActiveNFASubtree> activeStack;
125     activeStack.append(ActiveNFASubtree(&prefixTreeVertex, prefixTreeVertex.edges.begin(), rootId));
126
127     while (true) {
128     ProcessSubtree:
129         for (ActiveNFASubtree& activeSubtree = activeStack.last(); activeSubtree.iterator != activeSubtree.vertex->edges.end(); ++activeSubtree.iterator) {
130             if (activeSubtree.iterator->value->inVariableLengthPrefix)
131                 continue;
132
133             const Term& term = activeSubtree.iterator->key;
134             unsigned newEndNodeIndex = term.generateGraph(nfa, activeSubtree.lastNodeIndex, activeSubtree.iterator->value->finalActions);
135
136             PrefixTreeVertex* prefixTreeVertex = activeSubtree.iterator->value.get();
137             if (!prefixTreeVertex->edges.isEmpty()) {
138                 activeStack.append(ActiveNFASubtree(prefixTreeVertex, prefixTreeVertex->edges.begin(), newEndNodeIndex));
139                 goto ProcessSubtree;
140             }
141         }
142
143         activeStack.removeLast();
144         if (activeStack.isEmpty())
145             break;
146         ++activeStack.last().iterator;
147     }
148 }
149
150 Vector<NFA> CombinedURLFilters::createNFAs() const
151 {
152     Vector<ActiveSubtree> activeStack;
153     activeStack.append(ActiveSubtree({ m_prefixTreeRoot.get(), m_prefixTreeRoot->edges.begin() }));
154
155     Vector<NFA> nfas;
156
157     while (true) {
158     ProcessSubtree:
159         ActiveSubtree& activeSubtree = activeStack.last();
160
161         // We go depth first into the subtrees with variable prefix. Find the next subtree.
162         for (; activeSubtree.iterator != activeSubtree.vertex->edges.end(); ++activeSubtree.iterator) {
163             PrefixTreeVertex* prefixTreeVertex = activeSubtree.iterator->value.get();
164             if (prefixTreeVertex->inVariableLengthPrefix) {
165                 activeStack.append(ActiveSubtree({ prefixTreeVertex, prefixTreeVertex->edges.begin() }));
166                 goto ProcessSubtree;
167             }
168         }
169
170         // After we reached here, we know that all the subtrees with variable prefixes have been processed,
171         // time to generate the NFA for the graph rooted here.
172         bool needToGenerate = activeSubtree.vertex->edges.isEmpty() && !activeSubtree.vertex->finalActions.isEmpty();
173         if (!needToGenerate) {
174             for (const auto& edge : activeSubtree.vertex->edges) {
175                 if (!edge.value->inVariableLengthPrefix) {
176                     needToGenerate = true;
177                     break;
178                 }
179             }
180         }
181
182         if (needToGenerate) {
183             nfas.append(NFA());
184             NFA& generatingNFA = nfas.last();
185
186             unsigned prefixEnd = generatingNFA.root();
187
188             for (unsigned i = 0; i < activeStack.size() - 1; ++i) {
189                 const Term& term = activeStack[i].iterator->key;
190                 prefixEnd = term.generateGraph(generatingNFA, prefixEnd, activeStack[i].iterator->value->finalActions);
191             }
192
193             for (const auto& edge : activeSubtree.vertex->edges) {
194                 if (!edge.value->inVariableLengthPrefix) {
195                     const Term& term = edge.key;
196                     unsigned newSubtreeStart = term.generateGraph(generatingNFA, prefixEnd, edge.value->finalActions);
197                     generateNFAForSubtree(generatingNFA, newSubtreeStart, *edge.value);
198                 }
199             }
200         }
201
202         // We have processed all the subtrees of this level, pop the stack and move on to the next sibling.
203         activeStack.removeLast();
204         if (activeStack.isEmpty())
205             break;
206         ++activeStack.last().iterator;
207     }
208
209     return nfas;
210 }
211
212 } // namespace ContentExtensions
213 } // namespace WebCore
214
215 #endif // ENABLE(CONTENT_EXTENSIONS)