2 * Copyright (C) 2015 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
27 #include "CombinedURLFilters.h"
29 #if ENABLE(CONTENT_EXTENSIONS)
32 #include <wtf/DataLog.h>
33 #include <wtf/Vector.h>
34 #include <wtf/text/CString.h>
38 namespace ContentExtensions {
40 struct PrefixTreeEdge {
42 std::unique_ptr<PrefixTreeVertex> child;
45 typedef Vector<PrefixTreeEdge, 0, WTF::CrashOnOverflow, 1> PrefixTreeEdges;
47 struct PrefixTreeVertex {
48 PrefixTreeEdges edges;
49 ActionList finalActions;
52 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
53 static size_t recursiveMemoryUsed(const PrefixTreeVertex& vertex)
55 size_t size = sizeof(PrefixTreeVertex)
56 + vertex.edges.capacity() * sizeof(PrefixTreeEdge)
57 + vertex.finalActions.capacity() * sizeof(uint64_t);
58 for (const auto& edge : vertex.edges) {
60 size += recursiveMemoryUsed(*edge.child.get());
65 size_t CombinedURLFilters::memoryUsed() const
67 ASSERT(m_prefixTreeRoot);
68 return recursiveMemoryUsed(*m_prefixTreeRoot.get());
72 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
73 static String prefixTreeVertexToString(const PrefixTreeVertex& vertex, unsigned depth)
75 StringBuilder builder;
78 builder.append("vertex actions: ");
79 for (auto action : vertex.finalActions) {
80 builder.appendNumber(action);
84 return builder.toString();
87 static void recursivePrint(const PrefixTreeVertex& vertex, unsigned depth)
89 dataLogF("%s", prefixTreeVertexToString(vertex, depth).utf8().data());
90 for (const auto& edge : vertex.edges) {
91 StringBuilder builder;
92 for (unsigned i = 0; i < depth * 2; ++i)
94 builder.append("vertex edge: ");
95 builder.append(edge.term.toString());
97 dataLogF("%s", builder.toString().utf8().data());
99 recursivePrint(*edge.child.get(), depth + 1);
103 void CombinedURLFilters::print() const
105 recursivePrint(*m_prefixTreeRoot.get(), 0);
109 CombinedURLFilters::CombinedURLFilters()
110 : m_prefixTreeRoot(std::make_unique<PrefixTreeVertex>())
114 CombinedURLFilters::~CombinedURLFilters()
118 bool CombinedURLFilters::isEmpty() const
120 return m_prefixTreeRoot->edges.isEmpty();
123 void CombinedURLFilters::addDomain(uint64_t actionId, const String& domain)
125 unsigned domainLength = domain.length();
126 if (domainLength && domain[0] == '*') {
127 // If domain starts with a '*' then it means match domain and its subdomains, like (^|.)domain$
128 // This way a domain of "*webkit.org" will match "bugs.webkit.org" and "webkit.org".
129 Vector<Term> prependDot;
130 Vector<Term> prependBeginningOfLine;
131 prependDot.reserveInitialCapacity(domainLength + 2);
132 prependBeginningOfLine.reserveInitialCapacity(domainLength); // This is just no .* at the beginning.
134 Term canonicalDotStar(Term::UniversalTransition);
135 canonicalDotStar.quantify(AtomQuantifier::ZeroOrMore);
136 prependDot.uncheckedAppend(canonicalDotStar);
137 prependDot.uncheckedAppend(Term('.', true));
139 for (unsigned i = 1; i < domainLength; i++) {
140 ASSERT(isASCII(domain[i]));
141 ASSERT(!isASCIIUpper(domain[i]));
142 prependDot.uncheckedAppend(Term(domain[i], true));
143 prependBeginningOfLine.uncheckedAppend(Term(domain[i], true));
145 prependDot.uncheckedAppend(Term::EndOfLineAssertionTerm);
146 prependBeginningOfLine.uncheckedAppend(Term::EndOfLineAssertionTerm);
148 addPattern(actionId, prependDot);
149 addPattern(actionId, prependBeginningOfLine);
151 // This is like adding ^domain$, but interpreting domain as a series of characters, not a regular expression.
152 // "webkit.org" will match "webkit.org" but not "bugs.webkit.org".
153 Vector<Term> prependBeginningOfLine;
154 prependBeginningOfLine.reserveInitialCapacity(domainLength + 1); // This is just no .* at the beginning.
155 for (unsigned i = 0; i < domainLength; i++) {
156 ASSERT(isASCII(domain[i]));
157 ASSERT(!isASCIIUpper(domain[i]));
158 prependBeginningOfLine.uncheckedAppend(Term(domain[i], true));
160 prependBeginningOfLine.uncheckedAppend(Term::EndOfLineAssertionTerm);
161 addPattern(actionId, prependBeginningOfLine);
165 void CombinedURLFilters::addPattern(uint64_t actionId, const Vector<Term>& pattern)
167 ASSERT_WITH_MESSAGE(!pattern.isEmpty(), "The parser should have excluded empty patterns before reaching CombinedURLFilters.");
169 if (pattern.isEmpty())
172 // Extend the prefix tree with the new pattern.
173 PrefixTreeVertex* lastPrefixTree = m_prefixTreeRoot.get();
175 for (const Term& term : pattern) {
176 size_t nextEntryIndex = WTF::notFound;
177 for (size_t i = 0; i < lastPrefixTree->edges.size(); ++i) {
178 if (lastPrefixTree->edges[i].term == term) {
183 if (nextEntryIndex != WTF::notFound)
184 lastPrefixTree = lastPrefixTree->edges[nextEntryIndex].child.get();
186 lastPrefixTree->edges.append(PrefixTreeEdge({term, std::make_unique<PrefixTreeVertex>()}));
187 lastPrefixTree = lastPrefixTree->edges.last().child.get();
191 ActionList& actions = lastPrefixTree->finalActions;
192 if (actions.find(actionId) == WTF::notFound)
193 actions.append(actionId);
196 static void generateNFAForSubtree(NFA& nfa, unsigned nfaRootId, PrefixTreeVertex& root, size_t maxNFASize)
198 // This recurses the subtree of the prefix tree.
199 // For each edge that has fixed length (no quantifiers like ?, *, or +) it generates the nfa graph,
200 // recurses into children, and deletes any processed leaf nodes.
201 struct ActiveSubtree {
202 ActiveSubtree(PrefixTreeVertex& vertex, unsigned nfaNodeId, unsigned edgeIndex)
204 , nfaNodeId(nfaNodeId)
205 , edgeIndex(edgeIndex)
208 PrefixTreeVertex& vertex;
212 Vector<ActiveSubtree> stack;
213 if (!root.edges.isEmpty())
214 stack.append(ActiveSubtree(root, nfaRootId, 0));
215 bool nfaTooBig = false;
217 // Generate graphs for each subtree that does not contain any quantifiers.
218 while (!stack.isEmpty()) {
219 PrefixTreeVertex& vertex = stack.last().vertex;
220 const unsigned edgeIndex = stack.last().edgeIndex;
222 // Only stop generating an NFA at a leaf to ensure we have a correct NFA. We could go slightly over the maxNFASize.
223 if (vertex.edges.isEmpty() && nfa.graphSize() > maxNFASize)
226 if (edgeIndex < vertex.edges.size()) {
227 auto& edge = vertex.edges[edgeIndex];
229 // Clean up any processed leaves and return early if we are past the maxNFASize.
231 stack.last().edgeIndex = stack.last().vertex.edges.size();
235 // Quantified edges in the subtree will be a part of another NFA.
236 if (!edge.term.hasFixedLength()) {
237 stack.last().edgeIndex++;
241 unsigned subtreeRootId = edge.term.generateGraph(nfa, stack.last().nfaNodeId, edge.child->finalActions);
242 ASSERT(edge.child.get());
243 stack.append(ActiveSubtree(*edge.child.get(), subtreeRootId, 0));
245 ASSERT(edgeIndex == vertex.edges.size());
246 vertex.edges.removeAllMatching([](PrefixTreeEdge& edge)
248 return edge.term.isDeletedValue();
251 if (!stack.isEmpty()) {
252 auto& activeSubtree = stack.last();
253 auto& edge = activeSubtree.vertex.edges[stack.last().edgeIndex];
254 if (edge.child->edges.isEmpty())
255 edge.term = Term(Term::DeletedValue); // Mark this leaf for deleting.
256 activeSubtree.edgeIndex++;
262 void CombinedURLFilters::processNFAs(size_t maxNFASize, std::function<void(NFA&&)> handler)
264 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
268 // Traverse out to a leaf.
269 Vector<PrefixTreeVertex*, 128> stack;
270 PrefixTreeVertex* vertex = m_prefixTreeRoot.get();
273 stack.append(vertex);
274 if (vertex->edges.isEmpty())
276 vertex = vertex->edges.last().child.get();
278 if (stack.size() == 1)
279 break; // We're done once we have processed and removed all the edges in the prefix tree.
281 // Find the prefix root for this NFA. This is the vertex after the last term with a quantifier if there is one,
282 // or the root if there are no quantifiers left.
283 while (stack.size() > 1) {
284 if (!stack[stack.size() - 2]->edges.last().term.hasFixedLength())
288 ASSERT_WITH_MESSAGE(!stack.isEmpty(), "At least the root should be in the stack");
290 // Make an NFA with the subtrees for whom this is also the last quantifier (or who also have no quantifier).
292 // Put the prefix into the NFA.
293 unsigned prefixEnd = nfa.root();
294 for (unsigned i = 0; i < stack.size() - 1; ++i) {
295 ASSERT(!stack[i]->edges.isEmpty());
296 const PrefixTreeEdge& edge = stack[i]->edges.last();
297 prefixEnd = edge.term.generateGraph(nfa, prefixEnd, edge.child->finalActions);
299 // Put the non-quantified vertices in the subtree into the NFA and delete them.
300 ASSERT(stack.last());
301 generateNFAForSubtree(nfa, prefixEnd, *stack.last(), maxNFASize);
303 handler(WTF::move(nfa));
305 // Clean up any processed leaf nodes.
307 if (stack.size() > 1) {
308 if (stack[stack.size() - 1]->edges.isEmpty()) {
309 stack[stack.size() - 2]->edges.removeLast();
312 break; // Vertex is not a leaf.
314 break; // Leave the empty root.
319 } // namespace ContentExtensions
320 } // namespace WebCore
322 #endif // ENABLE(CONTENT_EXTENSIONS)