Unreviewed, fix -Wmisleading-indentation warning introduced in r246764
[WebKit-https.git] / Source / WebCore / contentextensions / DFAMinimizer.cpp
1 /*
2  * Copyright (C) 2015 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "DFAMinimizer.h"
28
29 #if ENABLE(CONTENT_EXTENSIONS)
30
31 #include "DFA.h"
32 #include "DFANode.h"
33 #include "MutableRangeList.h"
34 #include <wtf/HashMap.h>
35 #include <wtf/Hasher.h>
36 #include <wtf/Vector.h>
37
38 namespace WebCore {
39 namespace ContentExtensions {
40
41 namespace {
42
43 template<typename VectorType, typename Iterable, typename Function>
44 static inline void iterateIntersections(const VectorType& singularTransitionsFirsts, const Iterable& iterableTransitionList, const Function& intersectionHandler)
45 {
46     ASSERT(!singularTransitionsFirsts.isEmpty());
47     auto otherIterator = iterableTransitionList.begin();
48     auto otherEnd = iterableTransitionList.end();
49
50     if (otherIterator == otherEnd)
51         return;
52
53     unsigned singularTransitionsLength = singularTransitionsFirsts.size();
54     unsigned singularTransitionsFirstsIndex = 0;
55     for (; otherIterator != otherEnd; ++otherIterator) {
56         auto firstCharacter = otherIterator.first();
57         while (singularTransitionsFirstsIndex < singularTransitionsLength
58             && singularTransitionsFirsts[singularTransitionsFirstsIndex] != firstCharacter)
59             ++singularTransitionsFirstsIndex;
60
61         intersectionHandler(singularTransitionsFirstsIndex, otherIterator);
62         ++singularTransitionsFirstsIndex;
63
64         auto lastCharacter = otherIterator.last();
65         while (singularTransitionsFirstsIndex < singularTransitionsLength
66             && singularTransitionsFirsts[singularTransitionsFirstsIndex] <= lastCharacter) {
67             intersectionHandler(singularTransitionsFirstsIndex, otherIterator);
68             ++singularTransitionsFirstsIndex;
69         }
70     }
71 }
72
73 class Partition {
74 public:
75     void initialize(unsigned size)
76     {
77         if (!size)
78             return;
79
80         m_sets.reserveInitialCapacity(size);
81         m_partitionedElements.resize(size);
82         m_elementPositionInPartitionedNodes.resize(size);
83         m_elementToSetMap.resize(size);
84
85         for (unsigned i = 0; i < size; ++i) {
86             m_partitionedElements[i] = i;
87             m_elementPositionInPartitionedNodes[i] = i;
88             m_elementToSetMap[i] = 0;
89         }
90         m_sets.uncheckedAppend(SetDescriptor { 0, size, 0 });
91     }
92
93     void reserveUninitializedCapacity(unsigned elementCount)
94     {
95         m_partitionedElements.resize(elementCount);
96         m_elementPositionInPartitionedNodes.resize(elementCount);
97         m_elementToSetMap.resize(elementCount);
98     }
99
100     void addSetUnchecked(unsigned start, unsigned size)
101     {
102         m_sets.append(SetDescriptor { start, size, 0 });
103     }
104
105     void setElementUnchecked(unsigned elementIndex, unsigned positionInPartition, unsigned setIndex)
106     {
107         ASSERT(setIndex < m_sets.size());
108         m_partitionedElements[positionInPartition] = elementIndex;
109         m_elementPositionInPartitionedNodes[elementIndex] = positionInPartition;
110         m_elementToSetMap[elementIndex] = setIndex;
111     }
112
113     unsigned startOffsetOfSet(unsigned setIndex) const
114     {
115         return m_sets[setIndex].start;
116     }
117
118     ALWAYS_INLINE void markElementInCurrentGeneration(unsigned elementIndex)
119     {
120         // Swap the node with the first unmarked node.
121         unsigned setIndex = m_elementToSetMap[elementIndex];
122         SetDescriptor& setDescriptor = m_sets[setIndex];
123
124         unsigned elementPositionInPartition = m_elementPositionInPartitionedNodes[elementIndex];
125         ASSERT(elementPositionInPartition >= setDescriptor.start);
126         ASSERT(elementPositionInPartition < setDescriptor.end());
127
128         unsigned firstUnmarkedElementPositionInPartition = setDescriptor.indexAfterMarkedElements();
129         ASSERT(firstUnmarkedElementPositionInPartition >= setDescriptor.start && firstUnmarkedElementPositionInPartition < setDescriptor.end());
130         ASSERT(firstUnmarkedElementPositionInPartition >= firstUnmarkedElementPositionInPartition);
131
132         // Swap the nodes in the set.
133         unsigned firstUnmarkedElement = m_partitionedElements[firstUnmarkedElementPositionInPartition];
134         m_partitionedElements[firstUnmarkedElementPositionInPartition] = elementIndex;
135         m_partitionedElements[elementPositionInPartition] = firstUnmarkedElement;
136
137         // Update their index.
138         m_elementPositionInPartitionedNodes[elementIndex] = firstUnmarkedElementPositionInPartition;
139         m_elementPositionInPartitionedNodes[firstUnmarkedElement] = elementPositionInPartition;
140
141         if (!setDescriptor.markedCount) {
142             ASSERT(!m_setsMarkedInCurrentGeneration.contains(setIndex));
143             m_setsMarkedInCurrentGeneration.append(setIndex);
144         }
145         ++setDescriptor.markedCount;
146     }
147
148     // The function passed as argument MUST not modify the partition.
149     template<typename Function>
150     void refineGeneration(const Function& function)
151     {
152         for (unsigned setIndex : m_setsMarkedInCurrentGeneration) {
153             SetDescriptor& setDescriptor = m_sets[setIndex];
154             if (setDescriptor.markedCount == setDescriptor.size) {
155                 // Everything is marked, there is nothing to refine.
156                 setDescriptor.markedCount = 0;
157                 continue;
158             }
159
160             SetDescriptor newSet;
161             bool newSetIsMarkedSet = setDescriptor.markedCount * 2 <= setDescriptor.size;
162             if (newSetIsMarkedSet) {
163                 // Less than half of the nodes have been marked.
164                 newSet = { setDescriptor.start, setDescriptor.markedCount, 0 };
165                 setDescriptor.start = setDescriptor.start + setDescriptor.markedCount;
166             } else
167                 newSet = { setDescriptor.start + setDescriptor.markedCount, setDescriptor.size - setDescriptor.markedCount, 0 };
168             setDescriptor.size -= newSet.size;
169             setDescriptor.markedCount = 0;
170
171             unsigned newSetIndex = m_sets.size();
172             m_sets.append(newSet);
173
174             for (unsigned i = newSet.start; i < newSet.end(); ++i)
175                 m_elementToSetMap[m_partitionedElements[i]] = newSetIndex;
176
177             function(newSetIndex);
178         }
179         m_setsMarkedInCurrentGeneration.clear();
180     }
181
182     // Call Function() on every node of a given subset.
183     template<typename Function>
184     void iterateSet(unsigned setIndex, const Function& function)
185     {
186         SetDescriptor& setDescriptor = m_sets[setIndex];
187         for (unsigned i = setDescriptor.start; i < setDescriptor.end(); ++i)
188             function(m_partitionedElements[i]);
189     }
190
191     // Index of the set containing the Node.
192     unsigned setIndex(unsigned elementIndex) const
193     {
194         return m_elementToSetMap[elementIndex];
195     }
196
197     // NodeIndex of the first element in the set.
198     unsigned firstElementInSet(unsigned setIndex) const
199     {
200         return m_partitionedElements[m_sets[setIndex].start];
201     }
202
203     unsigned size() const
204     {
205         return m_sets.size();
206     }
207
208 private:
209     struct SetDescriptor {
210         unsigned start;
211         unsigned size;
212         unsigned markedCount;
213
214         unsigned indexAfterMarkedElements() const { return start + markedCount; }
215         unsigned end() const { return start + size; }
216     };
217
218     // List of sets.
219     Vector<SetDescriptor, 0, ContentExtensionsOverflowHandler> m_sets;
220
221     // All the element indices such that two elements of the same set never have a element of a different set between them.
222     Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_partitionedElements;
223
224     // Map elementIndex->position in the partitionedElements.
225     Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_elementPositionInPartitionedNodes;
226
227     // Map elementIndex->SetIndex.
228     Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_elementToSetMap;
229
230     // List of sets with any marked node. Each set can appear at most once.
231     // FIXME: find a good inline size for this.
232     Vector<unsigned, 128, ContentExtensionsOverflowHandler> m_setsMarkedInCurrentGeneration;
233 };
234
235 class FullGraphPartition {
236     typedef MutableRangeList<char, uint32_t, 128> SingularTransitionsMutableRangeList;
237 public:
238     FullGraphPartition(const DFA& dfa)
239     {
240         m_nodePartition.initialize(dfa.nodes.size());
241
242         SingularTransitionsMutableRangeList singularTransitions;
243         CounterConverter counterConverter;
244         for (const DFANode& node : dfa.nodes) {
245             if (node.isKilled())
246                 continue;
247             auto transitions = node.transitions(dfa);
248             singularTransitions.extend(transitions.begin(), transitions.end(), counterConverter);
249         }
250
251         // Count the number of transition for each singular range. This will give us the bucket size
252         // for the transition partition, where transitions are partitioned by "symbol".
253         unsigned rangeIndexAccumulator = 0;
254         for (const auto& transition : singularTransitions) {
255             m_transitionPartition.addSetUnchecked(rangeIndexAccumulator, transition.data);
256             rangeIndexAccumulator += transition.data;
257         }
258
259         // Count the number of incoming transitions per node.
260         m_flattenedTransitionsStartOffsetPerNode.resize(dfa.nodes.size());
261         memset(m_flattenedTransitionsStartOffsetPerNode.data(), 0, m_flattenedTransitionsStartOffsetPerNode.size() * sizeof(unsigned));
262
263         Vector<char, 0, ContentExtensionsOverflowHandler> singularTransitionsFirsts;
264         singularTransitionsFirsts.reserveInitialCapacity(singularTransitions.m_ranges.size());
265         for (const auto& transition : singularTransitions)
266             singularTransitionsFirsts.uncheckedAppend(transition.first);
267
268         for (const DFANode& node : dfa.nodes) {
269             if (node.isKilled())
270                 continue;
271             auto transitions = node.transitions(dfa);
272             iterateIntersections(singularTransitionsFirsts, transitions, [&](unsigned, const DFANode::ConstRangeIterator& origin) {
273                 uint32_t targetNodeIndex = origin.target();
274                 ++m_flattenedTransitionsStartOffsetPerNode[targetNodeIndex];
275             });
276         }
277
278         // Accumulate the offsets. This gives us the start position of each bucket.
279         unsigned transitionAccumulator = 0;
280         for (unsigned i = 0; i < m_flattenedTransitionsStartOffsetPerNode.size(); ++i) {
281             unsigned transitionsCountForNode = m_flattenedTransitionsStartOffsetPerNode[i];
282             m_flattenedTransitionsStartOffsetPerNode[i] = transitionAccumulator;
283             transitionAccumulator += transitionsCountForNode;
284         }
285         unsigned flattenedTransitionsSize = transitionAccumulator;
286         ASSERT_WITH_MESSAGE(flattenedTransitionsSize == rangeIndexAccumulator, "The number of transitions should be the same, regardless of how they are arranged in buckets.");
287
288         m_transitionPartition.reserveUninitializedCapacity(flattenedTransitionsSize);
289
290         // Next, let's fill the transition table and set up the size of each group at the same time.
291         m_flattenedTransitionsSizePerNode.resize(dfa.nodes.size());
292         for (unsigned& counter : m_flattenedTransitionsSizePerNode)
293             counter = 0;
294         m_flattenedTransitions.resize(flattenedTransitionsSize);
295
296         Vector<uint32_t> transitionPerRangeOffset(m_transitionPartition.size());
297         memset(transitionPerRangeOffset.data(), 0, transitionPerRangeOffset.size() * sizeof(uint32_t));
298
299         for (unsigned i = 0; i < dfa.nodes.size(); ++i) {
300             const DFANode& node = dfa.nodes[i];
301             if (node.isKilled())
302                 continue;
303
304             auto transitions = node.transitions(dfa);
305             iterateIntersections(singularTransitionsFirsts, transitions, [&](unsigned singularTransitonIndex, const DFANode::ConstRangeIterator& origin) {
306                 uint32_t targetNodeIndex = origin.target();
307
308                 unsigned start = m_flattenedTransitionsStartOffsetPerNode[targetNodeIndex];
309                 unsigned offset = m_flattenedTransitionsSizePerNode[targetNodeIndex];
310                 unsigned positionInFlattenedTransitions = start + offset;
311                 m_flattenedTransitions[positionInFlattenedTransitions] = Transition({ i });
312
313                 uint32_t& inRangeOffset = transitionPerRangeOffset[singularTransitonIndex];
314                 unsigned positionInTransitionPartition = m_transitionPartition.startOffsetOfSet(singularTransitonIndex) + inRangeOffset;
315                 ++inRangeOffset;
316
317                 m_transitionPartition.setElementUnchecked(positionInFlattenedTransitions, positionInTransitionPartition, singularTransitonIndex);
318
319                 ++m_flattenedTransitionsSizePerNode[targetNodeIndex];
320             });
321         }
322     }
323
324     void markNode(unsigned nodeIndex)
325     {
326         m_nodePartition.markElementInCurrentGeneration(nodeIndex);
327     }
328
329     void refinePartitions()
330     {
331         m_nodePartition.refineGeneration([&](unsigned smallestSetIndex) {
332             m_nodePartition.iterateSet(smallestSetIndex, [&](unsigned nodeIndex) {
333                 unsigned incomingTransitionsStartForNode = m_flattenedTransitionsStartOffsetPerNode[nodeIndex];
334                 unsigned incomingTransitionsSizeForNode = m_flattenedTransitionsSizePerNode[nodeIndex];
335
336                 for (unsigned i = 0; i < incomingTransitionsSizeForNode; ++i)
337                     m_transitionPartition.markElementInCurrentGeneration(incomingTransitionsStartForNode + i);
338             });
339
340             // We only need to split the transitions, we handle the new sets through the main loop.
341             m_transitionPartition.refineGeneration([](unsigned) { });
342         });
343     }
344
345     void splitByUniqueTransitions()
346     {
347         for (; m_nextTransitionSetToProcess < m_transitionPartition.size(); ++m_nextTransitionSetToProcess) {
348             // We use the known splitters to refine the set.
349             m_transitionPartition.iterateSet(m_nextTransitionSetToProcess, [&](unsigned transitionIndex) {
350                 unsigned sourceNodeIndex = m_flattenedTransitions[transitionIndex].source;
351                 m_nodePartition.markElementInCurrentGeneration(sourceNodeIndex);
352             });
353
354             refinePartitions();
355         }
356     }
357
358     unsigned nodeReplacement(unsigned nodeIndex)
359     {
360         unsigned setIndex = m_nodePartition.setIndex(nodeIndex);
361         return m_nodePartition.firstElementInSet(setIndex);
362     }
363
364 private:
365     struct Transition {
366         unsigned source;
367     };
368
369     struct CounterConverter {
370         uint32_t convert(uint32_t)
371         {
372             return 1;
373         }
374
375         void extend(uint32_t& destination, uint32_t)
376         {
377             ++destination;
378         }
379     };
380
381     Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_flattenedTransitionsStartOffsetPerNode;
382     Vector<unsigned, 0, ContentExtensionsOverflowHandler> m_flattenedTransitionsSizePerNode;
383     Vector<Transition, 0, ContentExtensionsOverflowHandler> m_flattenedTransitions;
384
385     Partition m_nodePartition;
386     Partition m_transitionPartition;
387
388     unsigned m_nextTransitionSetToProcess { 0 };
389 };
390
391 struct ActionKey {
392     enum DeletedValueTag { DeletedValue };
393     explicit ActionKey(DeletedValueTag) { state = Deleted; }
394
395     enum EmptyValueTag { EmptyValue };
396     explicit ActionKey(EmptyValueTag) { state = Empty; }
397
398     explicit ActionKey(const DFA* dfa, uint32_t actionsStart, uint16_t actionsLength)
399         : dfa(dfa)
400         , actionsStart(actionsStart)
401         , actionsLength(actionsLength)
402         , state(Valid)
403     {
404         StringHasher hasher;
405         hasher.addCharactersAssumingAligned(reinterpret_cast<const UChar*>(&dfa->actions[actionsStart]), actionsLength * sizeof(uint64_t) / sizeof(UChar));
406         hash = hasher.hash();
407     }
408
409     bool isEmptyValue() const { return state == Empty; }
410     bool isDeletedValue() const { return state == Deleted; }
411
412     unsigned hash;
413     
414     const DFA* dfa;
415     uint32_t actionsStart;
416     uint16_t actionsLength;
417     
418     enum {
419         Valid,
420         Empty,
421         Deleted
422     } state;
423 };
424
425 struct ActionKeyHash {
426     static unsigned hash(const ActionKey& actionKey)
427     {
428         return actionKey.hash;
429     }
430
431     static bool equal(const ActionKey& a, const ActionKey& b)
432     {
433         if (a.state != b.state
434             || a.dfa != b.dfa
435             || a.actionsLength != b.actionsLength)
436             return false;
437         for (uint16_t i = 0; i < a.actionsLength; ++i) {
438             if (a.dfa->actions[a.actionsStart + i] != a.dfa->actions[b.actionsStart + i])
439                 return false;
440         }
441         return true;
442     }
443     static const bool safeToCompareToEmptyOrDeleted = false;
444 };
445
446 struct ActionKeyHashTraits : public WTF::CustomHashTraits<ActionKey> {
447     static const bool emptyValueIsZero = true;
448 };
449
450 } // anonymous namespace.
451
452 void DFAMinimizer::minimize(DFA& dfa)
453 {
454     FullGraphPartition fullGraphPartition(dfa);
455
456     // Unlike traditional minimization final states can be differentiated by their action.
457     // Instead of creating a single set for the final state, we partition by actions from
458     // the start.
459     HashMap<ActionKey, Vector<unsigned>, ActionKeyHash, ActionKeyHashTraits> finalStates;
460     for (unsigned i = 0; i < dfa.nodes.size(); ++i) {
461         const DFANode& node = dfa.nodes[i];
462         if (node.hasActions()) {
463             // FIXME: Sort the actions in the dfa to make nodes that have the same actions in different order equal.
464             auto addResult = finalStates.add(ActionKey(&dfa, node.actionsStart(), node.actionsLength()), Vector<unsigned>());
465             addResult.iterator->value.append(i);
466         }
467     }
468
469     for (const auto& slot : finalStates) {
470         for (unsigned finalStateIndex : slot.value)
471             fullGraphPartition.markNode(finalStateIndex);
472         fullGraphPartition.refinePartitions();
473     }
474
475     // Use every splitter to refine the node partitions.
476     fullGraphPartition.splitByUniqueTransitions();
477
478     Vector<unsigned> relocationVector;
479     relocationVector.reserveInitialCapacity(dfa.nodes.size());
480     for (unsigned i = 0; i < dfa.nodes.size(); ++i)
481         relocationVector.uncheckedAppend(i);
482
483     // Update all the transitions.
484     for (unsigned i = 0; i < dfa.nodes.size(); ++i) {
485         unsigned replacement = fullGraphPartition.nodeReplacement(i);
486         if (i != replacement) {
487             relocationVector[i] = replacement;
488             dfa.nodes[i].kill(dfa);
489         }
490     }
491
492     dfa.root = relocationVector[dfa.root];
493     for (DFANode& node : dfa.nodes) {
494         if (node.isKilled())
495             continue;
496
497         for (auto& transition : node.transitions(dfa)) {
498             uint32_t target = transition.target();
499             uint32_t relocatedTarget = relocationVector[target];
500             if (target != relocatedTarget)
501                 transition.resetTarget(relocatedTarget);
502         }
503     }
504 }
505
506 } // namespace ContentExtensions
507 } // namespace WebCore
508
509 #endif // ENABLE(CONTENT_EXTENSIONS)