2 * Copyright (C) 2015 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
27 #include "DFABytecodeCompiler.h"
29 #if ENABLE(CONTENT_EXTENSIONS)
31 #include "ContentExtensionRule.h"
37 namespace ContentExtensions {
39 template <typename IntType>
40 inline void append(Vector<DFABytecode>& bytecode, IntType value)
42 bytecode.resize(bytecode.size() + sizeof(IntType));
43 *reinterpret_cast<IntType*>(&bytecode[bytecode.size() - sizeof(IntType)]) = value;
46 inline void set32Bits(Vector<DFABytecode>& bytecode, unsigned index, unsigned value)
48 *reinterpret_cast<unsigned*>(&bytecode[index]) = value;
51 void DFABytecodeCompiler::emitAppendAction(unsigned action)
53 append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::AppendAction);
54 append<unsigned>(m_bytecode, action);
57 void DFABytecodeCompiler::emitTestFlagsAndAppendAction(uint16_t flags, unsigned action)
60 append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendAction);
61 append<uint16_t>(m_bytecode, flags);
62 append<unsigned>(m_bytecode, action);
65 void DFABytecodeCompiler::emitJump(unsigned destinationNodeIndex)
67 append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::Jump);
68 m_linkRecords.append(std::make_pair(m_bytecode.size(), destinationNodeIndex));
69 append<unsigned>(m_bytecode, 0); // This value will be set when linking.
72 void DFABytecodeCompiler::emitCheckValue(uint8_t value, unsigned destinationNodeIndex, bool caseSensitive)
74 append<DFABytecodeInstruction>(m_bytecode, caseSensitive ? DFABytecodeInstruction::CheckValueCaseSensitive : DFABytecodeInstruction::CheckValueCaseInsensitive);
75 append<uint8_t>(m_bytecode, value);
76 m_linkRecords.append(std::make_pair(m_bytecode.size(), destinationNodeIndex));
77 append<unsigned>(m_bytecode, 0); // This value will be set when linking.
80 void DFABytecodeCompiler::emitCheckValueRange(uint8_t lowValue, uint8_t highValue, unsigned destinationNodeIndex, bool caseSensitive)
82 ASSERT_WITH_MESSAGE(lowValue < highValue, "The instruction semantic impose lowValue is strictly less than highValue.");
84 append<DFABytecodeInstruction>(m_bytecode, caseSensitive ? DFABytecodeInstruction::CheckValueRangeCaseSensitive : DFABytecodeInstruction::CheckValueRangeCaseInsensitive);
85 append<uint8_t>(m_bytecode, lowValue);
86 append<uint8_t>(m_bytecode, highValue);
87 m_linkRecords.append(std::make_pair(m_bytecode.size(), destinationNodeIndex));
88 append<unsigned>(m_bytecode, 0); // This value will be set when linking.
91 void DFABytecodeCompiler::emitTerminate()
93 append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::Terminate);
96 void DFABytecodeCompiler::compileNode(unsigned index)
98 const DFANode& node = m_dfa.nodeAt(index);
100 // Record starting index for linking.
101 m_nodeStartOffsets[index] = m_bytecode.size();
103 for (uint64_t action : node.actions) {
104 // High bits are used to store flags. See compileRuleList.
105 if (action & 0xFFFF00000000)
106 emitTestFlagsAndAppendAction(static_cast<uint16_t>(action >> 32), static_cast<unsigned>(action));
108 emitAppendAction(static_cast<unsigned>(action));
110 compileNodeTransitions(node);
113 void DFABytecodeCompiler::compileNodeTransitions(const DFANode& node)
115 unsigned destinations[128];
116 const unsigned noDestination = std::numeric_limits<unsigned>::max();
117 for (uint16_t i = 0; i < 128; i++) {
118 auto it = node.transitions.find(i);
119 if (it == node.transitions.end())
120 destinations[i] = noDestination;
122 destinations[i] = it->value;
125 Vector<Range> ranges;
127 bool hasRangeMin = false;
128 for (uint8_t i = 0; i < 128; i++) {
130 ASSERT_WITH_MESSAGE(!(node.hasFallbackTransition && node.fallbackTransition == destinations[rangeMin]), "Individual transitions to the fallback transitions should have been eliminated by the optimizer.");
131 if (destinations[i] != destinations[rangeMin]) {
133 // This is the end of a range. Check if it can be case insensitive.
134 uint8_t rangeMax = i - 1;
135 bool caseSensitive = true;
136 if (rangeMin >= 'A' && rangeMax <= 'Z') {
137 caseSensitive = false;
138 for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) {
139 if (destinations[rangeMin] != destinations[toASCIILower(rangeIndex)]) {
140 caseSensitive = true;
146 if (!caseSensitive) {
147 // If all the lower-case destinations are the same as the upper-case destinations,
148 // then they will be covered by a case-insensitive range and will not need their own range.
149 for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) {
150 ASSERT(destinations[rangeMin] == destinations[toASCIILower(rangeIndex)]);
151 destinations[toASCIILower(rangeIndex)] = noDestination;
153 ranges.append(Range(toASCIILower(rangeMin), toASCIILower(rangeMax), destinations[rangeMin], caseSensitive));
155 ranges.append(Range(rangeMin, rangeMax, destinations[rangeMin], caseSensitive));
157 if (destinations[i] == noDestination)
163 if (destinations[i] != noDestination) {
170 // Ranges are appended after passing the end of them.
171 // If a range goes to 127, we will have an uncommitted rangeMin because the loop does not check 128.
172 // If a range goes to 127, there will never be values higher than it, so checking for case-insensitive ranges would always fail.
173 ranges.append(Range(rangeMin, 127, destinations[rangeMin], true));
176 for (const auto& range : ranges)
177 compileCheckForRange(range);
178 if (node.hasFallbackTransition)
179 emitJump(node.fallbackTransition);
184 void DFABytecodeCompiler::compileCheckForRange(const Range& range)
186 ASSERT_WITH_MESSAGE(range.max < 128, "The DFA engine only supports the ASCII alphabet.");
187 ASSERT(range.min <= range.max);
189 if (range.min == range.max)
190 emitCheckValue(range.min, range.destination, range.caseSensitive);
192 emitCheckValueRange(range.min, range.max, range.destination, range.caseSensitive);
195 void DFABytecodeCompiler::compile()
198 unsigned startLocation = m_bytecode.size();
199 append<unsigned>(m_bytecode, 0);
200 m_nodeStartOffsets.resize(m_dfa.size());
202 // Make sure the root is always at the beginning of the bytecode.
203 compileNode(m_dfa.root());
204 for (unsigned i = 0; i < m_dfa.size(); i++) {
205 if (i != m_dfa.root())
210 for (const auto& linkRecord : m_linkRecords)
211 set32Bits(m_bytecode, linkRecord.first, m_nodeStartOffsets[linkRecord.second]);
214 set32Bits(m_bytecode, startLocation, m_bytecode.size() - startLocation);
217 } // namespace ContentExtensions
219 } // namespace WebCore
221 #endif // ENABLE(CONTENT_EXTENSIONS)