781a74247a423feb86058f72a174851736061b6a
[WebKit-https.git] / Source / WebCore / contentextensions / DFABytecodeInterpreter.cpp
1 /*
2  * Copyright (C) 2015 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "DFABytecodeInterpreter.h"
28
29 #include "ContentExtensionsDebugging.h"
30 #include <wtf/text/CString.h>
31
32 #if ENABLE(CONTENT_EXTENSIONS)
33
34 namespace WebCore {
35     
36 namespace ContentExtensions {
37
38 template <typename IntType>
39 static inline IntType getBits(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index, Vector<bool>& pagesUsed)
40 {
41 #if CONTENT_EXTENSIONS_MEMORY_REPORTING
42     uint32_t page = index / CONTENT_EXTENSIONS_PAGE_SIZE;
43     while (pagesUsed.size() < (bytecodeLength + CONTENT_EXTENSIONS_PAGE_SIZE - 1) / CONTENT_EXTENSIONS_PAGE_SIZE)
44         pagesUsed.append(false);
45     pagesUsed[page] = true;
46 #else
47     UNUSED_PARAM(pagesUsed);
48 #endif
49     ASSERT_UNUSED(bytecodeLength, index + sizeof(IntType) <= bytecodeLength);
50     return *reinterpret_cast<const IntType*>(&bytecode[index]);
51 }
52
53 // FIXME: Get rid of pagesUsed. We don't need to keep track of the pages used.
54 static inline DFABytecodeInstruction getInstruction(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index, Vector<bool>& pagesUsed)
55 {
56     return static_cast<DFABytecodeInstruction>(getBits<uint8_t>(bytecode, bytecodeLength, index, pagesUsed) & DFABytecodeInstructionMask);
57 }
58
59 static inline size_t jumpSizeInBytes(DFABytecodeJumpSize jumpSize)
60 {
61     switch (jumpSize) {
62     case Int8:
63         return sizeof(int8_t);
64     case Int16:
65         return sizeof(int16_t);
66     case Int32:
67         return sizeof(int32_t);
68     default:
69         RELEASE_ASSERT_NOT_REACHED();
70     }
71 }
72
73 static inline DFABytecodeJumpSize getJumpSize(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index, Vector<bool>& pagesUsed)
74 {
75     DFABytecodeJumpSize jumpSize = static_cast<DFABytecodeJumpSize>(getBits<uint8_t>(bytecode, bytecodeLength, index, pagesUsed) & DFABytecodeJumpSizeMask);
76     ASSERT(jumpSize == DFABytecodeJumpSize::Int32 || jumpSize == DFABytecodeJumpSize::Int16 || jumpSize == DFABytecodeJumpSize::Int8);
77     return jumpSize;
78 }
79
80 static inline int32_t getJumpDistance(const DFABytecode* bytecode, uint32_t bytecodeLength, uint32_t index, Vector<bool>& pagesUsed, DFABytecodeJumpSize jumpSize)
81 {
82     switch (jumpSize) {
83     case Int8:
84         return getBits<int8_t>(bytecode, bytecodeLength, index, pagesUsed);
85     case Int16:
86         return getBits<int16_t>(bytecode, bytecodeLength, index, pagesUsed);
87     case Int32:
88         return getBits<int32_t>(bytecode, bytecodeLength, index, pagesUsed);
89     default:
90         RELEASE_ASSERT_NOT_REACHED();
91     }
92 }
93
94 void DFABytecodeInterpreter::interpretAppendAction(uint32_t& programCounter, Actions& actions, bool ifDomain)
95 {
96     ASSERT(getInstruction(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed) == DFABytecodeInstruction::AppendAction
97         || getInstruction(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed) == DFABytecodeInstruction::AppendActionWithIfDomain
98         || getInstruction(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed) == DFABytecodeInstruction::AppendActionDefaultStylesheet);
99     actions.add((ifDomain ? IfDomainFlag : 0) | static_cast<uint64_t>(getBits<uint32_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction), m_pagesUsed)));
100     programCounter += instructionSizeWithArguments(DFABytecodeInstruction::AppendAction);
101     ASSERT(instructionSizeWithArguments(DFABytecodeInstruction::AppendAction) == instructionSizeWithArguments(DFABytecodeInstruction::AppendActionWithIfDomain));
102     ASSERT(instructionSizeWithArguments(DFABytecodeInstruction::AppendAction) == instructionSizeWithArguments(DFABytecodeInstruction::AppendActionDefaultStylesheet));
103 }
104
105 void DFABytecodeInterpreter::interpretTestFlagsAndAppendAction(uint32_t& programCounter, uint16_t flags, Actions& actions, bool ifDomain)
106 {
107     ASSERT(getInstruction(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed) == DFABytecodeInstruction::TestFlagsAndAppendAction
108         || getInstruction(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed) == DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain);
109     uint16_t flagsToCheck = getBits<uint16_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction), m_pagesUsed);
110
111     uint16_t loadTypeFlags = flagsToCheck & LoadTypeMask;
112     uint16_t ressourceTypeFlags = flagsToCheck & ResourceTypeMask;
113     
114     bool loadTypeMatches = loadTypeFlags ? (loadTypeFlags & flags) : true;
115     bool ressourceTypeMatches = ressourceTypeFlags ? (ressourceTypeFlags & flags) : true;
116     
117     if (loadTypeMatches && ressourceTypeMatches)
118         actions.add((ifDomain ? IfDomainFlag : 0) | static_cast<uint64_t>(getBits<uint32_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint16_t), m_pagesUsed)));
119     programCounter += instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendAction);
120     ASSERT(instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendAction) == instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain));
121 }
122
123 DFABytecodeInterpreter::Actions DFABytecodeInterpreter::actionsForDefaultStylesheetFromDFARoot()
124 {
125     Actions actions;
126
127     // DFA header.
128     uint32_t dfaBytecodeLength = getBits<uint32_t>(m_bytecode, m_bytecodeLength, 0, m_pagesUsed);
129     uint32_t programCounter = sizeof(uint32_t);
130
131     while (programCounter < dfaBytecodeLength) {
132         DFABytecodeInstruction instruction = getInstruction(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed);
133         if (instruction == DFABytecodeInstruction::AppendActionDefaultStylesheet)
134             interpretAppendAction(programCounter, actions, false);
135         else if (instruction == DFABytecodeInstruction::AppendAction)
136             programCounter += instructionSizeWithArguments(DFABytecodeInstruction::AppendAction);
137         else if (instruction == DFABytecodeInstruction::TestFlagsAndAppendAction)
138             programCounter += instructionSizeWithArguments(DFABytecodeInstruction::TestFlagsAndAppendAction);
139         else {
140             // actionsForDefaultStylesheetFromDFARoot should only be called on the DFA without domains,
141             // which should never have any actions with if-domain.
142             ASSERT(instruction != DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain);
143             ASSERT(instruction != DFABytecodeInstruction::AppendActionWithIfDomain);
144             break;
145         }
146     }
147     return actions;
148 }
149     
150 DFABytecodeInterpreter::Actions DFABytecodeInterpreter::interpret(const CString& urlCString, uint16_t flags)
151 {
152     const char* url = urlCString.data();
153     ASSERT(url);
154     
155     Actions actions;
156     
157     uint32_t programCounter = 0;
158     while (programCounter < m_bytecodeLength) {
159
160         // DFA header.
161         uint32_t dfaStart = programCounter;
162         uint32_t dfaBytecodeLength = getBits<uint32_t>(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed);
163         programCounter += sizeof(uint32_t);
164
165         // Skip the default stylesheet actions on the DFA root. These are accessed via actionsForDefaultStylesheetFromDFARoot.
166         if (!dfaStart) {
167             while (programCounter < dfaBytecodeLength) {
168                 DFABytecodeInstruction instruction = getInstruction(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed);
169                 if (instruction == DFABytecodeInstruction::AppendActionDefaultStylesheet)
170                     programCounter += instructionSizeWithArguments(DFABytecodeInstruction::AppendActionDefaultStylesheet);
171                 else if (instruction == DFABytecodeInstruction::AppendAction)
172                     interpretAppendAction(programCounter, actions, false);
173                 else if (instruction == DFABytecodeInstruction::TestFlagsAndAppendAction)
174                     interpretTestFlagsAndAppendAction(programCounter, flags, actions, false);
175                 else
176                     break;
177             }
178             if (programCounter >= m_bytecodeLength)
179                 return actions;
180         } else {
181             ASSERT_WITH_MESSAGE(getInstruction(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed) != DFABytecodeInstruction::AppendAction
182                 && getInstruction(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed) != DFABytecodeInstruction::TestFlagsAndAppendAction,
183                 "Triggers that match everything should only be in the first DFA.");
184         }
185         
186         // Interpret the bytecode from this DFA.
187         // This should always terminate if interpreting correctly compiled bytecode.
188         uint32_t urlIndex = 0;
189         bool urlIndexIsAfterEndOfString = false;
190         while (true) {
191             ASSERT(programCounter <= m_bytecodeLength);
192             switch (getInstruction(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed)) {
193
194             case DFABytecodeInstruction::Terminate:
195                 goto nextDFA;
196                     
197             case DFABytecodeInstruction::CheckValueCaseSensitive: {
198                 if (urlIndexIsAfterEndOfString)
199                     goto nextDFA;
200
201                 // Check to see if the next character in the url is the value stored with the bytecode.
202                 char character = url[urlIndex];
203                 DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed);
204                 if (character == getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction), m_pagesUsed)) {
205                     uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t);
206                     programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, m_pagesUsed, jumpSize);
207                     if (!character)
208                         urlIndexIsAfterEndOfString = true;
209                     urlIndex++; // This represents an edge in the DFA.
210                 } else
211                     programCounter += sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
212                 break;
213             }
214
215             case DFABytecodeInstruction::CheckValueCaseInsensitive: {
216                 if (urlIndexIsAfterEndOfString)
217                     goto nextDFA;
218
219                 // Check to see if the next character in the url is the value stored with the bytecode.
220                 char character = toASCIILower(url[urlIndex]);
221                 DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed);
222                 if (character == getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction), m_pagesUsed)) {
223                     uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t);
224                     programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, m_pagesUsed, jumpSize);
225                     if (!character)
226                         urlIndexIsAfterEndOfString = true;
227                     urlIndex++; // This represents an edge in the DFA.
228                 } else
229                     programCounter += sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
230                 break;
231             }
232                     
233             case DFABytecodeInstruction::CheckValueRangeCaseSensitive: {
234                 if (urlIndexIsAfterEndOfString)
235                     goto nextDFA;
236                 
237                 char character = url[urlIndex];
238                 DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed);
239                 if (character >= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction), m_pagesUsed)
240                     && character <= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t), m_pagesUsed)) {
241                     uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t);
242                     programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, m_pagesUsed, jumpSize);
243                     if (!character)
244                         urlIndexIsAfterEndOfString = true;
245                     urlIndex++; // This represents an edge in the DFA.
246                 } else
247                     programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
248                 break;
249             }
250
251             case DFABytecodeInstruction::CheckValueRangeCaseInsensitive: {
252                 if (urlIndexIsAfterEndOfString)
253                     goto nextDFA;
254                 
255                 char character = toASCIILower(url[urlIndex]);
256                 DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed);
257                 if (character >= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction), m_pagesUsed)
258                     && character <= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t), m_pagesUsed)) {
259                     uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t);
260                     programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, m_pagesUsed, jumpSize);
261                     if (!character)
262                         urlIndexIsAfterEndOfString = true;
263                     urlIndex++; // This represents an edge in the DFA.
264                 } else
265                     programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
266                 break;
267             }
268
269             case DFABytecodeInstruction::Jump: {
270                 if (!url[urlIndex] || urlIndexIsAfterEndOfString)
271                     goto nextDFA;
272                 
273                 uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction);
274                 DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, m_bytecodeLength, programCounter, m_pagesUsed);
275                 programCounter += getJumpDistance(m_bytecode, m_bytecodeLength, jumpLocation, m_pagesUsed, jumpSize);
276                 urlIndex++; // This represents an edge in the DFA.
277                 break;
278             }
279                     
280             case DFABytecodeInstruction::AppendAction:
281                 interpretAppendAction(programCounter, actions, false);
282                 break;
283                     
284             case DFABytecodeInstruction::AppendActionWithIfDomain:
285                 interpretAppendAction(programCounter, actions, true);
286                 break;
287                     
288             case DFABytecodeInstruction::TestFlagsAndAppendAction:
289                 interpretTestFlagsAndAppendAction(programCounter, flags, actions, false);
290                 break;
291             
292             case DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain:
293                 interpretTestFlagsAndAppendAction(programCounter, flags, actions, true);
294                 break;
295                     
296             default:
297                 RELEASE_ASSERT_NOT_REACHED(); // Invalid bytecode.
298             }
299             // We should always terminate before or at a null character at the end of a String.
300             ASSERT(urlIndex <= urlCString.length() || (urlIndexIsAfterEndOfString && urlIndex <= urlCString.length() + 1));
301         }
302         RELEASE_ASSERT_NOT_REACHED(); // The while loop can only be exited using goto nextDFA.
303         nextDFA:
304         ASSERT(dfaBytecodeLength);
305         programCounter = dfaStart + dfaBytecodeLength;
306     }
307     return actions;
308 }
309
310 } // namespace ContentExtensions
311     
312 } // namespace WebCore
313
314 #endif // ENABLE(CONTENT_EXTENSIONS)