Unreviewed, rolling out r234489.
[WebKit-https.git] / Source / JavaScriptCore / parser / Lexer.cpp
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2006-2017 Apple Inc. All Rights Reserved.
4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6  *  Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
7  *
8  *  This library is free software; you can redistribute it and/or
9  *  modify it under the terms of the GNU Library General Public
10  *  License as published by the Free Software Foundation; either
11  *  version 2 of the License, or (at your option) any later version.
12  *
13  *  This library is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  *  Library General Public License for more details.
17  *
18  *  You should have received a copy of the GNU Library General Public License
19  *  along with this library; see the file COPYING.LIB.  If not, write to
20  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  *  Boston, MA 02110-1301, USA.
22  *
23  */
24
25 #include "config.h"
26 #include "Lexer.h"
27
28 #include "BuiltinNames.h"
29 #include "Identifier.h"
30 #include "JSCInlines.h"
31 #include "JSFunctionInlines.h"
32 #include "KeywordLookup.h"
33 #include "Lexer.lut.h"
34 #include "Nodes.h"
35 #include "ParseInt.h"
36 #include "Parser.h"
37 #include <ctype.h>
38 #include <limits.h>
39 #include <string.h>
40 #include <wtf/Assertions.h>
41 #include <wtf/Variant.h>
42 #include <wtf/dtoa.h>
43
44 namespace JSC {
45
46 bool isLexerKeyword(const Identifier& identifier)
47 {
48     return JSC::mainTable.entry(identifier);
49 }
50
51 enum CharacterType {
52     // Types for the main switch
53
54     // The first three types are fixed, and also used for identifying
55     // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
56     CharacterIdentifierStart,
57     CharacterZero,
58     CharacterNumber,
59
60     CharacterInvalid,
61     CharacterLineTerminator,
62     CharacterExclamationMark,
63     CharacterOpenParen,
64     CharacterCloseParen,
65     CharacterOpenBracket,
66     CharacterCloseBracket,
67     CharacterComma,
68     CharacterColon,
69     CharacterQuestion,
70     CharacterTilde,
71     CharacterQuote,
72     CharacterBackQuote,
73     CharacterDot,
74     CharacterSlash,
75     CharacterBackSlash,
76     CharacterSemicolon,
77     CharacterOpenBrace,
78     CharacterCloseBrace,
79
80     CharacterAdd,
81     CharacterSub,
82     CharacterMultiply,
83     CharacterModulo,
84     CharacterAnd,
85     CharacterXor,
86     CharacterOr,
87     CharacterLess,
88     CharacterGreater,
89     CharacterEqual,
90
91     // Other types (only one so far)
92     CharacterWhiteSpace,
93     CharacterPrivateIdentifierStart
94 };
95
96 // 256 Latin-1 codes
97 static constexpr const unsigned short typesOfLatin1Characters[256] = {
98 /*   0 - Null               */ CharacterInvalid,
99 /*   1 - Start of Heading   */ CharacterInvalid,
100 /*   2 - Start of Text      */ CharacterInvalid,
101 /*   3 - End of Text        */ CharacterInvalid,
102 /*   4 - End of Transm.     */ CharacterInvalid,
103 /*   5 - Enquiry            */ CharacterInvalid,
104 /*   6 - Acknowledgment     */ CharacterInvalid,
105 /*   7 - Bell               */ CharacterInvalid,
106 /*   8 - Back Space         */ CharacterInvalid,
107 /*   9 - Horizontal Tab     */ CharacterWhiteSpace,
108 /*  10 - Line Feed          */ CharacterLineTerminator,
109 /*  11 - Vertical Tab       */ CharacterWhiteSpace,
110 /*  12 - Form Feed          */ CharacterWhiteSpace,
111 /*  13 - Carriage Return    */ CharacterLineTerminator,
112 /*  14 - Shift Out          */ CharacterInvalid,
113 /*  15 - Shift In           */ CharacterInvalid,
114 /*  16 - Data Line Escape   */ CharacterInvalid,
115 /*  17 - Device Control 1   */ CharacterInvalid,
116 /*  18 - Device Control 2   */ CharacterInvalid,
117 /*  19 - Device Control 3   */ CharacterInvalid,
118 /*  20 - Device Control 4   */ CharacterInvalid,
119 /*  21 - Negative Ack.      */ CharacterInvalid,
120 /*  22 - Synchronous Idle   */ CharacterInvalid,
121 /*  23 - End of Transmit    */ CharacterInvalid,
122 /*  24 - Cancel             */ CharacterInvalid,
123 /*  25 - End of Medium      */ CharacterInvalid,
124 /*  26 - Substitute         */ CharacterInvalid,
125 /*  27 - Escape             */ CharacterInvalid,
126 /*  28 - File Separator     */ CharacterInvalid,
127 /*  29 - Group Separator    */ CharacterInvalid,
128 /*  30 - Record Separator   */ CharacterInvalid,
129 /*  31 - Unit Separator     */ CharacterInvalid,
130 /*  32 - Space              */ CharacterWhiteSpace,
131 /*  33 - !                  */ CharacterExclamationMark,
132 /*  34 - "                  */ CharacterQuote,
133 /*  35 - #                  */ CharacterInvalid,
134 /*  36 - $                  */ CharacterIdentifierStart,
135 /*  37 - %                  */ CharacterModulo,
136 /*  38 - &                  */ CharacterAnd,
137 /*  39 - '                  */ CharacterQuote,
138 /*  40 - (                  */ CharacterOpenParen,
139 /*  41 - )                  */ CharacterCloseParen,
140 /*  42 - *                  */ CharacterMultiply,
141 /*  43 - +                  */ CharacterAdd,
142 /*  44 - ,                  */ CharacterComma,
143 /*  45 - -                  */ CharacterSub,
144 /*  46 - .                  */ CharacterDot,
145 /*  47 - /                  */ CharacterSlash,
146 /*  48 - 0                  */ CharacterZero,
147 /*  49 - 1                  */ CharacterNumber,
148 /*  50 - 2                  */ CharacterNumber,
149 /*  51 - 3                  */ CharacterNumber,
150 /*  52 - 4                  */ CharacterNumber,
151 /*  53 - 5                  */ CharacterNumber,
152 /*  54 - 6                  */ CharacterNumber,
153 /*  55 - 7                  */ CharacterNumber,
154 /*  56 - 8                  */ CharacterNumber,
155 /*  57 - 9                  */ CharacterNumber,
156 /*  58 - :                  */ CharacterColon,
157 /*  59 - ;                  */ CharacterSemicolon,
158 /*  60 - <                  */ CharacterLess,
159 /*  61 - =                  */ CharacterEqual,
160 /*  62 - >                  */ CharacterGreater,
161 /*  63 - ?                  */ CharacterQuestion,
162 /*  64 - @                  */ CharacterPrivateIdentifierStart,
163 /*  65 - A                  */ CharacterIdentifierStart,
164 /*  66 - B                  */ CharacterIdentifierStart,
165 /*  67 - C                  */ CharacterIdentifierStart,
166 /*  68 - D                  */ CharacterIdentifierStart,
167 /*  69 - E                  */ CharacterIdentifierStart,
168 /*  70 - F                  */ CharacterIdentifierStart,
169 /*  71 - G                  */ CharacterIdentifierStart,
170 /*  72 - H                  */ CharacterIdentifierStart,
171 /*  73 - I                  */ CharacterIdentifierStart,
172 /*  74 - J                  */ CharacterIdentifierStart,
173 /*  75 - K                  */ CharacterIdentifierStart,
174 /*  76 - L                  */ CharacterIdentifierStart,
175 /*  77 - M                  */ CharacterIdentifierStart,
176 /*  78 - N                  */ CharacterIdentifierStart,
177 /*  79 - O                  */ CharacterIdentifierStart,
178 /*  80 - P                  */ CharacterIdentifierStart,
179 /*  81 - Q                  */ CharacterIdentifierStart,
180 /*  82 - R                  */ CharacterIdentifierStart,
181 /*  83 - S                  */ CharacterIdentifierStart,
182 /*  84 - T                  */ CharacterIdentifierStart,
183 /*  85 - U                  */ CharacterIdentifierStart,
184 /*  86 - V                  */ CharacterIdentifierStart,
185 /*  87 - W                  */ CharacterIdentifierStart,
186 /*  88 - X                  */ CharacterIdentifierStart,
187 /*  89 - Y                  */ CharacterIdentifierStart,
188 /*  90 - Z                  */ CharacterIdentifierStart,
189 /*  91 - [                  */ CharacterOpenBracket,
190 /*  92 - \                  */ CharacterBackSlash,
191 /*  93 - ]                  */ CharacterCloseBracket,
192 /*  94 - ^                  */ CharacterXor,
193 /*  95 - _                  */ CharacterIdentifierStart,
194 /*  96 - `                  */ CharacterBackQuote,
195 /*  97 - a                  */ CharacterIdentifierStart,
196 /*  98 - b                  */ CharacterIdentifierStart,
197 /*  99 - c                  */ CharacterIdentifierStart,
198 /* 100 - d                  */ CharacterIdentifierStart,
199 /* 101 - e                  */ CharacterIdentifierStart,
200 /* 102 - f                  */ CharacterIdentifierStart,
201 /* 103 - g                  */ CharacterIdentifierStart,
202 /* 104 - h                  */ CharacterIdentifierStart,
203 /* 105 - i                  */ CharacterIdentifierStart,
204 /* 106 - j                  */ CharacterIdentifierStart,
205 /* 107 - k                  */ CharacterIdentifierStart,
206 /* 108 - l                  */ CharacterIdentifierStart,
207 /* 109 - m                  */ CharacterIdentifierStart,
208 /* 110 - n                  */ CharacterIdentifierStart,
209 /* 111 - o                  */ CharacterIdentifierStart,
210 /* 112 - p                  */ CharacterIdentifierStart,
211 /* 113 - q                  */ CharacterIdentifierStart,
212 /* 114 - r                  */ CharacterIdentifierStart,
213 /* 115 - s                  */ CharacterIdentifierStart,
214 /* 116 - t                  */ CharacterIdentifierStart,
215 /* 117 - u                  */ CharacterIdentifierStart,
216 /* 118 - v                  */ CharacterIdentifierStart,
217 /* 119 - w                  */ CharacterIdentifierStart,
218 /* 120 - x                  */ CharacterIdentifierStart,
219 /* 121 - y                  */ CharacterIdentifierStart,
220 /* 122 - z                  */ CharacterIdentifierStart,
221 /* 123 - {                  */ CharacterOpenBrace,
222 /* 124 - |                  */ CharacterOr,
223 /* 125 - }                  */ CharacterCloseBrace,
224 /* 126 - ~                  */ CharacterTilde,
225 /* 127 - Delete             */ CharacterInvalid,
226 /* 128 - Cc category        */ CharacterInvalid,
227 /* 129 - Cc category        */ CharacterInvalid,
228 /* 130 - Cc category        */ CharacterInvalid,
229 /* 131 - Cc category        */ CharacterInvalid,
230 /* 132 - Cc category        */ CharacterInvalid,
231 /* 133 - Cc category        */ CharacterInvalid,
232 /* 134 - Cc category        */ CharacterInvalid,
233 /* 135 - Cc category        */ CharacterInvalid,
234 /* 136 - Cc category        */ CharacterInvalid,
235 /* 137 - Cc category        */ CharacterInvalid,
236 /* 138 - Cc category        */ CharacterInvalid,
237 /* 139 - Cc category        */ CharacterInvalid,
238 /* 140 - Cc category        */ CharacterInvalid,
239 /* 141 - Cc category        */ CharacterInvalid,
240 /* 142 - Cc category        */ CharacterInvalid,
241 /* 143 - Cc category        */ CharacterInvalid,
242 /* 144 - Cc category        */ CharacterInvalid,
243 /* 145 - Cc category        */ CharacterInvalid,
244 /* 146 - Cc category        */ CharacterInvalid,
245 /* 147 - Cc category        */ CharacterInvalid,
246 /* 148 - Cc category        */ CharacterInvalid,
247 /* 149 - Cc category        */ CharacterInvalid,
248 /* 150 - Cc category        */ CharacterInvalid,
249 /* 151 - Cc category        */ CharacterInvalid,
250 /* 152 - Cc category        */ CharacterInvalid,
251 /* 153 - Cc category        */ CharacterInvalid,
252 /* 154 - Cc category        */ CharacterInvalid,
253 /* 155 - Cc category        */ CharacterInvalid,
254 /* 156 - Cc category        */ CharacterInvalid,
255 /* 157 - Cc category        */ CharacterInvalid,
256 /* 158 - Cc category        */ CharacterInvalid,
257 /* 159 - Cc category        */ CharacterInvalid,
258 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
259 /* 161 - Po category        */ CharacterInvalid,
260 /* 162 - Sc category        */ CharacterInvalid,
261 /* 163 - Sc category        */ CharacterInvalid,
262 /* 164 - Sc category        */ CharacterInvalid,
263 /* 165 - Sc category        */ CharacterInvalid,
264 /* 166 - So category        */ CharacterInvalid,
265 /* 167 - So category        */ CharacterInvalid,
266 /* 168 - Sk category        */ CharacterInvalid,
267 /* 169 - So category        */ CharacterInvalid,
268 /* 170 - Ll category        */ CharacterIdentifierStart,
269 /* 171 - Pi category        */ CharacterInvalid,
270 /* 172 - Sm category        */ CharacterInvalid,
271 /* 173 - Cf category        */ CharacterInvalid,
272 /* 174 - So category        */ CharacterInvalid,
273 /* 175 - Sk category        */ CharacterInvalid,
274 /* 176 - So category        */ CharacterInvalid,
275 /* 177 - Sm category        */ CharacterInvalid,
276 /* 178 - No category        */ CharacterInvalid,
277 /* 179 - No category        */ CharacterInvalid,
278 /* 180 - Sk category        */ CharacterInvalid,
279 /* 181 - Ll category        */ CharacterIdentifierStart,
280 /* 182 - So category        */ CharacterInvalid,
281 /* 183 - Po category        */ CharacterInvalid,
282 /* 184 - Sk category        */ CharacterInvalid,
283 /* 185 - No category        */ CharacterInvalid,
284 /* 186 - Ll category        */ CharacterIdentifierStart,
285 /* 187 - Pf category        */ CharacterInvalid,
286 /* 188 - No category        */ CharacterInvalid,
287 /* 189 - No category        */ CharacterInvalid,
288 /* 190 - No category        */ CharacterInvalid,
289 /* 191 - Po category        */ CharacterInvalid,
290 /* 192 - Lu category        */ CharacterIdentifierStart,
291 /* 193 - Lu category        */ CharacterIdentifierStart,
292 /* 194 - Lu category        */ CharacterIdentifierStart,
293 /* 195 - Lu category        */ CharacterIdentifierStart,
294 /* 196 - Lu category        */ CharacterIdentifierStart,
295 /* 197 - Lu category        */ CharacterIdentifierStart,
296 /* 198 - Lu category        */ CharacterIdentifierStart,
297 /* 199 - Lu category        */ CharacterIdentifierStart,
298 /* 200 - Lu category        */ CharacterIdentifierStart,
299 /* 201 - Lu category        */ CharacterIdentifierStart,
300 /* 202 - Lu category        */ CharacterIdentifierStart,
301 /* 203 - Lu category        */ CharacterIdentifierStart,
302 /* 204 - Lu category        */ CharacterIdentifierStart,
303 /* 205 - Lu category        */ CharacterIdentifierStart,
304 /* 206 - Lu category        */ CharacterIdentifierStart,
305 /* 207 - Lu category        */ CharacterIdentifierStart,
306 /* 208 - Lu category        */ CharacterIdentifierStart,
307 /* 209 - Lu category        */ CharacterIdentifierStart,
308 /* 210 - Lu category        */ CharacterIdentifierStart,
309 /* 211 - Lu category        */ CharacterIdentifierStart,
310 /* 212 - Lu category        */ CharacterIdentifierStart,
311 /* 213 - Lu category        */ CharacterIdentifierStart,
312 /* 214 - Lu category        */ CharacterIdentifierStart,
313 /* 215 - Sm category        */ CharacterInvalid,
314 /* 216 - Lu category        */ CharacterIdentifierStart,
315 /* 217 - Lu category        */ CharacterIdentifierStart,
316 /* 218 - Lu category        */ CharacterIdentifierStart,
317 /* 219 - Lu category        */ CharacterIdentifierStart,
318 /* 220 - Lu category        */ CharacterIdentifierStart,
319 /* 221 - Lu category        */ CharacterIdentifierStart,
320 /* 222 - Lu category        */ CharacterIdentifierStart,
321 /* 223 - Ll category        */ CharacterIdentifierStart,
322 /* 224 - Ll category        */ CharacterIdentifierStart,
323 /* 225 - Ll category        */ CharacterIdentifierStart,
324 /* 226 - Ll category        */ CharacterIdentifierStart,
325 /* 227 - Ll category        */ CharacterIdentifierStart,
326 /* 228 - Ll category        */ CharacterIdentifierStart,
327 /* 229 - Ll category        */ CharacterIdentifierStart,
328 /* 230 - Ll category        */ CharacterIdentifierStart,
329 /* 231 - Ll category        */ CharacterIdentifierStart,
330 /* 232 - Ll category        */ CharacterIdentifierStart,
331 /* 233 - Ll category        */ CharacterIdentifierStart,
332 /* 234 - Ll category        */ CharacterIdentifierStart,
333 /* 235 - Ll category        */ CharacterIdentifierStart,
334 /* 236 - Ll category        */ CharacterIdentifierStart,
335 /* 237 - Ll category        */ CharacterIdentifierStart,
336 /* 238 - Ll category        */ CharacterIdentifierStart,
337 /* 239 - Ll category        */ CharacterIdentifierStart,
338 /* 240 - Ll category        */ CharacterIdentifierStart,
339 /* 241 - Ll category        */ CharacterIdentifierStart,
340 /* 242 - Ll category        */ CharacterIdentifierStart,
341 /* 243 - Ll category        */ CharacterIdentifierStart,
342 /* 244 - Ll category        */ CharacterIdentifierStart,
343 /* 245 - Ll category        */ CharacterIdentifierStart,
344 /* 246 - Ll category        */ CharacterIdentifierStart,
345 /* 247 - Sm category        */ CharacterInvalid,
346 /* 248 - Ll category        */ CharacterIdentifierStart,
347 /* 249 - Ll category        */ CharacterIdentifierStart,
348 /* 250 - Ll category        */ CharacterIdentifierStart,
349 /* 251 - Ll category        */ CharacterIdentifierStart,
350 /* 252 - Ll category        */ CharacterIdentifierStart,
351 /* 253 - Ll category        */ CharacterIdentifierStart,
352 /* 254 - Ll category        */ CharacterIdentifierStart,
353 /* 255 - Ll category        */ CharacterIdentifierStart
354 };
355
356 // This table provides the character that results from \X where X is the index in the table beginning
357 // with SPACE. A table value of 0 means that more processing needs to be done.
358 static constexpr const LChar singleCharacterEscapeValuesForASCII[128] = {
359 /*   0 - Null               */ 0,
360 /*   1 - Start of Heading   */ 0,
361 /*   2 - Start of Text      */ 0,
362 /*   3 - End of Text        */ 0,
363 /*   4 - End of Transm.     */ 0,
364 /*   5 - Enquiry            */ 0,
365 /*   6 - Acknowledgment     */ 0,
366 /*   7 - Bell               */ 0,
367 /*   8 - Back Space         */ 0,
368 /*   9 - Horizontal Tab     */ 0,
369 /*  10 - Line Feed          */ 0,
370 /*  11 - Vertical Tab       */ 0,
371 /*  12 - Form Feed          */ 0,
372 /*  13 - Carriage Return    */ 0,
373 /*  14 - Shift Out          */ 0,
374 /*  15 - Shift In           */ 0,
375 /*  16 - Data Line Escape   */ 0,
376 /*  17 - Device Control 1   */ 0,
377 /*  18 - Device Control 2   */ 0,
378 /*  19 - Device Control 3   */ 0,
379 /*  20 - Device Control 4   */ 0,
380 /*  21 - Negative Ack.      */ 0,
381 /*  22 - Synchronous Idle   */ 0,
382 /*  23 - End of Transmit    */ 0,
383 /*  24 - Cancel             */ 0,
384 /*  25 - End of Medium      */ 0,
385 /*  26 - Substitute         */ 0,
386 /*  27 - Escape             */ 0,
387 /*  28 - File Separator     */ 0,
388 /*  29 - Group Separator    */ 0,
389 /*  30 - Record Separator   */ 0,
390 /*  31 - Unit Separator     */ 0,
391 /*  32 - Space              */ ' ',
392 /*  33 - !                  */ '!',
393 /*  34 - "                  */ '"',
394 /*  35 - #                  */ '#',
395 /*  36 - $                  */ '$',
396 /*  37 - %                  */ '%',
397 /*  38 - &                  */ '&',
398 /*  39 - '                  */ '\'',
399 /*  40 - (                  */ '(',
400 /*  41 - )                  */ ')',
401 /*  42 - *                  */ '*',
402 /*  43 - +                  */ '+',
403 /*  44 - ,                  */ ',',
404 /*  45 - -                  */ '-',
405 /*  46 - .                  */ '.',
406 /*  47 - /                  */ '/',
407 /*  48 - 0                  */ 0,
408 /*  49 - 1                  */ 0,
409 /*  50 - 2                  */ 0,
410 /*  51 - 3                  */ 0,
411 /*  52 - 4                  */ 0,
412 /*  53 - 5                  */ 0,
413 /*  54 - 6                  */ 0,
414 /*  55 - 7                  */ 0,
415 /*  56 - 8                  */ 0,
416 /*  57 - 9                  */ 0,
417 /*  58 - :                  */ ':',
418 /*  59 - ;                  */ ';',
419 /*  60 - <                  */ '<',
420 /*  61 - =                  */ '=',
421 /*  62 - >                  */ '>',
422 /*  63 - ?                  */ '?',
423 /*  64 - @                  */ '@',
424 /*  65 - A                  */ 'A',
425 /*  66 - B                  */ 'B',
426 /*  67 - C                  */ 'C',
427 /*  68 - D                  */ 'D',
428 /*  69 - E                  */ 'E',
429 /*  70 - F                  */ 'F',
430 /*  71 - G                  */ 'G',
431 /*  72 - H                  */ 'H',
432 /*  73 - I                  */ 'I',
433 /*  74 - J                  */ 'J',
434 /*  75 - K                  */ 'K',
435 /*  76 - L                  */ 'L',
436 /*  77 - M                  */ 'M',
437 /*  78 - N                  */ 'N',
438 /*  79 - O                  */ 'O',
439 /*  80 - P                  */ 'P',
440 /*  81 - Q                  */ 'Q',
441 /*  82 - R                  */ 'R',
442 /*  83 - S                  */ 'S',
443 /*  84 - T                  */ 'T',
444 /*  85 - U                  */ 'U',
445 /*  86 - V                  */ 'V',
446 /*  87 - W                  */ 'W',
447 /*  88 - X                  */ 'X',
448 /*  89 - Y                  */ 'Y',
449 /*  90 - Z                  */ 'Z',
450 /*  91 - [                  */ '[',
451 /*  92 - \                  */ '\\',
452 /*  93 - ]                  */ ']',
453 /*  94 - ^                  */ '^',
454 /*  95 - _                  */ '_',
455 /*  96 - `                  */ '`',
456 /*  97 - a                  */ 'a',
457 /*  98 - b                  */ 0x08,
458 /*  99 - c                  */ 'c',
459 /* 100 - d                  */ 'd',
460 /* 101 - e                  */ 'e',
461 /* 102 - f                  */ 0x0C,
462 /* 103 - g                  */ 'g',
463 /* 104 - h                  */ 'h',
464 /* 105 - i                  */ 'i',
465 /* 106 - j                  */ 'j',
466 /* 107 - k                  */ 'k',
467 /* 108 - l                  */ 'l',
468 /* 109 - m                  */ 'm',
469 /* 110 - n                  */ 0x0A,
470 /* 111 - o                  */ 'o',
471 /* 112 - p                  */ 'p',
472 /* 113 - q                  */ 'q',
473 /* 114 - r                  */ 0x0D,
474 /* 115 - s                  */ 's',
475 /* 116 - t                  */ 0x09,
476 /* 117 - u                  */ 0,
477 /* 118 - v                  */ 0x0B,
478 /* 119 - w                  */ 'w',
479 /* 120 - x                  */ 0,
480 /* 121 - y                  */ 'y',
481 /* 122 - z                  */ 'z',
482 /* 123 - {                  */ '{',
483 /* 124 - |                  */ '|',
484 /* 125 - }                  */ '}',
485 /* 126 - ~                  */ '~',
486 /* 127 - Delete             */ 0
487 };
488
489 template <typename T>
490 Lexer<T>::Lexer(VM* vm, JSParserBuiltinMode builtinMode, JSParserScriptMode scriptMode)
491     : m_isReparsingFunction(false)
492     , m_vm(vm)
493     , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
494     , m_scriptMode(scriptMode)
495 {
496 }
497
498 static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
499 {
500     if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
501         return INTEGER;
502     return DOUBLE;
503 }
504
505 template <typename T>
506 Lexer<T>::~Lexer()
507 {
508 }
509
510 template <typename T>
511 String Lexer<T>::invalidCharacterMessage() const
512 {
513     switch (m_current) {
514     case 0:
515         return "Invalid character: '\\0'"_s;
516     case 10:
517         return "Invalid character: '\\n'"_s;
518     case 11:
519         return "Invalid character: '\\v'"_s;
520     case 13:
521         return "Invalid character: '\\r'"_s;
522     case 35:
523         return "Invalid character: '#'"_s;
524     case 64:
525         return "Invalid character: '@'"_s;
526     case 96:
527         return "Invalid character: '`'"_s;
528     default:
529         return String::format("Invalid character '\\u%04x'", static_cast<unsigned>(m_current));
530     }
531 }
532
533 template <typename T>
534 ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
535 {
536     ASSERT(m_code <= m_codeEnd);
537     return m_code;
538 }
539
540 template <typename T>
541 void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
542 {
543     m_arena = &arena->identifierArena();
544     
545     m_lineNumber = source.firstLine().oneBasedInt();
546     m_lastToken = -1;
547     
548     StringView sourceString = source.provider()->source();
549
550     if (!sourceString.isNull())
551         setCodeStart(sourceString);
552     else
553         m_codeStart = 0;
554
555     m_source = &source;
556     m_sourceOffset = source.startOffset();
557     m_codeStartPlusOffset = m_codeStart + source.startOffset();
558     m_code = m_codeStartPlusOffset;
559     m_codeEnd = m_codeStart + source.endOffset();
560     m_error = false;
561     m_atLineStart = true;
562     m_lineStart = m_code;
563     m_lexErrorMessage = String();
564     m_sourceURLDirective = String();
565     m_sourceMappingURLDirective = String();
566     
567     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
568     m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
569     m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
570     
571     if (LIKELY(m_code < m_codeEnd))
572         m_current = *m_code;
573     else
574         m_current = 0;
575     ASSERT(currentOffset() == source.startOffset());
576 }
577
578 template <typename T>
579 template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
580 {
581     m_code += shiftAmount;
582     ASSERT(currentOffset() >= currentLineStartOffset());
583     m_current = *m_code;
584 }
585
586 template <typename T>
587 ALWAYS_INLINE void Lexer<T>::shift()
588 {
589     // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
590     m_current = 0;
591     ++m_code;
592     if (LIKELY(m_code < m_codeEnd))
593         m_current = *m_code;
594 }
595
596 template <typename T>
597 ALWAYS_INLINE bool Lexer<T>::atEnd() const
598 {
599     ASSERT(!m_current || m_code < m_codeEnd);
600     return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
601 }
602
603 template <typename T>
604 ALWAYS_INLINE T Lexer<T>::peek(int offset) const
605 {
606     ASSERT(offset > 0 && offset < 5);
607     const T* code = m_code + offset;
608     return (code < m_codeEnd) ? *code : 0;
609 }
610
611 struct ParsedUnicodeEscapeValue {
612     ParsedUnicodeEscapeValue(UChar32 value)
613         : m_value(value)
614     {
615         ASSERT(isValid());
616     }
617
618     enum SpecialValueType { Incomplete = -2, Invalid = -1 };
619     ParsedUnicodeEscapeValue(SpecialValueType type)
620         : m_value(type)
621     {
622     }
623
624     bool isValid() const { return m_value >= 0; }
625     bool isIncomplete() const { return m_value == Incomplete; }
626
627     UChar32 value() const
628     {
629         ASSERT(isValid());
630         return m_value;
631     }
632
633 private:
634     UChar32 m_value;
635 };
636
637 template<typename CharacterType>
638 ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
639 {
640     if (m_current == '{') {
641         shift();
642         UChar32 codePoint = 0;
643         do {
644             if (!isASCIIHexDigit(m_current))
645                 return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
646             codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
647             if (codePoint > UCHAR_MAX_VALUE) {
648                 // For raw template literal syntax, we consume `NotEscapeSequence`.
649                 // Here, we consume NotCodePoint's HexDigits.
650                 //
651                 // NotEscapeSequence ::
652                 //     u { [lookahread not one of HexDigit]
653                 //     u { NotCodePoint
654                 //     u { CodePoint [lookahead != }]
655                 //
656                 // NotCodePoint ::
657                 //     HexDigits but not if MV of HexDigits <= 0x10FFFF
658                 //
659                 // CodePoint ::
660                 //     HexDigits but not if MV of HexDigits > 0x10FFFF
661                 shift();
662                 while (isASCIIHexDigit(m_current))
663                     shift();
664
665                 return atEnd() ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
666             }
667             shift();
668         } while (m_current != '}');
669         shift();
670         return codePoint;
671     }
672
673     auto character2 = peek(1);
674     auto character3 = peek(2);
675     auto character4 = peek(3);
676     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4))) {
677         auto result = (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
678
679         // For raw template literal syntax, we consume `NotEscapeSequence`.
680         //
681         // NotEscapeSequence ::
682         //     u [lookahead not one of HexDigit][lookahead != {]
683         //     u HexDigit [lookahead not one of HexDigit]
684         //     u HexDigit HexDigit [lookahead not one of HexDigit]
685         //     u HexDigit HexDigit HexDigit [lookahead not one of HexDigit]
686         while (isASCIIHexDigit(m_current))
687             shift();
688
689         return result;
690     }
691
692     auto result = convertUnicode(m_current, character2, character3, character4);
693     shift();
694     shift();
695     shift();
696     shift();
697     return result;
698 }
699
700 template <typename T>
701 void Lexer<T>::shiftLineTerminator()
702 {
703     ASSERT(isLineTerminator(m_current));
704
705     m_positionBeforeLastNewline = currentPosition();
706     T prev = m_current;
707     shift();
708
709     if (prev == '\r' && m_current == '\n')
710         shift();
711
712     ++m_lineNumber;
713 }
714
715 template <typename T>
716 ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
717 {
718     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
719 }
720
721 template <typename T>
722 ALWAYS_INLINE void Lexer<T>::skipWhitespace()
723 {
724     while (isWhiteSpace(m_current))
725         shift();
726 }
727
728 static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
729 {
730     return U_GET_GC_MASK(c) & U_GC_L_MASK;
731 }
732
733 static ALWAYS_INLINE bool isLatin1(LChar)
734 {
735     return true;
736 }
737
738 static ALWAYS_INLINE bool isLatin1(UChar c)
739 {
740     return c < 256;
741 }
742
743 static ALWAYS_INLINE bool isLatin1(UChar32 c)
744 {
745     return !(c & ~0xFF);
746 }
747
748 static inline bool isIdentStart(LChar c)
749 {
750     return typesOfLatin1Characters[c] == CharacterIdentifierStart;
751 }
752
753 static inline bool isIdentStart(UChar32 c)
754 {
755     return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
756 }
757
758 static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
759 {
760     // FIXME: ES6 says this should be based on the Unicode property ID_Continue now instead.
761     return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
762 }
763
764 static ALWAYS_INLINE bool isIdentPart(LChar c)
765 {
766     // Character types are divided into two groups depending on whether they can be part of an
767     // identifier or not. Those whose type value is less or equal than CharacterNumber can be
768     // part of an identifier. (See the CharacterType definition for more details.)
769     return typesOfLatin1Characters[c] <= CharacterNumber;
770 }
771
772 static ALWAYS_INLINE bool isIdentPart(UChar32 c)
773 {
774     return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
775 }
776
777 static ALWAYS_INLINE bool isIdentPart(UChar c)
778 {
779     return isIdentPart(static_cast<UChar32>(c));
780 }
781
782 template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd)
783 {
784     if (isIdentPart(code[0]))
785         return true;
786
787     // Shortest sequence handled below is \u{0}, which is 5 characters.
788     if (!(code[0] == '\\' && codeEnd - code >= 5 && code[1] == 'u'))
789         return false;
790
791     if (code[2] == '{') {
792         UChar32 codePoint = 0;
793         const CharacterType* pointer;
794         for (pointer = &code[3]; pointer < codeEnd; ++pointer) {
795             auto digit = *pointer;
796             if (!isASCIIHexDigit(digit))
797                 break;
798             codePoint = (codePoint << 4) | toASCIIHexValue(digit);
799             if (codePoint > UCHAR_MAX_VALUE)
800                 return false;
801         }
802         return isIdentPart(codePoint) && pointer < codeEnd && *pointer == '}';
803     }
804
805     // Shortest sequence handled below is \uXXXX, which is 6 characters.
806     if (codeEnd - code < 6)
807         return false;
808
809     auto character1 = code[2];
810     auto character2 = code[3];
811     auto character3 = code[4];
812     auto character4 = code[5];
813     return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4)
814         && isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4));
815 }
816
817 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
818 {
819     return isIdentPartIncludingEscapeTemplate(code, codeEnd);
820 }
821
822 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
823 {
824     return isIdentPartIncludingEscapeTemplate(code, codeEnd);
825 }
826
827 static inline LChar singleEscape(int c)
828 {
829     if (c < 128) {
830         ASSERT(static_cast<size_t>(c) < WTF_ARRAY_LENGTH(singleCharacterEscapeValuesForASCII));
831         return singleCharacterEscapeValuesForASCII[c];
832     }
833     return 0;
834 }
835
836 template <typename T>
837 inline void Lexer<T>::record8(int c)
838 {
839     ASSERT(c >= 0);
840     ASSERT(c <= 0xFF);
841     m_buffer8.append(static_cast<LChar>(c));
842 }
843
844 template <typename T>
845 inline void assertCharIsIn8BitRange(T c)
846 {
847     UNUSED_PARAM(c);
848     ASSERT(c >= 0);
849     ASSERT(c <= 0xFF);
850 }
851
852 template <>
853 inline void assertCharIsIn8BitRange(UChar c)
854 {
855     UNUSED_PARAM(c);
856     ASSERT(c <= 0xFF);
857 }
858
859 template <>
860 inline void assertCharIsIn8BitRange(LChar)
861 {
862 }
863
864 template <typename T>
865 inline void Lexer<T>::append8(const T* p, size_t length)
866 {
867     size_t currentSize = m_buffer8.size();
868     m_buffer8.grow(currentSize + length);
869     LChar* rawBuffer = m_buffer8.data() + currentSize;
870
871     for (size_t i = 0; i < length; i++) {
872         T c = p[i];
873         assertCharIsIn8BitRange(c);
874         rawBuffer[i] = c;
875     }
876 }
877
878 template <typename T>
879 inline void Lexer<T>::append16(const LChar* p, size_t length)
880 {
881     size_t currentSize = m_buffer16.size();
882     m_buffer16.grow(currentSize + length);
883     UChar* rawBuffer = m_buffer16.data() + currentSize;
884
885     for (size_t i = 0; i < length; i++)
886         rawBuffer[i] = p[i];
887 }
888
889 template <typename T>
890 inline void Lexer<T>::record16(T c)
891 {
892     m_buffer16.append(c);
893 }
894
895 template <typename T>
896 inline void Lexer<T>::record16(int c)
897 {
898     ASSERT(c >= 0);
899     ASSERT(c <= static_cast<int>(USHRT_MAX));
900     m_buffer16.append(static_cast<UChar>(c));
901 }
902     
903 template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
904 {
905     ASSERT(codePoint >= 0);
906     ASSERT(codePoint <= UCHAR_MAX_VALUE);
907     if (U_IS_BMP(codePoint))
908         record16(codePoint);
909     else {
910         UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
911         append16(codeUnits, 2);
912     }
913 }
914
915 #if !ASSERT_DISABLED
916 bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
917 {
918     if (!ident)
919         return true;
920     /* Just block any use of suspicious identifiers.  This is intended to
921      * be used as a safety net while implementing builtins.
922      */
923     // FIXME: How can a debug-only assertion be a safety net?
924     if (*ident == vm.propertyNames->builtinNames().callPublicName())
925         return false;
926     if (*ident == vm.propertyNames->builtinNames().applyPublicName())
927         return false;
928     if (*ident == vm.propertyNames->eval)
929         return false;
930     if (*ident == vm.propertyNames->Function)
931         return false;
932     return true;
933 }
934 #endif
935     
936 template <>
937 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
938 {
939     tokenData->escaped = false;
940     const ptrdiff_t remaining = m_codeEnd - m_code;
941     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
942         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
943         if (keyword != IDENT) {
944             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
945             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
946         }
947     }
948     
949     bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
950     if (isPrivateName)
951         shift();
952     
953     const LChar* identifierStart = currentSourcePtr();
954     unsigned identifierLineStart = currentLineStartOffset();
955     
956     while (isIdentPart(m_current))
957         shift();
958     
959     if (UNLIKELY(m_current == '\\')) {
960         setOffsetFromSourcePtr(identifierStart, identifierLineStart);
961         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
962     }
963
964     const Identifier* ident = nullptr;
965     
966     if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
967         int identifierLength = currentSourcePtr() - identifierStart;
968         ident = makeIdentifier(identifierStart, identifierLength);
969         if (m_parsingBuiltinFunction) {
970             if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
971                 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
972                 return ERRORTOK;
973             }
974             if (isPrivateName)
975                 ident = m_vm->propertyNames->lookUpPrivateName(*ident);
976             else if (*ident == m_vm->propertyNames->undefinedKeyword)
977                 tokenData->ident = &m_vm->propertyNames->builtinNames().undefinedPrivateName();
978             if (!ident)
979                 return INVALID_PRIVATE_NAME_ERRORTOK;
980         }
981         tokenData->ident = ident;
982     } else
983         tokenData->ident = nullptr;
984
985     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
986         ASSERT(shouldCreateIdentifier);
987         if (remaining < maxTokenLength) {
988             const HashTableValue* entry = JSC::mainTable.entry(*ident);
989             ASSERT((remaining < maxTokenLength) || !entry);
990             if (!entry)
991                 return IDENT;
992             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
993             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
994         }
995         return IDENT;
996     }
997
998     return IDENT;
999 }
1000
1001 template <>
1002 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
1003 {
1004     tokenData->escaped = false;
1005     const ptrdiff_t remaining = m_codeEnd - m_code;
1006     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
1007         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
1008         if (keyword != IDENT) {
1009             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
1010             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
1011         }
1012     }
1013     
1014     bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
1015     if (isPrivateName)
1016         shift();
1017
1018     const UChar* identifierStart = currentSourcePtr();
1019     int identifierLineStart = currentLineStartOffset();
1020
1021     UChar orAllChars = 0;
1022     
1023     while (isIdentPart(m_current)) {
1024         orAllChars |= m_current;
1025         shift();
1026     }
1027     
1028     if (UNLIKELY(m_current == '\\')) {
1029         ASSERT(!isPrivateName);
1030         setOffsetFromSourcePtr(identifierStart, identifierLineStart);
1031         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
1032     }
1033
1034     bool isAll8Bit = false;
1035
1036     if (!(orAllChars & ~0xff))
1037         isAll8Bit = true;
1038
1039     const Identifier* ident = nullptr;
1040     
1041     if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
1042         int identifierLength = currentSourcePtr() - identifierStart;
1043         if (isAll8Bit)
1044             ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
1045         else
1046             ident = makeIdentifier(identifierStart, identifierLength);
1047         if (m_parsingBuiltinFunction) {
1048             if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
1049                 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
1050                 return ERRORTOK;
1051             }
1052             if (isPrivateName)
1053                 ident = m_vm->propertyNames->lookUpPrivateName(*ident);
1054             else if (*ident == m_vm->propertyNames->undefinedKeyword)
1055                 tokenData->ident = &m_vm->propertyNames->builtinNames().undefinedPrivateName();
1056             if (!ident)
1057                 return INVALID_PRIVATE_NAME_ERRORTOK;
1058         }
1059         tokenData->ident = ident;
1060     } else
1061         tokenData->ident = nullptr;
1062     
1063     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
1064         ASSERT(shouldCreateIdentifier);
1065         if (remaining < maxTokenLength) {
1066             const HashTableValue* entry = JSC::mainTable.entry(*ident);
1067             ASSERT((remaining < maxTokenLength) || !entry);
1068             if (!entry)
1069                 return IDENT;
1070             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1071             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1072         }
1073         return IDENT;
1074     }
1075
1076     return IDENT;
1077 }
1078
1079 template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
1080 {
1081     tokenData->escaped = true;
1082     auto identifierStart = currentSourcePtr();
1083     bool bufferRequired = false;
1084
1085     while (true) {
1086         if (LIKELY(isIdentPart(m_current))) {
1087             shift();
1088             continue;
1089         }
1090         if (LIKELY(m_current != '\\'))
1091             break;
1092
1093         // \uXXXX unicode characters.
1094         bufferRequired = true;
1095         if (identifierStart != currentSourcePtr())
1096             m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1097         shift();
1098         if (UNLIKELY(m_current != 'u'))
1099             return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
1100         shift();
1101         auto character = parseUnicodeEscape();
1102         if (UNLIKELY(!character.isValid()))
1103             return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1104         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value())))
1105             return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1106         if (shouldCreateIdentifier)
1107             recordUnicodeCodePoint(character.value());
1108         identifierStart = currentSourcePtr();
1109     }
1110
1111     int identifierLength;
1112     const Identifier* ident = nullptr;
1113     if (shouldCreateIdentifier) {
1114         if (!bufferRequired) {
1115             identifierLength = currentSourcePtr() - identifierStart;
1116             ident = makeIdentifier(identifierStart, identifierLength);
1117         } else {
1118             if (identifierStart != currentSourcePtr())
1119                 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1120             ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1121         }
1122
1123         tokenData->ident = ident;
1124     } else
1125         tokenData->ident = nullptr;
1126
1127     m_buffer16.shrink(0);
1128
1129     if (LIKELY(!(lexerFlags & LexerFlagsIgnoreReservedWords))) {
1130         ASSERT(shouldCreateIdentifier);
1131         const HashTableValue* entry = JSC::mainTable.entry(*ident);
1132         if (!entry)
1133             return IDENT;
1134         JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1135         if ((token != RESERVED_IF_STRICT) || strictMode)
1136             return bufferRequired ? UNEXPECTED_ESCAPE_ERRORTOK : token;
1137     }
1138
1139     return IDENT;
1140 }
1141
1142 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
1143 {
1144     return character < 0xE;
1145 }
1146
1147 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
1148 {
1149     return character < 0xE || character > 0xFF;
1150 }
1151
1152 template <typename T>
1153 template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1154 {
1155     int startingOffset = currentOffset();
1156     int startingLineStartOffset = currentLineStartOffset();
1157     int startingLineNumber = lineNumber();
1158     T stringQuoteCharacter = m_current;
1159     shift();
1160
1161     const T* stringStart = currentSourcePtr();
1162
1163     while (m_current != stringQuoteCharacter) {
1164         if (UNLIKELY(m_current == '\\')) {
1165             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1166                 append8(stringStart, currentSourcePtr() - stringStart);
1167             shift();
1168
1169             LChar escape = singleEscape(m_current);
1170
1171             // Most common escape sequences first.
1172             if (escape) {
1173                 if (shouldBuildStrings)
1174                     record8(escape);
1175                 shift();
1176             } else if (UNLIKELY(isLineTerminator(m_current)))
1177                 shiftLineTerminator();
1178             else if (m_current == 'x') {
1179                 shift();
1180                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1181                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1182                     return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1183                 }
1184                 T prev = m_current;
1185                 shift();
1186                 if (shouldBuildStrings)
1187                     record8(convertHex(prev, m_current));
1188                 shift();
1189             } else {
1190                 setOffset(startingOffset, startingLineStartOffset);
1191                 setLineNumber(startingLineNumber);
1192                 m_buffer8.shrink(0);
1193                 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1194             }
1195             stringStart = currentSourcePtr();
1196             continue;
1197         }
1198
1199         if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1200             setOffset(startingOffset, startingLineStartOffset);
1201             setLineNumber(startingLineNumber);
1202             m_buffer8.shrink(0);
1203             return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1204         }
1205
1206         shift();
1207     }
1208
1209     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1210         append8(stringStart, currentSourcePtr() - stringStart);
1211     if (shouldBuildStrings) {
1212         tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1213         m_buffer8.shrink(0);
1214     } else
1215         tokenData->ident = 0;
1216
1217     return StringParsedSuccessfully;
1218 }
1219
1220 template <typename T>
1221 template <bool shouldBuildStrings, LexerEscapeParseMode escapeParseMode> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(bool strictMode, T stringQuoteCharacter) -> StringParseResult
1222 {
1223     if (m_current == 'x') {
1224         shift();
1225         if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1226             // For raw template literal syntax, we consume `NotEscapeSequence`.
1227             //
1228             // NotEscapeSequence ::
1229             //     x [lookahread not one of HexDigit]
1230             //     x HexDigit [lookahread not one of HexDigit]
1231             if (isASCIIHexDigit(m_current))
1232                 shift();
1233             ASSERT(!isASCIIHexDigit(m_current));
1234
1235             m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1236             return atEnd() ? StringUnterminated : StringCannotBeParsed;
1237         }
1238
1239         T prev = m_current;
1240         shift();
1241         if (shouldBuildStrings)
1242             record16(convertHex(prev, m_current));
1243         shift();
1244
1245         return StringParsedSuccessfully;
1246     }
1247
1248     if (m_current == 'u') {
1249         shift();
1250
1251         if (escapeParseMode == LexerEscapeParseMode::String && m_current == stringQuoteCharacter) {
1252             if (shouldBuildStrings)
1253                 record16('u');
1254             return StringParsedSuccessfully;
1255         }
1256
1257         auto character = parseUnicodeEscape();
1258         if (character.isValid()) {
1259             if (shouldBuildStrings)
1260                 recordUnicodeCodePoint(character.value());
1261             return StringParsedSuccessfully;
1262         }
1263
1264         m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence"_s;
1265         return atEnd() ? StringUnterminated : StringCannotBeParsed;
1266     }
1267
1268     if (strictMode) {
1269         if (isASCIIDigit(m_current)) {
1270             // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1271             int character1 = m_current;
1272             shift();
1273             if (character1 != '0' || isASCIIDigit(m_current)) {
1274                 // For raw template literal syntax, we consume `NotEscapeSequence`.
1275                 //
1276                 // NotEscapeSequence ::
1277                 //     0 DecimalDigit
1278                 //     DecimalDigit but not 0
1279                 if (character1 == '0')
1280                     shift();
1281
1282                 m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'"_s;
1283                 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1284             }
1285             if (shouldBuildStrings)
1286                 record16(0);
1287             return StringParsedSuccessfully;
1288         }
1289     } else {
1290         if (isASCIIOctalDigit(m_current)) {
1291             // Octal character sequences
1292             T character1 = m_current;
1293             shift();
1294             if (isASCIIOctalDigit(m_current)) {
1295                 // Two octal characters
1296                 T character2 = m_current;
1297                 shift();
1298                 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1299                     if (shouldBuildStrings)
1300                         record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1301                     shift();
1302                 } else {
1303                     if (shouldBuildStrings)
1304                         record16((character1 - '0') * 8 + character2 - '0');
1305                 }
1306             } else {
1307                 if (shouldBuildStrings)
1308                     record16(character1 - '0');
1309             }
1310             return StringParsedSuccessfully;
1311         }
1312     }
1313
1314     if (!atEnd()) {
1315         if (shouldBuildStrings)
1316             record16(m_current);
1317         shift();
1318         return StringParsedSuccessfully;
1319     }
1320
1321     m_lexErrorMessage = "Unterminated string constant"_s;
1322     return StringUnterminated;
1323 }
1324
1325 template <typename T>
1326 template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
1327 {
1328     T stringQuoteCharacter = m_current;
1329     shift();
1330
1331     const T* stringStart = currentSourcePtr();
1332
1333     while (m_current != stringQuoteCharacter) {
1334         if (UNLIKELY(m_current == '\\')) {
1335             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1336                 append16(stringStart, currentSourcePtr() - stringStart);
1337             shift();
1338
1339             LChar escape = singleEscape(m_current);
1340
1341             // Most common escape sequences first
1342             if (escape) {
1343                 if (shouldBuildStrings)
1344                     record16(escape);
1345                 shift();
1346             } else if (UNLIKELY(isLineTerminator(m_current)))
1347                 shiftLineTerminator();
1348             else {
1349                 StringParseResult result = parseComplexEscape<shouldBuildStrings, LexerEscapeParseMode::String>(strictMode, stringQuoteCharacter);
1350                 if (result != StringParsedSuccessfully)
1351                     return result;
1352             }
1353
1354             stringStart = currentSourcePtr();
1355             continue;
1356         }
1357         // Fast check for characters that require special handling.
1358         // Catches 0, \n, and \r as efficiently as possible, and lets through all common ASCII characters.
1359         static_assert(std::is_unsigned<T>::value, "Lexer expects an unsigned character type");
1360         if (UNLIKELY(m_current < 0xE)) {
1361             // New-line or end of input is not allowed
1362             if (atEnd() || m_current == '\r' || m_current == '\n') {
1363                 m_lexErrorMessage = "Unexpected EOF"_s;
1364                 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1365             }
1366             // Anything else is just a normal character
1367         }
1368         shift();
1369     }
1370
1371     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1372         append16(stringStart, currentSourcePtr() - stringStart);
1373     if (shouldBuildStrings)
1374         tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1375     else
1376         tokenData->ident = 0;
1377
1378     m_buffer16.shrink(0);
1379     return StringParsedSuccessfully;
1380 }
1381
1382 template <typename T>
1383 typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
1384 {
1385     bool parseCookedFailed = false;
1386     const T* stringStart = currentSourcePtr();
1387     const T* rawStringStart = currentSourcePtr();
1388
1389     while (m_current != '`') {
1390         if (UNLIKELY(m_current == '\\')) {
1391             if (stringStart != currentSourcePtr())
1392                 append16(stringStart, currentSourcePtr() - stringStart);
1393             shift();
1394
1395             LChar escape = singleEscape(m_current);
1396
1397             // Most common escape sequences first.
1398             if (escape) {
1399                 record16(escape);
1400                 shift();
1401             } else if (UNLIKELY(isLineTerminator(m_current))) {
1402                 // Normalize <CR>, <CR><LF> to <LF>.
1403                 if (m_current == '\r') {
1404                     ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
1405
1406                     if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
1407                         m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1408                         m_bufferForRawTemplateString16.append('\n');
1409                     }
1410
1411                     shiftLineTerminator();
1412                     rawStringStart = currentSourcePtr();
1413                 } else
1414                     shiftLineTerminator();
1415             } else {
1416                 bool strictMode = true;
1417                 StringParseResult result = parseComplexEscape<true, LexerEscapeParseMode::Template>(strictMode, '`');
1418                 if (result != StringParsedSuccessfully) {
1419                     if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings && result == StringCannotBeParsed)
1420                         parseCookedFailed = true;
1421                     else
1422                         return result;
1423                 }
1424             }
1425
1426             stringStart = currentSourcePtr();
1427             continue;
1428         }
1429
1430         if (m_current == '$' && peek(1) == '{')
1431             break;
1432
1433         // Fast check for characters that require special handling.
1434         // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1435         // as possible, and lets through all common ASCII characters.
1436         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1437             // End of input is not allowed.
1438             // Unlike String, line terminator is allowed.
1439             if (atEnd()) {
1440                 m_lexErrorMessage = "Unexpected EOF"_s;
1441                 return StringUnterminated;
1442             }
1443
1444             if (isLineTerminator(m_current)) {
1445                 if (m_current == '\r') {
1446                     // Normalize <CR>, <CR><LF> to <LF>.
1447                     if (stringStart != currentSourcePtr())
1448                         append16(stringStart, currentSourcePtr() - stringStart);
1449                     if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1450                         m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1451
1452                     record16('\n');
1453                     if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1454                         m_bufferForRawTemplateString16.append('\n');
1455                     shiftLineTerminator();
1456                     stringStart = currentSourcePtr();
1457                     rawStringStart = currentSourcePtr();
1458                 } else
1459                     shiftLineTerminator();
1460                 continue;
1461             }
1462             // Anything else is just a normal character
1463         }
1464
1465         shift();
1466     }
1467
1468     bool isTail = m_current == '`';
1469
1470     if (currentSourcePtr() != stringStart)
1471         append16(stringStart, currentSourcePtr() - stringStart);
1472     if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1473         m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1474
1475     if (!parseCookedFailed)
1476         tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1477     else
1478         tokenData->cooked = nullptr;
1479
1480     // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
1481     if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1482         tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
1483     else
1484         tokenData->raw = nullptr;
1485
1486     tokenData->isTail = isTail;
1487
1488     m_buffer16.shrink(0);
1489     m_bufferForRawTemplateString16.shrink(0);
1490
1491     if (isTail) {
1492         // Skip `
1493         shift();
1494     } else {
1495         // Skip $ and {
1496         shift();
1497         shift();
1498     }
1499
1500     return StringParsedSuccessfully;
1501 }
1502
1503 template <typename T>
1504 ALWAYS_INLINE auto Lexer<T>::parseHex() -> NumberParseResult
1505 {
1506     // Optimization: most hexadecimal values fit into 4 bytes.
1507     uint32_t hexValue = 0;
1508     int maximumDigits = 7;
1509
1510     do {
1511         hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1512         shift();
1513         --maximumDigits;
1514     } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1515
1516     if (LIKELY(maximumDigits >= 0 && m_current != 'n'))
1517         return hexValue;
1518
1519     // No more place in the hexValue buffer.
1520     // The values are shifted out and placed into the m_buffer8 vector.
1521     for (int i = 0; i < 8; ++i) {
1522          int digit = hexValue >> 28;
1523          if (digit < 10)
1524              record8(digit + '0');
1525          else
1526              record8(digit - 10 + 'a');
1527          hexValue <<= 4;
1528     }
1529
1530     while (isASCIIHexDigit(m_current)) {
1531         record8(m_current);
1532         shift();
1533     }
1534
1535     if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
1536         return makeIdentifier(m_buffer8.data(), m_buffer8.size());
1537     
1538     return parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1539 }
1540
1541 template <typename T>
1542 ALWAYS_INLINE auto Lexer<T>::parseBinary() -> std::optional<NumberParseResult>
1543 {
1544     // Optimization: most binary values fit into 4 bytes.
1545     uint32_t binaryValue = 0;
1546     const unsigned maximumDigits = 32;
1547     int digit = maximumDigits - 1;
1548     // Temporary buffer for the digits. Makes easier
1549     // to reconstruct the input characters when needed.
1550     LChar digits[maximumDigits];
1551
1552     do {
1553         binaryValue = (binaryValue << 1) + (m_current - '0');
1554         digits[digit] = m_current;
1555         shift();
1556         --digit;
1557     } while (isASCIIBinaryDigit(m_current) && digit >= 0);
1558
1559     if (LIKELY(!isASCIIDigit(m_current) && digit >= 0 && m_current != 'n'))
1560         return Variant<double, const Identifier*> { binaryValue };
1561
1562     for (int i = maximumDigits - 1; i > digit; --i)
1563         record8(digits[i]);
1564
1565     while (isASCIIBinaryDigit(m_current)) {
1566         record8(m_current);
1567         shift();
1568     }
1569
1570     if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
1571         return Variant<double, const Identifier*> { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1572
1573     if (isASCIIDigit(m_current))
1574         return std::nullopt;
1575
1576     return Variant<double, const Identifier*> { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2) };
1577 }
1578
1579 template <typename T>
1580 ALWAYS_INLINE auto Lexer<T>::parseOctal() -> std::optional<NumberParseResult>
1581 {
1582     // Optimization: most octal values fit into 4 bytes.
1583     uint32_t octalValue = 0;
1584     const unsigned maximumDigits = 10;
1585     int digit = maximumDigits - 1;
1586     // Temporary buffer for the digits. Makes easier
1587     // to reconstruct the input characters when needed.
1588     LChar digits[maximumDigits];
1589
1590     do {
1591         octalValue = octalValue * 8 + (m_current - '0');
1592         digits[digit] = m_current;
1593         shift();
1594         --digit;
1595     } while (isASCIIOctalDigit(m_current) && digit >= 0);
1596
1597     if (LIKELY(!isASCIIDigit(m_current) && digit >= 0 && m_current != 'n'))
1598         return Variant<double, const Identifier*> { octalValue };
1599
1600
1601     for (int i = maximumDigits - 1; i > digit; --i)
1602          record8(digits[i]);
1603
1604     while (isASCIIOctalDigit(m_current)) {
1605         record8(m_current);
1606         shift();
1607     }
1608
1609     if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
1610         return Variant<double, const Identifier*> { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1611
1612     if (isASCIIDigit(m_current))
1613         return std::nullopt;
1614
1615     return Variant<double, const Identifier*> { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8) };
1616 }
1617
1618 template <typename T>
1619 ALWAYS_INLINE auto Lexer<T>::parseDecimal() -> std::optional<NumberParseResult>
1620 {
1621     // Optimization: most decimal values fit into 4 bytes.
1622     uint32_t decimalValue = 0;
1623
1624     // Since parseOctal may be executed before parseDecimal,
1625     // the m_buffer8 may hold ascii digits.
1626     if (!m_buffer8.size()) {
1627         const unsigned maximumDigits = 10;
1628         int digit = maximumDigits - 1;
1629         // Temporary buffer for the digits. Makes easier
1630         // to reconstruct the input characters when needed.
1631         LChar digits[maximumDigits];
1632
1633         do {
1634             decimalValue = decimalValue * 10 + (m_current - '0');
1635             digits[digit] = m_current;
1636             shift();
1637             --digit;
1638         } while (isASCIIDigit(m_current) && digit >= 0);
1639
1640         if (digit >= 0 && m_current != '.' && !isASCIIAlphaCaselessEqual(m_current, 'e') && m_current != 'n')
1641             return Variant<double, const Identifier*> { decimalValue };
1642
1643         for (int i = maximumDigits - 1; i > digit; --i)
1644             record8(digits[i]);
1645     }
1646
1647     while (isASCIIDigit(m_current)) {
1648         record8(m_current);
1649         shift();
1650     }
1651     
1652     if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
1653         return Variant<double, const Identifier*> { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1654
1655     return std::nullopt;
1656 }
1657
1658 template <typename T>
1659 ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
1660 {
1661     record8('.');
1662     while (isASCIIDigit(m_current)) {
1663         record8(m_current);
1664         shift();
1665     }
1666 }
1667
1668 template <typename T>
1669 ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1670 {
1671     record8('e');
1672     shift();
1673     if (m_current == '+' || m_current == '-') {
1674         record8(m_current);
1675         shift();
1676     }
1677
1678     if (!isASCIIDigit(m_current))
1679         return false;
1680
1681     do {
1682         record8(m_current);
1683         shift();
1684     } while (isASCIIDigit(m_current));
1685     return true;
1686 }
1687
1688 template <typename T>
1689 ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1690 {
1691     while (true) {
1692         while (UNLIKELY(m_current == '*')) {
1693             shift();
1694             if (m_current == '/') {
1695                 shift();
1696                 return true;
1697             }
1698         }
1699
1700         if (atEnd())
1701             return false;
1702
1703         if (isLineTerminator(m_current)) {
1704             shiftLineTerminator();
1705             m_terminator = true;
1706         } else
1707             shift();
1708     }
1709 }
1710
1711 template <typename T>
1712 ALWAYS_INLINE void Lexer<T>::parseCommentDirective()
1713 {
1714     // sourceURL and sourceMappingURL directives.
1715     if (!consume("source"))
1716         return;
1717
1718     if (consume("URL=")) {
1719         m_sourceURLDirective = parseCommentDirectiveValue();
1720         return;
1721     }
1722
1723     if (consume("MappingURL=")) {
1724         m_sourceMappingURLDirective = parseCommentDirectiveValue();
1725         return;
1726     }
1727 }
1728
1729 template <typename T>
1730 ALWAYS_INLINE String Lexer<T>::parseCommentDirectiveValue()
1731 {
1732     skipWhitespace();
1733     const T* stringStart = currentSourcePtr();
1734     while (!isWhiteSpace(m_current) && !isLineTerminator(m_current) && m_current != '"' && m_current != '\'' && !atEnd())
1735         shift();
1736     const T* stringEnd = currentSourcePtr();
1737     skipWhitespace();
1738
1739     if (!isLineTerminator(m_current) && !atEnd())
1740         return String();
1741
1742     append8(stringStart, stringEnd - stringStart);
1743     String result = String(m_buffer8.data(), m_buffer8.size());
1744     m_buffer8.shrink(0);
1745     return result;
1746 }
1747
1748 template <typename T>
1749 template <unsigned length>
1750 ALWAYS_INLINE bool Lexer<T>::consume(const char (&input)[length])
1751 {
1752     unsigned lengthToCheck = length - 1; // Ignore the ending NULL byte in the string literal.
1753
1754     unsigned i = 0;
1755     for (; i < lengthToCheck && m_current == input[i]; i++)
1756         shift();
1757
1758     return i == lengthToCheck;
1759 }
1760
1761 template <typename T>
1762 bool Lexer<T>::nextTokenIsColon()
1763 {
1764     const T* code = m_code;
1765     while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1766         code++;
1767     
1768     return code < m_codeEnd && *code == ':';
1769 }
1770
1771 template <typename T>
1772 void Lexer<T>::fillTokenInfo(JSToken* tokenRecord, JSTokenType token, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition)
1773 {
1774     JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1775     tokenLocation->line = lineNumber;
1776     tokenLocation->endOffset = endOffset;
1777     tokenLocation->lineStartOffset = lineStartOffset;
1778     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
1779     tokenRecord->m_endPosition = endPosition;
1780     m_lastToken = token;
1781 }
1782
1783 template <typename T>
1784 JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
1785 {
1786     JSTokenData* tokenData = &tokenRecord->m_data;
1787     JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1788     m_lastTokenLocation = JSTokenLocation(tokenRecord->m_location);
1789     
1790     ASSERT(!m_error);
1791     ASSERT(m_buffer8.isEmpty());
1792     ASSERT(m_buffer16.isEmpty());
1793
1794     JSTokenType token = ERRORTOK;
1795     m_terminator = false;
1796
1797 start:
1798     skipWhitespace();
1799
1800     if (atEnd())
1801         return EOFTOK;
1802     
1803     tokenLocation->startOffset = currentOffset();
1804     ASSERT(currentOffset() >= currentLineStartOffset());
1805     tokenRecord->m_startPosition = currentPosition();
1806
1807     CharacterType type;
1808     if (LIKELY(isLatin1(m_current)))
1809         type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1810     else if (isNonLatin1IdentStart(m_current))
1811         type = CharacterIdentifierStart;
1812     else if (isLineTerminator(m_current))
1813         type = CharacterLineTerminator;
1814     else
1815         type = CharacterInvalid;
1816
1817     switch (type) {
1818     case CharacterGreater:
1819         shift();
1820         if (m_current == '>') {
1821             shift();
1822             if (m_current == '>') {
1823                 shift();
1824                 if (m_current == '=') {
1825                     shift();
1826                     token = URSHIFTEQUAL;
1827                     break;
1828                 }
1829                 token = URSHIFT;
1830                 break;
1831             }
1832             if (m_current == '=') {
1833                 shift();
1834                 token = RSHIFTEQUAL;
1835                 break;
1836             }
1837             token = RSHIFT;
1838             break;
1839         }
1840         if (m_current == '=') {
1841             shift();
1842             token = GE;
1843             break;
1844         }
1845         token = GT;
1846         break;
1847     case CharacterEqual: {
1848         if (peek(1) == '>') {
1849             token = ARROWFUNCTION;
1850             tokenData->line = lineNumber();
1851             tokenData->offset = currentOffset();
1852             tokenData->lineStartOffset = currentLineStartOffset();
1853             ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1854             shift();
1855             shift();
1856             break;
1857         }
1858
1859         shift();
1860         if (m_current == '=') {
1861             shift();
1862             if (m_current == '=') {
1863                 shift();
1864                 token = STREQ;
1865                 break;
1866             }
1867             token = EQEQ;
1868             break;
1869         }
1870         token = EQUAL;
1871         break;
1872     }
1873     case CharacterLess:
1874         shift();
1875         if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1876             if (m_scriptMode == JSParserScriptMode::Classic) {
1877                 // <!-- marks the beginning of a line comment (for www usage)
1878                 goto inSingleLineComment;
1879             }
1880         }
1881         if (m_current == '<') {
1882             shift();
1883             if (m_current == '=') {
1884                 shift();
1885                 token = LSHIFTEQUAL;
1886                 break;
1887             }
1888             token = LSHIFT;
1889             break;
1890         }
1891         if (m_current == '=') {
1892             shift();
1893             token = LE;
1894             break;
1895         }
1896         token = LT;
1897         break;
1898     case CharacterExclamationMark:
1899         shift();
1900         if (m_current == '=') {
1901             shift();
1902             if (m_current == '=') {
1903                 shift();
1904                 token = STRNEQ;
1905                 break;
1906             }
1907             token = NE;
1908             break;
1909         }
1910         token = EXCLAMATION;
1911         break;
1912     case CharacterAdd:
1913         shift();
1914         if (m_current == '+') {
1915             shift();
1916             token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1917             break;
1918         }
1919         if (m_current == '=') {
1920             shift();
1921             token = PLUSEQUAL;
1922             break;
1923         }
1924         token = PLUS;
1925         break;
1926     case CharacterSub:
1927         shift();
1928         if (m_current == '-') {
1929             shift();
1930             if ((m_atLineStart || m_terminator) && m_current == '>') {
1931                 if (m_scriptMode == JSParserScriptMode::Classic) {
1932                     shift();
1933                     goto inSingleLineComment;
1934                 }
1935             }
1936             token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
1937             break;
1938         }
1939         if (m_current == '=') {
1940             shift();
1941             token = MINUSEQUAL;
1942             break;
1943         }
1944         token = MINUS;
1945         break;
1946     case CharacterMultiply:
1947         shift();
1948         if (m_current == '=') {
1949             shift();
1950             token = MULTEQUAL;
1951             break;
1952         }
1953         if (m_current == '*') {
1954             shift();
1955             if (m_current == '=') {
1956                 shift();
1957                 token = POWEQUAL;
1958                 break;
1959             }
1960             token = POW;
1961             break;
1962         }
1963         token = TIMES;
1964         break;
1965     case CharacterSlash:
1966         shift();
1967         if (m_current == '/') {
1968             shift();
1969             goto inSingleLineCommentCheckForDirectives;
1970         }
1971         if (m_current == '*') {
1972             shift();
1973             if (parseMultilineComment())
1974                 goto start;
1975             m_lexErrorMessage = "Multiline comment was not closed properly"_s;
1976             token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
1977             goto returnError;
1978         }
1979         if (m_current == '=') {
1980             shift();
1981             token = DIVEQUAL;
1982             break;
1983         }
1984         token = DIVIDE;
1985         break;
1986     case CharacterAnd:
1987         shift();
1988         if (m_current == '&') {
1989             shift();
1990             token = AND;
1991             break;
1992         }
1993         if (m_current == '=') {
1994             shift();
1995             token = ANDEQUAL;
1996             break;
1997         }
1998         token = BITAND;
1999         break;
2000     case CharacterXor:
2001         shift();
2002         if (m_current == '=') {
2003             shift();
2004             token = XOREQUAL;
2005             break;
2006         }
2007         token = BITXOR;
2008         break;
2009     case CharacterModulo:
2010         shift();
2011         if (m_current == '=') {
2012             shift();
2013             token = MODEQUAL;
2014             break;
2015         }
2016         token = MOD;
2017         break;
2018     case CharacterOr:
2019         shift();
2020         if (m_current == '=') {
2021             shift();
2022             token = OREQUAL;
2023             break;
2024         }
2025         if (m_current == '|') {
2026             shift();
2027             token = OR;
2028             break;
2029         }
2030         token = BITOR;
2031         break;
2032     case CharacterOpenParen:
2033         token = OPENPAREN;
2034         tokenData->line = lineNumber();
2035         tokenData->offset = currentOffset();
2036         tokenData->lineStartOffset = currentLineStartOffset();
2037         shift();
2038         break;
2039     case CharacterCloseParen:
2040         token = CLOSEPAREN;
2041         shift();
2042         break;
2043     case CharacterOpenBracket:
2044         token = OPENBRACKET;
2045         shift();
2046         break;
2047     case CharacterCloseBracket:
2048         token = CLOSEBRACKET;
2049         shift();
2050         break;
2051     case CharacterComma:
2052         token = COMMA;
2053         shift();
2054         break;
2055     case CharacterColon:
2056         token = COLON;
2057         shift();
2058         break;
2059     case CharacterQuestion:
2060         token = QUESTION;
2061         shift();
2062         break;
2063     case CharacterTilde:
2064         token = TILDE;
2065         shift();
2066         break;
2067     case CharacterSemicolon:
2068         shift();
2069         token = SEMICOLON;
2070         break;
2071     case CharacterBackQuote:
2072         shift();
2073         token = BACKQUOTE;
2074         break;
2075     case CharacterOpenBrace:
2076         tokenData->line = lineNumber();
2077         tokenData->offset = currentOffset();
2078         tokenData->lineStartOffset = currentLineStartOffset();
2079         ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2080         shift();
2081         token = OPENBRACE;
2082         break;
2083     case CharacterCloseBrace:
2084         tokenData->line = lineNumber();
2085         tokenData->offset = currentOffset();
2086         tokenData->lineStartOffset = currentLineStartOffset();
2087         ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2088         shift();
2089         token = CLOSEBRACE;
2090         break;
2091     case CharacterDot:
2092         shift();
2093         if (!isASCIIDigit(m_current)) {
2094             if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
2095                 shift();
2096                 shift();
2097                 token = DOTDOTDOT;
2098                 break;
2099             }
2100             token = DOT;
2101             break;
2102         }
2103         parseNumberAfterDecimalPoint();
2104         token = DOUBLE;
2105         if (isASCIIAlphaCaselessEqual(m_current, 'e')) {
2106             if (!parseNumberAfterExponentIndicator()) {
2107                 m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2108                 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2109                 goto returnError;
2110             }
2111         }
2112         size_t parsedLength;
2113         tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2114         if (token == INTEGER)
2115             token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2116
2117         if (UNLIKELY(isIdentStart(m_current))) {
2118             m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2119             token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2120             goto returnError;
2121         }
2122         m_buffer8.shrink(0);
2123         break;
2124     case CharacterZero:
2125         shift();
2126         if (isASCIIAlphaCaselessEqual(m_current, 'x')) {
2127             if (!isASCIIHexDigit(peek(1))) {
2128                 m_lexErrorMessage = "No hexadecimal digits after '0x'"_s;
2129                 token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2130                 goto returnError;
2131             }
2132
2133             // Shift out the 'x' prefix.
2134             shift();
2135
2136             auto parseNumberResult = parseHex();
2137             if (WTF::holds_alternative<double>(parseNumberResult))
2138                 tokenData->doubleValue = WTF::get<double>(parseNumberResult);
2139             else {
2140                 token = BIGINT;
2141                 shift();
2142                 tokenData->bigIntString = WTF::get<const Identifier*>(parseNumberResult);
2143                 tokenData->radix = 16;
2144             }
2145
2146             if (isIdentStart(m_current)) {
2147                 m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s;
2148                 token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2149                 goto returnError;
2150             }
2151             if (LIKELY(token != BIGINT))
2152                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2153             m_buffer8.shrink(0);
2154             break;
2155         }
2156         if (isASCIIAlphaCaselessEqual(m_current, 'b')) {
2157             if (!isASCIIBinaryDigit(peek(1))) {
2158                 m_lexErrorMessage = "No binary digits after '0b'"_s;
2159                 token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2160                 goto returnError;
2161             }
2162
2163             // Shift out the 'b' prefix.
2164             shift();
2165
2166             auto parseNumberResult = parseBinary();
2167             if (!parseNumberResult)
2168                 tokenData->doubleValue = 0;
2169             else if (WTF::holds_alternative<double>(*parseNumberResult))
2170                 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2171             else {
2172                 token = BIGINT;
2173                 shift();
2174                 tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2175                 tokenData->radix = 2;
2176             }
2177
2178             if (isIdentStart(m_current)) {
2179                 m_lexErrorMessage = "No space between binary literal and identifier"_s;
2180                 token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2181                 goto returnError;
2182             }
2183             if (LIKELY(token != BIGINT))
2184                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2185             m_buffer8.shrink(0);
2186             break;
2187         }
2188
2189         if (isASCIIAlphaCaselessEqual(m_current, 'o')) {
2190             if (!isASCIIOctalDigit(peek(1))) {
2191                 m_lexErrorMessage = "No octal digits after '0o'"_s;
2192                 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2193                 goto returnError;
2194             }
2195
2196             // Shift out the 'o' prefix.
2197             shift();
2198
2199             auto parseNumberResult = parseOctal();
2200             if (!parseNumberResult)
2201                 tokenData->doubleValue = 0;
2202             else if (WTF::holds_alternative<double>(*parseNumberResult))
2203                 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2204             else {
2205                 token = BIGINT;
2206                 shift();
2207                 tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2208                 tokenData->radix = 8;
2209             }
2210
2211             if (isIdentStart(m_current)) {
2212                 m_lexErrorMessage = "No space between octal literal and identifier"_s;
2213                 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2214                 goto returnError;
2215             }
2216             if (LIKELY(token != BIGINT))
2217                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2218             m_buffer8.shrink(0);
2219             break;
2220         }
2221
2222         record8('0');
2223         if (strictMode && isASCIIDigit(m_current)) {
2224             m_lexErrorMessage = "Decimal integer literals with a leading zero are forbidden in strict mode"_s;
2225             token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2226             goto returnError;
2227         }
2228         if (isASCIIOctalDigit(m_current)) {
2229             auto parseNumberResult = parseOctal();
2230             if (parseNumberResult && WTF::holds_alternative<double>(*parseNumberResult)) {
2231                 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2232                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2233             }
2234         }
2235         FALLTHROUGH;
2236     case CharacterNumber:
2237         if (LIKELY(token != INTEGER && token != DOUBLE)) {
2238             auto parseNumberResult = parseDecimal();
2239             if (parseNumberResult && WTF::holds_alternative<double>(*parseNumberResult)) {
2240                 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2241                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2242             } else {
2243                 if (parseNumberResult) {
2244                     ASSERT(WTF::get<const Identifier*>(*parseNumberResult));
2245                     token = BIGINT;
2246                     shift();
2247                     tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2248                     tokenData->radix = 10;
2249                 } else {
2250                     token = INTEGER;
2251                     if (m_current == '.') {
2252                         shift();
2253                         parseNumberAfterDecimalPoint();
2254                         token = DOUBLE;
2255                     }
2256                     if (isASCIIAlphaCaselessEqual(m_current, 'e')) {
2257                         if (!parseNumberAfterExponentIndicator()) {
2258                             m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2259                             token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2260                             goto returnError;
2261                         }
2262                     }
2263                     size_t parsedLength;
2264                     tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2265                     if (token == INTEGER)
2266                         token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2267                 }
2268             }
2269         }
2270
2271         if (UNLIKELY(isIdentStart(m_current))) {
2272             m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2273             token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2274             goto returnError;
2275         }
2276         m_buffer8.shrink(0);
2277         break;
2278     case CharacterQuote: {
2279         StringParseResult result = StringCannotBeParsed;
2280         if (lexerFlags & LexerFlagsDontBuildStrings)
2281             result = parseString<false>(tokenData, strictMode);
2282         else
2283             result = parseString<true>(tokenData, strictMode);
2284
2285         if (UNLIKELY(result != StringParsedSuccessfully)) {
2286             token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
2287             goto returnError;
2288         }
2289         shift();
2290         token = STRING;
2291         break;
2292         }
2293     case CharacterIdentifierStart:
2294         ASSERT(isIdentStart(m_current));
2295         FALLTHROUGH;
2296     case CharacterBackSlash:
2297         parseIdent:
2298         if (lexerFlags & LexexFlagsDontBuildKeywords)
2299             token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
2300         else
2301             token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
2302         break;
2303     case CharacterLineTerminator:
2304         ASSERT(isLineTerminator(m_current));
2305         shiftLineTerminator();
2306         m_atLineStart = true;
2307         m_terminator = true;
2308         m_lineStart = m_code;
2309         goto start;
2310     case CharacterPrivateIdentifierStart:
2311         if (m_parsingBuiltinFunction)
2312             goto parseIdent;
2313
2314         FALLTHROUGH;
2315     case CharacterInvalid:
2316         m_lexErrorMessage = invalidCharacterMessage();
2317         token = ERRORTOK;
2318         goto returnError;
2319     default:
2320         RELEASE_ASSERT_NOT_REACHED();
2321         m_lexErrorMessage = "Internal Error"_s;
2322         token = ERRORTOK;
2323         goto returnError;
2324     }
2325
2326     m_atLineStart = false;
2327     goto returnToken;
2328
2329 inSingleLineCommentCheckForDirectives:
2330     // Script comment directives like "//# sourceURL=test.js".
2331     if (UNLIKELY((m_current == '#' || m_current == '@') && isWhiteSpace(peek(1)))) {
2332         shift();
2333         shift();
2334         parseCommentDirective();
2335     }
2336     // Fall through to complete single line comment parsing.
2337
2338 inSingleLineComment:
2339     {
2340         auto lineNumber = m_lineNumber;
2341         auto endOffset = currentOffset();
2342         auto lineStartOffset = currentLineStartOffset();
2343         auto endPosition = currentPosition();
2344
2345         while (!isLineTerminator(m_current)) {
2346             if (atEnd())
2347                 return EOFTOK;
2348             shift();
2349         }
2350         shiftLineTerminator();
2351         m_atLineStart = true;
2352         m_terminator = true;
2353         m_lineStart = m_code;
2354         if (!lastTokenWasRestrKeyword())
2355             goto start;
2356
2357         token = SEMICOLON;
2358         fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2359         return token;
2360     }
2361
2362 returnToken:
2363     fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2364     return token;
2365
2366 returnError:
2367     m_error = true;
2368     fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2369     RELEASE_ASSERT(token & ErrorTokenFlag);
2370     return token;
2371 }
2372
2373 template <typename T>
2374 static inline void orCharacter(UChar&, UChar);
2375
2376 template <>
2377 inline void orCharacter<LChar>(UChar&, UChar) { }
2378
2379 template <>
2380 inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
2381 {
2382     orAccumulator |= character;
2383 }
2384
2385 template <typename T>
2386 JSTokenType Lexer<T>::scanRegExp(JSToken* tokenRecord, UChar patternPrefix)
2387 {
2388     JSTokenData* tokenData = &tokenRecord->m_data;
2389     ASSERT(m_buffer16.isEmpty());
2390
2391     bool lastWasEscape = false;
2392     bool inBrackets = false;
2393     UChar charactersOredTogether = 0;
2394
2395     if (patternPrefix) {
2396         ASSERT(!isLineTerminator(patternPrefix));
2397         ASSERT(patternPrefix != '/');
2398         ASSERT(patternPrefix != '[');
2399         record16(patternPrefix);
2400     }
2401
2402     while (true) {
2403         if (isLineTerminator(m_current) || atEnd()) {
2404             m_buffer16.shrink(0);
2405             JSTokenType token = UNTERMINATED_REGEXP_LITERAL_ERRORTOK;
2406             fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2407             m_error = true;
2408             m_lexErrorMessage = makeString("Unterminated regular expression literal '", getToken(*tokenRecord), "'");
2409             return token;
2410         }
2411
2412         T prev = m_current;
2413         
2414         shift();
2415
2416         if (prev == '/' && !lastWasEscape && !inBrackets)
2417             break;
2418
2419         record16(prev);
2420         orCharacter<T>(charactersOredTogether, prev);
2421
2422         if (lastWasEscape) {
2423             lastWasEscape = false;
2424             continue;
2425         }
2426
2427         switch (prev) {
2428         case '[':
2429             inBrackets = true;
2430             break;
2431         case ']':
2432             inBrackets = false;
2433             break;
2434         case '\\':
2435             lastWasEscape = true;
2436             break;
2437         }
2438     }
2439
2440     tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2441
2442     m_buffer16.shrink(0);
2443     charactersOredTogether = 0;
2444
2445     while (isIdentPart(m_current)) {
2446         record16(m_current);
2447         orCharacter<T>(charactersOredTogether, m_current);
2448         shift();
2449     }
2450
2451     tokenData->flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2452     m_buffer16.shrink(0);
2453
2454     // Since RegExp always ends with /, m_atLineStart always becomes false.
2455     m_atLineStart = false;
2456
2457     JSTokenType token = REGEXP;
2458     fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2459     return token;
2460 }
2461
2462 template <typename T>
2463 JSTokenType Lexer<T>::scanTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
2464 {
2465     JSTokenData* tokenData = &tokenRecord->m_data;
2466     ASSERT(!m_error);
2467     ASSERT(m_buffer16.isEmpty());
2468
2469     // Leading backquote ` (for template head) or closing brace } (for template trailing) are already shifted in the previous token scan.
2470     // So in this re-scan phase, shift() is not needed here.
2471     StringParseResult result = parseTemplateLiteral(tokenData, rawStringsBuildMode);
2472     JSTokenType token = ERRORTOK;
2473     if (UNLIKELY(result != StringParsedSuccessfully)) {
2474         token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2475         m_error = true;
2476     } else
2477         token = TEMPLATE;
2478
2479     // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2480     m_atLineStart = false;
2481     fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2482     return token;
2483 }
2484
2485 template <typename T>
2486 void Lexer<T>::clear()
2487 {
2488     m_arena = 0;
2489
2490     Vector<LChar> newBuffer8;
2491     m_buffer8.swap(newBuffer8);
2492
2493     Vector<UChar> newBuffer16;
2494     m_buffer16.swap(newBuffer16);
2495
2496     Vector<UChar> newBufferForRawTemplateString16;
2497     m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
2498
2499     m_isReparsingFunction = false;
2500 }
2501
2502 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2503 template class Lexer<LChar>;
2504 template class Lexer<UChar>;
2505
2506 } // namespace JSC