[ES6] Implement ES6 template literals
[WebKit-https.git] / Source / JavaScriptCore / parser / Lexer.cpp
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6  *  Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
7  *
8  *  This library is free software; you can redistribute it and/or
9  *  modify it under the terms of the GNU Library General Public
10  *  License as published by the Free Software Foundation; either
11  *  version 2 of the License, or (at your option) any later version.
12  *
13  *  This library is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  *  Library General Public License for more details.
17  *
18  *  You should have received a copy of the GNU Library General Public License
19  *  along with this library; see the file COPYING.LIB.  If not, write to
20  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  *  Boston, MA 02110-1301, USA.
22  *
23  */
24
25 #include "config.h"
26 #include "Lexer.h"
27
28 #include "JSFunctionInlines.h"
29
30 #include "BuiltinNames.h"
31 #include "JSGlobalObjectFunctions.h"
32 #include "Identifier.h"
33 #include "Nodes.h"
34 #include "JSCInlines.h"
35 #include <wtf/dtoa.h>
36 #include <ctype.h>
37 #include <limits.h>
38 #include <string.h>
39 #include <wtf/Assertions.h>
40
41 #include "KeywordLookup.h"
42 #include "Lexer.lut.h"
43 #include "Parser.h"
44
45 namespace JSC {
46
47 Keywords::Keywords(VM& vm)
48     : m_vm(vm)
49     , m_keywordTable(JSC::mainTable)
50 {
51 }
52
53 enum CharacterType {
54     // Types for the main switch
55
56     // The first three types are fixed, and also used for identifying
57     // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
58     CharacterIdentifierStart,
59     CharacterZero,
60     CharacterNumber,
61
62     CharacterInvalid,
63     CharacterLineTerminator,
64     CharacterExclamationMark,
65     CharacterOpenParen,
66     CharacterCloseParen,
67     CharacterOpenBracket,
68     CharacterCloseBracket,
69     CharacterComma,
70     CharacterColon,
71     CharacterQuestion,
72     CharacterTilde,
73     CharacterQuote,
74     CharacterBackQuote,
75     CharacterDot,
76     CharacterSlash,
77     CharacterBackSlash,
78     CharacterSemicolon,
79     CharacterOpenBrace,
80     CharacterCloseBrace,
81
82     CharacterAdd,
83     CharacterSub,
84     CharacterMultiply,
85     CharacterModulo,
86     CharacterAnd,
87     CharacterXor,
88     CharacterOr,
89     CharacterLess,
90     CharacterGreater,
91     CharacterEqual,
92
93     // Other types (only one so far)
94     CharacterWhiteSpace,
95     CharacterPrivateIdentifierStart
96 };
97
98 // 256 Latin-1 codes
99 static const unsigned short typesOfLatin1Characters[256] = {
100 /*   0 - Null               */ CharacterInvalid,
101 /*   1 - Start of Heading   */ CharacterInvalid,
102 /*   2 - Start of Text      */ CharacterInvalid,
103 /*   3 - End of Text        */ CharacterInvalid,
104 /*   4 - End of Transm.     */ CharacterInvalid,
105 /*   5 - Enquiry            */ CharacterInvalid,
106 /*   6 - Acknowledgment     */ CharacterInvalid,
107 /*   7 - Bell               */ CharacterInvalid,
108 /*   8 - Back Space         */ CharacterInvalid,
109 /*   9 - Horizontal Tab     */ CharacterWhiteSpace,
110 /*  10 - Line Feed          */ CharacterLineTerminator,
111 /*  11 - Vertical Tab       */ CharacterWhiteSpace,
112 /*  12 - Form Feed          */ CharacterWhiteSpace,
113 /*  13 - Carriage Return    */ CharacterLineTerminator,
114 /*  14 - Shift Out          */ CharacterInvalid,
115 /*  15 - Shift In           */ CharacterInvalid,
116 /*  16 - Data Line Escape   */ CharacterInvalid,
117 /*  17 - Device Control 1   */ CharacterInvalid,
118 /*  18 - Device Control 2   */ CharacterInvalid,
119 /*  19 - Device Control 3   */ CharacterInvalid,
120 /*  20 - Device Control 4   */ CharacterInvalid,
121 /*  21 - Negative Ack.      */ CharacterInvalid,
122 /*  22 - Synchronous Idle   */ CharacterInvalid,
123 /*  23 - End of Transmit    */ CharacterInvalid,
124 /*  24 - Cancel             */ CharacterInvalid,
125 /*  25 - End of Medium      */ CharacterInvalid,
126 /*  26 - Substitute         */ CharacterInvalid,
127 /*  27 - Escape             */ CharacterInvalid,
128 /*  28 - File Separator     */ CharacterInvalid,
129 /*  29 - Group Separator    */ CharacterInvalid,
130 /*  30 - Record Separator   */ CharacterInvalid,
131 /*  31 - Unit Separator     */ CharacterInvalid,
132 /*  32 - Space              */ CharacterWhiteSpace,
133 /*  33 - !                  */ CharacterExclamationMark,
134 /*  34 - "                  */ CharacterQuote,
135 /*  35 - #                  */ CharacterInvalid,
136 /*  36 - $                  */ CharacterIdentifierStart,
137 /*  37 - %                  */ CharacterModulo,
138 /*  38 - &                  */ CharacterAnd,
139 /*  39 - '                  */ CharacterQuote,
140 /*  40 - (                  */ CharacterOpenParen,
141 /*  41 - )                  */ CharacterCloseParen,
142 /*  42 - *                  */ CharacterMultiply,
143 /*  43 - +                  */ CharacterAdd,
144 /*  44 - ,                  */ CharacterComma,
145 /*  45 - -                  */ CharacterSub,
146 /*  46 - .                  */ CharacterDot,
147 /*  47 - /                  */ CharacterSlash,
148 /*  48 - 0                  */ CharacterZero,
149 /*  49 - 1                  */ CharacterNumber,
150 /*  50 - 2                  */ CharacterNumber,
151 /*  51 - 3                  */ CharacterNumber,
152 /*  52 - 4                  */ CharacterNumber,
153 /*  53 - 5                  */ CharacterNumber,
154 /*  54 - 6                  */ CharacterNumber,
155 /*  55 - 7                  */ CharacterNumber,
156 /*  56 - 8                  */ CharacterNumber,
157 /*  57 - 9                  */ CharacterNumber,
158 /*  58 - :                  */ CharacterColon,
159 /*  59 - ;                  */ CharacterSemicolon,
160 /*  60 - <                  */ CharacterLess,
161 /*  61 - =                  */ CharacterEqual,
162 /*  62 - >                  */ CharacterGreater,
163 /*  63 - ?                  */ CharacterQuestion,
164 /*  64 - @                  */ CharacterPrivateIdentifierStart,
165 /*  65 - A                  */ CharacterIdentifierStart,
166 /*  66 - B                  */ CharacterIdentifierStart,
167 /*  67 - C                  */ CharacterIdentifierStart,
168 /*  68 - D                  */ CharacterIdentifierStart,
169 /*  69 - E                  */ CharacterIdentifierStart,
170 /*  70 - F                  */ CharacterIdentifierStart,
171 /*  71 - G                  */ CharacterIdentifierStart,
172 /*  72 - H                  */ CharacterIdentifierStart,
173 /*  73 - I                  */ CharacterIdentifierStart,
174 /*  74 - J                  */ CharacterIdentifierStart,
175 /*  75 - K                  */ CharacterIdentifierStart,
176 /*  76 - L                  */ CharacterIdentifierStart,
177 /*  77 - M                  */ CharacterIdentifierStart,
178 /*  78 - N                  */ CharacterIdentifierStart,
179 /*  79 - O                  */ CharacterIdentifierStart,
180 /*  80 - P                  */ CharacterIdentifierStart,
181 /*  81 - Q                  */ CharacterIdentifierStart,
182 /*  82 - R                  */ CharacterIdentifierStart,
183 /*  83 - S                  */ CharacterIdentifierStart,
184 /*  84 - T                  */ CharacterIdentifierStart,
185 /*  85 - U                  */ CharacterIdentifierStart,
186 /*  86 - V                  */ CharacterIdentifierStart,
187 /*  87 - W                  */ CharacterIdentifierStart,
188 /*  88 - X                  */ CharacterIdentifierStart,
189 /*  89 - Y                  */ CharacterIdentifierStart,
190 /*  90 - Z                  */ CharacterIdentifierStart,
191 /*  91 - [                  */ CharacterOpenBracket,
192 /*  92 - \                  */ CharacterBackSlash,
193 /*  93 - ]                  */ CharacterCloseBracket,
194 /*  94 - ^                  */ CharacterXor,
195 /*  95 - _                  */ CharacterIdentifierStart,
196 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
197 /*  96 - `                  */ CharacterBackQuote,
198 #else
199 /*  96 - `                  */ CharacterInvalid,
200 #endif
201 /*  97 - a                  */ CharacterIdentifierStart,
202 /*  98 - b                  */ CharacterIdentifierStart,
203 /*  99 - c                  */ CharacterIdentifierStart,
204 /* 100 - d                  */ CharacterIdentifierStart,
205 /* 101 - e                  */ CharacterIdentifierStart,
206 /* 102 - f                  */ CharacterIdentifierStart,
207 /* 103 - g                  */ CharacterIdentifierStart,
208 /* 104 - h                  */ CharacterIdentifierStart,
209 /* 105 - i                  */ CharacterIdentifierStart,
210 /* 106 - j                  */ CharacterIdentifierStart,
211 /* 107 - k                  */ CharacterIdentifierStart,
212 /* 108 - l                  */ CharacterIdentifierStart,
213 /* 109 - m                  */ CharacterIdentifierStart,
214 /* 110 - n                  */ CharacterIdentifierStart,
215 /* 111 - o                  */ CharacterIdentifierStart,
216 /* 112 - p                  */ CharacterIdentifierStart,
217 /* 113 - q                  */ CharacterIdentifierStart,
218 /* 114 - r                  */ CharacterIdentifierStart,
219 /* 115 - s                  */ CharacterIdentifierStart,
220 /* 116 - t                  */ CharacterIdentifierStart,
221 /* 117 - u                  */ CharacterIdentifierStart,
222 /* 118 - v                  */ CharacterIdentifierStart,
223 /* 119 - w                  */ CharacterIdentifierStart,
224 /* 120 - x                  */ CharacterIdentifierStart,
225 /* 121 - y                  */ CharacterIdentifierStart,
226 /* 122 - z                  */ CharacterIdentifierStart,
227 /* 123 - {                  */ CharacterOpenBrace,
228 /* 124 - |                  */ CharacterOr,
229 /* 125 - }                  */ CharacterCloseBrace,
230 /* 126 - ~                  */ CharacterTilde,
231 /* 127 - Delete             */ CharacterInvalid,
232 /* 128 - Cc category        */ CharacterInvalid,
233 /* 129 - Cc category        */ CharacterInvalid,
234 /* 130 - Cc category        */ CharacterInvalid,
235 /* 131 - Cc category        */ CharacterInvalid,
236 /* 132 - Cc category        */ CharacterInvalid,
237 /* 133 - Cc category        */ CharacterInvalid,
238 /* 134 - Cc category        */ CharacterInvalid,
239 /* 135 - Cc category        */ CharacterInvalid,
240 /* 136 - Cc category        */ CharacterInvalid,
241 /* 137 - Cc category        */ CharacterInvalid,
242 /* 138 - Cc category        */ CharacterInvalid,
243 /* 139 - Cc category        */ CharacterInvalid,
244 /* 140 - Cc category        */ CharacterInvalid,
245 /* 141 - Cc category        */ CharacterInvalid,
246 /* 142 - Cc category        */ CharacterInvalid,
247 /* 143 - Cc category        */ CharacterInvalid,
248 /* 144 - Cc category        */ CharacterInvalid,
249 /* 145 - Cc category        */ CharacterInvalid,
250 /* 146 - Cc category        */ CharacterInvalid,
251 /* 147 - Cc category        */ CharacterInvalid,
252 /* 148 - Cc category        */ CharacterInvalid,
253 /* 149 - Cc category        */ CharacterInvalid,
254 /* 150 - Cc category        */ CharacterInvalid,
255 /* 151 - Cc category        */ CharacterInvalid,
256 /* 152 - Cc category        */ CharacterInvalid,
257 /* 153 - Cc category        */ CharacterInvalid,
258 /* 154 - Cc category        */ CharacterInvalid,
259 /* 155 - Cc category        */ CharacterInvalid,
260 /* 156 - Cc category        */ CharacterInvalid,
261 /* 157 - Cc category        */ CharacterInvalid,
262 /* 158 - Cc category        */ CharacterInvalid,
263 /* 159 - Cc category        */ CharacterInvalid,
264 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
265 /* 161 - Po category        */ CharacterInvalid,
266 /* 162 - Sc category        */ CharacterInvalid,
267 /* 163 - Sc category        */ CharacterInvalid,
268 /* 164 - Sc category        */ CharacterInvalid,
269 /* 165 - Sc category        */ CharacterInvalid,
270 /* 166 - So category        */ CharacterInvalid,
271 /* 167 - So category        */ CharacterInvalid,
272 /* 168 - Sk category        */ CharacterInvalid,
273 /* 169 - So category        */ CharacterInvalid,
274 /* 170 - Ll category        */ CharacterIdentifierStart,
275 /* 171 - Pi category        */ CharacterInvalid,
276 /* 172 - Sm category        */ CharacterInvalid,
277 /* 173 - Cf category        */ CharacterInvalid,
278 /* 174 - So category        */ CharacterInvalid,
279 /* 175 - Sk category        */ CharacterInvalid,
280 /* 176 - So category        */ CharacterInvalid,
281 /* 177 - Sm category        */ CharacterInvalid,
282 /* 178 - No category        */ CharacterInvalid,
283 /* 179 - No category        */ CharacterInvalid,
284 /* 180 - Sk category        */ CharacterInvalid,
285 /* 181 - Ll category        */ CharacterIdentifierStart,
286 /* 182 - So category        */ CharacterInvalid,
287 /* 183 - Po category        */ CharacterInvalid,
288 /* 184 - Sk category        */ CharacterInvalid,
289 /* 185 - No category        */ CharacterInvalid,
290 /* 186 - Ll category        */ CharacterIdentifierStart,
291 /* 187 - Pf category        */ CharacterInvalid,
292 /* 188 - No category        */ CharacterInvalid,
293 /* 189 - No category        */ CharacterInvalid,
294 /* 190 - No category        */ CharacterInvalid,
295 /* 191 - Po category        */ CharacterInvalid,
296 /* 192 - Lu category        */ CharacterIdentifierStart,
297 /* 193 - Lu category        */ CharacterIdentifierStart,
298 /* 194 - Lu category        */ CharacterIdentifierStart,
299 /* 195 - Lu category        */ CharacterIdentifierStart,
300 /* 196 - Lu category        */ CharacterIdentifierStart,
301 /* 197 - Lu category        */ CharacterIdentifierStart,
302 /* 198 - Lu category        */ CharacterIdentifierStart,
303 /* 199 - Lu category        */ CharacterIdentifierStart,
304 /* 200 - Lu category        */ CharacterIdentifierStart,
305 /* 201 - Lu category        */ CharacterIdentifierStart,
306 /* 202 - Lu category        */ CharacterIdentifierStart,
307 /* 203 - Lu category        */ CharacterIdentifierStart,
308 /* 204 - Lu category        */ CharacterIdentifierStart,
309 /* 205 - Lu category        */ CharacterIdentifierStart,
310 /* 206 - Lu category        */ CharacterIdentifierStart,
311 /* 207 - Lu category        */ CharacterIdentifierStart,
312 /* 208 - Lu category        */ CharacterIdentifierStart,
313 /* 209 - Lu category        */ CharacterIdentifierStart,
314 /* 210 - Lu category        */ CharacterIdentifierStart,
315 /* 211 - Lu category        */ CharacterIdentifierStart,
316 /* 212 - Lu category        */ CharacterIdentifierStart,
317 /* 213 - Lu category        */ CharacterIdentifierStart,
318 /* 214 - Lu category        */ CharacterIdentifierStart,
319 /* 215 - Sm category        */ CharacterInvalid,
320 /* 216 - Lu category        */ CharacterIdentifierStart,
321 /* 217 - Lu category        */ CharacterIdentifierStart,
322 /* 218 - Lu category        */ CharacterIdentifierStart,
323 /* 219 - Lu category        */ CharacterIdentifierStart,
324 /* 220 - Lu category        */ CharacterIdentifierStart,
325 /* 221 - Lu category        */ CharacterIdentifierStart,
326 /* 222 - Lu category        */ CharacterIdentifierStart,
327 /* 223 - Ll category        */ CharacterIdentifierStart,
328 /* 224 - Ll category        */ CharacterIdentifierStart,
329 /* 225 - Ll category        */ CharacterIdentifierStart,
330 /* 226 - Ll category        */ CharacterIdentifierStart,
331 /* 227 - Ll category        */ CharacterIdentifierStart,
332 /* 228 - Ll category        */ CharacterIdentifierStart,
333 /* 229 - Ll category        */ CharacterIdentifierStart,
334 /* 230 - Ll category        */ CharacterIdentifierStart,
335 /* 231 - Ll category        */ CharacterIdentifierStart,
336 /* 232 - Ll category        */ CharacterIdentifierStart,
337 /* 233 - Ll category        */ CharacterIdentifierStart,
338 /* 234 - Ll category        */ CharacterIdentifierStart,
339 /* 235 - Ll category        */ CharacterIdentifierStart,
340 /* 236 - Ll category        */ CharacterIdentifierStart,
341 /* 237 - Ll category        */ CharacterIdentifierStart,
342 /* 238 - Ll category        */ CharacterIdentifierStart,
343 /* 239 - Ll category        */ CharacterIdentifierStart,
344 /* 240 - Ll category        */ CharacterIdentifierStart,
345 /* 241 - Ll category        */ CharacterIdentifierStart,
346 /* 242 - Ll category        */ CharacterIdentifierStart,
347 /* 243 - Ll category        */ CharacterIdentifierStart,
348 /* 244 - Ll category        */ CharacterIdentifierStart,
349 /* 245 - Ll category        */ CharacterIdentifierStart,
350 /* 246 - Ll category        */ CharacterIdentifierStart,
351 /* 247 - Sm category        */ CharacterInvalid,
352 /* 248 - Ll category        */ CharacterIdentifierStart,
353 /* 249 - Ll category        */ CharacterIdentifierStart,
354 /* 250 - Ll category        */ CharacterIdentifierStart,
355 /* 251 - Ll category        */ CharacterIdentifierStart,
356 /* 252 - Ll category        */ CharacterIdentifierStart,
357 /* 253 - Ll category        */ CharacterIdentifierStart,
358 /* 254 - Ll category        */ CharacterIdentifierStart,
359 /* 255 - Ll category        */ CharacterIdentifierStart
360 };
361
362 // This table provides the character that results from \X where X is the index in the table beginning
363 // with SPACE. A table value of 0 means that more processing needs to be done.
364 static const LChar singleCharacterEscapeValuesForASCII[128] = {
365 /*   0 - Null               */ 0,
366 /*   1 - Start of Heading   */ 0,
367 /*   2 - Start of Text      */ 0,
368 /*   3 - End of Text        */ 0,
369 /*   4 - End of Transm.     */ 0,
370 /*   5 - Enquiry            */ 0,
371 /*   6 - Acknowledgment     */ 0,
372 /*   7 - Bell               */ 0,
373 /*   8 - Back Space         */ 0,
374 /*   9 - Horizontal Tab     */ 0,
375 /*  10 - Line Feed          */ 0,
376 /*  11 - Vertical Tab       */ 0,
377 /*  12 - Form Feed          */ 0,
378 /*  13 - Carriage Return    */ 0,
379 /*  14 - Shift Out          */ 0,
380 /*  15 - Shift In           */ 0,
381 /*  16 - Data Line Escape   */ 0,
382 /*  17 - Device Control 1   */ 0,
383 /*  18 - Device Control 2   */ 0,
384 /*  19 - Device Control 3   */ 0,
385 /*  20 - Device Control 4   */ 0,
386 /*  21 - Negative Ack.      */ 0,
387 /*  22 - Synchronous Idle   */ 0,
388 /*  23 - End of Transmit    */ 0,
389 /*  24 - Cancel             */ 0,
390 /*  25 - End of Medium      */ 0,
391 /*  26 - Substitute         */ 0,
392 /*  27 - Escape             */ 0,
393 /*  28 - File Separator     */ 0,
394 /*  29 - Group Separator    */ 0,
395 /*  30 - Record Separator   */ 0,
396 /*  31 - Unit Separator     */ 0,
397 /*  32 - Space              */ ' ',
398 /*  33 - !                  */ '!',
399 /*  34 - "                  */ '"',
400 /*  35 - #                  */ '#',
401 /*  36 - $                  */ '$',
402 /*  37 - %                  */ '%',
403 /*  38 - &                  */ '&',
404 /*  39 - '                  */ '\'',
405 /*  40 - (                  */ '(',
406 /*  41 - )                  */ ')',
407 /*  42 - *                  */ '*',
408 /*  43 - +                  */ '+',
409 /*  44 - ,                  */ ',',
410 /*  45 - -                  */ '-',
411 /*  46 - .                  */ '.',
412 /*  47 - /                  */ '/',
413 /*  48 - 0                  */ 0,
414 /*  49 - 1                  */ 0,
415 /*  50 - 2                  */ 0,
416 /*  51 - 3                  */ 0,
417 /*  52 - 4                  */ 0,
418 /*  53 - 5                  */ 0,
419 /*  54 - 6                  */ 0,
420 /*  55 - 7                  */ 0,
421 /*  56 - 8                  */ 0,
422 /*  57 - 9                  */ 0,
423 /*  58 - :                  */ ':',
424 /*  59 - ;                  */ ';',
425 /*  60 - <                  */ '<',
426 /*  61 - =                  */ '=',
427 /*  62 - >                  */ '>',
428 /*  63 - ?                  */ '?',
429 /*  64 - @                  */ '@',
430 /*  65 - A                  */ 'A',
431 /*  66 - B                  */ 'B',
432 /*  67 - C                  */ 'C',
433 /*  68 - D                  */ 'D',
434 /*  69 - E                  */ 'E',
435 /*  70 - F                  */ 'F',
436 /*  71 - G                  */ 'G',
437 /*  72 - H                  */ 'H',
438 /*  73 - I                  */ 'I',
439 /*  74 - J                  */ 'J',
440 /*  75 - K                  */ 'K',
441 /*  76 - L                  */ 'L',
442 /*  77 - M                  */ 'M',
443 /*  78 - N                  */ 'N',
444 /*  79 - O                  */ 'O',
445 /*  80 - P                  */ 'P',
446 /*  81 - Q                  */ 'Q',
447 /*  82 - R                  */ 'R',
448 /*  83 - S                  */ 'S',
449 /*  84 - T                  */ 'T',
450 /*  85 - U                  */ 'U',
451 /*  86 - V                  */ 'V',
452 /*  87 - W                  */ 'W',
453 /*  88 - X                  */ 'X',
454 /*  89 - Y                  */ 'Y',
455 /*  90 - Z                  */ 'Z',
456 /*  91 - [                  */ '[',
457 /*  92 - \                  */ '\\',
458 /*  93 - ]                  */ ']',
459 /*  94 - ^                  */ '^',
460 /*  95 - _                  */ '_',
461 /*  96 - `                  */ '`',
462 /*  97 - a                  */ 'a',
463 /*  98 - b                  */ 0x08,
464 /*  99 - c                  */ 'c',
465 /* 100 - d                  */ 'd',
466 /* 101 - e                  */ 'e',
467 /* 102 - f                  */ 0x0C,
468 /* 103 - g                  */ 'g',
469 /* 104 - h                  */ 'h',
470 /* 105 - i                  */ 'i',
471 /* 106 - j                  */ 'j',
472 /* 107 - k                  */ 'k',
473 /* 108 - l                  */ 'l',
474 /* 109 - m                  */ 'm',
475 /* 110 - n                  */ 0x0A,
476 /* 111 - o                  */ 'o',
477 /* 112 - p                  */ 'p',
478 /* 113 - q                  */ 'q',
479 /* 114 - r                  */ 0x0D,
480 /* 115 - s                  */ 's',
481 /* 116 - t                  */ 0x09,
482 /* 117 - u                  */ 0,
483 /* 118 - v                  */ 0x0B,
484 /* 119 - w                  */ 'w',
485 /* 120 - x                  */ 0,
486 /* 121 - y                  */ 'y',
487 /* 122 - z                  */ 'z',
488 /* 123 - {                  */ '{',
489 /* 124 - |                  */ '|',
490 /* 125 - }                  */ '}',
491 /* 126 - ~                  */ '~',
492 /* 127 - Delete             */ 0
493 };
494
495 template <typename T>
496 Lexer<T>::Lexer(VM* vm, JSParserBuiltinMode builtinMode)
497     : m_isReparsing(false)
498     , m_vm(vm)
499     , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
500 {
501 }
502
503 static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
504 {
505     if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
506         return INTEGER;
507     return DOUBLE;
508 }
509
510 template <typename T>
511 Lexer<T>::~Lexer()
512 {
513 }
514
515 template <typename T>
516 String Lexer<T>::invalidCharacterMessage() const
517 {
518     switch (m_current) {
519     case 0:
520         return ASCIILiteral("Invalid character: '\\0'");
521     case 10:
522         return ASCIILiteral("Invalid character: '\\n'");
523     case 11:
524         return ASCIILiteral("Invalid character: '\\v'");
525     case 13:
526         return ASCIILiteral("Invalid character: '\\r'");
527     case 35:
528         return ASCIILiteral("Invalid character: '#'");
529     case 64:
530         return ASCIILiteral("Invalid character: '@'");
531     case 96:
532         return ASCIILiteral("Invalid character: '`'");
533     default:
534         return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current));
535     }
536 }
537
538 template <typename T>
539 ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
540 {
541     ASSERT(m_code <= m_codeEnd);
542     return m_code;
543 }
544
545 template <typename T>
546 void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
547 {
548     m_arena = &arena->identifierArena();
549     
550     m_lineNumber = source.firstLine();
551     m_lastToken = -1;
552     
553     const String& sourceString = source.provider()->source();
554
555     if (!sourceString.isNull())
556         setCodeStart(sourceString.impl());
557     else
558         m_codeStart = 0;
559
560     m_source = &source;
561     m_sourceOffset = source.startOffset();
562     m_codeStartPlusOffset = m_codeStart + source.startOffset();
563     m_code = m_codeStartPlusOffset;
564     m_codeEnd = m_codeStart + source.endOffset();
565     m_error = false;
566     m_atLineStart = true;
567     m_lineStart = m_code;
568     m_lexErrorMessage = String();
569     
570     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
571     m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
572     
573     if (LIKELY(m_code < m_codeEnd))
574         m_current = *m_code;
575     else
576         m_current = 0;
577     ASSERT(currentOffset() == source.startOffset());
578 }
579
580 template <typename T>
581 template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
582 {
583     m_code += shiftAmount;
584     ASSERT(currentOffset() >= currentLineStartOffset());
585     m_current = *m_code;
586 }
587
588 template <typename T>
589 ALWAYS_INLINE void Lexer<T>::shift()
590 {
591     // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
592     m_current = 0;
593     ++m_code;
594     if (LIKELY(m_code < m_codeEnd))
595         m_current = *m_code;
596 }
597
598 template <typename T>
599 ALWAYS_INLINE bool Lexer<T>::atEnd() const
600 {
601     ASSERT(!m_current || m_code < m_codeEnd);
602     return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
603 }
604
605 template <typename T>
606 ALWAYS_INLINE T Lexer<T>::peek(int offset) const
607 {
608     ASSERT(offset > 0 && offset < 5);
609     const T* code = m_code + offset;
610     return (code < m_codeEnd) ? *code : 0;
611 }
612
613 template <typename T>
614 typename Lexer<T>::UnicodeHexValue Lexer<T>::parseFourDigitUnicodeHex()
615 {
616     T char1 = peek(1);
617     T char2 = peek(2);
618     T char3 = peek(3);
619
620     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
621         return UnicodeHexValue((m_code + 4) >= m_codeEnd ? UnicodeHexValue::IncompleteHex : UnicodeHexValue::InvalidHex);
622
623     int result = convertUnicode(m_current, char1, char2, char3);
624     shift();
625     shift();
626     shift();
627     shift();
628     return UnicodeHexValue(result);
629 }
630
631 template <typename T>
632 void Lexer<T>::shiftLineTerminator()
633 {
634     ASSERT(isLineTerminator(m_current));
635
636     m_positionBeforeLastNewline = currentPosition();
637     T prev = m_current;
638     shift();
639
640     // Allow both CRLF and LFCR.
641     if (prev + m_current == '\n' + '\r')
642         shift();
643
644     ++m_lineNumber;
645 }
646
647 template <typename T>
648 ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
649 {
650     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
651 }
652
653 static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
654 {
655     return U_GET_GC_MASK(c) & U_GC_L_MASK;
656 }
657
658 static ALWAYS_INLINE bool isLatin1(LChar)
659 {
660     return true;
661 }
662
663 static ALWAYS_INLINE bool isLatin1(UChar c)
664 {
665     return c < 256;
666 }
667
668 static inline bool isIdentStart(LChar c)
669 {
670     return typesOfLatin1Characters[c] == CharacterIdentifierStart;
671 }
672
673 static inline bool isIdentStart(UChar c)
674 {
675     return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
676 }
677
678 static NEVER_INLINE bool isNonLatin1IdentPart(int c)
679 {
680     return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
681 }
682
683 static ALWAYS_INLINE bool isIdentPart(LChar c)
684 {
685     // Character types are divided into two groups depending on whether they can be part of an
686     // identifier or not. Those whose type value is less or equal than CharacterNumber can be
687     // part of an identifier. (See the CharacterType definition for more details.)
688     return typesOfLatin1Characters[c] <= CharacterNumber;
689 }
690
691 static ALWAYS_INLINE bool isIdentPart(UChar c)
692 {
693     return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
694 }
695
696 template <typename T>
697 bool isUnicodeEscapeIdentPart(const T* code)
698 {
699     T char1 = code[0];
700     T char2 = code[1];
701     T char3 = code[2];
702     T char4 = code[3];
703     
704     if (!isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3) || !isASCIIHexDigit(char4))
705         return false;
706     
707     return isIdentPart(Lexer<T>::convertUnicode(char1, char2, char3, char4));
708 }
709
710 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
711 {
712     if (isIdentPart(*code))
713         return true;
714
715     return (*code == '\\' && ((codeEnd - code) >= 6) && code[1] == 'u' && isUnicodeEscapeIdentPart(code+2));
716 }
717
718 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
719 {
720     if (isIdentPart(*code))
721         return true;
722     
723     return (*code == '\\' && ((codeEnd - code) >= 6) && code[1] == 'u' && isUnicodeEscapeIdentPart(code+2));
724 }
725
726 static inline LChar singleEscape(int c)
727 {
728     if (c < 128) {
729         ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
730         return singleCharacterEscapeValuesForASCII[c];
731     }
732     return 0;
733 }
734
735 template <typename T>
736 inline void Lexer<T>::record8(int c)
737 {
738     ASSERT(c >= 0);
739     ASSERT(c <= 0xFF);
740     m_buffer8.append(static_cast<LChar>(c));
741 }
742
743 template <typename T>
744 inline void assertCharIsIn8BitRange(T c)
745 {
746     UNUSED_PARAM(c);
747     ASSERT(c >= 0);
748     ASSERT(c <= 0xFF);
749 }
750
751 template <>
752 inline void assertCharIsIn8BitRange(UChar c)
753 {
754     UNUSED_PARAM(c);
755     ASSERT(c <= 0xFF);
756 }
757
758 template <>
759 inline void assertCharIsIn8BitRange(LChar)
760 {
761 }
762
763 template <typename T>
764 inline void Lexer<T>::append8(const T* p, size_t length)
765 {
766     size_t currentSize = m_buffer8.size();
767     m_buffer8.grow(currentSize + length);
768     LChar* rawBuffer = m_buffer8.data() + currentSize;
769
770     for (size_t i = 0; i < length; i++) {
771         T c = p[i];
772         assertCharIsIn8BitRange(c);
773         rawBuffer[i] = c;
774     }
775 }
776
777 template <typename T>
778 inline void Lexer<T>::append16(const LChar* p, size_t length)
779 {
780     size_t currentSize = m_buffer16.size();
781     m_buffer16.grow(currentSize + length);
782     UChar* rawBuffer = m_buffer16.data() + currentSize;
783
784     for (size_t i = 0; i < length; i++)
785         rawBuffer[i] = p[i];
786 }
787
788 template <typename T>
789 inline void Lexer<T>::record16(T c)
790 {
791     m_buffer16.append(c);
792 }
793
794 template <typename T>
795 inline void Lexer<T>::record16(int c)
796 {
797     ASSERT(c >= 0);
798     ASSERT(c <= static_cast<int>(USHRT_MAX));
799     m_buffer16.append(static_cast<UChar>(c));
800 }
801     
802 #if !ASSERT_DISABLED
803 bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
804 {
805     if (!ident)
806         return true;
807     /* Just block any use of suspicious identifiers.  This is intended to
808      * be used as a safety net while implementing builtins.
809      */
810     if (*ident == vm.propertyNames->builtinNames().callPublicName())
811         return false;
812     if (*ident == vm.propertyNames->builtinNames().applyPublicName())
813         return false;
814     if (*ident == vm.propertyNames->eval)
815         return false;
816     if (*ident == vm.propertyNames->Function)
817         return false;
818     return true;
819 }
820 #endif
821     
822 template <>
823 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
824 {
825     const ptrdiff_t remaining = m_codeEnd - m_code;
826     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
827         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
828         if (keyword != IDENT) {
829             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
830             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
831         }
832     }
833     
834     bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
835     if (isPrivateName)
836         shift();
837     
838     const LChar* identifierStart = currentSourcePtr();
839     unsigned identifierLineStart = currentLineStartOffset();
840     
841     while (isIdentPart(m_current))
842         shift();
843     
844     if (UNLIKELY(m_current == '\\')) {
845         setOffsetFromSourcePtr(identifierStart, identifierLineStart);
846         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
847     }
848
849     const Identifier* ident = 0;
850     
851     if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
852         int identifierLength = currentSourcePtr() - identifierStart;
853         ident = makeIdentifier(identifierStart, identifierLength);
854         if (m_parsingBuiltinFunction) {
855             if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
856                 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
857                 return ERRORTOK;
858             }
859             if (isPrivateName)
860                 ident = m_vm->propertyNames->getPrivateName(*ident);
861             else if (*ident == m_vm->propertyNames->undefinedKeyword)
862                 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
863             if (!ident)
864                 return INVALID_PRIVATE_NAME_ERRORTOK;
865         }
866         tokenData->ident = ident;
867     } else
868         tokenData->ident = 0;
869
870     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
871         ASSERT(shouldCreateIdentifier);
872         if (remaining < maxTokenLength) {
873             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
874             ASSERT((remaining < maxTokenLength) || !entry);
875             if (!entry)
876                 return IDENT;
877             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
878             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
879         }
880         return IDENT;
881     }
882
883     return IDENT;
884 }
885
886 template <>
887 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
888 {
889     const ptrdiff_t remaining = m_codeEnd - m_code;
890     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
891         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
892         if (keyword != IDENT) {
893             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
894             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
895         }
896     }
897     
898     bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
899     if (isPrivateName)
900         shift();
901
902     const UChar* identifierStart = currentSourcePtr();
903     int identifierLineStart = currentLineStartOffset();
904
905     UChar orAllChars = 0;
906     
907     while (isIdentPart(m_current)) {
908         orAllChars |= m_current;
909         shift();
910     }
911     
912     if (UNLIKELY(m_current == '\\')) {
913         ASSERT(!isPrivateName);
914         setOffsetFromSourcePtr(identifierStart, identifierLineStart);
915         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
916     }
917
918     bool isAll8Bit = false;
919
920     if (!(orAllChars & ~0xff))
921         isAll8Bit = true;
922
923     const Identifier* ident = 0;
924     
925     if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
926         int identifierLength = currentSourcePtr() - identifierStart;
927         if (isAll8Bit)
928             ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
929         else
930             ident = makeIdentifier(identifierStart, identifierLength);
931         if (m_parsingBuiltinFunction) {
932             if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
933                 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
934                 return ERRORTOK;
935             }
936             if (isPrivateName)
937                 ident = m_vm->propertyNames->getPrivateName(*ident);
938             else if (*ident == m_vm->propertyNames->undefinedKeyword)
939                 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
940             if (!ident)
941                 return INVALID_PRIVATE_NAME_ERRORTOK;
942         }
943         tokenData->ident = ident;
944     } else
945         tokenData->ident = 0;
946     
947     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
948         ASSERT(shouldCreateIdentifier);
949         if (remaining < maxTokenLength) {
950             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
951             ASSERT((remaining < maxTokenLength) || !entry);
952             if (!entry)
953                 return IDENT;
954             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
955             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
956         }
957         return IDENT;
958     }
959
960     return IDENT;
961 }
962
963 template <typename T>
964 template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
965 {
966     const ptrdiff_t remaining = m_codeEnd - m_code;
967     const T* identifierStart = currentSourcePtr();
968     bool bufferRequired = false;
969
970     while (true) {
971         if (LIKELY(isIdentPart(m_current))) {
972             shift();
973             continue;
974         }
975         if (LIKELY(m_current != '\\'))
976             break;
977
978         // \uXXXX unicode characters.
979         bufferRequired = true;
980         if (identifierStart != currentSourcePtr())
981             m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
982         shift();
983         if (UNLIKELY(m_current != 'u'))
984             return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
985         shift();
986         UnicodeHexValue character = parseFourDigitUnicodeHex();
987         if (UNLIKELY(!character.isValid()))
988             return character.valueType() == UnicodeHexValue::IncompleteHex ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
989         UChar ucharacter = static_cast<UChar>(character.value());
990         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
991             return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
992         if (shouldCreateIdentifier)
993             record16(ucharacter);
994         identifierStart = currentSourcePtr();
995     }
996
997     int identifierLength;
998     const Identifier* ident = 0;
999     if (shouldCreateIdentifier) {
1000         if (!bufferRequired) {
1001             identifierLength = currentSourcePtr() - identifierStart;
1002             ident = makeIdentifier(identifierStart, identifierLength);
1003         } else {
1004             if (identifierStart != currentSourcePtr())
1005                 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1006             ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1007         }
1008
1009         tokenData->ident = ident;
1010     } else
1011         tokenData->ident = 0;
1012
1013     if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
1014         ASSERT(shouldCreateIdentifier);
1015         // Keywords must not be recognized if there was an \uXXXX in the identifier.
1016         if (remaining < maxTokenLength) {
1017             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
1018             ASSERT((remaining < maxTokenLength) || !entry);
1019             if (!entry)
1020                 return IDENT;
1021             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1022             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1023         }
1024         return IDENT;
1025     }
1026
1027     m_buffer16.shrink(0);
1028     return IDENT;
1029 }
1030
1031 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
1032 {
1033     return character < 0xE;
1034 }
1035
1036 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
1037 {
1038     return character < 0xE || character > 0xFF;
1039 }
1040
1041 template <typename T>
1042 template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1043 {
1044     int startingOffset = currentOffset();
1045     int startingLineStartOffset = currentLineStartOffset();
1046     int startingLineNumber = lineNumber();
1047     T stringQuoteCharacter = m_current;
1048     shift();
1049
1050     const T* stringStart = currentSourcePtr();
1051
1052     while (m_current != stringQuoteCharacter) {
1053         if (UNLIKELY(m_current == '\\')) {
1054             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1055                 append8(stringStart, currentSourcePtr() - stringStart);
1056             shift();
1057
1058             LChar escape = singleEscape(m_current);
1059
1060             // Most common escape sequences first.
1061             if (escape) {
1062                 if (shouldBuildStrings)
1063                     record8(escape);
1064                 shift();
1065             } else if (UNLIKELY(isLineTerminator(m_current)))
1066                 shiftLineTerminator();
1067             else if (m_current == 'x') {
1068                 shift();
1069                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1070                     m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
1071                     return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1072                 }
1073                 T prev = m_current;
1074                 shift();
1075                 if (shouldBuildStrings)
1076                     record8(convertHex(prev, m_current));
1077                 shift();
1078             } else {
1079                 setOffset(startingOffset, startingLineStartOffset);
1080                 setLineNumber(startingLineNumber);
1081                 m_buffer8.shrink(0);
1082                 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1083             }
1084             stringStart = currentSourcePtr();
1085             continue;
1086         }
1087
1088         if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1089             setOffset(startingOffset, startingLineStartOffset);
1090             setLineNumber(startingLineNumber);
1091             m_buffer8.shrink(0);
1092             return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1093         }
1094
1095         shift();
1096     }
1097
1098     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1099         append8(stringStart, currentSourcePtr() - stringStart);
1100     if (shouldBuildStrings) {
1101         tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1102         m_buffer8.shrink(0);
1103     } else
1104         tokenData->ident = 0;
1105
1106     return StringParsedSuccessfully;
1107 }
1108
1109 template <typename T>
1110 template <bool shouldBuildStrings> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(EscapeParseMode escapeParseMode, bool strictMode, T stringQuoteCharacter) -> StringParseResult
1111 {
1112     if (m_current == 'x') {
1113         shift();
1114         if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1115             m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
1116             return StringCannotBeParsed;
1117         }
1118         T prev = m_current;
1119         shift();
1120         if (shouldBuildStrings)
1121             record16(convertHex(prev, m_current));
1122         shift();
1123         return StringParsedSuccessfully;
1124     }
1125
1126     if (m_current == 'u') {
1127         shift();
1128         UnicodeHexValue character = parseFourDigitUnicodeHex();
1129         if (character.isValid()) {
1130             if (shouldBuildStrings)
1131                 record16(character.value());
1132             return StringParsedSuccessfully;
1133         }
1134
1135         if (escapeParseMode == EscapeParseMode::String && m_current == stringQuoteCharacter) {
1136             if (shouldBuildStrings)
1137                 record16('u');
1138             return StringParsedSuccessfully;
1139         }
1140
1141         m_lexErrorMessage = ASCIILiteral("\\u can only be followed by a Unicode character sequence");
1142         return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed;
1143     }
1144
1145     if (strictMode) {
1146         if (isASCIIDigit(m_current)) {
1147             // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1148             int character1 = m_current;
1149             shift();
1150             if (character1 != '0' || isASCIIDigit(m_current)) {
1151                 m_lexErrorMessage = ASCIILiteral("The only valid numeric escape in strict mode is '\\0'");
1152                 return StringCannotBeParsed;
1153             }
1154             if (shouldBuildStrings)
1155                 record16(0);
1156             return StringParsedSuccessfully;
1157         }
1158     } else {
1159         if (isASCIIOctalDigit(m_current)) {
1160             // Octal character sequences
1161             T character1 = m_current;
1162             shift();
1163             if (isASCIIOctalDigit(m_current)) {
1164                 // Two octal characters
1165                 T character2 = m_current;
1166                 shift();
1167                 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1168                     if (shouldBuildStrings)
1169                         record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1170                     shift();
1171                 } else {
1172                     if (shouldBuildStrings)
1173                         record16((character1 - '0') * 8 + character2 - '0');
1174                 }
1175             } else {
1176                 if (shouldBuildStrings)
1177                     record16(character1 - '0');
1178             }
1179             return StringParsedSuccessfully;
1180         }
1181     }
1182
1183     if (!atEnd()) {
1184         if (shouldBuildStrings)
1185             record16(m_current);
1186         shift();
1187         return StringParsedSuccessfully;
1188     }
1189
1190     m_lexErrorMessage = ASCIILiteral("Unterminated string constant");
1191     return StringUnterminated;
1192 }
1193
1194 template <typename T>
1195 template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
1196 {
1197     T stringQuoteCharacter = m_current;
1198     shift();
1199
1200     const T* stringStart = currentSourcePtr();
1201
1202     while (m_current != stringQuoteCharacter) {
1203         if (UNLIKELY(m_current == '\\')) {
1204             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1205                 append16(stringStart, currentSourcePtr() - stringStart);
1206             shift();
1207
1208             LChar escape = singleEscape(m_current);
1209
1210             // Most common escape sequences first
1211             if (escape) {
1212                 if (shouldBuildStrings)
1213                     record16(escape);
1214                 shift();
1215             } else if (UNLIKELY(isLineTerminator(m_current)))
1216                 shiftLineTerminator();
1217             else {
1218                 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::String, strictMode, stringQuoteCharacter);
1219                 if (result != StringParsedSuccessfully)
1220                     return result;
1221             }
1222
1223             stringStart = currentSourcePtr();
1224             continue;
1225         }
1226         // Fast check for characters that require special handling.
1227         // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1228         // as possible, and lets through all common ASCII characters.
1229         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1230             // New-line or end of input is not allowed
1231             if (atEnd() || isLineTerminator(m_current)) {
1232                 m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
1233                 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1234             }
1235             // Anything else is just a normal character
1236         }
1237         shift();
1238     }
1239
1240     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1241         append16(stringStart, currentSourcePtr() - stringStart);
1242     if (shouldBuildStrings)
1243         tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1244     else
1245         tokenData->ident = 0;
1246
1247     m_buffer16.shrink(0);
1248     return StringParsedSuccessfully;
1249 }
1250
1251 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
1252 // While the lexer accepts <LF><CR> (not <CR><LF>) sequence
1253 // as one line terminator and increments one line number,
1254 // TemplateLiteral considers it as two line terminators <LF> and <CR>.
1255 //
1256 // TemplateLiteral normalizes line terminators as follows.
1257 //
1258 // <LF> => <LF>
1259 // <CR> => <LF>
1260 // <CR><LF> => <LF>
1261 // <\u2028> => <\u2028>
1262 // <\u2029> => <\u2029>
1263 //
1264 // So, <LF><CR> should be normalized to <LF><LF>.
1265 // However, the lexer should increment the line number only once for <LF><CR>.
1266 //
1267 // To achieve this, LineNumberAdder holds the current status of line terminator sequence.
1268 // When TemplateLiteral lexer encounters a line terminator, it notifies to LineNumberAdder.
1269 // LineNumberAdder maintains the status and increments the line number when it's necessary.
1270 // For example, LineNumberAdder increments the line number only once for <LF><CR> and <CR><LF>.
1271 template<typename CharacterType>
1272 class LineNumberAdder {
1273 public:
1274     LineNumberAdder(int& lineNumber)
1275         : m_lineNumber(lineNumber)
1276     {
1277     }
1278
1279     void clear()
1280     {
1281         m_previous = 0;
1282     }
1283
1284     void add(CharacterType character)
1285     {
1286         ASSERT(Lexer<CharacterType>::isLineTerminator(character));
1287         if ((character + m_previous) == ('\n' + '\r'))
1288             m_previous = 0;
1289         else {
1290             ++m_lineNumber;
1291             m_previous = character;
1292         }
1293     }
1294
1295 private:
1296     int& m_lineNumber;
1297     CharacterType m_previous { 0 };
1298 };
1299
1300 template <typename T>
1301 template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData)
1302 {
1303     const T* stringStart = currentSourcePtr();
1304     const T* rawStringStart = currentSourcePtr();
1305
1306     LineNumberAdder<T> lineNumberAdder(m_lineNumber);
1307
1308     while (m_current != '`') {
1309         if (UNLIKELY(m_current == '\\')) {
1310             lineNumberAdder.clear();
1311             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1312                 append16(stringStart, currentSourcePtr() - stringStart);
1313             shift();
1314
1315             LChar escape = singleEscape(m_current);
1316
1317             // Most common escape sequences first.
1318             if (escape) {
1319                 if (shouldBuildStrings)
1320                     record16(escape);
1321                 shift();
1322             } else if (UNLIKELY(isLineTerminator(m_current))) {
1323                 if (m_current == '\r') {
1324                     lineNumberAdder.add(m_current);
1325                     shift();
1326                     if (m_current == '\n') {
1327                         lineNumberAdder.add(m_current);
1328                         shift();
1329                     }
1330                 } else {
1331                     lineNumberAdder.add(m_current);
1332                     shift();
1333                 }
1334             } else {
1335                 bool strictMode = true;
1336                 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::Template, strictMode, '`');
1337                 if (result != StringParsedSuccessfully)
1338                     return result;
1339             }
1340
1341             stringStart = currentSourcePtr();
1342             continue;
1343         }
1344
1345         if (m_current == '$' && peek(1) == '{')
1346             break;
1347
1348         // Fast check for characters that require special handling.
1349         // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1350         // as possible, and lets through all common ASCII characters.
1351         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1352             // End of input is not allowed.
1353             // Unlike String, line terminator is allowed.
1354             if (atEnd()) {
1355                 m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
1356                 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1357             }
1358
1359             if (isLineTerminator(m_current)) {
1360                 if (m_current == '\r') {
1361                     // Normalize <CR>, <CR><LF> to <LF>.
1362                     if (stringStart != currentSourcePtr() && shouldBuildStrings)
1363                         append16(stringStart, currentSourcePtr() - stringStart);
1364                     if (shouldBuildStrings)
1365                         record16('\n');
1366                     lineNumberAdder.add(m_current);
1367                     shift();
1368                     if (m_current == '\n') {
1369                         lineNumberAdder.add(m_current);
1370                         shift();
1371                     }
1372                     stringStart = currentSourcePtr();
1373                 } else {
1374                     lineNumberAdder.add(m_current);
1375                     shift();
1376                 }
1377                 continue;
1378             }
1379             // Anything else is just a normal character
1380         }
1381
1382         lineNumberAdder.clear();
1383         shift();
1384     }
1385
1386     bool isTail = m_current == '`';
1387
1388     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1389         append16(stringStart, currentSourcePtr() - stringStart);
1390
1391     if (shouldBuildStrings) {
1392         tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1393         // TODO: While line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations,
1394         // this raw implementation just slices the source string. As a result, line terminators appear in the raw representation without normalization.
1395         // For example, when parsing `<CR>`, <CR> appears in the raw representation.
1396         // While non-tagged template literals don't use the raw representation, tagged templates use the raw representation.
1397         // So line terminator normalization should be applied to the raw representation when implementing tagged templates.
1398         tokenData->raw = makeIdentifier(rawStringStart, currentSourcePtr() - rawStringStart);
1399     } else {
1400         tokenData->cooked = nullptr;
1401         tokenData->raw = nullptr;
1402     }
1403     tokenData->isTail = isTail;
1404
1405     m_buffer16.shrink(0);
1406
1407     if (isTail) {
1408         // Skip `
1409         shift();
1410     } else {
1411         // Skip $ and {
1412         shift();
1413         shift();
1414     }
1415
1416     return StringParsedSuccessfully;
1417 }
1418 #endif
1419
1420 template <typename T>
1421 ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
1422 {
1423     // Optimization: most hexadecimal values fit into 4 bytes.
1424     uint32_t hexValue = 0;
1425     int maximumDigits = 7;
1426
1427     do {
1428         hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1429         shift();
1430         --maximumDigits;
1431     } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1432
1433     if (maximumDigits >= 0) {
1434         returnValue = hexValue;
1435         return;
1436     }
1437
1438     // No more place in the hexValue buffer.
1439     // The values are shifted out and placed into the m_buffer8 vector.
1440     for (int i = 0; i < 8; ++i) {
1441          int digit = hexValue >> 28;
1442          if (digit < 10)
1443              record8(digit + '0');
1444          else
1445              record8(digit - 10 + 'a');
1446          hexValue <<= 4;
1447     }
1448
1449     while (isASCIIHexDigit(m_current)) {
1450         record8(m_current);
1451         shift();
1452     }
1453
1454     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1455 }
1456
1457 template <typename T>
1458 ALWAYS_INLINE bool Lexer<T>::parseBinary(double& returnValue)
1459 {
1460     // Optimization: most binary values fit into 4 bytes.
1461     uint32_t binaryValue = 0;
1462     const unsigned maximumDigits = 32;
1463     int digit = maximumDigits - 1;
1464     // Temporary buffer for the digits. Makes easier
1465     // to reconstruct the input characters when needed.
1466     LChar digits[maximumDigits];
1467
1468     do {
1469         binaryValue = (binaryValue << 1) + (m_current - '0');
1470         digits[digit] = m_current;
1471         shift();
1472         --digit;
1473     } while (isASCIIBinaryDigit(m_current) && digit >= 0);
1474
1475     if (!isASCIIDigit(m_current) && digit >= 0) {
1476         returnValue = binaryValue;
1477         return true;
1478     }
1479
1480     for (int i = maximumDigits - 1; i > digit; --i)
1481         record8(digits[i]);
1482
1483     while (isASCIIBinaryDigit(m_current)) {
1484         record8(m_current);
1485         shift();
1486     }
1487
1488     if (isASCIIDigit(m_current))
1489         return false;
1490
1491     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2);
1492     return true;
1493 }
1494
1495 template <typename T>
1496 ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
1497 {
1498     // Optimization: most octal values fit into 4 bytes.
1499     uint32_t octalValue = 0;
1500     const unsigned maximumDigits = 10;
1501     int digit = maximumDigits - 1;
1502     // Temporary buffer for the digits. Makes easier
1503     // to reconstruct the input characters when needed.
1504     LChar digits[maximumDigits];
1505
1506     do {
1507         octalValue = octalValue * 8 + (m_current - '0');
1508         digits[digit] = m_current;
1509         shift();
1510         --digit;
1511     } while (isASCIIOctalDigit(m_current) && digit >= 0);
1512
1513     if (!isASCIIDigit(m_current) && digit >= 0) {
1514         returnValue = octalValue;
1515         return true;
1516     }
1517
1518     for (int i = maximumDigits - 1; i > digit; --i)
1519          record8(digits[i]);
1520
1521     while (isASCIIOctalDigit(m_current)) {
1522         record8(m_current);
1523         shift();
1524     }
1525
1526     if (isASCIIDigit(m_current))
1527         return false;
1528
1529     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
1530     return true;
1531 }
1532
1533 template <typename T>
1534 ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
1535 {
1536     // Optimization: most decimal values fit into 4 bytes.
1537     uint32_t decimalValue = 0;
1538
1539     // Since parseOctal may be executed before parseDecimal,
1540     // the m_buffer8 may hold ascii digits.
1541     if (!m_buffer8.size()) {
1542         const unsigned maximumDigits = 10;
1543         int digit = maximumDigits - 1;
1544         // Temporary buffer for the digits. Makes easier
1545         // to reconstruct the input characters when needed.
1546         LChar digits[maximumDigits];
1547
1548         do {
1549             decimalValue = decimalValue * 10 + (m_current - '0');
1550             digits[digit] = m_current;
1551             shift();
1552             --digit;
1553         } while (isASCIIDigit(m_current) && digit >= 0);
1554
1555         if (digit >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
1556             returnValue = decimalValue;
1557             return true;
1558         }
1559
1560         for (int i = maximumDigits - 1; i > digit; --i)
1561             record8(digits[i]);
1562     }
1563
1564     while (isASCIIDigit(m_current)) {
1565         record8(m_current);
1566         shift();
1567     }
1568
1569     return false;
1570 }
1571
1572 template <typename T>
1573 ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
1574 {
1575     record8('.');
1576     while (isASCIIDigit(m_current)) {
1577         record8(m_current);
1578         shift();
1579     }
1580 }
1581
1582 template <typename T>
1583 ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1584 {
1585     record8('e');
1586     shift();
1587     if (m_current == '+' || m_current == '-') {
1588         record8(m_current);
1589         shift();
1590     }
1591
1592     if (!isASCIIDigit(m_current))
1593         return false;
1594
1595     do {
1596         record8(m_current);
1597         shift();
1598     } while (isASCIIDigit(m_current));
1599     return true;
1600 }
1601
1602 template <typename T>
1603 ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1604 {
1605     while (true) {
1606         while (UNLIKELY(m_current == '*')) {
1607             shift();
1608             if (m_current == '/') {
1609                 shift();
1610                 return true;
1611             }
1612         }
1613
1614         if (atEnd())
1615             return false;
1616
1617         if (isLineTerminator(m_current)) {
1618             shiftLineTerminator();
1619             m_terminator = true;
1620         } else
1621             shift();
1622     }
1623 }
1624
1625 template <typename T>
1626 bool Lexer<T>::nextTokenIsColon()
1627 {
1628     const T* code = m_code;
1629     while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1630         code++;
1631     
1632     return code < m_codeEnd && *code == ':';
1633 }
1634
1635 template <typename T>
1636 JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
1637 {
1638     JSTokenData* tokenData = &tokenRecord->m_data;
1639     JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1640     ASSERT(!m_error);
1641     ASSERT(m_buffer8.isEmpty());
1642     ASSERT(m_buffer16.isEmpty());
1643
1644     JSTokenType token = ERRORTOK;
1645     m_terminator = false;
1646
1647 start:
1648     while (isWhiteSpace(m_current))
1649         shift();
1650
1651     if (atEnd())
1652         return EOFTOK;
1653     
1654     tokenLocation->startOffset = currentOffset();
1655     ASSERT(currentOffset() >= currentLineStartOffset());
1656     tokenRecord->m_startPosition = currentPosition();
1657
1658     CharacterType type;
1659     if (LIKELY(isLatin1(m_current)))
1660         type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1661     else if (isNonLatin1IdentStart(m_current))
1662         type = CharacterIdentifierStart;
1663     else if (isLineTerminator(m_current))
1664         type = CharacterLineTerminator;
1665     else
1666         type = CharacterInvalid;
1667
1668     switch (type) {
1669     case CharacterGreater:
1670         shift();
1671         if (m_current == '>') {
1672             shift();
1673             if (m_current == '>') {
1674                 shift();
1675                 if (m_current == '=') {
1676                     shift();
1677                     token = URSHIFTEQUAL;
1678                     break;
1679                 }
1680                 token = URSHIFT;
1681                 break;
1682             }
1683             if (m_current == '=') {
1684                 shift();
1685                 token = RSHIFTEQUAL;
1686                 break;
1687             }
1688             token = RSHIFT;
1689             break;
1690         }
1691         if (m_current == '=') {
1692             shift();
1693             token = GE;
1694             break;
1695         }
1696         token = GT;
1697         break;
1698     case CharacterEqual:
1699         shift();
1700         if (m_current == '=') {
1701             shift();
1702             if (m_current == '=') {
1703                 shift();
1704                 token = STREQ;
1705                 break;
1706             }
1707             token = EQEQ;
1708             break;
1709         }
1710         token = EQUAL;
1711         break;
1712     case CharacterLess:
1713         shift();
1714         if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1715             // <!-- marks the beginning of a line comment (for www usage)
1716             goto inSingleLineComment;
1717         }
1718         if (m_current == '<') {
1719             shift();
1720             if (m_current == '=') {
1721                 shift();
1722                 token = LSHIFTEQUAL;
1723                 break;
1724             }
1725             token = LSHIFT;
1726             break;
1727         }
1728         if (m_current == '=') {
1729             shift();
1730             token = LE;
1731             break;
1732         }
1733         token = LT;
1734         break;
1735     case CharacterExclamationMark:
1736         shift();
1737         if (m_current == '=') {
1738             shift();
1739             if (m_current == '=') {
1740                 shift();
1741                 token = STRNEQ;
1742                 break;
1743             }
1744             token = NE;
1745             break;
1746         }
1747         token = EXCLAMATION;
1748         break;
1749     case CharacterAdd:
1750         shift();
1751         if (m_current == '+') {
1752             shift();
1753             token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1754             break;
1755         }
1756         if (m_current == '=') {
1757             shift();
1758             token = PLUSEQUAL;
1759             break;
1760         }
1761         token = PLUS;
1762         break;
1763     case CharacterSub:
1764         shift();
1765         if (m_current == '-') {
1766             shift();
1767             if (m_atLineStart && m_current == '>') {
1768                 shift();
1769                 goto inSingleLineComment;
1770             }
1771             token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
1772             break;
1773         }
1774         if (m_current == '=') {
1775             shift();
1776             token = MINUSEQUAL;
1777             break;
1778         }
1779         token = MINUS;
1780         break;
1781     case CharacterMultiply:
1782         shift();
1783         if (m_current == '=') {
1784             shift();
1785             token = MULTEQUAL;
1786             break;
1787         }
1788         token = TIMES;
1789         break;
1790     case CharacterSlash:
1791         shift();
1792         if (m_current == '/') {
1793             shift();
1794             goto inSingleLineComment;
1795         }
1796         if (m_current == '*') {
1797             shift();
1798             if (parseMultilineComment())
1799                 goto start;
1800             m_lexErrorMessage = ASCIILiteral("Multiline comment was not closed properly");
1801             token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
1802             goto returnError;
1803         }
1804         if (m_current == '=') {
1805             shift();
1806             token = DIVEQUAL;
1807             break;
1808         }
1809         token = DIVIDE;
1810         break;
1811     case CharacterAnd:
1812         shift();
1813         if (m_current == '&') {
1814             shift();
1815             token = AND;
1816             break;
1817         }
1818         if (m_current == '=') {
1819             shift();
1820             token = ANDEQUAL;
1821             break;
1822         }
1823         token = BITAND;
1824         break;
1825     case CharacterXor:
1826         shift();
1827         if (m_current == '=') {
1828             shift();
1829             token = XOREQUAL;
1830             break;
1831         }
1832         token = BITXOR;
1833         break;
1834     case CharacterModulo:
1835         shift();
1836         if (m_current == '=') {
1837             shift();
1838             token = MODEQUAL;
1839             break;
1840         }
1841         token = MOD;
1842         break;
1843     case CharacterOr:
1844         shift();
1845         if (m_current == '=') {
1846             shift();
1847             token = OREQUAL;
1848             break;
1849         }
1850         if (m_current == '|') {
1851             shift();
1852             token = OR;
1853             break;
1854         }
1855         token = BITOR;
1856         break;
1857     case CharacterOpenParen:
1858         token = OPENPAREN;
1859         shift();
1860         break;
1861     case CharacterCloseParen:
1862         token = CLOSEPAREN;
1863         shift();
1864         break;
1865     case CharacterOpenBracket:
1866         token = OPENBRACKET;
1867         shift();
1868         break;
1869     case CharacterCloseBracket:
1870         token = CLOSEBRACKET;
1871         shift();
1872         break;
1873     case CharacterComma:
1874         token = COMMA;
1875         shift();
1876         break;
1877     case CharacterColon:
1878         token = COLON;
1879         shift();
1880         break;
1881     case CharacterQuestion:
1882         token = QUESTION;
1883         shift();
1884         break;
1885     case CharacterTilde:
1886         token = TILDE;
1887         shift();
1888         break;
1889     case CharacterSemicolon:
1890         shift();
1891         token = SEMICOLON;
1892         break;
1893     case CharacterOpenBrace:
1894         tokenData->line = lineNumber();
1895         tokenData->offset = currentOffset();
1896         tokenData->lineStartOffset = currentLineStartOffset();
1897         ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1898         shift();
1899         token = OPENBRACE;
1900         break;
1901     case CharacterCloseBrace:
1902         tokenData->line = lineNumber();
1903         tokenData->offset = currentOffset();
1904         tokenData->lineStartOffset = currentLineStartOffset();
1905         ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1906         shift();
1907         token = CLOSEBRACE;
1908         break;
1909     case CharacterDot:
1910         shift();
1911         if (!isASCIIDigit(m_current)) {
1912             if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
1913                 shift();
1914                 shift();
1915                 token = DOTDOTDOT;
1916                 break;
1917             }
1918             token = DOT;
1919             break;
1920         }
1921         goto inNumberAfterDecimalPoint;
1922     case CharacterZero:
1923         shift();
1924         if ((m_current | 0x20) == 'x') {
1925             if (!isASCIIHexDigit(peek(1))) {
1926                 m_lexErrorMessage = ASCIILiteral("No hexadecimal digits after '0x'");
1927                 token = INVALID_HEX_NUMBER_ERRORTOK;
1928                 goto returnError;
1929             }
1930
1931             // Shift out the 'x' prefix.
1932             shift();
1933
1934             parseHex(tokenData->doubleValue);
1935             if (isIdentStart(m_current)) {
1936                 m_lexErrorMessage = ASCIILiteral("No space between hexadecimal literal and identifier");
1937                 token = INVALID_HEX_NUMBER_ERRORTOK;
1938                 goto returnError;
1939             }
1940             token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
1941             m_buffer8.shrink(0);
1942             break;
1943         }
1944         if ((m_current | 0x20) == 'b') {
1945             if (!isASCIIBinaryDigit(peek(1))) {
1946                 m_lexErrorMessage = ASCIILiteral("No binary digits after '0b'");
1947                 token = INVALID_BINARY_NUMBER_ERRORTOK;
1948                 goto returnError;
1949             }
1950
1951             // Shift out the 'b' prefix.
1952             shift();
1953
1954             parseBinary(tokenData->doubleValue);
1955             if (isIdentStart(m_current)) {
1956                 m_lexErrorMessage = ASCIILiteral("No space between binary literal and identifier");
1957                 token = INVALID_BINARY_NUMBER_ERRORTOK;
1958                 goto returnError;
1959             }
1960             token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
1961             m_buffer8.shrink(0);
1962             break;
1963         }
1964
1965         if ((m_current | 0x20) == 'o') {
1966             if (!isASCIIOctalDigit(peek(1))) {
1967                 m_lexErrorMessage = ASCIILiteral("No octal digits after '0o'");
1968                 token = INVALID_OCTAL_NUMBER_ERRORTOK;
1969                 goto returnError;
1970             }
1971
1972             // Shift out the 'o' prefix.
1973             shift();
1974
1975             parseOctal(tokenData->doubleValue);
1976             if (isIdentStart(m_current)) {
1977                 m_lexErrorMessage = ASCIILiteral("No space between octal literal and identifier");
1978                 token = INVALID_OCTAL_NUMBER_ERRORTOK;
1979                 goto returnError;
1980             }
1981             token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
1982             m_buffer8.shrink(0);
1983             break;
1984         }
1985
1986         record8('0');
1987         if (strictMode && isASCIIDigit(m_current)) {
1988             m_lexErrorMessage = ASCIILiteral("Decimal integer literals with a leading zero are forbidden in strict mode");
1989             token = INVALID_OCTAL_NUMBER_ERRORTOK;
1990             goto returnError;
1991         }
1992         if (isASCIIOctalDigit(m_current)) {
1993             if (parseOctal(tokenData->doubleValue)) {
1994                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
1995             }
1996         }
1997         FALLTHROUGH;
1998     case CharacterNumber:
1999         if (LIKELY(token != INTEGER && token != DOUBLE)) {
2000             if (!parseDecimal(tokenData->doubleValue)) {
2001                 token = INTEGER;
2002                 if (m_current == '.') {
2003                     shift();
2004 inNumberAfterDecimalPoint:
2005                     parseNumberAfterDecimalPoint();
2006                     token = DOUBLE;
2007                 }
2008                 if ((m_current | 0x20) == 'e') {
2009                     if (!parseNumberAfterExponentIndicator()) {
2010                         m_lexErrorMessage = ASCIILiteral("Non-number found after exponent indicator");
2011                         token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2012                         goto returnError;
2013                     }
2014                 }
2015                 size_t parsedLength;
2016                 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2017                 if (token == INTEGER)
2018                     token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2019             } else
2020                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2021         }
2022
2023         // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
2024         if (UNLIKELY(isIdentStart(m_current))) {
2025             m_lexErrorMessage = ASCIILiteral("At least one digit must occur after a decimal point");
2026             token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2027             goto returnError;
2028         }
2029         m_buffer8.shrink(0);
2030         break;
2031     case CharacterQuote: {
2032         StringParseResult result = StringCannotBeParsed;
2033         if (lexerFlags & LexerFlagsDontBuildStrings)
2034             result = parseString<false>(tokenData, strictMode);
2035         else
2036             result = parseString<true>(tokenData, strictMode);
2037
2038         if (UNLIKELY(result != StringParsedSuccessfully)) {
2039             token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
2040             goto returnError;
2041         }
2042         shift();
2043         token = STRING;
2044         break;
2045         }
2046 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
2047     case CharacterBackQuote: {
2048         // Skip backquote.
2049         shift();
2050         StringParseResult result = StringCannotBeParsed;
2051         if (lexerFlags & LexerFlagsDontBuildStrings)
2052             result = parseTemplateLiteral<false>(tokenData);
2053         else
2054             result = parseTemplateLiteral<true>(tokenData);
2055
2056         if (UNLIKELY(result != StringParsedSuccessfully)) {
2057             token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2058             goto returnError;
2059         }
2060         token = TEMPLATE;
2061         break;
2062         }
2063 #endif
2064     case CharacterIdentifierStart:
2065         ASSERT(isIdentStart(m_current));
2066         FALLTHROUGH;
2067     case CharacterBackSlash:
2068         parseIdent:
2069         if (lexerFlags & LexexFlagsDontBuildKeywords)
2070             token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
2071         else
2072             token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
2073         break;
2074     case CharacterLineTerminator:
2075         ASSERT(isLineTerminator(m_current));
2076         shiftLineTerminator();
2077         m_atLineStart = true;
2078         m_terminator = true;
2079         m_lineStart = m_code;
2080         goto start;
2081     case CharacterPrivateIdentifierStart:
2082         if (m_parsingBuiltinFunction)
2083             goto parseIdent;
2084
2085         FALLTHROUGH;
2086     case CharacterInvalid:
2087         m_lexErrorMessage = invalidCharacterMessage();
2088         token = ERRORTOK;
2089         goto returnError;
2090     default:
2091         RELEASE_ASSERT_NOT_REACHED();
2092         m_lexErrorMessage = ASCIILiteral("Internal Error");
2093         token = ERRORTOK;
2094         goto returnError;
2095     }
2096
2097     m_atLineStart = false;
2098     goto returnToken;
2099
2100 inSingleLineComment:
2101     while (!isLineTerminator(m_current)) {
2102         if (atEnd())
2103             return EOFTOK;
2104         shift();
2105     }
2106     shiftLineTerminator();
2107     m_atLineStart = true;
2108     m_terminator = true;
2109     m_lineStart = m_code;
2110     if (!lastTokenWasRestrKeyword())
2111         goto start;
2112
2113     token = SEMICOLON;
2114     // Fall through into returnToken.
2115
2116 returnToken:
2117     tokenLocation->line = m_lineNumber;
2118     tokenLocation->endOffset = currentOffset();
2119     tokenLocation->lineStartOffset = currentLineStartOffset();
2120     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
2121     tokenRecord->m_endPosition = currentPosition();
2122     m_lastToken = token;
2123     return token;
2124
2125 returnError:
2126     m_error = true;
2127     tokenLocation->line = m_lineNumber;
2128     tokenLocation->endOffset = currentOffset();
2129     tokenLocation->lineStartOffset = currentLineStartOffset();
2130     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
2131     tokenRecord->m_endPosition = currentPosition();
2132     RELEASE_ASSERT(token & ErrorTokenFlag);
2133     return token;
2134 }
2135
2136 template <typename T>
2137 static inline void orCharacter(UChar&, UChar);
2138
2139 template <>
2140 inline void orCharacter<LChar>(UChar&, UChar) { }
2141
2142 template <>
2143 inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
2144 {
2145     orAccumulator |= character;
2146 }
2147
2148 template <typename T>
2149 bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
2150 {
2151     ASSERT(m_buffer16.isEmpty());
2152
2153     bool lastWasEscape = false;
2154     bool inBrackets = false;
2155     UChar charactersOredTogether = 0;
2156
2157     if (patternPrefix) {
2158         ASSERT(!isLineTerminator(patternPrefix));
2159         ASSERT(patternPrefix != '/');
2160         ASSERT(patternPrefix != '[');
2161         record16(patternPrefix);
2162     }
2163
2164     while (true) {
2165         if (isLineTerminator(m_current) || atEnd()) {
2166             m_buffer16.shrink(0);
2167             return false;
2168         }
2169
2170         T prev = m_current;
2171         
2172         shift();
2173
2174         if (prev == '/' && !lastWasEscape && !inBrackets)
2175             break;
2176
2177         record16(prev);
2178         orCharacter<T>(charactersOredTogether, prev);
2179
2180         if (lastWasEscape) {
2181             lastWasEscape = false;
2182             continue;
2183         }
2184
2185         switch (prev) {
2186         case '[':
2187             inBrackets = true;
2188             break;
2189         case ']':
2190             inBrackets = false;
2191             break;
2192         case '\\':
2193             lastWasEscape = true;
2194             break;
2195         }
2196     }
2197
2198     pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2199
2200     m_buffer16.shrink(0);
2201     charactersOredTogether = 0;
2202
2203     while (isIdentPart(m_current)) {
2204         record16(m_current);
2205         orCharacter<T>(charactersOredTogether, m_current);
2206         shift();
2207     }
2208
2209     flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2210     m_buffer16.shrink(0);
2211
2212     return true;
2213 }
2214
2215 template <typename T>
2216 bool Lexer<T>::skipRegExp()
2217 {
2218     bool lastWasEscape = false;
2219     bool inBrackets = false;
2220
2221     while (true) {
2222         if (isLineTerminator(m_current) || atEnd())
2223             return false;
2224
2225         T prev = m_current;
2226         
2227         shift();
2228
2229         if (prev == '/' && !lastWasEscape && !inBrackets)
2230             break;
2231
2232         if (lastWasEscape) {
2233             lastWasEscape = false;
2234             continue;
2235         }
2236
2237         switch (prev) {
2238         case '[':
2239             inBrackets = true;
2240             break;
2241         case ']':
2242             inBrackets = false;
2243             break;
2244         case '\\':
2245             lastWasEscape = true;
2246             break;
2247         }
2248     }
2249
2250     while (isIdentPart(m_current))
2251         shift();
2252
2253     return true;
2254 }
2255
2256 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
2257 template <typename T>
2258 JSTokenType Lexer<T>::scanTrailingTemplateString(JSToken* tokenRecord)
2259 {
2260     JSTokenData* tokenData = &tokenRecord->m_data;
2261     JSTokenLocation* tokenLocation = &tokenRecord->m_location;
2262     ASSERT(!m_error);
2263     ASSERT(m_buffer16.isEmpty());
2264
2265     // Leading closing brace } is already shifted in the previous token scan.
2266     // So in this re-scan phase, shift() is not needed here.
2267     StringParseResult result = parseTemplateLiteral<true>(tokenData);
2268     JSTokenType token = ERRORTOK;
2269     if (UNLIKELY(result != StringParsedSuccessfully)) {
2270         token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2271         m_error = true;
2272     } else {
2273         token = TEMPLATE;
2274         m_lastToken = token;
2275     }
2276
2277     // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2278     m_atLineStart = false;
2279
2280     // Adjust current tokenLocation data for TemplateString.
2281     tokenLocation->line = m_lineNumber;
2282     tokenLocation->endOffset = currentOffset();
2283     tokenLocation->lineStartOffset = currentLineStartOffset();
2284     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
2285     tokenRecord->m_endPosition = currentPosition();
2286     return token;
2287 }
2288 #endif
2289
2290 template <typename T>
2291 void Lexer<T>::clear()
2292 {
2293     m_arena = 0;
2294
2295     Vector<LChar> newBuffer8;
2296     m_buffer8.swap(newBuffer8);
2297
2298     Vector<UChar> newBuffer16;
2299     m_buffer16.swap(newBuffer16);
2300
2301     m_isReparsing = false;
2302 }
2303
2304 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2305 template class Lexer<LChar>;
2306 template class Lexer<UChar>;
2307
2308 } // namespace JSC