7de14220bbda72738b573a4b9788ce0b32cc7ce3
[WebKit-https.git] / Source / JavaScriptCore / parser / Lexer.cpp
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6  *  Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
7  *
8  *  This library is free software; you can redistribute it and/or
9  *  modify it under the terms of the GNU Library General Public
10  *  License as published by the Free Software Foundation; either
11  *  version 2 of the License, or (at your option) any later version.
12  *
13  *  This library is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  *  Library General Public License for more details.
17  *
18  *  You should have received a copy of the GNU Library General Public License
19  *  along with this library; see the file COPYING.LIB.  If not, write to
20  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  *  Boston, MA 02110-1301, USA.
22  *
23  */
24
25 #include "config.h"
26 #include "Lexer.h"
27
28 #include "JSFunctionInlines.h"
29
30 #include "BuiltinNames.h"
31 #include "JSGlobalObjectFunctions.h"
32 #include "Identifier.h"
33 #include "Nodes.h"
34 #include "JSCInlines.h"
35 #include <wtf/dtoa.h>
36 #include <ctype.h>
37 #include <limits.h>
38 #include <string.h>
39 #include <wtf/Assertions.h>
40
41 #include "KeywordLookup.h"
42 #include "Lexer.lut.h"
43 #include "Parser.h"
44
45 namespace JSC {
46
47 Keywords::Keywords(VM& vm)
48     : m_vm(vm)
49     , m_keywordTable(JSC::mainTable)
50 {
51 }
52
53 enum CharacterType {
54     // Types for the main switch
55
56     // The first three types are fixed, and also used for identifying
57     // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
58     CharacterIdentifierStart,
59     CharacterZero,
60     CharacterNumber,
61
62     CharacterInvalid,
63     CharacterLineTerminator,
64     CharacterExclamationMark,
65     CharacterOpenParen,
66     CharacterCloseParen,
67     CharacterOpenBracket,
68     CharacterCloseBracket,
69     CharacterComma,
70     CharacterColon,
71     CharacterQuestion,
72     CharacterTilde,
73     CharacterQuote,
74     CharacterDot,
75     CharacterSlash,
76     CharacterBackSlash,
77     CharacterSemicolon,
78     CharacterOpenBrace,
79     CharacterCloseBrace,
80
81     CharacterAdd,
82     CharacterSub,
83     CharacterMultiply,
84     CharacterModulo,
85     CharacterAnd,
86     CharacterXor,
87     CharacterOr,
88     CharacterLess,
89     CharacterGreater,
90     CharacterEqual,
91
92     // Other types (only one so far)
93     CharacterWhiteSpace,
94     CharacterPrivateIdentifierStart
95 };
96
97 // 256 Latin-1 codes
98 static const unsigned short typesOfLatin1Characters[256] = {
99 /*   0 - Null               */ CharacterInvalid,
100 /*   1 - Start of Heading   */ CharacterInvalid,
101 /*   2 - Start of Text      */ CharacterInvalid,
102 /*   3 - End of Text        */ CharacterInvalid,
103 /*   4 - End of Transm.     */ CharacterInvalid,
104 /*   5 - Enquiry            */ CharacterInvalid,
105 /*   6 - Acknowledgment     */ CharacterInvalid,
106 /*   7 - Bell               */ CharacterInvalid,
107 /*   8 - Back Space         */ CharacterInvalid,
108 /*   9 - Horizontal Tab     */ CharacterWhiteSpace,
109 /*  10 - Line Feed          */ CharacterLineTerminator,
110 /*  11 - Vertical Tab       */ CharacterWhiteSpace,
111 /*  12 - Form Feed          */ CharacterWhiteSpace,
112 /*  13 - Carriage Return    */ CharacterLineTerminator,
113 /*  14 - Shift Out          */ CharacterInvalid,
114 /*  15 - Shift In           */ CharacterInvalid,
115 /*  16 - Data Line Escape   */ CharacterInvalid,
116 /*  17 - Device Control 1   */ CharacterInvalid,
117 /*  18 - Device Control 2   */ CharacterInvalid,
118 /*  19 - Device Control 3   */ CharacterInvalid,
119 /*  20 - Device Control 4   */ CharacterInvalid,
120 /*  21 - Negative Ack.      */ CharacterInvalid,
121 /*  22 - Synchronous Idle   */ CharacterInvalid,
122 /*  23 - End of Transmit    */ CharacterInvalid,
123 /*  24 - Cancel             */ CharacterInvalid,
124 /*  25 - End of Medium      */ CharacterInvalid,
125 /*  26 - Substitute         */ CharacterInvalid,
126 /*  27 - Escape             */ CharacterInvalid,
127 /*  28 - File Separator     */ CharacterInvalid,
128 /*  29 - Group Separator    */ CharacterInvalid,
129 /*  30 - Record Separator   */ CharacterInvalid,
130 /*  31 - Unit Separator     */ CharacterInvalid,
131 /*  32 - Space              */ CharacterWhiteSpace,
132 /*  33 - !                  */ CharacterExclamationMark,
133 /*  34 - "                  */ CharacterQuote,
134 /*  35 - #                  */ CharacterInvalid,
135 /*  36 - $                  */ CharacterIdentifierStart,
136 /*  37 - %                  */ CharacterModulo,
137 /*  38 - &                  */ CharacterAnd,
138 /*  39 - '                  */ CharacterQuote,
139 /*  40 - (                  */ CharacterOpenParen,
140 /*  41 - )                  */ CharacterCloseParen,
141 /*  42 - *                  */ CharacterMultiply,
142 /*  43 - +                  */ CharacterAdd,
143 /*  44 - ,                  */ CharacterComma,
144 /*  45 - -                  */ CharacterSub,
145 /*  46 - .                  */ CharacterDot,
146 /*  47 - /                  */ CharacterSlash,
147 /*  48 - 0                  */ CharacterZero,
148 /*  49 - 1                  */ CharacterNumber,
149 /*  50 - 2                  */ CharacterNumber,
150 /*  51 - 3                  */ CharacterNumber,
151 /*  52 - 4                  */ CharacterNumber,
152 /*  53 - 5                  */ CharacterNumber,
153 /*  54 - 6                  */ CharacterNumber,
154 /*  55 - 7                  */ CharacterNumber,
155 /*  56 - 8                  */ CharacterNumber,
156 /*  57 - 9                  */ CharacterNumber,
157 /*  58 - :                  */ CharacterColon,
158 /*  59 - ;                  */ CharacterSemicolon,
159 /*  60 - <                  */ CharacterLess,
160 /*  61 - =                  */ CharacterEqual,
161 /*  62 - >                  */ CharacterGreater,
162 /*  63 - ?                  */ CharacterQuestion,
163 /*  64 - @                  */ CharacterPrivateIdentifierStart,
164 /*  65 - A                  */ CharacterIdentifierStart,
165 /*  66 - B                  */ CharacterIdentifierStart,
166 /*  67 - C                  */ CharacterIdentifierStart,
167 /*  68 - D                  */ CharacterIdentifierStart,
168 /*  69 - E                  */ CharacterIdentifierStart,
169 /*  70 - F                  */ CharacterIdentifierStart,
170 /*  71 - G                  */ CharacterIdentifierStart,
171 /*  72 - H                  */ CharacterIdentifierStart,
172 /*  73 - I                  */ CharacterIdentifierStart,
173 /*  74 - J                  */ CharacterIdentifierStart,
174 /*  75 - K                  */ CharacterIdentifierStart,
175 /*  76 - L                  */ CharacterIdentifierStart,
176 /*  77 - M                  */ CharacterIdentifierStart,
177 /*  78 - N                  */ CharacterIdentifierStart,
178 /*  79 - O                  */ CharacterIdentifierStart,
179 /*  80 - P                  */ CharacterIdentifierStart,
180 /*  81 - Q                  */ CharacterIdentifierStart,
181 /*  82 - R                  */ CharacterIdentifierStart,
182 /*  83 - S                  */ CharacterIdentifierStart,
183 /*  84 - T                  */ CharacterIdentifierStart,
184 /*  85 - U                  */ CharacterIdentifierStart,
185 /*  86 - V                  */ CharacterIdentifierStart,
186 /*  87 - W                  */ CharacterIdentifierStart,
187 /*  88 - X                  */ CharacterIdentifierStart,
188 /*  89 - Y                  */ CharacterIdentifierStart,
189 /*  90 - Z                  */ CharacterIdentifierStart,
190 /*  91 - [                  */ CharacterOpenBracket,
191 /*  92 - \                  */ CharacterBackSlash,
192 /*  93 - ]                  */ CharacterCloseBracket,
193 /*  94 - ^                  */ CharacterXor,
194 /*  95 - _                  */ CharacterIdentifierStart,
195 /*  96 - `                  */ CharacterInvalid,
196 /*  97 - a                  */ CharacterIdentifierStart,
197 /*  98 - b                  */ CharacterIdentifierStart,
198 /*  99 - c                  */ CharacterIdentifierStart,
199 /* 100 - d                  */ CharacterIdentifierStart,
200 /* 101 - e                  */ CharacterIdentifierStart,
201 /* 102 - f                  */ CharacterIdentifierStart,
202 /* 103 - g                  */ CharacterIdentifierStart,
203 /* 104 - h                  */ CharacterIdentifierStart,
204 /* 105 - i                  */ CharacterIdentifierStart,
205 /* 106 - j                  */ CharacterIdentifierStart,
206 /* 107 - k                  */ CharacterIdentifierStart,
207 /* 108 - l                  */ CharacterIdentifierStart,
208 /* 109 - m                  */ CharacterIdentifierStart,
209 /* 110 - n                  */ CharacterIdentifierStart,
210 /* 111 - o                  */ CharacterIdentifierStart,
211 /* 112 - p                  */ CharacterIdentifierStart,
212 /* 113 - q                  */ CharacterIdentifierStart,
213 /* 114 - r                  */ CharacterIdentifierStart,
214 /* 115 - s                  */ CharacterIdentifierStart,
215 /* 116 - t                  */ CharacterIdentifierStart,
216 /* 117 - u                  */ CharacterIdentifierStart,
217 /* 118 - v                  */ CharacterIdentifierStart,
218 /* 119 - w                  */ CharacterIdentifierStart,
219 /* 120 - x                  */ CharacterIdentifierStart,
220 /* 121 - y                  */ CharacterIdentifierStart,
221 /* 122 - z                  */ CharacterIdentifierStart,
222 /* 123 - {                  */ CharacterOpenBrace,
223 /* 124 - |                  */ CharacterOr,
224 /* 125 - }                  */ CharacterCloseBrace,
225 /* 126 - ~                  */ CharacterTilde,
226 /* 127 - Delete             */ CharacterInvalid,
227 /* 128 - Cc category        */ CharacterInvalid,
228 /* 129 - Cc category        */ CharacterInvalid,
229 /* 130 - Cc category        */ CharacterInvalid,
230 /* 131 - Cc category        */ CharacterInvalid,
231 /* 132 - Cc category        */ CharacterInvalid,
232 /* 133 - Cc category        */ CharacterInvalid,
233 /* 134 - Cc category        */ CharacterInvalid,
234 /* 135 - Cc category        */ CharacterInvalid,
235 /* 136 - Cc category        */ CharacterInvalid,
236 /* 137 - Cc category        */ CharacterInvalid,
237 /* 138 - Cc category        */ CharacterInvalid,
238 /* 139 - Cc category        */ CharacterInvalid,
239 /* 140 - Cc category        */ CharacterInvalid,
240 /* 141 - Cc category        */ CharacterInvalid,
241 /* 142 - Cc category        */ CharacterInvalid,
242 /* 143 - Cc category        */ CharacterInvalid,
243 /* 144 - Cc category        */ CharacterInvalid,
244 /* 145 - Cc category        */ CharacterInvalid,
245 /* 146 - Cc category        */ CharacterInvalid,
246 /* 147 - Cc category        */ CharacterInvalid,
247 /* 148 - Cc category        */ CharacterInvalid,
248 /* 149 - Cc category        */ CharacterInvalid,
249 /* 150 - Cc category        */ CharacterInvalid,
250 /* 151 - Cc category        */ CharacterInvalid,
251 /* 152 - Cc category        */ CharacterInvalid,
252 /* 153 - Cc category        */ CharacterInvalid,
253 /* 154 - Cc category        */ CharacterInvalid,
254 /* 155 - Cc category        */ CharacterInvalid,
255 /* 156 - Cc category        */ CharacterInvalid,
256 /* 157 - Cc category        */ CharacterInvalid,
257 /* 158 - Cc category        */ CharacterInvalid,
258 /* 159 - Cc category        */ CharacterInvalid,
259 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
260 /* 161 - Po category        */ CharacterInvalid,
261 /* 162 - Sc category        */ CharacterInvalid,
262 /* 163 - Sc category        */ CharacterInvalid,
263 /* 164 - Sc category        */ CharacterInvalid,
264 /* 165 - Sc category        */ CharacterInvalid,
265 /* 166 - So category        */ CharacterInvalid,
266 /* 167 - So category        */ CharacterInvalid,
267 /* 168 - Sk category        */ CharacterInvalid,
268 /* 169 - So category        */ CharacterInvalid,
269 /* 170 - Ll category        */ CharacterIdentifierStart,
270 /* 171 - Pi category        */ CharacterInvalid,
271 /* 172 - Sm category        */ CharacterInvalid,
272 /* 173 - Cf category        */ CharacterInvalid,
273 /* 174 - So category        */ CharacterInvalid,
274 /* 175 - Sk category        */ CharacterInvalid,
275 /* 176 - So category        */ CharacterInvalid,
276 /* 177 - Sm category        */ CharacterInvalid,
277 /* 178 - No category        */ CharacterInvalid,
278 /* 179 - No category        */ CharacterInvalid,
279 /* 180 - Sk category        */ CharacterInvalid,
280 /* 181 - Ll category        */ CharacterIdentifierStart,
281 /* 182 - So category        */ CharacterInvalid,
282 /* 183 - Po category        */ CharacterInvalid,
283 /* 184 - Sk category        */ CharacterInvalid,
284 /* 185 - No category        */ CharacterInvalid,
285 /* 186 - Ll category        */ CharacterIdentifierStart,
286 /* 187 - Pf category        */ CharacterInvalid,
287 /* 188 - No category        */ CharacterInvalid,
288 /* 189 - No category        */ CharacterInvalid,
289 /* 190 - No category        */ CharacterInvalid,
290 /* 191 - Po category        */ CharacterInvalid,
291 /* 192 - Lu category        */ CharacterIdentifierStart,
292 /* 193 - Lu category        */ CharacterIdentifierStart,
293 /* 194 - Lu category        */ CharacterIdentifierStart,
294 /* 195 - Lu category        */ CharacterIdentifierStart,
295 /* 196 - Lu category        */ CharacterIdentifierStart,
296 /* 197 - Lu category        */ CharacterIdentifierStart,
297 /* 198 - Lu category        */ CharacterIdentifierStart,
298 /* 199 - Lu category        */ CharacterIdentifierStart,
299 /* 200 - Lu category        */ CharacterIdentifierStart,
300 /* 201 - Lu category        */ CharacterIdentifierStart,
301 /* 202 - Lu category        */ CharacterIdentifierStart,
302 /* 203 - Lu category        */ CharacterIdentifierStart,
303 /* 204 - Lu category        */ CharacterIdentifierStart,
304 /* 205 - Lu category        */ CharacterIdentifierStart,
305 /* 206 - Lu category        */ CharacterIdentifierStart,
306 /* 207 - Lu category        */ CharacterIdentifierStart,
307 /* 208 - Lu category        */ CharacterIdentifierStart,
308 /* 209 - Lu category        */ CharacterIdentifierStart,
309 /* 210 - Lu category        */ CharacterIdentifierStart,
310 /* 211 - Lu category        */ CharacterIdentifierStart,
311 /* 212 - Lu category        */ CharacterIdentifierStart,
312 /* 213 - Lu category        */ CharacterIdentifierStart,
313 /* 214 - Lu category        */ CharacterIdentifierStart,
314 /* 215 - Sm category        */ CharacterInvalid,
315 /* 216 - Lu category        */ CharacterIdentifierStart,
316 /* 217 - Lu category        */ CharacterIdentifierStart,
317 /* 218 - Lu category        */ CharacterIdentifierStart,
318 /* 219 - Lu category        */ CharacterIdentifierStart,
319 /* 220 - Lu category        */ CharacterIdentifierStart,
320 /* 221 - Lu category        */ CharacterIdentifierStart,
321 /* 222 - Lu category        */ CharacterIdentifierStart,
322 /* 223 - Ll category        */ CharacterIdentifierStart,
323 /* 224 - Ll category        */ CharacterIdentifierStart,
324 /* 225 - Ll category        */ CharacterIdentifierStart,
325 /* 226 - Ll category        */ CharacterIdentifierStart,
326 /* 227 - Ll category        */ CharacterIdentifierStart,
327 /* 228 - Ll category        */ CharacterIdentifierStart,
328 /* 229 - Ll category        */ CharacterIdentifierStart,
329 /* 230 - Ll category        */ CharacterIdentifierStart,
330 /* 231 - Ll category        */ CharacterIdentifierStart,
331 /* 232 - Ll category        */ CharacterIdentifierStart,
332 /* 233 - Ll category        */ CharacterIdentifierStart,
333 /* 234 - Ll category        */ CharacterIdentifierStart,
334 /* 235 - Ll category        */ CharacterIdentifierStart,
335 /* 236 - Ll category        */ CharacterIdentifierStart,
336 /* 237 - Ll category        */ CharacterIdentifierStart,
337 /* 238 - Ll category        */ CharacterIdentifierStart,
338 /* 239 - Ll category        */ CharacterIdentifierStart,
339 /* 240 - Ll category        */ CharacterIdentifierStart,
340 /* 241 - Ll category        */ CharacterIdentifierStart,
341 /* 242 - Ll category        */ CharacterIdentifierStart,
342 /* 243 - Ll category        */ CharacterIdentifierStart,
343 /* 244 - Ll category        */ CharacterIdentifierStart,
344 /* 245 - Ll category        */ CharacterIdentifierStart,
345 /* 246 - Ll category        */ CharacterIdentifierStart,
346 /* 247 - Sm category        */ CharacterInvalid,
347 /* 248 - Ll category        */ CharacterIdentifierStart,
348 /* 249 - Ll category        */ CharacterIdentifierStart,
349 /* 250 - Ll category        */ CharacterIdentifierStart,
350 /* 251 - Ll category        */ CharacterIdentifierStart,
351 /* 252 - Ll category        */ CharacterIdentifierStart,
352 /* 253 - Ll category        */ CharacterIdentifierStart,
353 /* 254 - Ll category        */ CharacterIdentifierStart,
354 /* 255 - Ll category        */ CharacterIdentifierStart
355 };
356
357 // This table provides the character that results from \X where X is the index in the table beginning
358 // with SPACE. A table value of 0 means that more processing needs to be done.
359 static const LChar singleCharacterEscapeValuesForASCII[128] = {
360 /*   0 - Null               */ 0,
361 /*   1 - Start of Heading   */ 0,
362 /*   2 - Start of Text      */ 0,
363 /*   3 - End of Text        */ 0,
364 /*   4 - End of Transm.     */ 0,
365 /*   5 - Enquiry            */ 0,
366 /*   6 - Acknowledgment     */ 0,
367 /*   7 - Bell               */ 0,
368 /*   8 - Back Space         */ 0,
369 /*   9 - Horizontal Tab     */ 0,
370 /*  10 - Line Feed          */ 0,
371 /*  11 - Vertical Tab       */ 0,
372 /*  12 - Form Feed          */ 0,
373 /*  13 - Carriage Return    */ 0,
374 /*  14 - Shift Out          */ 0,
375 /*  15 - Shift In           */ 0,
376 /*  16 - Data Line Escape   */ 0,
377 /*  17 - Device Control 1   */ 0,
378 /*  18 - Device Control 2   */ 0,
379 /*  19 - Device Control 3   */ 0,
380 /*  20 - Device Control 4   */ 0,
381 /*  21 - Negative Ack.      */ 0,
382 /*  22 - Synchronous Idle   */ 0,
383 /*  23 - End of Transmit    */ 0,
384 /*  24 - Cancel             */ 0,
385 /*  25 - End of Medium      */ 0,
386 /*  26 - Substitute         */ 0,
387 /*  27 - Escape             */ 0,
388 /*  28 - File Separator     */ 0,
389 /*  29 - Group Separator    */ 0,
390 /*  30 - Record Separator   */ 0,
391 /*  31 - Unit Separator     */ 0,
392 /*  32 - Space              */ ' ',
393 /*  33 - !                  */ '!',
394 /*  34 - "                  */ '"',
395 /*  35 - #                  */ '#',
396 /*  36 - $                  */ '$',
397 /*  37 - %                  */ '%',
398 /*  38 - &                  */ '&',
399 /*  39 - '                  */ '\'',
400 /*  40 - (                  */ '(',
401 /*  41 - )                  */ ')',
402 /*  42 - *                  */ '*',
403 /*  43 - +                  */ '+',
404 /*  44 - ,                  */ ',',
405 /*  45 - -                  */ '-',
406 /*  46 - .                  */ '.',
407 /*  47 - /                  */ '/',
408 /*  48 - 0                  */ 0,
409 /*  49 - 1                  */ 0,
410 /*  50 - 2                  */ 0,
411 /*  51 - 3                  */ 0,
412 /*  52 - 4                  */ 0,
413 /*  53 - 5                  */ 0,
414 /*  54 - 6                  */ 0,
415 /*  55 - 7                  */ 0,
416 /*  56 - 8                  */ 0,
417 /*  57 - 9                  */ 0,
418 /*  58 - :                  */ ':',
419 /*  59 - ;                  */ ';',
420 /*  60 - <                  */ '<',
421 /*  61 - =                  */ '=',
422 /*  62 - >                  */ '>',
423 /*  63 - ?                  */ '?',
424 /*  64 - @                  */ '@',
425 /*  65 - A                  */ 'A',
426 /*  66 - B                  */ 'B',
427 /*  67 - C                  */ 'C',
428 /*  68 - D                  */ 'D',
429 /*  69 - E                  */ 'E',
430 /*  70 - F                  */ 'F',
431 /*  71 - G                  */ 'G',
432 /*  72 - H                  */ 'H',
433 /*  73 - I                  */ 'I',
434 /*  74 - J                  */ 'J',
435 /*  75 - K                  */ 'K',
436 /*  76 - L                  */ 'L',
437 /*  77 - M                  */ 'M',
438 /*  78 - N                  */ 'N',
439 /*  79 - O                  */ 'O',
440 /*  80 - P                  */ 'P',
441 /*  81 - Q                  */ 'Q',
442 /*  82 - R                  */ 'R',
443 /*  83 - S                  */ 'S',
444 /*  84 - T                  */ 'T',
445 /*  85 - U                  */ 'U',
446 /*  86 - V                  */ 'V',
447 /*  87 - W                  */ 'W',
448 /*  88 - X                  */ 'X',
449 /*  89 - Y                  */ 'Y',
450 /*  90 - Z                  */ 'Z',
451 /*  91 - [                  */ '[',
452 /*  92 - \                  */ '\\',
453 /*  93 - ]                  */ ']',
454 /*  94 - ^                  */ '^',
455 /*  95 - _                  */ '_',
456 /*  96 - `                  */ '`',
457 /*  97 - a                  */ 'a',
458 /*  98 - b                  */ 0x08,
459 /*  99 - c                  */ 'c',
460 /* 100 - d                  */ 'd',
461 /* 101 - e                  */ 'e',
462 /* 102 - f                  */ 0x0C,
463 /* 103 - g                  */ 'g',
464 /* 104 - h                  */ 'h',
465 /* 105 - i                  */ 'i',
466 /* 106 - j                  */ 'j',
467 /* 107 - k                  */ 'k',
468 /* 108 - l                  */ 'l',
469 /* 109 - m                  */ 'm',
470 /* 110 - n                  */ 0x0A,
471 /* 111 - o                  */ 'o',
472 /* 112 - p                  */ 'p',
473 /* 113 - q                  */ 'q',
474 /* 114 - r                  */ 0x0D,
475 /* 115 - s                  */ 's',
476 /* 116 - t                  */ 0x09,
477 /* 117 - u                  */ 0,
478 /* 118 - v                  */ 0x0B,
479 /* 119 - w                  */ 'w',
480 /* 120 - x                  */ 0,
481 /* 121 - y                  */ 'y',
482 /* 122 - z                  */ 'z',
483 /* 123 - {                  */ '{',
484 /* 124 - |                  */ '|',
485 /* 125 - }                  */ '}',
486 /* 126 - ~                  */ '~',
487 /* 127 - Delete             */ 0
488 };
489
490 template <typename T>
491 Lexer<T>::Lexer(VM* vm, JSParserBuiltinMode builtinMode)
492     : m_isReparsing(false)
493     , m_vm(vm)
494     , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
495 {
496 }
497
498 static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
499 {
500     if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
501         return INTEGER;
502     return DOUBLE;
503 }
504
505 template <typename T>
506 Lexer<T>::~Lexer()
507 {
508 }
509
510 template <typename T>
511 String Lexer<T>::invalidCharacterMessage() const
512 {
513     switch (m_current) {
514     case 0:
515         return ASCIILiteral("Invalid character: '\\0'");
516     case 10:
517         return ASCIILiteral("Invalid character: '\\n'");
518     case 11:
519         return ASCIILiteral("Invalid character: '\\v'");
520     case 13:
521         return ASCIILiteral("Invalid character: '\\r'");
522     case 35:
523         return ASCIILiteral("Invalid character: '#'");
524     case 64:
525         return ASCIILiteral("Invalid character: '@'");
526     case 96:
527         return ASCIILiteral("Invalid character: '`'");
528     default:
529         return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current));
530     }
531 }
532
533 template <typename T>
534 ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
535 {
536     ASSERT(m_code <= m_codeEnd);
537     return m_code;
538 }
539
540 template <typename T>
541 void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
542 {
543     m_arena = &arena->identifierArena();
544     
545     m_lineNumber = source.firstLine();
546     m_lastToken = -1;
547     
548     const String& sourceString = source.provider()->source();
549
550     if (!sourceString.isNull())
551         setCodeStart(sourceString.impl());
552     else
553         m_codeStart = 0;
554
555     m_source = &source;
556     m_sourceOffset = source.startOffset();
557     m_codeStartPlusOffset = m_codeStart + source.startOffset();
558     m_code = m_codeStartPlusOffset;
559     m_codeEnd = m_codeStart + source.endOffset();
560     m_error = false;
561     m_atLineStart = true;
562     m_lineStart = m_code;
563     m_lexErrorMessage = String();
564     
565     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
566     m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
567     
568     if (LIKELY(m_code < m_codeEnd))
569         m_current = *m_code;
570     else
571         m_current = 0;
572     ASSERT(currentOffset() == source.startOffset());
573 }
574
575 template <typename T>
576 template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
577 {
578     m_code += shiftAmount;
579     ASSERT(currentOffset() >= currentLineStartOffset());
580     m_current = *m_code;
581 }
582
583 template <typename T>
584 ALWAYS_INLINE void Lexer<T>::shift()
585 {
586     // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
587     m_current = 0;
588     ++m_code;
589     if (LIKELY(m_code < m_codeEnd))
590         m_current = *m_code;
591 }
592
593 template <typename T>
594 ALWAYS_INLINE bool Lexer<T>::atEnd() const
595 {
596     ASSERT(!m_current || m_code < m_codeEnd);
597     return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
598 }
599
600 template <typename T>
601 ALWAYS_INLINE T Lexer<T>::peek(int offset) const
602 {
603     ASSERT(offset > 0 && offset < 5);
604     const T* code = m_code + offset;
605     return (code < m_codeEnd) ? *code : 0;
606 }
607
608 template <typename T>
609 typename Lexer<T>::UnicodeHexValue Lexer<T>::parseFourDigitUnicodeHex()
610 {
611     T char1 = peek(1);
612     T char2 = peek(2);
613     T char3 = peek(3);
614
615     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
616         return UnicodeHexValue((m_code + 4) >= m_codeEnd ? UnicodeHexValue::IncompleteHex : UnicodeHexValue::InvalidHex);
617
618     int result = convertUnicode(m_current, char1, char2, char3);
619     shift();
620     shift();
621     shift();
622     shift();
623     return UnicodeHexValue(result);
624 }
625
626 template <typename T>
627 void Lexer<T>::shiftLineTerminator()
628 {
629     ASSERT(isLineTerminator(m_current));
630
631     m_positionBeforeLastNewline = currentPosition();
632     T prev = m_current;
633     shift();
634
635     // Allow both CRLF and LFCR.
636     if (prev + m_current == '\n' + '\r')
637         shift();
638
639     ++m_lineNumber;
640 }
641
642 template <typename T>
643 ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
644 {
645     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
646 }
647
648 static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
649 {
650     return U_GET_GC_MASK(c) & U_GC_L_MASK;
651 }
652
653 static ALWAYS_INLINE bool isLatin1(LChar)
654 {
655     return true;
656 }
657
658 static ALWAYS_INLINE bool isLatin1(UChar c)
659 {
660     return c < 256;
661 }
662
663 static inline bool isIdentStart(LChar c)
664 {
665     return typesOfLatin1Characters[c] == CharacterIdentifierStart;
666 }
667
668 static inline bool isIdentStart(UChar c)
669 {
670     return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
671 }
672
673 static NEVER_INLINE bool isNonLatin1IdentPart(int c)
674 {
675     return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
676 }
677
678 static ALWAYS_INLINE bool isIdentPart(LChar c)
679 {
680     // Character types are divided into two groups depending on whether they can be part of an
681     // identifier or not. Those whose type value is less or equal than CharacterNumber can be
682     // part of an identifier. (See the CharacterType definition for more details.)
683     return typesOfLatin1Characters[c] <= CharacterNumber;
684 }
685
686 static ALWAYS_INLINE bool isIdentPart(UChar c)
687 {
688     return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
689 }
690
691 template <typename T>
692 bool isUnicodeEscapeIdentPart(const T* code)
693 {
694     T char1 = code[0];
695     T char2 = code[1];
696     T char3 = code[2];
697     T char4 = code[3];
698     
699     if (!isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3) || !isASCIIHexDigit(char4))
700         return false;
701     
702     return isIdentPart(Lexer<T>::convertUnicode(char1, char2, char3, char4));
703 }
704
705 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
706 {
707     if (isIdentPart(*code))
708         return true;
709
710     return (*code == '\\' && ((codeEnd - code) >= 6) && code[1] == 'u' && isUnicodeEscapeIdentPart(code+2));
711 }
712
713 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
714 {
715     if (isIdentPart(*code))
716         return true;
717     
718     return (*code == '\\' && ((codeEnd - code) >= 6) && code[1] == 'u' && isUnicodeEscapeIdentPart(code+2));
719 }
720
721 static inline LChar singleEscape(int c)
722 {
723     if (c < 128) {
724         ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
725         return singleCharacterEscapeValuesForASCII[c];
726     }
727     return 0;
728 }
729
730 template <typename T>
731 inline void Lexer<T>::record8(int c)
732 {
733     ASSERT(c >= 0);
734     ASSERT(c <= 0xFF);
735     m_buffer8.append(static_cast<LChar>(c));
736 }
737
738 template <typename T>
739 inline void assertCharIsIn8BitRange(T c)
740 {
741     UNUSED_PARAM(c);
742     ASSERT(c >= 0);
743     ASSERT(c <= 0xFF);
744 }
745
746 template <>
747 inline void assertCharIsIn8BitRange(UChar c)
748 {
749     UNUSED_PARAM(c);
750     ASSERT(c <= 0xFF);
751 }
752
753 template <>
754 inline void assertCharIsIn8BitRange(LChar)
755 {
756 }
757
758 template <typename T>
759 inline void Lexer<T>::append8(const T* p, size_t length)
760 {
761     size_t currentSize = m_buffer8.size();
762     m_buffer8.grow(currentSize + length);
763     LChar* rawBuffer = m_buffer8.data() + currentSize;
764
765     for (size_t i = 0; i < length; i++) {
766         T c = p[i];
767         assertCharIsIn8BitRange(c);
768         rawBuffer[i] = c;
769     }
770 }
771
772 template <typename T>
773 inline void Lexer<T>::append16(const LChar* p, size_t length)
774 {
775     size_t currentSize = m_buffer16.size();
776     m_buffer16.grow(currentSize + length);
777     UChar* rawBuffer = m_buffer16.data() + currentSize;
778
779     for (size_t i = 0; i < length; i++)
780         rawBuffer[i] = p[i];
781 }
782
783 template <typename T>
784 inline void Lexer<T>::record16(T c)
785 {
786     m_buffer16.append(c);
787 }
788
789 template <typename T>
790 inline void Lexer<T>::record16(int c)
791 {
792     ASSERT(c >= 0);
793     ASSERT(c <= static_cast<int>(USHRT_MAX));
794     m_buffer16.append(static_cast<UChar>(c));
795 }
796     
797 #if !ASSERT_DISABLED
798 bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
799 {
800     if (!ident)
801         return true;
802     /* Just block any use of suspicious identifiers.  This is intended to
803      * be used as a safety net while implementing builtins.
804      */
805     if (*ident == vm.propertyNames->builtinNames().callPublicName())
806         return false;
807     if (*ident == vm.propertyNames->builtinNames().applyPublicName())
808         return false;
809     if (*ident == vm.propertyNames->eval)
810         return false;
811     if (*ident == vm.propertyNames->Function)
812         return false;
813     return true;
814 }
815 #endif
816     
817 template <>
818 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
819 {
820     const ptrdiff_t remaining = m_codeEnd - m_code;
821     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
822         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
823         if (keyword != IDENT) {
824             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
825             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
826         }
827     }
828     
829     bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
830     if (isPrivateName)
831         shift();
832     
833     const LChar* identifierStart = currentSourcePtr();
834     unsigned identifierLineStart = currentLineStartOffset();
835     
836     while (isIdentPart(m_current))
837         shift();
838     
839     if (UNLIKELY(m_current == '\\')) {
840         setOffsetFromSourcePtr(identifierStart, identifierLineStart);
841         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
842     }
843
844     const Identifier* ident = 0;
845     
846     if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
847         int identifierLength = currentSourcePtr() - identifierStart;
848         ident = makeIdentifier(identifierStart, identifierLength);
849         if (m_parsingBuiltinFunction) {
850             if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
851                 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
852                 return ERRORTOK;
853             }
854             if (isPrivateName)
855                 ident = m_vm->propertyNames->getPrivateName(*ident);
856             else if (*ident == m_vm->propertyNames->undefinedKeyword)
857                 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
858             if (!ident)
859                 return INVALID_PRIVATE_NAME_ERRORTOK;
860         }
861         tokenData->ident = ident;
862     } else
863         tokenData->ident = 0;
864
865     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
866         ASSERT(shouldCreateIdentifier);
867         if (remaining < maxTokenLength) {
868             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
869             ASSERT((remaining < maxTokenLength) || !entry);
870             if (!entry)
871                 return IDENT;
872             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
873             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
874         }
875         return IDENT;
876     }
877
878     return IDENT;
879 }
880
881 template <>
882 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
883 {
884     const ptrdiff_t remaining = m_codeEnd - m_code;
885     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
886         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
887         if (keyword != IDENT) {
888             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
889             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
890         }
891     }
892     
893     bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
894     if (isPrivateName)
895         shift();
896
897     const UChar* identifierStart = currentSourcePtr();
898     int identifierLineStart = currentLineStartOffset();
899
900     UChar orAllChars = 0;
901     
902     while (isIdentPart(m_current)) {
903         orAllChars |= m_current;
904         shift();
905     }
906     
907     if (UNLIKELY(m_current == '\\')) {
908         ASSERT(!isPrivateName);
909         setOffsetFromSourcePtr(identifierStart, identifierLineStart);
910         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
911     }
912
913     bool isAll8Bit = false;
914
915     if (!(orAllChars & ~0xff))
916         isAll8Bit = true;
917
918     const Identifier* ident = 0;
919     
920     if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
921         int identifierLength = currentSourcePtr() - identifierStart;
922         if (isAll8Bit)
923             ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
924         else
925             ident = makeIdentifier(identifierStart, identifierLength);
926         if (m_parsingBuiltinFunction) {
927             if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
928                 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
929                 return ERRORTOK;
930             }
931             if (isPrivateName)
932                 ident = m_vm->propertyNames->getPrivateName(*ident);
933             else if (*ident == m_vm->propertyNames->undefinedKeyword)
934                 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
935             if (!ident)
936                 return INVALID_PRIVATE_NAME_ERRORTOK;
937         }
938         tokenData->ident = ident;
939     } else
940         tokenData->ident = 0;
941     
942     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
943         ASSERT(shouldCreateIdentifier);
944         if (remaining < maxTokenLength) {
945             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
946             ASSERT((remaining < maxTokenLength) || !entry);
947             if (!entry)
948                 return IDENT;
949             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
950             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
951         }
952         return IDENT;
953     }
954
955     return IDENT;
956 }
957
958 template <typename T>
959 template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
960 {
961     const ptrdiff_t remaining = m_codeEnd - m_code;
962     const T* identifierStart = currentSourcePtr();
963     bool bufferRequired = false;
964
965     while (true) {
966         if (LIKELY(isIdentPart(m_current))) {
967             shift();
968             continue;
969         }
970         if (LIKELY(m_current != '\\'))
971             break;
972
973         // \uXXXX unicode characters.
974         bufferRequired = true;
975         if (identifierStart != currentSourcePtr())
976             m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
977         shift();
978         if (UNLIKELY(m_current != 'u'))
979             return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
980         shift();
981         UnicodeHexValue character = parseFourDigitUnicodeHex();
982         if (UNLIKELY(!character.isValid()))
983             return character.valueType() == UnicodeHexValue::IncompleteHex ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
984         UChar ucharacter = static_cast<UChar>(character.value());
985         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
986             return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
987         if (shouldCreateIdentifier)
988             record16(ucharacter);
989         identifierStart = currentSourcePtr();
990     }
991
992     int identifierLength;
993     const Identifier* ident = 0;
994     if (shouldCreateIdentifier) {
995         if (!bufferRequired) {
996             identifierLength = currentSourcePtr() - identifierStart;
997             ident = makeIdentifier(identifierStart, identifierLength);
998         } else {
999             if (identifierStart != currentSourcePtr())
1000                 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1001             ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1002         }
1003
1004         tokenData->ident = ident;
1005     } else
1006         tokenData->ident = 0;
1007
1008     if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
1009         ASSERT(shouldCreateIdentifier);
1010         // Keywords must not be recognized if there was an \uXXXX in the identifier.
1011         if (remaining < maxTokenLength) {
1012             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
1013             ASSERT((remaining < maxTokenLength) || !entry);
1014             if (!entry)
1015                 return IDENT;
1016             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1017             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1018         }
1019         return IDENT;
1020     }
1021
1022     m_buffer16.resize(0);
1023     return IDENT;
1024 }
1025
1026 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
1027 {
1028     return character < 0xE;
1029 }
1030
1031 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
1032 {
1033     return character < 0xE || character > 0xFF;
1034 }
1035
1036 template <typename T>
1037 template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1038 {
1039     int startingOffset = currentOffset();
1040     int startingLineStartOffset = currentLineStartOffset();
1041     int startingLineNumber = lineNumber();
1042     T stringQuoteCharacter = m_current;
1043     shift();
1044
1045     const T* stringStart = currentSourcePtr();
1046
1047     while (m_current != stringQuoteCharacter) {
1048         if (UNLIKELY(m_current == '\\')) {
1049             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1050                 append8(stringStart, currentSourcePtr() - stringStart);
1051             shift();
1052
1053             LChar escape = singleEscape(m_current);
1054
1055             // Most common escape sequences first
1056             if (escape) {
1057                 if (shouldBuildStrings)
1058                     record8(escape);
1059                 shift();
1060             } else if (UNLIKELY(isLineTerminator(m_current)))
1061                 shiftLineTerminator();
1062             else if (m_current == 'x') {
1063                 shift();
1064                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1065                     m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
1066                     return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1067                 }
1068                 T prev = m_current;
1069                 shift();
1070                 if (shouldBuildStrings)
1071                     record8(convertHex(prev, m_current));
1072                 shift();
1073             } else {
1074                 setOffset(startingOffset, startingLineStartOffset);
1075                 setLineNumber(startingLineNumber);
1076                 m_buffer8.resize(0);
1077                 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1078             }
1079             stringStart = currentSourcePtr();
1080             continue;
1081         }
1082
1083         if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1084             setOffset(startingOffset, startingLineStartOffset);
1085             setLineNumber(startingLineNumber);
1086             m_buffer8.resize(0);
1087             return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1088         }
1089
1090         shift();
1091     }
1092
1093     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1094         append8(stringStart, currentSourcePtr() - stringStart);
1095     if (shouldBuildStrings) {
1096         tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1097         m_buffer8.resize(0);
1098     } else
1099         tokenData->ident = 0;
1100
1101     return StringParsedSuccessfully;
1102 }
1103
1104 template <typename T>
1105 template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
1106 {
1107     T stringQuoteCharacter = m_current;
1108     shift();
1109
1110     const T* stringStart = currentSourcePtr();
1111
1112     while (m_current != stringQuoteCharacter) {
1113         if (UNLIKELY(m_current == '\\')) {
1114             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1115                 append16(stringStart, currentSourcePtr() - stringStart);
1116             shift();
1117
1118             LChar escape = singleEscape(m_current);
1119
1120             // Most common escape sequences first
1121             if (escape) {
1122                 if (shouldBuildStrings)
1123                     record16(escape);
1124                 shift();
1125             } else if (UNLIKELY(isLineTerminator(m_current)))
1126                 shiftLineTerminator();
1127             else if (m_current == 'x') {
1128                 shift();
1129                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1130                     m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
1131                     return StringCannotBeParsed;
1132                 }
1133                 T prev = m_current;
1134                 shift();
1135                 if (shouldBuildStrings)
1136                     record16(convertHex(prev, m_current));
1137                 shift();
1138             } else if (m_current == 'u') {
1139                 shift();
1140                 UnicodeHexValue character = parseFourDigitUnicodeHex();
1141                 if (character.isValid()) {
1142                     if (shouldBuildStrings)
1143                         record16(character.value());
1144                 } else if (m_current == stringQuoteCharacter) {
1145                     if (shouldBuildStrings)
1146                         record16('u');
1147                 } else {
1148                     m_lexErrorMessage = ASCIILiteral("\\u can only be followed by a Unicode character sequence");
1149                     return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed;
1150                 }
1151             } else if (strictMode && isASCIIDigit(m_current)) {
1152                 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1153                 int character1 = m_current;
1154                 shift();
1155                 if (character1 != '0' || isASCIIDigit(m_current)) {
1156                     m_lexErrorMessage = ASCIILiteral("The only valid numeric escape in strict mode is '\\0'");
1157                     return StringCannotBeParsed;
1158                 }
1159                 if (shouldBuildStrings)
1160                     record16(0);
1161             } else if (!strictMode && isASCIIOctalDigit(m_current)) {
1162                 // Octal character sequences
1163                 T character1 = m_current;
1164                 shift();
1165                 if (isASCIIOctalDigit(m_current)) {
1166                     // Two octal characters
1167                     T character2 = m_current;
1168                     shift();
1169                     if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1170                         if (shouldBuildStrings)
1171                             record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1172                         shift();
1173                     } else {
1174                         if (shouldBuildStrings)
1175                             record16((character1 - '0') * 8 + character2 - '0');
1176                     }
1177                 } else {
1178                     if (shouldBuildStrings)
1179                         record16(character1 - '0');
1180                 }
1181             } else if (!atEnd()) {
1182                 if (shouldBuildStrings)
1183                     record16(m_current);
1184                 shift();
1185             } else {
1186                 m_lexErrorMessage = ASCIILiteral("Unterminated string constant");
1187                 return StringUnterminated;
1188             }
1189
1190             stringStart = currentSourcePtr();
1191             continue;
1192         }
1193         // Fast check for characters that require special handling.
1194         // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1195         // as possible, and lets through all common ASCII characters.
1196         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1197             // New-line or end of input is not allowed
1198             if (atEnd() || isLineTerminator(m_current)) {
1199                 m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
1200                 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1201             }
1202             // Anything else is just a normal character
1203         }
1204         shift();
1205     }
1206
1207     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1208         append16(stringStart, currentSourcePtr() - stringStart);
1209     if (shouldBuildStrings)
1210         tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1211     else
1212         tokenData->ident = 0;
1213
1214     m_buffer16.resize(0);
1215     return StringParsedSuccessfully;
1216 }
1217
1218 template <typename T>
1219 ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
1220 {
1221     // Optimization: most hexadecimal values fit into 4 bytes.
1222     uint32_t hexValue = 0;
1223     int maximumDigits = 7;
1224
1225     do {
1226         hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1227         shift();
1228         --maximumDigits;
1229     } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1230
1231     if (maximumDigits >= 0) {
1232         returnValue = hexValue;
1233         return;
1234     }
1235
1236     // No more place in the hexValue buffer.
1237     // The values are shifted out and placed into the m_buffer8 vector.
1238     for (int i = 0; i < 8; ++i) {
1239          int digit = hexValue >> 28;
1240          if (digit < 10)
1241              record8(digit + '0');
1242          else
1243              record8(digit - 10 + 'a');
1244          hexValue <<= 4;
1245     }
1246
1247     while (isASCIIHexDigit(m_current)) {
1248         record8(m_current);
1249         shift();
1250     }
1251
1252     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1253 }
1254
1255 template <typename T>
1256 ALWAYS_INLINE bool Lexer<T>::parseBinary(double& returnValue)
1257 {
1258     // Optimization: most binary values fit into 4 bytes.
1259     uint32_t binaryValue = 0;
1260     const unsigned maximumDigits = 32;
1261     int digit = maximumDigits - 1;
1262     // Temporary buffer for the digits. Makes easier
1263     // to reconstruct the input characters when needed.
1264     LChar digits[maximumDigits];
1265
1266     do {
1267         binaryValue = (binaryValue << 1) + (m_current - '0');
1268         digits[digit] = m_current;
1269         shift();
1270         --digit;
1271     } while (isASCIIBinaryDigit(m_current) && digit >= 0);
1272
1273     if (!isASCIIDigit(m_current) && digit >= 0) {
1274         returnValue = binaryValue;
1275         return true;
1276     }
1277
1278     for (int i = maximumDigits - 1; i > digit; --i)
1279         record8(digits[i]);
1280
1281     while (isASCIIBinaryDigit(m_current)) {
1282         record8(m_current);
1283         shift();
1284     }
1285
1286     if (isASCIIDigit(m_current))
1287         return false;
1288
1289     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2);
1290     return true;
1291 }
1292
1293 template <typename T>
1294 ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
1295 {
1296     // Optimization: most octal values fit into 4 bytes.
1297     uint32_t octalValue = 0;
1298     const unsigned maximumDigits = 10;
1299     int digit = maximumDigits - 1;
1300     // Temporary buffer for the digits. Makes easier
1301     // to reconstruct the input characters when needed.
1302     LChar digits[maximumDigits];
1303
1304     do {
1305         octalValue = octalValue * 8 + (m_current - '0');
1306         digits[digit] = m_current;
1307         shift();
1308         --digit;
1309     } while (isASCIIOctalDigit(m_current) && digit >= 0);
1310
1311     if (!isASCIIDigit(m_current) && digit >= 0) {
1312         returnValue = octalValue;
1313         return true;
1314     }
1315
1316     for (int i = maximumDigits - 1; i > digit; --i)
1317          record8(digits[i]);
1318
1319     while (isASCIIOctalDigit(m_current)) {
1320         record8(m_current);
1321         shift();
1322     }
1323
1324     if (isASCIIDigit(m_current))
1325         return false;
1326
1327     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
1328     return true;
1329 }
1330
1331 template <typename T>
1332 ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
1333 {
1334     // Optimization: most decimal values fit into 4 bytes.
1335     uint32_t decimalValue = 0;
1336
1337     // Since parseOctal may be executed before parseDecimal,
1338     // the m_buffer8 may hold ascii digits.
1339     if (!m_buffer8.size()) {
1340         const unsigned maximumDigits = 10;
1341         int digit = maximumDigits - 1;
1342         // Temporary buffer for the digits. Makes easier
1343         // to reconstruct the input characters when needed.
1344         LChar digits[maximumDigits];
1345
1346         do {
1347             decimalValue = decimalValue * 10 + (m_current - '0');
1348             digits[digit] = m_current;
1349             shift();
1350             --digit;
1351         } while (isASCIIDigit(m_current) && digit >= 0);
1352
1353         if (digit >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
1354             returnValue = decimalValue;
1355             return true;
1356         }
1357
1358         for (int i = maximumDigits - 1; i > digit; --i)
1359             record8(digits[i]);
1360     }
1361
1362     while (isASCIIDigit(m_current)) {
1363         record8(m_current);
1364         shift();
1365     }
1366
1367     return false;
1368 }
1369
1370 template <typename T>
1371 ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
1372 {
1373     record8('.');
1374     while (isASCIIDigit(m_current)) {
1375         record8(m_current);
1376         shift();
1377     }
1378 }
1379
1380 template <typename T>
1381 ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1382 {
1383     record8('e');
1384     shift();
1385     if (m_current == '+' || m_current == '-') {
1386         record8(m_current);
1387         shift();
1388     }
1389
1390     if (!isASCIIDigit(m_current))
1391         return false;
1392
1393     do {
1394         record8(m_current);
1395         shift();
1396     } while (isASCIIDigit(m_current));
1397     return true;
1398 }
1399
1400 template <typename T>
1401 ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1402 {
1403     while (true) {
1404         while (UNLIKELY(m_current == '*')) {
1405             shift();
1406             if (m_current == '/') {
1407                 shift();
1408                 return true;
1409             }
1410         }
1411
1412         if (atEnd())
1413             return false;
1414
1415         if (isLineTerminator(m_current)) {
1416             shiftLineTerminator();
1417             m_terminator = true;
1418         } else
1419             shift();
1420     }
1421 }
1422
1423 template <typename T>
1424 bool Lexer<T>::nextTokenIsColon()
1425 {
1426     const T* code = m_code;
1427     while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1428         code++;
1429     
1430     return code < m_codeEnd && *code == ':';
1431 }
1432
1433 template <typename T>
1434 JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
1435 {
1436     JSTokenData* tokenData = &tokenRecord->m_data;
1437     JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1438     ASSERT(!m_error);
1439     ASSERT(m_buffer8.isEmpty());
1440     ASSERT(m_buffer16.isEmpty());
1441
1442     JSTokenType token = ERRORTOK;
1443     m_terminator = false;
1444
1445 start:
1446     while (isWhiteSpace(m_current))
1447         shift();
1448
1449     if (atEnd())
1450         return EOFTOK;
1451     
1452     tokenLocation->startOffset = currentOffset();
1453     ASSERT(currentOffset() >= currentLineStartOffset());
1454     tokenRecord->m_startPosition = currentPosition();
1455
1456     CharacterType type;
1457     if (LIKELY(isLatin1(m_current)))
1458         type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1459     else if (isNonLatin1IdentStart(m_current))
1460         type = CharacterIdentifierStart;
1461     else if (isLineTerminator(m_current))
1462         type = CharacterLineTerminator;
1463     else
1464         type = CharacterInvalid;
1465
1466     switch (type) {
1467     case CharacterGreater:
1468         shift();
1469         if (m_current == '>') {
1470             shift();
1471             if (m_current == '>') {
1472                 shift();
1473                 if (m_current == '=') {
1474                     shift();
1475                     token = URSHIFTEQUAL;
1476                     break;
1477                 }
1478                 token = URSHIFT;
1479                 break;
1480             }
1481             if (m_current == '=') {
1482                 shift();
1483                 token = RSHIFTEQUAL;
1484                 break;
1485             }
1486             token = RSHIFT;
1487             break;
1488         }
1489         if (m_current == '=') {
1490             shift();
1491             token = GE;
1492             break;
1493         }
1494         token = GT;
1495         break;
1496     case CharacterEqual:
1497         shift();
1498         if (m_current == '=') {
1499             shift();
1500             if (m_current == '=') {
1501                 shift();
1502                 token = STREQ;
1503                 break;
1504             }
1505             token = EQEQ;
1506             break;
1507         }
1508         token = EQUAL;
1509         break;
1510     case CharacterLess:
1511         shift();
1512         if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1513             // <!-- marks the beginning of a line comment (for www usage)
1514             goto inSingleLineComment;
1515         }
1516         if (m_current == '<') {
1517             shift();
1518             if (m_current == '=') {
1519                 shift();
1520                 token = LSHIFTEQUAL;
1521                 break;
1522             }
1523             token = LSHIFT;
1524             break;
1525         }
1526         if (m_current == '=') {
1527             shift();
1528             token = LE;
1529             break;
1530         }
1531         token = LT;
1532         break;
1533     case CharacterExclamationMark:
1534         shift();
1535         if (m_current == '=') {
1536             shift();
1537             if (m_current == '=') {
1538                 shift();
1539                 token = STRNEQ;
1540                 break;
1541             }
1542             token = NE;
1543             break;
1544         }
1545         token = EXCLAMATION;
1546         break;
1547     case CharacterAdd:
1548         shift();
1549         if (m_current == '+') {
1550             shift();
1551             token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1552             break;
1553         }
1554         if (m_current == '=') {
1555             shift();
1556             token = PLUSEQUAL;
1557             break;
1558         }
1559         token = PLUS;
1560         break;
1561     case CharacterSub:
1562         shift();
1563         if (m_current == '-') {
1564             shift();
1565             if (m_atLineStart && m_current == '>') {
1566                 shift();
1567                 goto inSingleLineComment;
1568             }
1569             token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
1570             break;
1571         }
1572         if (m_current == '=') {
1573             shift();
1574             token = MINUSEQUAL;
1575             break;
1576         }
1577         token = MINUS;
1578         break;
1579     case CharacterMultiply:
1580         shift();
1581         if (m_current == '=') {
1582             shift();
1583             token = MULTEQUAL;
1584             break;
1585         }
1586         token = TIMES;
1587         break;
1588     case CharacterSlash:
1589         shift();
1590         if (m_current == '/') {
1591             shift();
1592             goto inSingleLineComment;
1593         }
1594         if (m_current == '*') {
1595             shift();
1596             if (parseMultilineComment())
1597                 goto start;
1598             m_lexErrorMessage = ASCIILiteral("Multiline comment was not closed properly");
1599             token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
1600             goto returnError;
1601         }
1602         if (m_current == '=') {
1603             shift();
1604             token = DIVEQUAL;
1605             break;
1606         }
1607         token = DIVIDE;
1608         break;
1609     case CharacterAnd:
1610         shift();
1611         if (m_current == '&') {
1612             shift();
1613             token = AND;
1614             break;
1615         }
1616         if (m_current == '=') {
1617             shift();
1618             token = ANDEQUAL;
1619             break;
1620         }
1621         token = BITAND;
1622         break;
1623     case CharacterXor:
1624         shift();
1625         if (m_current == '=') {
1626             shift();
1627             token = XOREQUAL;
1628             break;
1629         }
1630         token = BITXOR;
1631         break;
1632     case CharacterModulo:
1633         shift();
1634         if (m_current == '=') {
1635             shift();
1636             token = MODEQUAL;
1637             break;
1638         }
1639         token = MOD;
1640         break;
1641     case CharacterOr:
1642         shift();
1643         if (m_current == '=') {
1644             shift();
1645             token = OREQUAL;
1646             break;
1647         }
1648         if (m_current == '|') {
1649             shift();
1650             token = OR;
1651             break;
1652         }
1653         token = BITOR;
1654         break;
1655     case CharacterOpenParen:
1656         token = OPENPAREN;
1657         shift();
1658         break;
1659     case CharacterCloseParen:
1660         token = CLOSEPAREN;
1661         shift();
1662         break;
1663     case CharacterOpenBracket:
1664         token = OPENBRACKET;
1665         shift();
1666         break;
1667     case CharacterCloseBracket:
1668         token = CLOSEBRACKET;
1669         shift();
1670         break;
1671     case CharacterComma:
1672         token = COMMA;
1673         shift();
1674         break;
1675     case CharacterColon:
1676         token = COLON;
1677         shift();
1678         break;
1679     case CharacterQuestion:
1680         token = QUESTION;
1681         shift();
1682         break;
1683     case CharacterTilde:
1684         token = TILDE;
1685         shift();
1686         break;
1687     case CharacterSemicolon:
1688         shift();
1689         token = SEMICOLON;
1690         break;
1691     case CharacterOpenBrace:
1692         tokenData->line = lineNumber();
1693         tokenData->offset = currentOffset();
1694         tokenData->lineStartOffset = currentLineStartOffset();
1695         ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1696         shift();
1697         token = OPENBRACE;
1698         break;
1699     case CharacterCloseBrace:
1700         tokenData->line = lineNumber();
1701         tokenData->offset = currentOffset();
1702         tokenData->lineStartOffset = currentLineStartOffset();
1703         ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1704         shift();
1705         token = CLOSEBRACE;
1706         break;
1707     case CharacterDot:
1708         shift();
1709         if (!isASCIIDigit(m_current)) {
1710             if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
1711                 shift();
1712                 shift();
1713                 token = DOTDOTDOT;
1714                 break;
1715             }
1716             token = DOT;
1717             break;
1718         }
1719         goto inNumberAfterDecimalPoint;
1720     case CharacterZero:
1721         shift();
1722         if ((m_current | 0x20) == 'x') {
1723             if (!isASCIIHexDigit(peek(1))) {
1724                 m_lexErrorMessage = ASCIILiteral("No hexadecimal digits after '0x'");
1725                 token = INVALID_HEX_NUMBER_ERRORTOK;
1726                 goto returnError;
1727             }
1728
1729             // Shift out the 'x' prefix.
1730             shift();
1731
1732             parseHex(tokenData->doubleValue);
1733             if (isIdentStart(m_current)) {
1734                 m_lexErrorMessage = ASCIILiteral("No space between hexadecimal literal and identifier");
1735                 token = INVALID_HEX_NUMBER_ERRORTOK;
1736                 goto returnError;
1737             }
1738             token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
1739             m_buffer8.resize(0);
1740             break;
1741         }
1742         if ((m_current | 0x20) == 'b') {
1743             if (!isASCIIBinaryDigit(peek(1))) {
1744                 m_lexErrorMessage = ASCIILiteral("No binary digits after '0b'");
1745                 token = INVALID_BINARY_NUMBER_ERRORTOK;
1746                 goto returnError;
1747             }
1748
1749             // Shift out the 'b' prefix.
1750             shift();
1751
1752             parseBinary(tokenData->doubleValue);
1753             if (isIdentStart(m_current)) {
1754                 m_lexErrorMessage = ASCIILiteral("No space between binary literal and identifier");
1755                 token = INVALID_BINARY_NUMBER_ERRORTOK;
1756                 goto returnError;
1757             }
1758             token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
1759             m_buffer8.resize(0);
1760             break;
1761         }
1762
1763         if ((m_current | 0x20) == 'o') {
1764             if (!isASCIIOctalDigit(peek(1))) {
1765                 m_lexErrorMessage = ASCIILiteral("No octal digits after '0o'");
1766                 token = INVALID_OCTAL_NUMBER_ERRORTOK;
1767                 goto returnError;
1768             }
1769
1770             // Shift out the 'o' prefix.
1771             shift();
1772
1773             parseOctal(tokenData->doubleValue);
1774             if (isIdentStart(m_current)) {
1775                 m_lexErrorMessage = ASCIILiteral("No space between octal literal and identifier");
1776                 token = INVALID_OCTAL_NUMBER_ERRORTOK;
1777                 goto returnError;
1778             }
1779             token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
1780             m_buffer8.resize(0);
1781             break;
1782         }
1783
1784         record8('0');
1785         if (strictMode && isASCIIDigit(m_current)) {
1786             m_lexErrorMessage = ASCIILiteral("Decimal integer literals with a leading zero are forbidden in strict mode");
1787             token = INVALID_OCTAL_NUMBER_ERRORTOK;
1788             goto returnError;
1789         }
1790         if (isASCIIOctalDigit(m_current)) {
1791             if (parseOctal(tokenData->doubleValue)) {
1792                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
1793             }
1794         }
1795         FALLTHROUGH;
1796     case CharacterNumber:
1797         if (LIKELY(token != INTEGER && token != DOUBLE)) {
1798             if (!parseDecimal(tokenData->doubleValue)) {
1799                 token = INTEGER;
1800                 if (m_current == '.') {
1801                     shift();
1802 inNumberAfterDecimalPoint:
1803                     parseNumberAfterDecimalPoint();
1804                     token = DOUBLE;
1805                 }
1806                 if ((m_current | 0x20) == 'e') {
1807                     if (!parseNumberAfterExponentIndicator()) {
1808                         m_lexErrorMessage = ASCIILiteral("Non-number found after exponent indicator");
1809                         token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
1810                         goto returnError;
1811                     }
1812                 }
1813                 size_t parsedLength;
1814                 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
1815                 if (token == INTEGER)
1816                     token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
1817             } else
1818                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
1819         }
1820
1821         // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
1822         if (UNLIKELY(isIdentStart(m_current))) {
1823             m_lexErrorMessage = ASCIILiteral("At least one digit must occur after a decimal point");
1824             token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
1825             goto returnError;
1826         }
1827         m_buffer8.resize(0);
1828         break;
1829     case CharacterQuote:
1830         if (lexerFlags & LexerFlagsDontBuildStrings) {
1831             StringParseResult result = parseString<false>(tokenData, strictMode);
1832             if (UNLIKELY(result != StringParsedSuccessfully)) {
1833                 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
1834                 goto returnError;
1835             }
1836         } else {
1837             StringParseResult result = parseString<true>(tokenData, strictMode);
1838             if (UNLIKELY(result != StringParsedSuccessfully)) {
1839                 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
1840                 goto returnError;
1841             }
1842         }
1843         shift();
1844         token = STRING;
1845         break;
1846     case CharacterIdentifierStart:
1847         ASSERT(isIdentStart(m_current));
1848         FALLTHROUGH;
1849     case CharacterBackSlash:
1850         parseIdent:
1851         if (lexerFlags & LexexFlagsDontBuildKeywords)
1852             token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
1853         else
1854             token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
1855         break;
1856     case CharacterLineTerminator:
1857         ASSERT(isLineTerminator(m_current));
1858         shiftLineTerminator();
1859         m_atLineStart = true;
1860         m_terminator = true;
1861         m_lineStart = m_code;
1862         goto start;
1863     case CharacterPrivateIdentifierStart:
1864         if (m_parsingBuiltinFunction)
1865             goto parseIdent;
1866
1867         FALLTHROUGH;
1868     case CharacterInvalid:
1869         m_lexErrorMessage = invalidCharacterMessage();
1870         token = ERRORTOK;
1871         goto returnError;
1872     default:
1873         RELEASE_ASSERT_NOT_REACHED();
1874         m_lexErrorMessage = ASCIILiteral("Internal Error");
1875         token = ERRORTOK;
1876         goto returnError;
1877     }
1878
1879     m_atLineStart = false;
1880     goto returnToken;
1881
1882 inSingleLineComment:
1883     while (!isLineTerminator(m_current)) {
1884         if (atEnd())
1885             return EOFTOK;
1886         shift();
1887     }
1888     shiftLineTerminator();
1889     m_atLineStart = true;
1890     m_terminator = true;
1891     m_lineStart = m_code;
1892     if (!lastTokenWasRestrKeyword())
1893         goto start;
1894
1895     token = SEMICOLON;
1896     // Fall through into returnToken.
1897
1898 returnToken:
1899     tokenLocation->line = m_lineNumber;
1900     tokenLocation->endOffset = currentOffset();
1901     tokenLocation->lineStartOffset = currentLineStartOffset();
1902     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
1903     tokenRecord->m_endPosition = currentPosition();
1904     m_lastToken = token;
1905     return token;
1906
1907 returnError:
1908     m_error = true;
1909     tokenLocation->line = m_lineNumber;
1910     tokenLocation->endOffset = currentOffset();
1911     tokenLocation->lineStartOffset = currentLineStartOffset();
1912     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
1913     tokenRecord->m_endPosition = currentPosition();
1914     RELEASE_ASSERT(token & ErrorTokenFlag);
1915     return token;
1916 }
1917
1918 template <typename T>
1919 static inline void orCharacter(UChar&, UChar);
1920
1921 template <>
1922 inline void orCharacter<LChar>(UChar&, UChar) { }
1923
1924 template <>
1925 inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
1926 {
1927     orAccumulator |= character;
1928 }
1929
1930 template <typename T>
1931 bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
1932 {
1933     ASSERT(m_buffer16.isEmpty());
1934
1935     bool lastWasEscape = false;
1936     bool inBrackets = false;
1937     UChar charactersOredTogether = 0;
1938
1939     if (patternPrefix) {
1940         ASSERT(!isLineTerminator(patternPrefix));
1941         ASSERT(patternPrefix != '/');
1942         ASSERT(patternPrefix != '[');
1943         record16(patternPrefix);
1944     }
1945
1946     while (true) {
1947         if (isLineTerminator(m_current) || atEnd()) {
1948             m_buffer16.resize(0);
1949             return false;
1950         }
1951
1952         T prev = m_current;
1953         
1954         shift();
1955
1956         if (prev == '/' && !lastWasEscape && !inBrackets)
1957             break;
1958
1959         record16(prev);
1960         orCharacter<T>(charactersOredTogether, prev);
1961
1962         if (lastWasEscape) {
1963             lastWasEscape = false;
1964             continue;
1965         }
1966
1967         switch (prev) {
1968         case '[':
1969             inBrackets = true;
1970             break;
1971         case ']':
1972             inBrackets = false;
1973             break;
1974         case '\\':
1975             lastWasEscape = true;
1976             break;
1977         }
1978     }
1979
1980     pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
1981
1982     m_buffer16.resize(0);
1983     charactersOredTogether = 0;
1984
1985     while (isIdentPart(m_current)) {
1986         record16(m_current);
1987         orCharacter<T>(charactersOredTogether, m_current);
1988         shift();
1989     }
1990
1991     flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
1992     m_buffer16.resize(0);
1993
1994     return true;
1995 }
1996
1997 template <typename T>
1998 bool Lexer<T>::skipRegExp()
1999 {
2000     bool lastWasEscape = false;
2001     bool inBrackets = false;
2002
2003     while (true) {
2004         if (isLineTerminator(m_current) || atEnd())
2005             return false;
2006
2007         T prev = m_current;
2008         
2009         shift();
2010
2011         if (prev == '/' && !lastWasEscape && !inBrackets)
2012             break;
2013
2014         if (lastWasEscape) {
2015             lastWasEscape = false;
2016             continue;
2017         }
2018
2019         switch (prev) {
2020         case '[':
2021             inBrackets = true;
2022             break;
2023         case ']':
2024             inBrackets = false;
2025             break;
2026         case '\\':
2027             lastWasEscape = true;
2028             break;
2029         }
2030     }
2031
2032     while (isIdentPart(m_current))
2033         shift();
2034
2035     return true;
2036 }
2037
2038 template <typename T>
2039 void Lexer<T>::clear()
2040 {
2041     m_arena = 0;
2042
2043     Vector<LChar> newBuffer8;
2044     m_buffer8.swap(newBuffer8);
2045
2046     Vector<UChar> newBuffer16;
2047     m_buffer16.swap(newBuffer16);
2048
2049     m_isReparsing = false;
2050 }
2051
2052 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2053 template class Lexer<LChar>;
2054 template class Lexer<UChar>;
2055
2056 } // namespace JSC