[CMake] Enable USE_FOLDERS property
[WebKit-https.git] / Source / JavaScriptCore / parser / Lexer.cpp
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6  *  Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
7  *
8  *  This library is free software; you can redistribute it and/or
9  *  modify it under the terms of the GNU Library General Public
10  *  License as published by the Free Software Foundation; either
11  *  version 2 of the License, or (at your option) any later version.
12  *
13  *  This library is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  *  Library General Public License for more details.
17  *
18  *  You should have received a copy of the GNU Library General Public License
19  *  along with this library; see the file COPYING.LIB.  If not, write to
20  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  *  Boston, MA 02110-1301, USA.
22  *
23  */
24
25 #include "config.h"
26 #include "Lexer.h"
27
28 #include "JSFunction.h"
29
30 #include "JSGlobalObjectFunctions.h"
31 #include "Identifier.h"
32 #include "NodeInfo.h"
33 #include "Nodes.h"
34 #include <wtf/dtoa.h>
35 #include <ctype.h>
36 #include <limits.h>
37 #include <string.h>
38 #include <wtf/Assertions.h>
39
40 using namespace WTF;
41 using namespace Unicode;
42
43 #include "KeywordLookup.h"
44 #include "Lexer.lut.h"
45 #include "Parser.h"
46
47 namespace JSC {
48
49 Keywords::Keywords(JSGlobalData* globalData)
50     : m_globalData(globalData)
51     , m_keywordTable(JSC::mainTable)
52 {
53 }
54
55 enum CharacterType {
56     // Types for the main switch
57
58     // The first three types are fixed, and also used for identifying
59     // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
60     CharacterIdentifierStart,
61     CharacterZero,
62     CharacterNumber,
63
64     CharacterInvalid,
65     CharacterLineTerminator,
66     CharacterExclamationMark,
67     CharacterOpenParen,
68     CharacterCloseParen,
69     CharacterOpenBracket,
70     CharacterCloseBracket,
71     CharacterComma,
72     CharacterColon,
73     CharacterQuestion,
74     CharacterTilde,
75     CharacterQuote,
76     CharacterDot,
77     CharacterSlash,
78     CharacterBackSlash,
79     CharacterSemicolon,
80     CharacterOpenBrace,
81     CharacterCloseBrace,
82
83     CharacterAdd,
84     CharacterSub,
85     CharacterMultiply,
86     CharacterModulo,
87     CharacterAnd,
88     CharacterXor,
89     CharacterOr,
90     CharacterLess,
91     CharacterGreater,
92     CharacterEqual,
93
94     // Other types (only one so far)
95     CharacterWhiteSpace,
96 };
97
98 // 256 Latin-1 codes
99 static const unsigned short typesOfLatin1Characters[256] = {
100 /*   0 - Null               */ CharacterInvalid,
101 /*   1 - Start of Heading   */ CharacterInvalid,
102 /*   2 - Start of Text      */ CharacterInvalid,
103 /*   3 - End of Text        */ CharacterInvalid,
104 /*   4 - End of Transm.     */ CharacterInvalid,
105 /*   5 - Enquiry            */ CharacterInvalid,
106 /*   6 - Acknowledgment     */ CharacterInvalid,
107 /*   7 - Bell               */ CharacterInvalid,
108 /*   8 - Back Space         */ CharacterInvalid,
109 /*   9 - Horizontal Tab     */ CharacterWhiteSpace,
110 /*  10 - Line Feed          */ CharacterLineTerminator,
111 /*  11 - Vertical Tab       */ CharacterWhiteSpace,
112 /*  12 - Form Feed          */ CharacterWhiteSpace,
113 /*  13 - Carriage Return    */ CharacterLineTerminator,
114 /*  14 - Shift Out          */ CharacterInvalid,
115 /*  15 - Shift In           */ CharacterInvalid,
116 /*  16 - Data Line Escape   */ CharacterInvalid,
117 /*  17 - Device Control 1   */ CharacterInvalid,
118 /*  18 - Device Control 2   */ CharacterInvalid,
119 /*  19 - Device Control 3   */ CharacterInvalid,
120 /*  20 - Device Control 4   */ CharacterInvalid,
121 /*  21 - Negative Ack.      */ CharacterInvalid,
122 /*  22 - Synchronous Idle   */ CharacterInvalid,
123 /*  23 - End of Transmit    */ CharacterInvalid,
124 /*  24 - Cancel             */ CharacterInvalid,
125 /*  25 - End of Medium      */ CharacterInvalid,
126 /*  26 - Substitute         */ CharacterInvalid,
127 /*  27 - Escape             */ CharacterInvalid,
128 /*  28 - File Separator     */ CharacterInvalid,
129 /*  29 - Group Separator    */ CharacterInvalid,
130 /*  30 - Record Separator   */ CharacterInvalid,
131 /*  31 - Unit Separator     */ CharacterInvalid,
132 /*  32 - Space              */ CharacterWhiteSpace,
133 /*  33 - !                  */ CharacterExclamationMark,
134 /*  34 - "                  */ CharacterQuote,
135 /*  35 - #                  */ CharacterInvalid,
136 /*  36 - $                  */ CharacterIdentifierStart,
137 /*  37 - %                  */ CharacterModulo,
138 /*  38 - &                  */ CharacterAnd,
139 /*  39 - '                  */ CharacterQuote,
140 /*  40 - (                  */ CharacterOpenParen,
141 /*  41 - )                  */ CharacterCloseParen,
142 /*  42 - *                  */ CharacterMultiply,
143 /*  43 - +                  */ CharacterAdd,
144 /*  44 - ,                  */ CharacterComma,
145 /*  45 - -                  */ CharacterSub,
146 /*  46 - .                  */ CharacterDot,
147 /*  47 - /                  */ CharacterSlash,
148 /*  48 - 0                  */ CharacterZero,
149 /*  49 - 1                  */ CharacterNumber,
150 /*  50 - 2                  */ CharacterNumber,
151 /*  51 - 3                  */ CharacterNumber,
152 /*  52 - 4                  */ CharacterNumber,
153 /*  53 - 5                  */ CharacterNumber,
154 /*  54 - 6                  */ CharacterNumber,
155 /*  55 - 7                  */ CharacterNumber,
156 /*  56 - 8                  */ CharacterNumber,
157 /*  57 - 9                  */ CharacterNumber,
158 /*  58 - :                  */ CharacterColon,
159 /*  59 - ;                  */ CharacterSemicolon,
160 /*  60 - <                  */ CharacterLess,
161 /*  61 - =                  */ CharacterEqual,
162 /*  62 - >                  */ CharacterGreater,
163 /*  63 - ?                  */ CharacterQuestion,
164 /*  64 - @                  */ CharacterInvalid,
165 /*  65 - A                  */ CharacterIdentifierStart,
166 /*  66 - B                  */ CharacterIdentifierStart,
167 /*  67 - C                  */ CharacterIdentifierStart,
168 /*  68 - D                  */ CharacterIdentifierStart,
169 /*  69 - E                  */ CharacterIdentifierStart,
170 /*  70 - F                  */ CharacterIdentifierStart,
171 /*  71 - G                  */ CharacterIdentifierStart,
172 /*  72 - H                  */ CharacterIdentifierStart,
173 /*  73 - I                  */ CharacterIdentifierStart,
174 /*  74 - J                  */ CharacterIdentifierStart,
175 /*  75 - K                  */ CharacterIdentifierStart,
176 /*  76 - L                  */ CharacterIdentifierStart,
177 /*  77 - M                  */ CharacterIdentifierStart,
178 /*  78 - N                  */ CharacterIdentifierStart,
179 /*  79 - O                  */ CharacterIdentifierStart,
180 /*  80 - P                  */ CharacterIdentifierStart,
181 /*  81 - Q                  */ CharacterIdentifierStart,
182 /*  82 - R                  */ CharacterIdentifierStart,
183 /*  83 - S                  */ CharacterIdentifierStart,
184 /*  84 - T                  */ CharacterIdentifierStart,
185 /*  85 - U                  */ CharacterIdentifierStart,
186 /*  86 - V                  */ CharacterIdentifierStart,
187 /*  87 - W                  */ CharacterIdentifierStart,
188 /*  88 - X                  */ CharacterIdentifierStart,
189 /*  89 - Y                  */ CharacterIdentifierStart,
190 /*  90 - Z                  */ CharacterIdentifierStart,
191 /*  91 - [                  */ CharacterOpenBracket,
192 /*  92 - \                  */ CharacterBackSlash,
193 /*  93 - ]                  */ CharacterCloseBracket,
194 /*  94 - ^                  */ CharacterXor,
195 /*  95 - _                  */ CharacterIdentifierStart,
196 /*  96 - `                  */ CharacterInvalid,
197 /*  97 - a                  */ CharacterIdentifierStart,
198 /*  98 - b                  */ CharacterIdentifierStart,
199 /*  99 - c                  */ CharacterIdentifierStart,
200 /* 100 - d                  */ CharacterIdentifierStart,
201 /* 101 - e                  */ CharacterIdentifierStart,
202 /* 102 - f                  */ CharacterIdentifierStart,
203 /* 103 - g                  */ CharacterIdentifierStart,
204 /* 104 - h                  */ CharacterIdentifierStart,
205 /* 105 - i                  */ CharacterIdentifierStart,
206 /* 106 - j                  */ CharacterIdentifierStart,
207 /* 107 - k                  */ CharacterIdentifierStart,
208 /* 108 - l                  */ CharacterIdentifierStart,
209 /* 109 - m                  */ CharacterIdentifierStart,
210 /* 110 - n                  */ CharacterIdentifierStart,
211 /* 111 - o                  */ CharacterIdentifierStart,
212 /* 112 - p                  */ CharacterIdentifierStart,
213 /* 113 - q                  */ CharacterIdentifierStart,
214 /* 114 - r                  */ CharacterIdentifierStart,
215 /* 115 - s                  */ CharacterIdentifierStart,
216 /* 116 - t                  */ CharacterIdentifierStart,
217 /* 117 - u                  */ CharacterIdentifierStart,
218 /* 118 - v                  */ CharacterIdentifierStart,
219 /* 119 - w                  */ CharacterIdentifierStart,
220 /* 120 - x                  */ CharacterIdentifierStart,
221 /* 121 - y                  */ CharacterIdentifierStart,
222 /* 122 - z                  */ CharacterIdentifierStart,
223 /* 123 - {                  */ CharacterOpenBrace,
224 /* 124 - |                  */ CharacterOr,
225 /* 125 - }                  */ CharacterCloseBrace,
226 /* 126 - ~                  */ CharacterTilde,
227 /* 127 - Delete             */ CharacterInvalid,
228 /* 128 - Cc category        */ CharacterInvalid,
229 /* 129 - Cc category        */ CharacterInvalid,
230 /* 130 - Cc category        */ CharacterInvalid,
231 /* 131 - Cc category        */ CharacterInvalid,
232 /* 132 - Cc category        */ CharacterInvalid,
233 /* 133 - Cc category        */ CharacterInvalid,
234 /* 134 - Cc category        */ CharacterInvalid,
235 /* 135 - Cc category        */ CharacterInvalid,
236 /* 136 - Cc category        */ CharacterInvalid,
237 /* 137 - Cc category        */ CharacterInvalid,
238 /* 138 - Cc category        */ CharacterInvalid,
239 /* 139 - Cc category        */ CharacterInvalid,
240 /* 140 - Cc category        */ CharacterInvalid,
241 /* 141 - Cc category        */ CharacterInvalid,
242 /* 142 - Cc category        */ CharacterInvalid,
243 /* 143 - Cc category        */ CharacterInvalid,
244 /* 144 - Cc category        */ CharacterInvalid,
245 /* 145 - Cc category        */ CharacterInvalid,
246 /* 146 - Cc category        */ CharacterInvalid,
247 /* 147 - Cc category        */ CharacterInvalid,
248 /* 148 - Cc category        */ CharacterInvalid,
249 /* 149 - Cc category        */ CharacterInvalid,
250 /* 150 - Cc category        */ CharacterInvalid,
251 /* 151 - Cc category        */ CharacterInvalid,
252 /* 152 - Cc category        */ CharacterInvalid,
253 /* 153 - Cc category        */ CharacterInvalid,
254 /* 154 - Cc category        */ CharacterInvalid,
255 /* 155 - Cc category        */ CharacterInvalid,
256 /* 156 - Cc category        */ CharacterInvalid,
257 /* 157 - Cc category        */ CharacterInvalid,
258 /* 158 - Cc category        */ CharacterInvalid,
259 /* 159 - Cc category        */ CharacterInvalid,
260 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
261 /* 161 - Po category        */ CharacterInvalid,
262 /* 162 - Sc category        */ CharacterInvalid,
263 /* 163 - Sc category        */ CharacterInvalid,
264 /* 164 - Sc category        */ CharacterInvalid,
265 /* 165 - Sc category        */ CharacterInvalid,
266 /* 166 - So category        */ CharacterInvalid,
267 /* 167 - So category        */ CharacterInvalid,
268 /* 168 - Sk category        */ CharacterInvalid,
269 /* 169 - So category        */ CharacterInvalid,
270 /* 170 - Ll category        */ CharacterIdentifierStart,
271 /* 171 - Pi category        */ CharacterInvalid,
272 /* 172 - Sm category        */ CharacterInvalid,
273 /* 173 - Cf category        */ CharacterInvalid,
274 /* 174 - So category        */ CharacterInvalid,
275 /* 175 - Sk category        */ CharacterInvalid,
276 /* 176 - So category        */ CharacterInvalid,
277 /* 177 - Sm category        */ CharacterInvalid,
278 /* 178 - No category        */ CharacterInvalid,
279 /* 179 - No category        */ CharacterInvalid,
280 /* 180 - Sk category        */ CharacterInvalid,
281 /* 181 - Ll category        */ CharacterIdentifierStart,
282 /* 182 - So category        */ CharacterInvalid,
283 /* 183 - Po category        */ CharacterInvalid,
284 /* 184 - Sk category        */ CharacterInvalid,
285 /* 185 - No category        */ CharacterInvalid,
286 /* 186 - Ll category        */ CharacterIdentifierStart,
287 /* 187 - Pf category        */ CharacterInvalid,
288 /* 188 - No category        */ CharacterInvalid,
289 /* 189 - No category        */ CharacterInvalid,
290 /* 190 - No category        */ CharacterInvalid,
291 /* 191 - Po category        */ CharacterInvalid,
292 /* 192 - Lu category        */ CharacterIdentifierStart,
293 /* 193 - Lu category        */ CharacterIdentifierStart,
294 /* 194 - Lu category        */ CharacterIdentifierStart,
295 /* 195 - Lu category        */ CharacterIdentifierStart,
296 /* 196 - Lu category        */ CharacterIdentifierStart,
297 /* 197 - Lu category        */ CharacterIdentifierStart,
298 /* 198 - Lu category        */ CharacterIdentifierStart,
299 /* 199 - Lu category        */ CharacterIdentifierStart,
300 /* 200 - Lu category        */ CharacterIdentifierStart,
301 /* 201 - Lu category        */ CharacterIdentifierStart,
302 /* 202 - Lu category        */ CharacterIdentifierStart,
303 /* 203 - Lu category        */ CharacterIdentifierStart,
304 /* 204 - Lu category        */ CharacterIdentifierStart,
305 /* 205 - Lu category        */ CharacterIdentifierStart,
306 /* 206 - Lu category        */ CharacterIdentifierStart,
307 /* 207 - Lu category        */ CharacterIdentifierStart,
308 /* 208 - Lu category        */ CharacterIdentifierStart,
309 /* 209 - Lu category        */ CharacterIdentifierStart,
310 /* 210 - Lu category        */ CharacterIdentifierStart,
311 /* 211 - Lu category        */ CharacterIdentifierStart,
312 /* 212 - Lu category        */ CharacterIdentifierStart,
313 /* 213 - Lu category        */ CharacterIdentifierStart,
314 /* 214 - Lu category        */ CharacterIdentifierStart,
315 /* 215 - Sm category        */ CharacterInvalid,
316 /* 216 - Lu category        */ CharacterIdentifierStart,
317 /* 217 - Lu category        */ CharacterIdentifierStart,
318 /* 218 - Lu category        */ CharacterIdentifierStart,
319 /* 219 - Lu category        */ CharacterIdentifierStart,
320 /* 220 - Lu category        */ CharacterIdentifierStart,
321 /* 221 - Lu category        */ CharacterIdentifierStart,
322 /* 222 - Lu category        */ CharacterIdentifierStart,
323 /* 223 - Ll category        */ CharacterIdentifierStart,
324 /* 224 - Ll category        */ CharacterIdentifierStart,
325 /* 225 - Ll category        */ CharacterIdentifierStart,
326 /* 226 - Ll category        */ CharacterIdentifierStart,
327 /* 227 - Ll category        */ CharacterIdentifierStart,
328 /* 228 - Ll category        */ CharacterIdentifierStart,
329 /* 229 - Ll category        */ CharacterIdentifierStart,
330 /* 230 - Ll category        */ CharacterIdentifierStart,
331 /* 231 - Ll category        */ CharacterIdentifierStart,
332 /* 232 - Ll category        */ CharacterIdentifierStart,
333 /* 233 - Ll category        */ CharacterIdentifierStart,
334 /* 234 - Ll category        */ CharacterIdentifierStart,
335 /* 235 - Ll category        */ CharacterIdentifierStart,
336 /* 236 - Ll category        */ CharacterIdentifierStart,
337 /* 237 - Ll category        */ CharacterIdentifierStart,
338 /* 238 - Ll category        */ CharacterIdentifierStart,
339 /* 239 - Ll category        */ CharacterIdentifierStart,
340 /* 240 - Ll category        */ CharacterIdentifierStart,
341 /* 241 - Ll category        */ CharacterIdentifierStart,
342 /* 242 - Ll category        */ CharacterIdentifierStart,
343 /* 243 - Ll category        */ CharacterIdentifierStart,
344 /* 244 - Ll category        */ CharacterIdentifierStart,
345 /* 245 - Ll category        */ CharacterIdentifierStart,
346 /* 246 - Ll category        */ CharacterIdentifierStart,
347 /* 247 - Sm category        */ CharacterInvalid,
348 /* 248 - Ll category        */ CharacterIdentifierStart,
349 /* 249 - Ll category        */ CharacterIdentifierStart,
350 /* 250 - Ll category        */ CharacterIdentifierStart,
351 /* 251 - Ll category        */ CharacterIdentifierStart,
352 /* 252 - Ll category        */ CharacterIdentifierStart,
353 /* 253 - Ll category        */ CharacterIdentifierStart,
354 /* 254 - Ll category        */ CharacterIdentifierStart,
355 /* 255 - Ll category        */ CharacterIdentifierStart
356 };
357
358 template <typename T>
359 Lexer<T>::Lexer(JSGlobalData* globalData)
360     : m_isReparsing(false)
361     , m_globalData(globalData)
362 {
363 }
364
365 template <typename T>
366 Lexer<T>::~Lexer()
367 {
368 }
369
370 template <typename T>
371 UString Lexer<T>::getInvalidCharMessage()
372 {
373     switch (m_current) {
374     case 0:
375         return "Invalid character: '\\0'";
376     case 10:
377         return "Invalid character: '\\n'";
378     case 11:
379         return "Invalid character: '\\v'";
380     case 13:
381         return "Invalid character: '\\r'";
382     case 35:
383         return "Invalid character: '#'";
384     case 64:
385         return "Invalid character: '@'";
386     case 96:
387         return "Invalid character: '`'";
388     default:
389         return String::format("Invalid character '\\u%04u'", m_current).impl();
390     }
391 }
392
393 template <typename T>
394 ALWAYS_INLINE const T* Lexer<T>::currentCharacter() const
395 {
396     ASSERT(m_code <= m_codeEnd);
397     return m_code;
398 }
399
400 template <typename T>
401 void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
402 {
403     m_arena = &arena->identifierArena();
404     
405     m_lineNumber = source.firstLine();
406     m_delimited = false;
407     m_lastToken = -1;
408     
409     const StringImpl* sourceString = source.provider()->data();
410
411     if (sourceString)
412         setCodeStart(sourceString);
413     else
414         m_codeStart = 0;
415
416     m_source = &source;
417     m_code = m_codeStart + source.startOffset();
418     m_codeEnd = m_codeStart + source.endOffset();
419     m_error = false;
420     m_atLineStart = true;
421     m_lexErrorMessage = UString();
422     
423     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
424     m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
425     
426     if (LIKELY(m_code < m_codeEnd))
427         m_current = *m_code;
428     else
429         m_current = -1;
430     ASSERT(currentOffset() == source.startOffset());
431 }
432
433 template <typename T>
434 template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
435 {
436     m_code += shiftAmount;
437     m_current = *m_code;
438 }
439
440 template <typename T>
441 ALWAYS_INLINE void Lexer<T>::shift()
442 {
443     // Faster than an if-else sequence
444     ASSERT(m_current != -1);
445     m_current = -1;
446     m_code++;
447     if (LIKELY(m_code < m_codeEnd))
448         m_current = *m_code;
449 }
450
451 template <typename T>
452 ALWAYS_INLINE int Lexer<T>::peek(int offset)
453 {
454     // Only use if necessary
455     ASSERT(offset > 0 && offset < 5);
456     const T* code = m_code + offset;
457     return (code < m_codeEnd) ? *code : -1;
458 }
459
460 template <typename T>
461 int Lexer<T>::getUnicodeCharacter()
462 {
463     int char1 = peek(1);
464     int char2 = peek(2);
465     int char3 = peek(3);
466
467     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
468         return -1;
469
470     int result = convertUnicode(m_current, char1, char2, char3);
471     shift();
472     shift();
473     shift();
474     shift();
475     return result;
476 }
477
478 template <typename T>
479 void Lexer<T>::shiftLineTerminator()
480 {
481     ASSERT(isLineTerminator(static_cast<T>(m_current)));
482
483     int m_prev = m_current;
484     shift();
485
486     // Allow both CRLF and LFCR.
487     if (m_prev + m_current == '\n' + '\r')
488         shift();
489
490     ++m_lineNumber;
491 }
492
493 template <typename T>
494 ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
495 {
496     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
497 }
498
499 static NEVER_INLINE bool isNonLatin1IdentStart(int c)
500 {
501     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
502 }
503
504 static ALWAYS_INLINE bool isLatin1(LChar)
505 {
506     return true;
507 }
508
509 static ALWAYS_INLINE bool isLatin1(UChar c)
510 {
511     return c < 256;
512 }
513
514 static inline bool isIdentStart(LChar c)
515 {
516     return typesOfLatin1Characters[c] == CharacterIdentifierStart;
517 }
518
519 static inline bool isIdentStart(UChar c)
520 {
521     return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
522 }
523
524 static NEVER_INLINE bool isNonLatin1IdentPart(int c)
525 {
526     return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
527         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)) || c == 0x200C || c == 0x200D;
528 }
529
530 static ALWAYS_INLINE bool isIdentPart(LChar c)
531 {
532     // Character types are divided into two groups depending on whether they can be part of an
533     // identifier or not. Those whose type value is less or equal than CharacterNumber can be
534     // part of an identifier. (See the CharacterType definition for more details.)
535     return typesOfLatin1Characters[c] <= CharacterNumber;
536 }
537
538 static ALWAYS_INLINE bool isIdentPart(UChar c)
539 {
540     return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
541 }
542
543 static inline int singleEscape(int c)
544 {
545     switch (c) {
546     case 'b':
547         return 0x08;
548     case 't':
549         return 0x09;
550     case 'n':
551         return 0x0A;
552     case 'v':
553         return 0x0B;
554     case 'f':
555         return 0x0C;
556     case 'r':
557         return 0x0D;
558     case '\\':
559         return '\\';
560     case '\'':
561         return '\'';
562     case '"':
563         return '"';
564     default:
565         return 0;
566     }
567 }
568
569 template <typename T>
570 inline void Lexer<T>::record8(int c)
571 {
572     ASSERT(c >= 0);
573     ASSERT(c <= 0xFF);
574     m_buffer8.append(static_cast<LChar>(c));
575 }
576
577 template <typename T>
578 inline void assertCharIsIn8BitRange(T c)
579 {
580     UNUSED_PARAM(c);
581     ASSERT(c >= 0);
582     ASSERT(c <= 0xFF);
583 }
584
585 template <>
586 inline void assertCharIsIn8BitRange(UChar c)
587 {
588     UNUSED_PARAM(c);
589     ASSERT(c <= 0xFF);
590 }
591
592 template <>
593 inline void assertCharIsIn8BitRange(LChar)
594 {
595 }
596
597 template <typename T>
598 inline void Lexer<T>::append8(const T* p, size_t length)
599 {
600     size_t currentSize = m_buffer8.size();
601     m_buffer8.grow(currentSize + length);
602     LChar* rawBuffer = m_buffer8.data() + currentSize;
603
604     for (size_t i = 0; i < length; i++) {
605         T c = p[i];
606         assertCharIsIn8BitRange(c);
607         rawBuffer[i] = c;
608     }
609 }
610
611 template <typename T>
612 inline void Lexer<T>::append16(const LChar* p, size_t length)
613 {
614     size_t currentSize = m_buffer16.size();
615     m_buffer16.grow(currentSize + length);
616     UChar* rawBuffer = m_buffer16.data() + currentSize;
617
618     for (size_t i = 0; i < length; i++)
619         rawBuffer[i] = p[i];
620 }
621
622 template <typename T>
623 inline void Lexer<T>::record16(T c)
624 {
625     m_buffer16.append(c);
626 }
627
628 template <typename T>
629 inline void Lexer<T>::record16(int c)
630 {
631     ASSERT(c >= 0);
632     ASSERT(c <= USHRT_MAX);
633     m_buffer16.append(static_cast<UChar>(c));
634 }
635
636 template <>
637     template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
638 {
639     const ptrdiff_t remaining = m_codeEnd - m_code;
640     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
641         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
642         if (keyword != IDENT) {
643             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
644             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
645         }
646     }
647
648     const LChar* identifierStart = currentCharacter();
649     
650     while (m_current != -1 && isIdentPart(static_cast<LChar>(m_current)))
651         shift();
652     
653     if (UNLIKELY(m_current == '\\')) {
654         setOffsetFromCharOffset(identifierStart);
655         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
656     }
657
658     const Identifier* ident = 0;
659     
660     if (shouldCreateIdentifier) {
661         int identifierLength = currentCharacter() - identifierStart;
662         ident = makeIdentifier(identifierStart, identifierLength);
663
664         tokenData->ident = ident;
665     } else
666         tokenData->ident = 0;
667
668     m_delimited = false;
669
670     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
671         ASSERT(shouldCreateIdentifier);
672         if (remaining < maxTokenLength) {
673             const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
674             ASSERT((remaining < maxTokenLength) || !entry);
675             if (!entry)
676                 return IDENT;
677             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
678             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
679         }
680         return IDENT;
681     }
682
683     return IDENT;
684 }
685
686 template <>
687 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
688 {
689     const ptrdiff_t remaining = m_codeEnd - m_code;
690     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
691         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
692         if (keyword != IDENT) {
693             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
694             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
695         }
696     }
697     const UChar* identifierStart = currentCharacter();
698
699     UChar orAllChars = 0;
700     
701     while (m_current != -1 && isIdentPart(static_cast<UChar>(m_current))) {
702         orAllChars |= m_current;
703         shift();
704     }
705     
706     if (UNLIKELY(m_current == '\\')) {
707         setOffsetFromCharOffset(identifierStart);
708         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
709     }
710
711     bool isAll8Bit = false;
712
713     if (!(orAllChars & ~0xff))
714         isAll8Bit = true;
715
716     const Identifier* ident = 0;
717     
718     if (shouldCreateIdentifier) {
719         int identifierLength = currentCharacter() - identifierStart;
720         if (isAll8Bit)
721             ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
722         else
723             ident = makeIdentifier(identifierStart, identifierLength);
724         
725         tokenData->ident = ident;
726     } else
727         tokenData->ident = 0;
728     
729     m_delimited = false;
730     
731     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
732         ASSERT(shouldCreateIdentifier);
733         if (remaining < maxTokenLength) {
734             const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
735             ASSERT((remaining < maxTokenLength) || !entry);
736             if (!entry)
737                 return IDENT;
738             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
739             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
740         }
741         return IDENT;
742     }
743
744     return IDENT;
745 }
746
747 template <typename T>
748 template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
749 {
750     const ptrdiff_t remaining = m_codeEnd - m_code;
751     const T* identifierStart = currentCharacter();
752     bool bufferRequired = false;
753
754     while (true) {
755         if (LIKELY(m_current != -1 && isIdentPart(static_cast<T>(m_current)))) {
756             shift();
757             continue;
758         }
759         if (LIKELY(m_current != '\\'))
760             break;
761
762         // \uXXXX unicode characters.
763         bufferRequired = true;
764         if (identifierStart != currentCharacter())
765             m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
766         shift();
767         if (UNLIKELY(m_current != 'u'))
768             return ERRORTOK;
769         shift();
770         int character = getUnicodeCharacter();
771         if (UNLIKELY(character == -1))
772             return ERRORTOK;
773         UChar ucharacter = static_cast<UChar>(character);
774         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
775             return ERRORTOK;
776         if (shouldCreateIdentifier)
777             record16(ucharacter);
778         identifierStart = currentCharacter();
779     }
780
781     int identifierLength;
782     const Identifier* ident = 0;
783     if (shouldCreateIdentifier) {
784         if (!bufferRequired) {
785             identifierLength = currentCharacter() - identifierStart;
786             ident = makeIdentifier(identifierStart, identifierLength);
787         } else {
788             if (identifierStart != currentCharacter())
789                 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
790             ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
791         }
792
793         tokenData->ident = ident;
794     } else
795         tokenData->ident = 0;
796
797     m_delimited = false;
798
799     if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
800         ASSERT(shouldCreateIdentifier);
801         // Keywords must not be recognized if there was an \uXXXX in the identifier.
802         if (remaining < maxTokenLength) {
803             const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
804             ASSERT((remaining < maxTokenLength) || !entry);
805             if (!entry)
806                 return IDENT;
807             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
808             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
809         }
810         return IDENT;
811     }
812
813     m_buffer16.resize(0);
814     return IDENT;
815 }
816
817 template <typename T>
818 template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
819 {
820     int startingOffset = currentOffset();
821     int startingLineNumber = lineNumber();
822     int stringQuoteCharacter = m_current;
823     shift();
824
825     const T* stringStart = currentCharacter();
826
827     while (m_current != stringQuoteCharacter) {
828         if (UNLIKELY((m_current == '\\'))) {
829             if (stringStart != currentCharacter() && shouldBuildStrings)
830                 append8(stringStart, currentCharacter() - stringStart);
831             shift();
832
833             int escape = singleEscape(m_current);
834
835             // Most common escape sequences first
836             if (escape) {
837                 if (shouldBuildStrings)
838                     record8(escape);
839                 shift();
840             } else if (UNLIKELY(isLineTerminator(m_current)))
841                 shiftLineTerminator();
842             else if (m_current == 'x') {
843                 shift();
844                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
845                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
846                     return false;
847                 }
848                 int prev = m_current;
849                 shift();
850                 if (shouldBuildStrings)
851                     record8(convertHex(prev, m_current));
852                 shift();
853             } else {
854                 setOffset(startingOffset);
855                 setLineNumber(startingLineNumber);
856                 m_buffer8.resize(0);
857                 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
858             }
859             stringStart = currentCharacter();
860             continue;
861         }
862
863         if (UNLIKELY(((m_current > 0xff) || (m_current < 0xe)))) {
864             setOffset(startingOffset);
865             setLineNumber(startingLineNumber);
866             m_buffer8.resize(0);
867             return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
868         }
869
870         shift();
871     }
872
873     if (currentCharacter() != stringStart && shouldBuildStrings)
874         append8(stringStart, currentCharacter() - stringStart);
875     if (shouldBuildStrings) {
876         tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
877         m_buffer8.resize(0);
878     } else
879         tokenData->ident = 0;
880
881     return true;
882 }
883
884 template <typename T>
885 template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
886 {
887     int stringQuoteCharacter = m_current;
888     shift();
889
890     const T* stringStart = currentCharacter();
891
892     while (m_current != stringQuoteCharacter) {
893         if (UNLIKELY(m_current == '\\')) {
894             if (stringStart != currentCharacter() && shouldBuildStrings)
895                 append16(stringStart, currentCharacter() - stringStart);
896             shift();
897
898             int escape = singleEscape(m_current);
899
900             // Most common escape sequences first
901             if (escape) {
902                 if (shouldBuildStrings)
903                     record16(escape);
904                 shift();
905             } else if (UNLIKELY(isLineTerminator(static_cast<T>(m_current))))
906                 shiftLineTerminator();
907             else if (m_current == 'x') {
908                 shift();
909                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
910                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
911                     return false;
912                 }
913                 int prev = m_current;
914                 shift();
915                 if (shouldBuildStrings)
916                     record16(convertHex(prev, m_current));
917                 shift();
918             } else if (m_current == 'u') {
919                 shift();
920                 int character = getUnicodeCharacter();
921                 if (character != -1) {
922                     if (shouldBuildStrings)
923                         record16(character);
924                 } else if (m_current == stringQuoteCharacter) {
925                     if (shouldBuildStrings)
926                         record16('u');
927                 } else {
928                     m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
929                     return false;
930                 }
931             } else if (strictMode && isASCIIDigit(m_current)) {
932                 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
933                 int character1 = m_current;
934                 shift();
935                 if (character1 != '0' || isASCIIDigit(m_current)) {
936                     m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
937                     return false;
938                 }
939                 if (shouldBuildStrings)
940                     record16(0);
941             } else if (!strictMode && isASCIIOctalDigit(m_current)) {
942                 // Octal character sequences
943                 int character1 = m_current;
944                 shift();
945                 if (isASCIIOctalDigit(m_current)) {
946                     // Two octal characters
947                     int character2 = m_current;
948                     shift();
949                     if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
950                         if (shouldBuildStrings)
951                             record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
952                         shift();
953                     } else {
954                         if (shouldBuildStrings)
955                             record16((character1 - '0') * 8 + character2 - '0');
956                     }
957                 } else {
958                     if (shouldBuildStrings)
959                         record16(character1 - '0');
960                 }
961             } else if (m_current != -1) {
962                 if (shouldBuildStrings)
963                     record16(m_current);
964                 shift();
965             } else {
966                 m_lexErrorMessage = "Unterminated string constant";
967                 return false;
968             }
969
970             stringStart = currentCharacter();
971             continue;
972         }
973         // Fast check for characters that require special handling.
974         // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
975         // as possible, and lets through all common ASCII characters.
976         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
977             // New-line or end of input is not allowed
978             if (UNLIKELY(m_current == -1) || UNLIKELY(isLineTerminator(static_cast<T>(m_current)))) {
979                 m_lexErrorMessage = "Unexpected EOF";
980                 return false;
981             }
982             // Anything else is just a normal character
983         }
984         shift();
985     }
986
987     if (currentCharacter() != stringStart && shouldBuildStrings)
988         append16(stringStart, currentCharacter() - stringStart);
989     if (shouldBuildStrings)
990         tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
991     else
992         tokenData->ident = 0;
993
994     m_buffer16.resize(0);
995     return true;
996 }
997
998 template <typename T>
999 ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
1000 {
1001     // Optimization: most hexadecimal values fit into 4 bytes.
1002     uint32_t hexValue = 0;
1003     int maximumDigits = 7;
1004
1005     // Shift out the 'x' prefix.
1006     shift();
1007
1008     do {
1009         hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1010         shift();
1011         --maximumDigits;
1012     } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1013
1014     if (maximumDigits >= 0) {
1015         returnValue = hexValue;
1016         return;
1017     }
1018
1019     // No more place in the hexValue buffer.
1020     // The values are shifted out and placed into the m_buffer8 vector.
1021     for (int i = 0; i < 8; ++i) {
1022          int digit = hexValue >> 28;
1023          if (digit < 10)
1024              record8(digit + '0');
1025          else
1026              record8(digit - 10 + 'a');
1027          hexValue <<= 4;
1028     }
1029
1030     while (isASCIIHexDigit(m_current)) {
1031         record8(m_current);
1032         shift();
1033     }
1034
1035     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1036 }
1037
1038 template <typename T>
1039 ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
1040 {
1041     // Optimization: most octal values fit into 4 bytes.
1042     uint32_t octalValue = 0;
1043     int maximumDigits = 9;
1044     // Temporary buffer for the digits. Makes easier
1045     // to reconstruct the input characters when needed.
1046     LChar digits[10];
1047
1048     do {
1049         octalValue = octalValue * 8 + (m_current - '0');
1050         digits[maximumDigits] = m_current;
1051         shift();
1052         --maximumDigits;
1053     } while (isASCIIOctalDigit(m_current) && maximumDigits >= 0);
1054
1055     if (!isASCIIDigit(m_current) && maximumDigits >= 0) {
1056         returnValue = octalValue;
1057         return true;
1058     }
1059
1060     for (int i = 9; i > maximumDigits; --i)
1061          record8(digits[i]);
1062
1063     while (isASCIIOctalDigit(m_current)) {
1064         record8(m_current);
1065         shift();
1066     }
1067
1068     if (isASCIIDigit(m_current))
1069         return false;
1070
1071     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
1072     return true;
1073 }
1074
1075 template <typename T>
1076 ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
1077 {
1078     // Optimization: most decimal values fit into 4 bytes.
1079     uint32_t decimalValue = 0;
1080
1081     // Since parseOctal may be executed before parseDecimal,
1082     // the m_buffer8 may hold ascii digits.
1083     if (!m_buffer8.size()) {
1084         int maximumDigits = 9;
1085         // Temporary buffer for the digits. Makes easier
1086         // to reconstruct the input characters when needed.
1087         LChar digits[10];
1088
1089         do {
1090             decimalValue = decimalValue * 10 + (m_current - '0');
1091             digits[maximumDigits] = m_current;
1092             shift();
1093             --maximumDigits;
1094         } while (isASCIIDigit(m_current) && maximumDigits >= 0);
1095
1096         if (maximumDigits >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
1097             returnValue = decimalValue;
1098             return true;
1099         }
1100
1101         for (int i = 9; i > maximumDigits; --i)
1102             record8(digits[i]);
1103     }
1104
1105     while (isASCIIDigit(m_current)) {
1106         record8(m_current);
1107         shift();
1108     }
1109
1110     return false;
1111 }
1112
1113 template <typename T>
1114 ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
1115 {
1116     record8('.');
1117     while (isASCIIDigit(m_current)) {
1118         record8(m_current);
1119         shift();
1120     }
1121 }
1122
1123 template <typename T>
1124 ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1125 {
1126     record8('e');
1127     shift();
1128     if (m_current == '+' || m_current == '-') {
1129         record8(m_current);
1130         shift();
1131     }
1132
1133     if (!isASCIIDigit(m_current))
1134         return false;
1135
1136     do {
1137         record8(m_current);
1138         shift();
1139     } while (isASCIIDigit(m_current));
1140     return true;
1141 }
1142
1143 template <typename T>
1144 ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1145 {
1146     while (true) {
1147         while (UNLIKELY(m_current == '*')) {
1148             shift();
1149             if (m_current == '/') {
1150                 shift();
1151                 return true;
1152             }
1153         }
1154
1155         if (UNLIKELY(m_current == -1))
1156             return false;
1157
1158         if (isLineTerminator(static_cast<T>(m_current))) {
1159             shiftLineTerminator();
1160             m_terminator = true;
1161         } else
1162             shift();
1163     }
1164 }
1165
1166 template <typename T>
1167 bool Lexer<T>::nextTokenIsColon()
1168 {
1169     const T* code = m_code;
1170     while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1171         code++;
1172     
1173     return code < m_codeEnd && *code == ':';
1174 }
1175
1176 template <typename T>
1177 JSTokenType Lexer<T>::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexerFlags, bool strictMode)
1178 {
1179     ASSERT(!m_error);
1180     ASSERT(m_buffer8.isEmpty());
1181     ASSERT(m_buffer16.isEmpty());
1182
1183     JSTokenType token = ERRORTOK;
1184     m_terminator = false;
1185
1186 start:
1187     while (m_current != -1 && isWhiteSpace(static_cast<T>(m_current)))
1188         shift();
1189
1190     int startOffset = currentOffset();
1191
1192     if (UNLIKELY(m_current == -1))
1193         return EOFTOK;
1194
1195     m_delimited = false;
1196
1197     CharacterType type;
1198     if (LIKELY(isLatin1(static_cast<T>(m_current))))
1199         type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1200     else if (isNonLatin1IdentStart(m_current))
1201         type = CharacterIdentifierStart;
1202     else if (isLineTerminator(static_cast<T>(m_current)))
1203         type = CharacterLineTerminator;
1204     else
1205         type = CharacterInvalid;
1206
1207     switch (type) {
1208     case CharacterGreater:
1209         shift();
1210         if (m_current == '>') {
1211             shift();
1212             if (m_current == '>') {
1213                 shift();
1214                 if (m_current == '=') {
1215                     shift();
1216                     token = URSHIFTEQUAL;
1217                     break;
1218                 }
1219                 token = URSHIFT;
1220                 break;
1221             }
1222             if (m_current == '=') {
1223                 shift();
1224                 token = RSHIFTEQUAL;
1225                 break;
1226             }
1227             token = RSHIFT;
1228             break;
1229         }
1230         if (m_current == '=') {
1231             shift();
1232             token = GE;
1233             break;
1234         }
1235         token = GT;
1236         break;
1237     case CharacterEqual:
1238         shift();
1239         if (m_current == '=') {
1240             shift();
1241             if (m_current == '=') {
1242                 shift();
1243                 token = STREQ;
1244                 break;
1245             }
1246             token = EQEQ;
1247             break;
1248         }
1249         token = EQUAL;
1250         break;
1251     case CharacterLess:
1252         shift();
1253         if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1254             // <!-- marks the beginning of a line comment (for www usage)
1255             goto inSingleLineComment;
1256         }
1257         if (m_current == '<') {
1258             shift();
1259             if (m_current == '=') {
1260                 shift();
1261                 token = LSHIFTEQUAL;
1262                 break;
1263             }
1264             token = LSHIFT;
1265             break;
1266         }
1267         if (m_current == '=') {
1268             shift();
1269             token = LE;
1270             break;
1271         }
1272         token = LT;
1273         break;
1274     case CharacterExclamationMark:
1275         shift();
1276         if (m_current == '=') {
1277             shift();
1278             if (m_current == '=') {
1279                 shift();
1280                 token = STRNEQ;
1281                 break;
1282             }
1283             token = NE;
1284             break;
1285         }
1286         token = EXCLAMATION;
1287         break;
1288     case CharacterAdd:
1289         shift();
1290         if (m_current == '+') {
1291             shift();
1292             token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1293             break;
1294         }
1295         if (m_current == '=') {
1296             shift();
1297             token = PLUSEQUAL;
1298             break;
1299         }
1300         token = PLUS;
1301         break;
1302     case CharacterSub:
1303         shift();
1304         if (m_current == '-') {
1305             shift();
1306             if (m_atLineStart && m_current == '>') {
1307                 shift();
1308                 goto inSingleLineComment;
1309             }
1310             token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
1311             break;
1312         }
1313         if (m_current == '=') {
1314             shift();
1315             token = MINUSEQUAL;
1316             break;
1317         }
1318         token = MINUS;
1319         break;
1320     case CharacterMultiply:
1321         shift();
1322         if (m_current == '=') {
1323             shift();
1324             token = MULTEQUAL;
1325             break;
1326         }
1327         token = TIMES;
1328         break;
1329     case CharacterSlash:
1330         shift();
1331         if (m_current == '/') {
1332             shift();
1333             goto inSingleLineComment;
1334         }
1335         if (m_current == '*') {
1336             shift();
1337             if (parseMultilineComment())
1338                 goto start;
1339             m_lexErrorMessage = "Multiline comment was not closed properly";
1340             goto returnError;
1341         }
1342         if (m_current == '=') {
1343             shift();
1344             token = DIVEQUAL;
1345             break;
1346         }
1347         token = DIVIDE;
1348         break;
1349     case CharacterAnd:
1350         shift();
1351         if (m_current == '&') {
1352             shift();
1353             token = AND;
1354             break;
1355         }
1356         if (m_current == '=') {
1357             shift();
1358             token = ANDEQUAL;
1359             break;
1360         }
1361         token = BITAND;
1362         break;
1363     case CharacterXor:
1364         shift();
1365         if (m_current == '=') {
1366             shift();
1367             token = XOREQUAL;
1368             break;
1369         }
1370         token = BITXOR;
1371         break;
1372     case CharacterModulo:
1373         shift();
1374         if (m_current == '=') {
1375             shift();
1376             token = MODEQUAL;
1377             break;
1378         }
1379         token = MOD;
1380         break;
1381     case CharacterOr:
1382         shift();
1383         if (m_current == '=') {
1384             shift();
1385             token = OREQUAL;
1386             break;
1387         }
1388         if (m_current == '|') {
1389             shift();
1390             token = OR;
1391             break;
1392         }
1393         token = BITOR;
1394         break;
1395     case CharacterOpenParen:
1396         token = OPENPAREN;
1397         shift();
1398         break;
1399     case CharacterCloseParen:
1400         token = CLOSEPAREN;
1401         shift();
1402         break;
1403     case CharacterOpenBracket:
1404         token = OPENBRACKET;
1405         shift();
1406         break;
1407     case CharacterCloseBracket:
1408         token = CLOSEBRACKET;
1409         shift();
1410         break;
1411     case CharacterComma:
1412         token = COMMA;
1413         shift();
1414         break;
1415     case CharacterColon:
1416         token = COLON;
1417         shift();
1418         break;
1419     case CharacterQuestion:
1420         token = QUESTION;
1421         shift();
1422         break;
1423     case CharacterTilde:
1424         token = TILDE;
1425         shift();
1426         break;
1427     case CharacterSemicolon:
1428         m_delimited = true;
1429         shift();
1430         token = SEMICOLON;
1431         break;
1432     case CharacterOpenBrace:
1433         tokenData->intValue = currentOffset();
1434         shift();
1435         token = OPENBRACE;
1436         break;
1437     case CharacterCloseBrace:
1438         tokenData->intValue = currentOffset();
1439         m_delimited = true;
1440         shift();
1441         token = CLOSEBRACE;
1442         break;
1443     case CharacterDot:
1444         shift();
1445         if (!isASCIIDigit(m_current)) {
1446             token = DOT;
1447             break;
1448         }
1449         goto inNumberAfterDecimalPoint;
1450     case CharacterZero:
1451         shift();
1452         if ((m_current | 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
1453             parseHex(tokenData->doubleValue);
1454             token = NUMBER;
1455         } else {
1456             record8('0');
1457             if (isASCIIOctalDigit(m_current)) {
1458                 if (parseOctal(tokenData->doubleValue)) {
1459                     if (strictMode) {
1460                         m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
1461                         goto returnError;
1462                     }
1463                     token = NUMBER;
1464                 }
1465             }
1466         }
1467         // Fall through into CharacterNumber
1468     case CharacterNumber:
1469         if (LIKELY(token != NUMBER)) {
1470             if (!parseDecimal(tokenData->doubleValue)) {
1471                 if (m_current == '.') {
1472                     shift();
1473 inNumberAfterDecimalPoint:
1474                     parseNumberAfterDecimalPoint();
1475                 }
1476                 if ((m_current | 0x20) == 'e') {
1477                     if (!parseNumberAfterExponentIndicator()) {
1478                         m_lexErrorMessage = "Non-number found after exponent indicator";
1479                         goto returnError;
1480                     }
1481                 }
1482                 size_t parsedLength;
1483                 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
1484             }
1485             token = NUMBER;
1486         }
1487
1488         // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
1489         if (UNLIKELY(m_current != -1 && isIdentStart(static_cast<T>(m_current)))) {
1490             m_lexErrorMessage = "At least one digit must occur after a decimal point";
1491             goto returnError;
1492         }
1493         m_buffer8.resize(0);
1494         m_delimited = false;
1495         break;
1496     case CharacterQuote:
1497         if (lexerFlags & LexerFlagsDontBuildStrings) {
1498             if (UNLIKELY(!parseString<false>(tokenData, strictMode)))
1499                 goto returnError;
1500         } else {
1501             if (UNLIKELY(!parseString<true>(tokenData, strictMode)))
1502                 goto returnError;
1503         }
1504         shift();
1505         m_delimited = false;
1506         token = STRING;
1507         break;
1508     case CharacterIdentifierStart:
1509         ASSERT(isIdentStart(static_cast<T>(m_current)));
1510         // Fall through into CharacterBackSlash.
1511     case CharacterBackSlash:
1512         if (lexerFlags & LexexFlagsDontBuildKeywords)
1513             token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
1514         else
1515             token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
1516         break;
1517     case CharacterLineTerminator:
1518         ASSERT(isLineTerminator(static_cast<T>(m_current)));
1519         shiftLineTerminator();
1520         m_atLineStart = true;
1521         m_terminator = true;
1522         goto start;
1523     case CharacterInvalid:
1524         m_lexErrorMessage = getInvalidCharMessage();
1525         goto returnError;
1526     default:
1527         ASSERT_NOT_REACHED();
1528         m_lexErrorMessage = "Internal Error";
1529         goto returnError;
1530     }
1531
1532     m_atLineStart = false;
1533     goto returnToken;
1534
1535 inSingleLineComment:
1536     while (!isLineTerminator(static_cast<T>(m_current))) {
1537         if (UNLIKELY(m_current == -1))
1538             return EOFTOK;
1539         shift();
1540     }
1541     shiftLineTerminator();
1542     m_atLineStart = true;
1543     m_terminator = true;
1544     if (!lastTokenWasRestrKeyword())
1545         goto start;
1546
1547     token = SEMICOLON;
1548     m_delimited = true;
1549     // Fall through into returnToken.
1550
1551 returnToken:
1552     tokenInfo->line = m_lineNumber;
1553     tokenInfo->startOffset = startOffset;
1554     tokenInfo->endOffset = currentOffset();
1555     m_lastToken = token;
1556     return token;
1557
1558 returnError:
1559     m_error = true;
1560     tokenInfo->line = m_lineNumber;
1561     tokenInfo->startOffset = startOffset;
1562     tokenInfo->endOffset = currentOffset();
1563     return ERRORTOK;
1564 }
1565
1566 template <typename T>
1567 bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
1568 {
1569     ASSERT(m_buffer16.isEmpty());
1570
1571     bool lastWasEscape = false;
1572     bool inBrackets = false;
1573
1574     if (patternPrefix) {
1575         ASSERT(!isLineTerminator(patternPrefix));
1576         ASSERT(patternPrefix != '/');
1577         ASSERT(patternPrefix != '[');
1578         record16(patternPrefix);
1579     }
1580
1581     while (true) {
1582         int current = m_current;
1583
1584         if (isLineTerminator(static_cast<T>(current)) || current == -1) {
1585             m_buffer16.resize(0);
1586             return false;
1587         }
1588
1589         shift();
1590
1591         if (current == '/' && !lastWasEscape && !inBrackets)
1592             break;
1593
1594         record16(current);
1595
1596         if (lastWasEscape) {
1597             lastWasEscape = false;
1598             continue;
1599         }
1600
1601         switch (current) {
1602         case '[':
1603             inBrackets = true;
1604             break;
1605         case ']':
1606             inBrackets = false;
1607             break;
1608         case '\\':
1609             lastWasEscape = true;
1610             break;
1611         }
1612     }
1613
1614     pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1615     m_buffer16.resize(0);
1616
1617     while (m_current != -1 && isIdentPart(static_cast<T>(m_current))) {
1618         record16(m_current);
1619         shift();
1620     }
1621
1622     flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1623     m_buffer16.resize(0);
1624
1625     return true;
1626 }
1627
1628 template <typename T>
1629 bool Lexer<T>::skipRegExp()
1630 {
1631     bool lastWasEscape = false;
1632     bool inBrackets = false;
1633
1634     while (true) {
1635         int current = m_current;
1636
1637         if (isLineTerminator(static_cast<T>(current)) || current == -1)
1638             return false;
1639
1640         shift();
1641
1642         if (current == '/' && !lastWasEscape && !inBrackets)
1643             break;
1644
1645         if (lastWasEscape) {
1646             lastWasEscape = false;
1647             continue;
1648         }
1649
1650         switch (current) {
1651         case '[':
1652             inBrackets = true;
1653             break;
1654         case ']':
1655             inBrackets = false;
1656             break;
1657         case '\\':
1658             lastWasEscape = true;
1659             break;
1660         }
1661     }
1662
1663     while (m_current != -1 && isIdentPart(static_cast<T>(m_current)))
1664         shift();
1665
1666     return true;
1667 }
1668
1669 template <typename T>
1670 void Lexer<T>::clear()
1671 {
1672     m_arena = 0;
1673
1674     Vector<LChar> newBuffer8;
1675     m_buffer8.swap(newBuffer8);
1676
1677     Vector<UChar> newBuffer16;
1678     m_buffer16.swap(newBuffer16);
1679
1680     m_isReparsing = false;
1681 }
1682
1683 template <typename T>
1684 SourceCode Lexer<T>::sourceCode(int openBrace, int closeBrace, int firstLine)
1685 {
1686     ASSERT((*m_source->provider()->data())[openBrace] == '{');
1687     ASSERT((*m_source->provider()->data())[closeBrace] == '}');
1688     return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1689 }
1690
1691 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
1692 template class Lexer<LChar>;
1693 template class Lexer<UChar>;
1694
1695 } // namespace JSC