Remove unused data member from Lexer class
[WebKit-https.git] / Source / JavaScriptCore / parser / Lexer.cpp
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6  *  Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
7  *
8  *  This library is free software; you can redistribute it and/or
9  *  modify it under the terms of the GNU Library General Public
10  *  License as published by the Free Software Foundation; either
11  *  version 2 of the License, or (at your option) any later version.
12  *
13  *  This library is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  *  Library General Public License for more details.
17  *
18  *  You should have received a copy of the GNU Library General Public License
19  *  along with this library; see the file COPYING.LIB.  If not, write to
20  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  *  Boston, MA 02110-1301, USA.
22  *
23  */
24
25 #include "config.h"
26 #include "Lexer.h"
27
28 #include "JSFunction.h"
29
30 #include "JSGlobalObjectFunctions.h"
31 #include "Identifier.h"
32 #include "NodeInfo.h"
33 #include "Nodes.h"
34 #include <wtf/dtoa.h>
35 #include <ctype.h>
36 #include <limits.h>
37 #include <string.h>
38 #include <wtf/Assertions.h>
39
40 using namespace WTF;
41 using namespace Unicode;
42
43 #include "KeywordLookup.h"
44 #include "Lexer.lut.h"
45 #include "Parser.h"
46
47 namespace JSC {
48
49 Keywords::Keywords(JSGlobalData* globalData)
50     : m_globalData(globalData)
51     , m_keywordTable(JSC::mainTable)
52 {
53 }
54
55 enum CharacterType {
56     // Types for the main switch
57
58     // The first three types are fixed, and also used for identifying
59     // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
60     CharacterIdentifierStart,
61     CharacterZero,
62     CharacterNumber,
63
64     CharacterInvalid,
65     CharacterLineTerminator,
66     CharacterExclamationMark,
67     CharacterOpenParen,
68     CharacterCloseParen,
69     CharacterOpenBracket,
70     CharacterCloseBracket,
71     CharacterComma,
72     CharacterColon,
73     CharacterQuestion,
74     CharacterTilde,
75     CharacterQuote,
76     CharacterDot,
77     CharacterSlash,
78     CharacterBackSlash,
79     CharacterSemicolon,
80     CharacterOpenBrace,
81     CharacterCloseBrace,
82
83     CharacterAdd,
84     CharacterSub,
85     CharacterMultiply,
86     CharacterModulo,
87     CharacterAnd,
88     CharacterXor,
89     CharacterOr,
90     CharacterLess,
91     CharacterGreater,
92     CharacterEqual,
93
94     // Other types (only one so far)
95     CharacterWhiteSpace,
96 };
97
98 // 256 Latin-1 codes
99 static const unsigned short typesOfLatin1Characters[256] = {
100 /*   0 - Null               */ CharacterInvalid,
101 /*   1 - Start of Heading   */ CharacterInvalid,
102 /*   2 - Start of Text      */ CharacterInvalid,
103 /*   3 - End of Text        */ CharacterInvalid,
104 /*   4 - End of Transm.     */ CharacterInvalid,
105 /*   5 - Enquiry            */ CharacterInvalid,
106 /*   6 - Acknowledgment     */ CharacterInvalid,
107 /*   7 - Bell               */ CharacterInvalid,
108 /*   8 - Back Space         */ CharacterInvalid,
109 /*   9 - Horizontal Tab     */ CharacterWhiteSpace,
110 /*  10 - Line Feed          */ CharacterLineTerminator,
111 /*  11 - Vertical Tab       */ CharacterWhiteSpace,
112 /*  12 - Form Feed          */ CharacterWhiteSpace,
113 /*  13 - Carriage Return    */ CharacterLineTerminator,
114 /*  14 - Shift Out          */ CharacterInvalid,
115 /*  15 - Shift In           */ CharacterInvalid,
116 /*  16 - Data Line Escape   */ CharacterInvalid,
117 /*  17 - Device Control 1   */ CharacterInvalid,
118 /*  18 - Device Control 2   */ CharacterInvalid,
119 /*  19 - Device Control 3   */ CharacterInvalid,
120 /*  20 - Device Control 4   */ CharacterInvalid,
121 /*  21 - Negative Ack.      */ CharacterInvalid,
122 /*  22 - Synchronous Idle   */ CharacterInvalid,
123 /*  23 - End of Transmit    */ CharacterInvalid,
124 /*  24 - Cancel             */ CharacterInvalid,
125 /*  25 - End of Medium      */ CharacterInvalid,
126 /*  26 - Substitute         */ CharacterInvalid,
127 /*  27 - Escape             */ CharacterInvalid,
128 /*  28 - File Separator     */ CharacterInvalid,
129 /*  29 - Group Separator    */ CharacterInvalid,
130 /*  30 - Record Separator   */ CharacterInvalid,
131 /*  31 - Unit Separator     */ CharacterInvalid,
132 /*  32 - Space              */ CharacterWhiteSpace,
133 /*  33 - !                  */ CharacterExclamationMark,
134 /*  34 - "                  */ CharacterQuote,
135 /*  35 - #                  */ CharacterInvalid,
136 /*  36 - $                  */ CharacterIdentifierStart,
137 /*  37 - %                  */ CharacterModulo,
138 /*  38 - &                  */ CharacterAnd,
139 /*  39 - '                  */ CharacterQuote,
140 /*  40 - (                  */ CharacterOpenParen,
141 /*  41 - )                  */ CharacterCloseParen,
142 /*  42 - *                  */ CharacterMultiply,
143 /*  43 - +                  */ CharacterAdd,
144 /*  44 - ,                  */ CharacterComma,
145 /*  45 - -                  */ CharacterSub,
146 /*  46 - .                  */ CharacterDot,
147 /*  47 - /                  */ CharacterSlash,
148 /*  48 - 0                  */ CharacterZero,
149 /*  49 - 1                  */ CharacterNumber,
150 /*  50 - 2                  */ CharacterNumber,
151 /*  51 - 3                  */ CharacterNumber,
152 /*  52 - 4                  */ CharacterNumber,
153 /*  53 - 5                  */ CharacterNumber,
154 /*  54 - 6                  */ CharacterNumber,
155 /*  55 - 7                  */ CharacterNumber,
156 /*  56 - 8                  */ CharacterNumber,
157 /*  57 - 9                  */ CharacterNumber,
158 /*  58 - :                  */ CharacterColon,
159 /*  59 - ;                  */ CharacterSemicolon,
160 /*  60 - <                  */ CharacterLess,
161 /*  61 - =                  */ CharacterEqual,
162 /*  62 - >                  */ CharacterGreater,
163 /*  63 - ?                  */ CharacterQuestion,
164 /*  64 - @                  */ CharacterInvalid,
165 /*  65 - A                  */ CharacterIdentifierStart,
166 /*  66 - B                  */ CharacterIdentifierStart,
167 /*  67 - C                  */ CharacterIdentifierStart,
168 /*  68 - D                  */ CharacterIdentifierStart,
169 /*  69 - E                  */ CharacterIdentifierStart,
170 /*  70 - F                  */ CharacterIdentifierStart,
171 /*  71 - G                  */ CharacterIdentifierStart,
172 /*  72 - H                  */ CharacterIdentifierStart,
173 /*  73 - I                  */ CharacterIdentifierStart,
174 /*  74 - J                  */ CharacterIdentifierStart,
175 /*  75 - K                  */ CharacterIdentifierStart,
176 /*  76 - L                  */ CharacterIdentifierStart,
177 /*  77 - M                  */ CharacterIdentifierStart,
178 /*  78 - N                  */ CharacterIdentifierStart,
179 /*  79 - O                  */ CharacterIdentifierStart,
180 /*  80 - P                  */ CharacterIdentifierStart,
181 /*  81 - Q                  */ CharacterIdentifierStart,
182 /*  82 - R                  */ CharacterIdentifierStart,
183 /*  83 - S                  */ CharacterIdentifierStart,
184 /*  84 - T                  */ CharacterIdentifierStart,
185 /*  85 - U                  */ CharacterIdentifierStart,
186 /*  86 - V                  */ CharacterIdentifierStart,
187 /*  87 - W                  */ CharacterIdentifierStart,
188 /*  88 - X                  */ CharacterIdentifierStart,
189 /*  89 - Y                  */ CharacterIdentifierStart,
190 /*  90 - Z                  */ CharacterIdentifierStart,
191 /*  91 - [                  */ CharacterOpenBracket,
192 /*  92 - \                  */ CharacterBackSlash,
193 /*  93 - ]                  */ CharacterCloseBracket,
194 /*  94 - ^                  */ CharacterXor,
195 /*  95 - _                  */ CharacterIdentifierStart,
196 /*  96 - `                  */ CharacterInvalid,
197 /*  97 - a                  */ CharacterIdentifierStart,
198 /*  98 - b                  */ CharacterIdentifierStart,
199 /*  99 - c                  */ CharacterIdentifierStart,
200 /* 100 - d                  */ CharacterIdentifierStart,
201 /* 101 - e                  */ CharacterIdentifierStart,
202 /* 102 - f                  */ CharacterIdentifierStart,
203 /* 103 - g                  */ CharacterIdentifierStart,
204 /* 104 - h                  */ CharacterIdentifierStart,
205 /* 105 - i                  */ CharacterIdentifierStart,
206 /* 106 - j                  */ CharacterIdentifierStart,
207 /* 107 - k                  */ CharacterIdentifierStart,
208 /* 108 - l                  */ CharacterIdentifierStart,
209 /* 109 - m                  */ CharacterIdentifierStart,
210 /* 110 - n                  */ CharacterIdentifierStart,
211 /* 111 - o                  */ CharacterIdentifierStart,
212 /* 112 - p                  */ CharacterIdentifierStart,
213 /* 113 - q                  */ CharacterIdentifierStart,
214 /* 114 - r                  */ CharacterIdentifierStart,
215 /* 115 - s                  */ CharacterIdentifierStart,
216 /* 116 - t                  */ CharacterIdentifierStart,
217 /* 117 - u                  */ CharacterIdentifierStart,
218 /* 118 - v                  */ CharacterIdentifierStart,
219 /* 119 - w                  */ CharacterIdentifierStart,
220 /* 120 - x                  */ CharacterIdentifierStart,
221 /* 121 - y                  */ CharacterIdentifierStart,
222 /* 122 - z                  */ CharacterIdentifierStart,
223 /* 123 - {                  */ CharacterOpenBrace,
224 /* 124 - |                  */ CharacterOr,
225 /* 125 - }                  */ CharacterCloseBrace,
226 /* 126 - ~                  */ CharacterTilde,
227 /* 127 - Delete             */ CharacterInvalid,
228 /* 128 - Cc category        */ CharacterInvalid,
229 /* 129 - Cc category        */ CharacterInvalid,
230 /* 130 - Cc category        */ CharacterInvalid,
231 /* 131 - Cc category        */ CharacterInvalid,
232 /* 132 - Cc category        */ CharacterInvalid,
233 /* 133 - Cc category        */ CharacterInvalid,
234 /* 134 - Cc category        */ CharacterInvalid,
235 /* 135 - Cc category        */ CharacterInvalid,
236 /* 136 - Cc category        */ CharacterInvalid,
237 /* 137 - Cc category        */ CharacterInvalid,
238 /* 138 - Cc category        */ CharacterInvalid,
239 /* 139 - Cc category        */ CharacterInvalid,
240 /* 140 - Cc category        */ CharacterInvalid,
241 /* 141 - Cc category        */ CharacterInvalid,
242 /* 142 - Cc category        */ CharacterInvalid,
243 /* 143 - Cc category        */ CharacterInvalid,
244 /* 144 - Cc category        */ CharacterInvalid,
245 /* 145 - Cc category        */ CharacterInvalid,
246 /* 146 - Cc category        */ CharacterInvalid,
247 /* 147 - Cc category        */ CharacterInvalid,
248 /* 148 - Cc category        */ CharacterInvalid,
249 /* 149 - Cc category        */ CharacterInvalid,
250 /* 150 - Cc category        */ CharacterInvalid,
251 /* 151 - Cc category        */ CharacterInvalid,
252 /* 152 - Cc category        */ CharacterInvalid,
253 /* 153 - Cc category        */ CharacterInvalid,
254 /* 154 - Cc category        */ CharacterInvalid,
255 /* 155 - Cc category        */ CharacterInvalid,
256 /* 156 - Cc category        */ CharacterInvalid,
257 /* 157 - Cc category        */ CharacterInvalid,
258 /* 158 - Cc category        */ CharacterInvalid,
259 /* 159 - Cc category        */ CharacterInvalid,
260 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
261 /* 161 - Po category        */ CharacterInvalid,
262 /* 162 - Sc category        */ CharacterInvalid,
263 /* 163 - Sc category        */ CharacterInvalid,
264 /* 164 - Sc category        */ CharacterInvalid,
265 /* 165 - Sc category        */ CharacterInvalid,
266 /* 166 - So category        */ CharacterInvalid,
267 /* 167 - So category        */ CharacterInvalid,
268 /* 168 - Sk category        */ CharacterInvalid,
269 /* 169 - So category        */ CharacterInvalid,
270 /* 170 - Ll category        */ CharacterIdentifierStart,
271 /* 171 - Pi category        */ CharacterInvalid,
272 /* 172 - Sm category        */ CharacterInvalid,
273 /* 173 - Cf category        */ CharacterInvalid,
274 /* 174 - So category        */ CharacterInvalid,
275 /* 175 - Sk category        */ CharacterInvalid,
276 /* 176 - So category        */ CharacterInvalid,
277 /* 177 - Sm category        */ CharacterInvalid,
278 /* 178 - No category        */ CharacterInvalid,
279 /* 179 - No category        */ CharacterInvalid,
280 /* 180 - Sk category        */ CharacterInvalid,
281 /* 181 - Ll category        */ CharacterIdentifierStart,
282 /* 182 - So category        */ CharacterInvalid,
283 /* 183 - Po category        */ CharacterInvalid,
284 /* 184 - Sk category        */ CharacterInvalid,
285 /* 185 - No category        */ CharacterInvalid,
286 /* 186 - Ll category        */ CharacterIdentifierStart,
287 /* 187 - Pf category        */ CharacterInvalid,
288 /* 188 - No category        */ CharacterInvalid,
289 /* 189 - No category        */ CharacterInvalid,
290 /* 190 - No category        */ CharacterInvalid,
291 /* 191 - Po category        */ CharacterInvalid,
292 /* 192 - Lu category        */ CharacterIdentifierStart,
293 /* 193 - Lu category        */ CharacterIdentifierStart,
294 /* 194 - Lu category        */ CharacterIdentifierStart,
295 /* 195 - Lu category        */ CharacterIdentifierStart,
296 /* 196 - Lu category        */ CharacterIdentifierStart,
297 /* 197 - Lu category        */ CharacterIdentifierStart,
298 /* 198 - Lu category        */ CharacterIdentifierStart,
299 /* 199 - Lu category        */ CharacterIdentifierStart,
300 /* 200 - Lu category        */ CharacterIdentifierStart,
301 /* 201 - Lu category        */ CharacterIdentifierStart,
302 /* 202 - Lu category        */ CharacterIdentifierStart,
303 /* 203 - Lu category        */ CharacterIdentifierStart,
304 /* 204 - Lu category        */ CharacterIdentifierStart,
305 /* 205 - Lu category        */ CharacterIdentifierStart,
306 /* 206 - Lu category        */ CharacterIdentifierStart,
307 /* 207 - Lu category        */ CharacterIdentifierStart,
308 /* 208 - Lu category        */ CharacterIdentifierStart,
309 /* 209 - Lu category        */ CharacterIdentifierStart,
310 /* 210 - Lu category        */ CharacterIdentifierStart,
311 /* 211 - Lu category        */ CharacterIdentifierStart,
312 /* 212 - Lu category        */ CharacterIdentifierStart,
313 /* 213 - Lu category        */ CharacterIdentifierStart,
314 /* 214 - Lu category        */ CharacterIdentifierStart,
315 /* 215 - Sm category        */ CharacterInvalid,
316 /* 216 - Lu category        */ CharacterIdentifierStart,
317 /* 217 - Lu category        */ CharacterIdentifierStart,
318 /* 218 - Lu category        */ CharacterIdentifierStart,
319 /* 219 - Lu category        */ CharacterIdentifierStart,
320 /* 220 - Lu category        */ CharacterIdentifierStart,
321 /* 221 - Lu category        */ CharacterIdentifierStart,
322 /* 222 - Lu category        */ CharacterIdentifierStart,
323 /* 223 - Ll category        */ CharacterIdentifierStart,
324 /* 224 - Ll category        */ CharacterIdentifierStart,
325 /* 225 - Ll category        */ CharacterIdentifierStart,
326 /* 226 - Ll category        */ CharacterIdentifierStart,
327 /* 227 - Ll category        */ CharacterIdentifierStart,
328 /* 228 - Ll category        */ CharacterIdentifierStart,
329 /* 229 - Ll category        */ CharacterIdentifierStart,
330 /* 230 - Ll category        */ CharacterIdentifierStart,
331 /* 231 - Ll category        */ CharacterIdentifierStart,
332 /* 232 - Ll category        */ CharacterIdentifierStart,
333 /* 233 - Ll category        */ CharacterIdentifierStart,
334 /* 234 - Ll category        */ CharacterIdentifierStart,
335 /* 235 - Ll category        */ CharacterIdentifierStart,
336 /* 236 - Ll category        */ CharacterIdentifierStart,
337 /* 237 - Ll category        */ CharacterIdentifierStart,
338 /* 238 - Ll category        */ CharacterIdentifierStart,
339 /* 239 - Ll category        */ CharacterIdentifierStart,
340 /* 240 - Ll category        */ CharacterIdentifierStart,
341 /* 241 - Ll category        */ CharacterIdentifierStart,
342 /* 242 - Ll category        */ CharacterIdentifierStart,
343 /* 243 - Ll category        */ CharacterIdentifierStart,
344 /* 244 - Ll category        */ CharacterIdentifierStart,
345 /* 245 - Ll category        */ CharacterIdentifierStart,
346 /* 246 - Ll category        */ CharacterIdentifierStart,
347 /* 247 - Sm category        */ CharacterInvalid,
348 /* 248 - Ll category        */ CharacterIdentifierStart,
349 /* 249 - Ll category        */ CharacterIdentifierStart,
350 /* 250 - Ll category        */ CharacterIdentifierStart,
351 /* 251 - Ll category        */ CharacterIdentifierStart,
352 /* 252 - Ll category        */ CharacterIdentifierStart,
353 /* 253 - Ll category        */ CharacterIdentifierStart,
354 /* 254 - Ll category        */ CharacterIdentifierStart,
355 /* 255 - Ll category        */ CharacterIdentifierStart
356 };
357
358 template <typename T>
359 Lexer<T>::Lexer(JSGlobalData* globalData)
360     : m_isReparsing(false)
361     , m_globalData(globalData)
362 {
363 }
364
365 template <typename T>
366 Lexer<T>::~Lexer()
367 {
368 }
369
370 template <typename T>
371 UString Lexer<T>::getInvalidCharMessage()
372 {
373     switch (m_current) {
374     case 0:
375         return "Invalid character: '\\0'";
376     case 10:
377         return "Invalid character: '\\n'";
378     case 11:
379         return "Invalid character: '\\v'";
380     case 13:
381         return "Invalid character: '\\r'";
382     case 35:
383         return "Invalid character: '#'";
384     case 64:
385         return "Invalid character: '@'";
386     case 96:
387         return "Invalid character: '`'";
388     default:
389         return String::format("Invalid character '\\u%04u'", m_current).impl();
390     }
391 }
392
393 template <typename T>
394 ALWAYS_INLINE const T* Lexer<T>::currentCharacter() const
395 {
396     ASSERT(m_code <= m_codeEnd);
397     return m_code;
398 }
399
400 template <typename T>
401 void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
402 {
403     m_arena = &arena->identifierArena();
404     
405     m_lineNumber = source.firstLine();
406     m_lastToken = -1;
407     
408     const StringImpl* sourceString = source.provider()->data();
409
410     if (sourceString)
411         setCodeStart(sourceString);
412     else
413         m_codeStart = 0;
414
415     m_source = &source;
416     m_code = m_codeStart + source.startOffset();
417     m_codeEnd = m_codeStart + source.endOffset();
418     m_error = false;
419     m_atLineStart = true;
420     m_lexErrorMessage = UString();
421     
422     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
423     m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
424     
425     if (LIKELY(m_code < m_codeEnd))
426         m_current = *m_code;
427     else
428         m_current = -1;
429     ASSERT(currentOffset() == source.startOffset());
430 }
431
432 template <typename T>
433 template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
434 {
435     m_code += shiftAmount;
436     m_current = *m_code;
437 }
438
439 template <typename T>
440 ALWAYS_INLINE void Lexer<T>::shift()
441 {
442     // Faster than an if-else sequence
443     ASSERT(m_current != -1);
444     m_current = -1;
445     m_code++;
446     if (LIKELY(m_code < m_codeEnd))
447         m_current = *m_code;
448 }
449
450 template <typename T>
451 ALWAYS_INLINE int Lexer<T>::peek(int offset)
452 {
453     // Only use if necessary
454     ASSERT(offset > 0 && offset < 5);
455     const T* code = m_code + offset;
456     return (code < m_codeEnd) ? *code : -1;
457 }
458
459 template <typename T>
460 int Lexer<T>::getUnicodeCharacter()
461 {
462     int char1 = peek(1);
463     int char2 = peek(2);
464     int char3 = peek(3);
465
466     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
467         return -1;
468
469     int result = convertUnicode(m_current, char1, char2, char3);
470     shift();
471     shift();
472     shift();
473     shift();
474     return result;
475 }
476
477 template <typename T>
478 void Lexer<T>::shiftLineTerminator()
479 {
480     ASSERT(isLineTerminator(static_cast<T>(m_current)));
481
482     int m_prev = m_current;
483     shift();
484
485     // Allow both CRLF and LFCR.
486     if (m_prev + m_current == '\n' + '\r')
487         shift();
488
489     ++m_lineNumber;
490 }
491
492 template <typename T>
493 ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
494 {
495     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
496 }
497
498 static NEVER_INLINE bool isNonLatin1IdentStart(int c)
499 {
500     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
501 }
502
503 static ALWAYS_INLINE bool isLatin1(LChar)
504 {
505     return true;
506 }
507
508 static ALWAYS_INLINE bool isLatin1(UChar c)
509 {
510     return c < 256;
511 }
512
513 static inline bool isIdentStart(LChar c)
514 {
515     return typesOfLatin1Characters[c] == CharacterIdentifierStart;
516 }
517
518 static inline bool isIdentStart(UChar c)
519 {
520     return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
521 }
522
523 static NEVER_INLINE bool isNonLatin1IdentPart(int c)
524 {
525     return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
526         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)) || c == 0x200C || c == 0x200D;
527 }
528
529 static ALWAYS_INLINE bool isIdentPart(LChar c)
530 {
531     // Character types are divided into two groups depending on whether they can be part of an
532     // identifier or not. Those whose type value is less or equal than CharacterNumber can be
533     // part of an identifier. (See the CharacterType definition for more details.)
534     return typesOfLatin1Characters[c] <= CharacterNumber;
535 }
536
537 static ALWAYS_INLINE bool isIdentPart(UChar c)
538 {
539     return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
540 }
541
542 static inline int singleEscape(int c)
543 {
544     switch (c) {
545     case 'b':
546         return 0x08;
547     case 't':
548         return 0x09;
549     case 'n':
550         return 0x0A;
551     case 'v':
552         return 0x0B;
553     case 'f':
554         return 0x0C;
555     case 'r':
556         return 0x0D;
557     case '\\':
558         return '\\';
559     case '\'':
560         return '\'';
561     case '"':
562         return '"';
563     default:
564         return 0;
565     }
566 }
567
568 template <typename T>
569 inline void Lexer<T>::record8(int c)
570 {
571     ASSERT(c >= 0);
572     ASSERT(c <= 0xFF);
573     m_buffer8.append(static_cast<LChar>(c));
574 }
575
576 template <typename T>
577 inline void assertCharIsIn8BitRange(T c)
578 {
579     UNUSED_PARAM(c);
580     ASSERT(c >= 0);
581     ASSERT(c <= 0xFF);
582 }
583
584 template <>
585 inline void assertCharIsIn8BitRange(UChar c)
586 {
587     UNUSED_PARAM(c);
588     ASSERT(c <= 0xFF);
589 }
590
591 template <>
592 inline void assertCharIsIn8BitRange(LChar)
593 {
594 }
595
596 template <typename T>
597 inline void Lexer<T>::append8(const T* p, size_t length)
598 {
599     size_t currentSize = m_buffer8.size();
600     m_buffer8.grow(currentSize + length);
601     LChar* rawBuffer = m_buffer8.data() + currentSize;
602
603     for (size_t i = 0; i < length; i++) {
604         T c = p[i];
605         assertCharIsIn8BitRange(c);
606         rawBuffer[i] = c;
607     }
608 }
609
610 template <typename T>
611 inline void Lexer<T>::append16(const LChar* p, size_t length)
612 {
613     size_t currentSize = m_buffer16.size();
614     m_buffer16.grow(currentSize + length);
615     UChar* rawBuffer = m_buffer16.data() + currentSize;
616
617     for (size_t i = 0; i < length; i++)
618         rawBuffer[i] = p[i];
619 }
620
621 template <typename T>
622 inline void Lexer<T>::record16(T c)
623 {
624     m_buffer16.append(c);
625 }
626
627 template <typename T>
628 inline void Lexer<T>::record16(int c)
629 {
630     ASSERT(c >= 0);
631     ASSERT(c <= USHRT_MAX);
632     m_buffer16.append(static_cast<UChar>(c));
633 }
634
635 template <>
636     template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
637 {
638     const ptrdiff_t remaining = m_codeEnd - m_code;
639     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
640         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
641         if (keyword != IDENT) {
642             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
643             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
644         }
645     }
646
647     const LChar* identifierStart = currentCharacter();
648     
649     while (m_current != -1 && isIdentPart(static_cast<LChar>(m_current)))
650         shift();
651     
652     if (UNLIKELY(m_current == '\\')) {
653         setOffsetFromCharOffset(identifierStart);
654         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
655     }
656
657     const Identifier* ident = 0;
658     
659     if (shouldCreateIdentifier) {
660         int identifierLength = currentCharacter() - identifierStart;
661         ident = makeIdentifier(identifierStart, identifierLength);
662
663         tokenData->ident = ident;
664     } else
665         tokenData->ident = 0;
666
667     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
668         ASSERT(shouldCreateIdentifier);
669         if (remaining < maxTokenLength) {
670             const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
671             ASSERT((remaining < maxTokenLength) || !entry);
672             if (!entry)
673                 return IDENT;
674             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
675             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
676         }
677         return IDENT;
678     }
679
680     return IDENT;
681 }
682
683 template <>
684 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
685 {
686     const ptrdiff_t remaining = m_codeEnd - m_code;
687     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
688         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
689         if (keyword != IDENT) {
690             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
691             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
692         }
693     }
694     const UChar* identifierStart = currentCharacter();
695
696     UChar orAllChars = 0;
697     
698     while (m_current != -1 && isIdentPart(static_cast<UChar>(m_current))) {
699         orAllChars |= m_current;
700         shift();
701     }
702     
703     if (UNLIKELY(m_current == '\\')) {
704         setOffsetFromCharOffset(identifierStart);
705         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
706     }
707
708     bool isAll8Bit = false;
709
710     if (!(orAllChars & ~0xff))
711         isAll8Bit = true;
712
713     const Identifier* ident = 0;
714     
715     if (shouldCreateIdentifier) {
716         int identifierLength = currentCharacter() - identifierStart;
717         if (isAll8Bit)
718             ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
719         else
720             ident = makeIdentifier(identifierStart, identifierLength);
721         
722         tokenData->ident = ident;
723     } else
724         tokenData->ident = 0;
725     
726     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
727         ASSERT(shouldCreateIdentifier);
728         if (remaining < maxTokenLength) {
729             const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
730             ASSERT((remaining < maxTokenLength) || !entry);
731             if (!entry)
732                 return IDENT;
733             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
734             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
735         }
736         return IDENT;
737     }
738
739     return IDENT;
740 }
741
742 template <typename T>
743 template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
744 {
745     const ptrdiff_t remaining = m_codeEnd - m_code;
746     const T* identifierStart = currentCharacter();
747     bool bufferRequired = false;
748
749     while (true) {
750         if (LIKELY(m_current != -1 && isIdentPart(static_cast<T>(m_current)))) {
751             shift();
752             continue;
753         }
754         if (LIKELY(m_current != '\\'))
755             break;
756
757         // \uXXXX unicode characters.
758         bufferRequired = true;
759         if (identifierStart != currentCharacter())
760             m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
761         shift();
762         if (UNLIKELY(m_current != 'u'))
763             return ERRORTOK;
764         shift();
765         int character = getUnicodeCharacter();
766         if (UNLIKELY(character == -1))
767             return ERRORTOK;
768         UChar ucharacter = static_cast<UChar>(character);
769         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
770             return ERRORTOK;
771         if (shouldCreateIdentifier)
772             record16(ucharacter);
773         identifierStart = currentCharacter();
774     }
775
776     int identifierLength;
777     const Identifier* ident = 0;
778     if (shouldCreateIdentifier) {
779         if (!bufferRequired) {
780             identifierLength = currentCharacter() - identifierStart;
781             ident = makeIdentifier(identifierStart, identifierLength);
782         } else {
783             if (identifierStart != currentCharacter())
784                 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
785             ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
786         }
787
788         tokenData->ident = ident;
789     } else
790         tokenData->ident = 0;
791
792     if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
793         ASSERT(shouldCreateIdentifier);
794         // Keywords must not be recognized if there was an \uXXXX in the identifier.
795         if (remaining < maxTokenLength) {
796             const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
797             ASSERT((remaining < maxTokenLength) || !entry);
798             if (!entry)
799                 return IDENT;
800             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
801             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
802         }
803         return IDENT;
804     }
805
806     m_buffer16.resize(0);
807     return IDENT;
808 }
809
810 template <typename T>
811 template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
812 {
813     int startingOffset = currentOffset();
814     int startingLineNumber = lineNumber();
815     int stringQuoteCharacter = m_current;
816     shift();
817
818     const T* stringStart = currentCharacter();
819
820     while (m_current != stringQuoteCharacter) {
821         if (UNLIKELY((m_current == '\\'))) {
822             if (stringStart != currentCharacter() && shouldBuildStrings)
823                 append8(stringStart, currentCharacter() - stringStart);
824             shift();
825
826             int escape = singleEscape(m_current);
827
828             // Most common escape sequences first
829             if (escape) {
830                 if (shouldBuildStrings)
831                     record8(escape);
832                 shift();
833             } else if (UNLIKELY(isLineTerminator(m_current)))
834                 shiftLineTerminator();
835             else if (m_current == 'x') {
836                 shift();
837                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
838                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
839                     return false;
840                 }
841                 int prev = m_current;
842                 shift();
843                 if (shouldBuildStrings)
844                     record8(convertHex(prev, m_current));
845                 shift();
846             } else {
847                 setOffset(startingOffset);
848                 setLineNumber(startingLineNumber);
849                 m_buffer8.resize(0);
850                 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
851             }
852             stringStart = currentCharacter();
853             continue;
854         }
855
856         if (UNLIKELY(((m_current > 0xff) || (m_current < 0xe)))) {
857             setOffset(startingOffset);
858             setLineNumber(startingLineNumber);
859             m_buffer8.resize(0);
860             return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
861         }
862
863         shift();
864     }
865
866     if (currentCharacter() != stringStart && shouldBuildStrings)
867         append8(stringStart, currentCharacter() - stringStart);
868     if (shouldBuildStrings) {
869         tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
870         m_buffer8.resize(0);
871     } else
872         tokenData->ident = 0;
873
874     return true;
875 }
876
877 template <typename T>
878 template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
879 {
880     int stringQuoteCharacter = m_current;
881     shift();
882
883     const T* stringStart = currentCharacter();
884
885     while (m_current != stringQuoteCharacter) {
886         if (UNLIKELY(m_current == '\\')) {
887             if (stringStart != currentCharacter() && shouldBuildStrings)
888                 append16(stringStart, currentCharacter() - stringStart);
889             shift();
890
891             int escape = singleEscape(m_current);
892
893             // Most common escape sequences first
894             if (escape) {
895                 if (shouldBuildStrings)
896                     record16(escape);
897                 shift();
898             } else if (UNLIKELY(isLineTerminator(static_cast<T>(m_current))))
899                 shiftLineTerminator();
900             else if (m_current == 'x') {
901                 shift();
902                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
903                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
904                     return false;
905                 }
906                 int prev = m_current;
907                 shift();
908                 if (shouldBuildStrings)
909                     record16(convertHex(prev, m_current));
910                 shift();
911             } else if (m_current == 'u') {
912                 shift();
913                 int character = getUnicodeCharacter();
914                 if (character != -1) {
915                     if (shouldBuildStrings)
916                         record16(character);
917                 } else if (m_current == stringQuoteCharacter) {
918                     if (shouldBuildStrings)
919                         record16('u');
920                 } else {
921                     m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
922                     return false;
923                 }
924             } else if (strictMode && isASCIIDigit(m_current)) {
925                 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
926                 int character1 = m_current;
927                 shift();
928                 if (character1 != '0' || isASCIIDigit(m_current)) {
929                     m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
930                     return false;
931                 }
932                 if (shouldBuildStrings)
933                     record16(0);
934             } else if (!strictMode && isASCIIOctalDigit(m_current)) {
935                 // Octal character sequences
936                 int character1 = m_current;
937                 shift();
938                 if (isASCIIOctalDigit(m_current)) {
939                     // Two octal characters
940                     int character2 = m_current;
941                     shift();
942                     if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
943                         if (shouldBuildStrings)
944                             record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
945                         shift();
946                     } else {
947                         if (shouldBuildStrings)
948                             record16((character1 - '0') * 8 + character2 - '0');
949                     }
950                 } else {
951                     if (shouldBuildStrings)
952                         record16(character1 - '0');
953                 }
954             } else if (m_current != -1) {
955                 if (shouldBuildStrings)
956                     record16(m_current);
957                 shift();
958             } else {
959                 m_lexErrorMessage = "Unterminated string constant";
960                 return false;
961             }
962
963             stringStart = currentCharacter();
964             continue;
965         }
966         // Fast check for characters that require special handling.
967         // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
968         // as possible, and lets through all common ASCII characters.
969         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
970             // New-line or end of input is not allowed
971             if (UNLIKELY(m_current == -1) || UNLIKELY(isLineTerminator(static_cast<T>(m_current)))) {
972                 m_lexErrorMessage = "Unexpected EOF";
973                 return false;
974             }
975             // Anything else is just a normal character
976         }
977         shift();
978     }
979
980     if (currentCharacter() != stringStart && shouldBuildStrings)
981         append16(stringStart, currentCharacter() - stringStart);
982     if (shouldBuildStrings)
983         tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
984     else
985         tokenData->ident = 0;
986
987     m_buffer16.resize(0);
988     return true;
989 }
990
991 template <typename T>
992 ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
993 {
994     // Optimization: most hexadecimal values fit into 4 bytes.
995     uint32_t hexValue = 0;
996     int maximumDigits = 7;
997
998     // Shift out the 'x' prefix.
999     shift();
1000
1001     do {
1002         hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1003         shift();
1004         --maximumDigits;
1005     } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1006
1007     if (maximumDigits >= 0) {
1008         returnValue = hexValue;
1009         return;
1010     }
1011
1012     // No more place in the hexValue buffer.
1013     // The values are shifted out and placed into the m_buffer8 vector.
1014     for (int i = 0; i < 8; ++i) {
1015          int digit = hexValue >> 28;
1016          if (digit < 10)
1017              record8(digit + '0');
1018          else
1019              record8(digit - 10 + 'a');
1020          hexValue <<= 4;
1021     }
1022
1023     while (isASCIIHexDigit(m_current)) {
1024         record8(m_current);
1025         shift();
1026     }
1027
1028     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1029 }
1030
1031 template <typename T>
1032 ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
1033 {
1034     // Optimization: most octal values fit into 4 bytes.
1035     uint32_t octalValue = 0;
1036     int maximumDigits = 9;
1037     // Temporary buffer for the digits. Makes easier
1038     // to reconstruct the input characters when needed.
1039     LChar digits[10];
1040
1041     do {
1042         octalValue = octalValue * 8 + (m_current - '0');
1043         digits[maximumDigits] = m_current;
1044         shift();
1045         --maximumDigits;
1046     } while (isASCIIOctalDigit(m_current) && maximumDigits >= 0);
1047
1048     if (!isASCIIDigit(m_current) && maximumDigits >= 0) {
1049         returnValue = octalValue;
1050         return true;
1051     }
1052
1053     for (int i = 9; i > maximumDigits; --i)
1054          record8(digits[i]);
1055
1056     while (isASCIIOctalDigit(m_current)) {
1057         record8(m_current);
1058         shift();
1059     }
1060
1061     if (isASCIIDigit(m_current))
1062         return false;
1063
1064     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
1065     return true;
1066 }
1067
1068 template <typename T>
1069 ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
1070 {
1071     // Optimization: most decimal values fit into 4 bytes.
1072     uint32_t decimalValue = 0;
1073
1074     // Since parseOctal may be executed before parseDecimal,
1075     // the m_buffer8 may hold ascii digits.
1076     if (!m_buffer8.size()) {
1077         int maximumDigits = 9;
1078         // Temporary buffer for the digits. Makes easier
1079         // to reconstruct the input characters when needed.
1080         LChar digits[10];
1081
1082         do {
1083             decimalValue = decimalValue * 10 + (m_current - '0');
1084             digits[maximumDigits] = m_current;
1085             shift();
1086             --maximumDigits;
1087         } while (isASCIIDigit(m_current) && maximumDigits >= 0);
1088
1089         if (maximumDigits >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
1090             returnValue = decimalValue;
1091             return true;
1092         }
1093
1094         for (int i = 9; i > maximumDigits; --i)
1095             record8(digits[i]);
1096     }
1097
1098     while (isASCIIDigit(m_current)) {
1099         record8(m_current);
1100         shift();
1101     }
1102
1103     return false;
1104 }
1105
1106 template <typename T>
1107 ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
1108 {
1109     record8('.');
1110     while (isASCIIDigit(m_current)) {
1111         record8(m_current);
1112         shift();
1113     }
1114 }
1115
1116 template <typename T>
1117 ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1118 {
1119     record8('e');
1120     shift();
1121     if (m_current == '+' || m_current == '-') {
1122         record8(m_current);
1123         shift();
1124     }
1125
1126     if (!isASCIIDigit(m_current))
1127         return false;
1128
1129     do {
1130         record8(m_current);
1131         shift();
1132     } while (isASCIIDigit(m_current));
1133     return true;
1134 }
1135
1136 template <typename T>
1137 ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1138 {
1139     while (true) {
1140         while (UNLIKELY(m_current == '*')) {
1141             shift();
1142             if (m_current == '/') {
1143                 shift();
1144                 return true;
1145             }
1146         }
1147
1148         if (UNLIKELY(m_current == -1))
1149             return false;
1150
1151         if (isLineTerminator(static_cast<T>(m_current))) {
1152             shiftLineTerminator();
1153             m_terminator = true;
1154         } else
1155             shift();
1156     }
1157 }
1158
1159 template <typename T>
1160 bool Lexer<T>::nextTokenIsColon()
1161 {
1162     const T* code = m_code;
1163     while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1164         code++;
1165     
1166     return code < m_codeEnd && *code == ':';
1167 }
1168
1169 template <typename T>
1170 JSTokenType Lexer<T>::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexerFlags, bool strictMode)
1171 {
1172     ASSERT(!m_error);
1173     ASSERT(m_buffer8.isEmpty());
1174     ASSERT(m_buffer16.isEmpty());
1175
1176     JSTokenType token = ERRORTOK;
1177     m_terminator = false;
1178
1179 start:
1180     while (m_current != -1 && isWhiteSpace(static_cast<T>(m_current)))
1181         shift();
1182
1183     int startOffset = currentOffset();
1184
1185     if (UNLIKELY(m_current == -1))
1186         return EOFTOK;
1187
1188     CharacterType type;
1189     if (LIKELY(isLatin1(static_cast<T>(m_current))))
1190         type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1191     else if (isNonLatin1IdentStart(m_current))
1192         type = CharacterIdentifierStart;
1193     else if (isLineTerminator(static_cast<T>(m_current)))
1194         type = CharacterLineTerminator;
1195     else
1196         type = CharacterInvalid;
1197
1198     switch (type) {
1199     case CharacterGreater:
1200         shift();
1201         if (m_current == '>') {
1202             shift();
1203             if (m_current == '>') {
1204                 shift();
1205                 if (m_current == '=') {
1206                     shift();
1207                     token = URSHIFTEQUAL;
1208                     break;
1209                 }
1210                 token = URSHIFT;
1211                 break;
1212             }
1213             if (m_current == '=') {
1214                 shift();
1215                 token = RSHIFTEQUAL;
1216                 break;
1217             }
1218             token = RSHIFT;
1219             break;
1220         }
1221         if (m_current == '=') {
1222             shift();
1223             token = GE;
1224             break;
1225         }
1226         token = GT;
1227         break;
1228     case CharacterEqual:
1229         shift();
1230         if (m_current == '=') {
1231             shift();
1232             if (m_current == '=') {
1233                 shift();
1234                 token = STREQ;
1235                 break;
1236             }
1237             token = EQEQ;
1238             break;
1239         }
1240         token = EQUAL;
1241         break;
1242     case CharacterLess:
1243         shift();
1244         if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1245             // <!-- marks the beginning of a line comment (for www usage)
1246             goto inSingleLineComment;
1247         }
1248         if (m_current == '<') {
1249             shift();
1250             if (m_current == '=') {
1251                 shift();
1252                 token = LSHIFTEQUAL;
1253                 break;
1254             }
1255             token = LSHIFT;
1256             break;
1257         }
1258         if (m_current == '=') {
1259             shift();
1260             token = LE;
1261             break;
1262         }
1263         token = LT;
1264         break;
1265     case CharacterExclamationMark:
1266         shift();
1267         if (m_current == '=') {
1268             shift();
1269             if (m_current == '=') {
1270                 shift();
1271                 token = STRNEQ;
1272                 break;
1273             }
1274             token = NE;
1275             break;
1276         }
1277         token = EXCLAMATION;
1278         break;
1279     case CharacterAdd:
1280         shift();
1281         if (m_current == '+') {
1282             shift();
1283             token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1284             break;
1285         }
1286         if (m_current == '=') {
1287             shift();
1288             token = PLUSEQUAL;
1289             break;
1290         }
1291         token = PLUS;
1292         break;
1293     case CharacterSub:
1294         shift();
1295         if (m_current == '-') {
1296             shift();
1297             if (m_atLineStart && m_current == '>') {
1298                 shift();
1299                 goto inSingleLineComment;
1300             }
1301             token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
1302             break;
1303         }
1304         if (m_current == '=') {
1305             shift();
1306             token = MINUSEQUAL;
1307             break;
1308         }
1309         token = MINUS;
1310         break;
1311     case CharacterMultiply:
1312         shift();
1313         if (m_current == '=') {
1314             shift();
1315             token = MULTEQUAL;
1316             break;
1317         }
1318         token = TIMES;
1319         break;
1320     case CharacterSlash:
1321         shift();
1322         if (m_current == '/') {
1323             shift();
1324             goto inSingleLineComment;
1325         }
1326         if (m_current == '*') {
1327             shift();
1328             if (parseMultilineComment())
1329                 goto start;
1330             m_lexErrorMessage = "Multiline comment was not closed properly";
1331             goto returnError;
1332         }
1333         if (m_current == '=') {
1334             shift();
1335             token = DIVEQUAL;
1336             break;
1337         }
1338         token = DIVIDE;
1339         break;
1340     case CharacterAnd:
1341         shift();
1342         if (m_current == '&') {
1343             shift();
1344             token = AND;
1345             break;
1346         }
1347         if (m_current == '=') {
1348             shift();
1349             token = ANDEQUAL;
1350             break;
1351         }
1352         token = BITAND;
1353         break;
1354     case CharacterXor:
1355         shift();
1356         if (m_current == '=') {
1357             shift();
1358             token = XOREQUAL;
1359             break;
1360         }
1361         token = BITXOR;
1362         break;
1363     case CharacterModulo:
1364         shift();
1365         if (m_current == '=') {
1366             shift();
1367             token = MODEQUAL;
1368             break;
1369         }
1370         token = MOD;
1371         break;
1372     case CharacterOr:
1373         shift();
1374         if (m_current == '=') {
1375             shift();
1376             token = OREQUAL;
1377             break;
1378         }
1379         if (m_current == '|') {
1380             shift();
1381             token = OR;
1382             break;
1383         }
1384         token = BITOR;
1385         break;
1386     case CharacterOpenParen:
1387         token = OPENPAREN;
1388         shift();
1389         break;
1390     case CharacterCloseParen:
1391         token = CLOSEPAREN;
1392         shift();
1393         break;
1394     case CharacterOpenBracket:
1395         token = OPENBRACKET;
1396         shift();
1397         break;
1398     case CharacterCloseBracket:
1399         token = CLOSEBRACKET;
1400         shift();
1401         break;
1402     case CharacterComma:
1403         token = COMMA;
1404         shift();
1405         break;
1406     case CharacterColon:
1407         token = COLON;
1408         shift();
1409         break;
1410     case CharacterQuestion:
1411         token = QUESTION;
1412         shift();
1413         break;
1414     case CharacterTilde:
1415         token = TILDE;
1416         shift();
1417         break;
1418     case CharacterSemicolon:
1419         shift();
1420         token = SEMICOLON;
1421         break;
1422     case CharacterOpenBrace:
1423         tokenData->intValue = currentOffset();
1424         shift();
1425         token = OPENBRACE;
1426         break;
1427     case CharacterCloseBrace:
1428         tokenData->intValue = currentOffset();
1429         shift();
1430         token = CLOSEBRACE;
1431         break;
1432     case CharacterDot:
1433         shift();
1434         if (!isASCIIDigit(m_current)) {
1435             token = DOT;
1436             break;
1437         }
1438         goto inNumberAfterDecimalPoint;
1439     case CharacterZero:
1440         shift();
1441         if ((m_current | 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
1442             parseHex(tokenData->doubleValue);
1443             token = NUMBER;
1444         } else {
1445             record8('0');
1446             if (isASCIIOctalDigit(m_current)) {
1447                 if (parseOctal(tokenData->doubleValue)) {
1448                     if (strictMode) {
1449                         m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
1450                         goto returnError;
1451                     }
1452                     token = NUMBER;
1453                 }
1454             }
1455         }
1456         // Fall through into CharacterNumber
1457     case CharacterNumber:
1458         if (LIKELY(token != NUMBER)) {
1459             if (!parseDecimal(tokenData->doubleValue)) {
1460                 if (m_current == '.') {
1461                     shift();
1462 inNumberAfterDecimalPoint:
1463                     parseNumberAfterDecimalPoint();
1464                 }
1465                 if ((m_current | 0x20) == 'e') {
1466                     if (!parseNumberAfterExponentIndicator()) {
1467                         m_lexErrorMessage = "Non-number found after exponent indicator";
1468                         goto returnError;
1469                     }
1470                 }
1471                 size_t parsedLength;
1472                 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
1473             }
1474             token = NUMBER;
1475         }
1476
1477         // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
1478         if (UNLIKELY(m_current != -1 && isIdentStart(static_cast<T>(m_current)))) {
1479             m_lexErrorMessage = "At least one digit must occur after a decimal point";
1480             goto returnError;
1481         }
1482         m_buffer8.resize(0);
1483         break;
1484     case CharacterQuote:
1485         if (lexerFlags & LexerFlagsDontBuildStrings) {
1486             if (UNLIKELY(!parseString<false>(tokenData, strictMode)))
1487                 goto returnError;
1488         } else {
1489             if (UNLIKELY(!parseString<true>(tokenData, strictMode)))
1490                 goto returnError;
1491         }
1492         shift();
1493         token = STRING;
1494         break;
1495     case CharacterIdentifierStart:
1496         ASSERT(isIdentStart(static_cast<T>(m_current)));
1497         // Fall through into CharacterBackSlash.
1498     case CharacterBackSlash:
1499         if (lexerFlags & LexexFlagsDontBuildKeywords)
1500             token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
1501         else
1502             token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
1503         break;
1504     case CharacterLineTerminator:
1505         ASSERT(isLineTerminator(static_cast<T>(m_current)));
1506         shiftLineTerminator();
1507         m_atLineStart = true;
1508         m_terminator = true;
1509         goto start;
1510     case CharacterInvalid:
1511         m_lexErrorMessage = getInvalidCharMessage();
1512         goto returnError;
1513     default:
1514         ASSERT_NOT_REACHED();
1515         m_lexErrorMessage = "Internal Error";
1516         goto returnError;
1517     }
1518
1519     m_atLineStart = false;
1520     goto returnToken;
1521
1522 inSingleLineComment:
1523     while (!isLineTerminator(static_cast<T>(m_current))) {
1524         if (UNLIKELY(m_current == -1))
1525             return EOFTOK;
1526         shift();
1527     }
1528     shiftLineTerminator();
1529     m_atLineStart = true;
1530     m_terminator = true;
1531     if (!lastTokenWasRestrKeyword())
1532         goto start;
1533
1534     token = SEMICOLON;
1535     // Fall through into returnToken.
1536
1537 returnToken:
1538     tokenInfo->line = m_lineNumber;
1539     tokenInfo->startOffset = startOffset;
1540     tokenInfo->endOffset = currentOffset();
1541     m_lastToken = token;
1542     return token;
1543
1544 returnError:
1545     m_error = true;
1546     tokenInfo->line = m_lineNumber;
1547     tokenInfo->startOffset = startOffset;
1548     tokenInfo->endOffset = currentOffset();
1549     return ERRORTOK;
1550 }
1551
1552 template <typename T>
1553 bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
1554 {
1555     ASSERT(m_buffer16.isEmpty());
1556
1557     bool lastWasEscape = false;
1558     bool inBrackets = false;
1559
1560     if (patternPrefix) {
1561         ASSERT(!isLineTerminator(patternPrefix));
1562         ASSERT(patternPrefix != '/');
1563         ASSERT(patternPrefix != '[');
1564         record16(patternPrefix);
1565     }
1566
1567     while (true) {
1568         int current = m_current;
1569
1570         if (isLineTerminator(static_cast<T>(current)) || current == -1) {
1571             m_buffer16.resize(0);
1572             return false;
1573         }
1574
1575         shift();
1576
1577         if (current == '/' && !lastWasEscape && !inBrackets)
1578             break;
1579
1580         record16(current);
1581
1582         if (lastWasEscape) {
1583             lastWasEscape = false;
1584             continue;
1585         }
1586
1587         switch (current) {
1588         case '[':
1589             inBrackets = true;
1590             break;
1591         case ']':
1592             inBrackets = false;
1593             break;
1594         case '\\':
1595             lastWasEscape = true;
1596             break;
1597         }
1598     }
1599
1600     pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1601     m_buffer16.resize(0);
1602
1603     while (m_current != -1 && isIdentPart(static_cast<T>(m_current))) {
1604         record16(m_current);
1605         shift();
1606     }
1607
1608     flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1609     m_buffer16.resize(0);
1610
1611     return true;
1612 }
1613
1614 template <typename T>
1615 bool Lexer<T>::skipRegExp()
1616 {
1617     bool lastWasEscape = false;
1618     bool inBrackets = false;
1619
1620     while (true) {
1621         int current = m_current;
1622
1623         if (isLineTerminator(static_cast<T>(current)) || current == -1)
1624             return false;
1625
1626         shift();
1627
1628         if (current == '/' && !lastWasEscape && !inBrackets)
1629             break;
1630
1631         if (lastWasEscape) {
1632             lastWasEscape = false;
1633             continue;
1634         }
1635
1636         switch (current) {
1637         case '[':
1638             inBrackets = true;
1639             break;
1640         case ']':
1641             inBrackets = false;
1642             break;
1643         case '\\':
1644             lastWasEscape = true;
1645             break;
1646         }
1647     }
1648
1649     while (m_current != -1 && isIdentPart(static_cast<T>(m_current)))
1650         shift();
1651
1652     return true;
1653 }
1654
1655 template <typename T>
1656 void Lexer<T>::clear()
1657 {
1658     m_arena = 0;
1659
1660     Vector<LChar> newBuffer8;
1661     m_buffer8.swap(newBuffer8);
1662
1663     Vector<UChar> newBuffer16;
1664     m_buffer16.swap(newBuffer16);
1665
1666     m_isReparsing = false;
1667 }
1668
1669 template <typename T>
1670 SourceCode Lexer<T>::sourceCode(int openBrace, int closeBrace, int firstLine)
1671 {
1672     ASSERT((*m_source->provider()->data())[openBrace] == '{');
1673     ASSERT((*m_source->provider()->data())[closeBrace] == '}');
1674     return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1675 }
1676
1677 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
1678 template class Lexer<LChar>;
1679 template class Lexer<UChar>;
1680
1681 } // namespace JSC