Unreviewed, rolling out r234489.
[WebKit-https.git] / Source / JavaScriptCore / runtime / LiteralParser.cpp
1 /*
2  * Copyright (C) 2009-2017 Apple Inc. All rights reserved.
3  * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
25  */
26
27 #include "config.h"
28 #include "LiteralParser.h"
29
30 #include "ButterflyInlines.h"
31 #include "CodeBlock.h"
32 #include "JSArray.h"
33 #include "JSString.h"
34 #include "Lexer.h"
35 #include "ObjectConstructor.h"
36 #include "JSCInlines.h"
37 #include "StrongInlines.h"
38 #include <wtf/ASCIICType.h>
39 #include <wtf/dtoa.h>
40
41 namespace JSC {
42
43 template <typename CharType>
44 static ALWAYS_INLINE bool isJSONWhiteSpace(const CharType& c)
45 {
46     // The JSON RFC 4627 defines a list of allowed characters to be considered
47     // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
48     return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
49 }
50
51 template <typename CharType>
52 bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool needsFullSourceInfo)
53 {
54     VM& vm = m_exec->vm();
55     auto scope = DECLARE_THROW_SCOPE(vm);
56     if (m_lexer.next() != TokIdentifier)
57         return false;
58     do {
59         Vector<JSONPPathEntry> path;
60         // Unguarded next to start off the lexer
61         Identifier name = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
62         JSONPPathEntry entry;
63         if (name == vm.propertyNames->varKeyword) {
64             if (m_lexer.next() != TokIdentifier)
65                 return false;
66             entry.m_type = JSONPPathEntryTypeDeclareVar;
67             entry.m_pathEntryName = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
68             path.append(entry);
69         } else {
70             entry.m_type = JSONPPathEntryTypeDot;
71             entry.m_pathEntryName = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
72             path.append(entry);
73         }
74         if (isLexerKeyword(entry.m_pathEntryName))
75             return false;
76         TokenType tokenType = m_lexer.next();
77         if (entry.m_type == JSONPPathEntryTypeDeclareVar && tokenType != TokAssign)
78             return false;
79         while (tokenType != TokAssign) {
80             switch (tokenType) {
81             case TokLBracket: {
82                 entry.m_type = JSONPPathEntryTypeLookup;
83                 if (m_lexer.next() != TokNumber)
84                     return false;
85                 double doubleIndex = m_lexer.currentToken()->numberToken;
86                 int index = (int)doubleIndex;
87                 if (index != doubleIndex || index < 0)
88                     return false;
89                 entry.m_pathIndex = index;
90                 if (m_lexer.next() != TokRBracket)
91                     return false;
92                 break;
93             }
94             case TokDot: {
95                 entry.m_type = JSONPPathEntryTypeDot;
96                 if (m_lexer.next() != TokIdentifier)
97                     return false;
98                 entry.m_pathEntryName = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
99                 break;
100             }
101             case TokLParen: {
102                 if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo)
103                     return false;
104                 path.last().m_type = JSONPPathEntryTypeCall;
105                 entry = path.last();
106                 goto startJSON;
107             }
108             default:
109                 return false;
110             }
111             path.append(entry);
112             tokenType = m_lexer.next();
113         }
114     startJSON:
115         m_lexer.next();
116         results.append(JSONPData());
117         JSValue startParseExpressionValue = parse(StartParseExpression);
118         RETURN_IF_EXCEPTION(scope, false);
119         results.last().m_value.set(vm, startParseExpressionValue);
120         if (!results.last().m_value)
121             return false;
122         results.last().m_path.swap(path);
123         if (entry.m_type == JSONPPathEntryTypeCall) {
124             if (m_lexer.currentToken()->type != TokRParen)
125                 return false;
126             m_lexer.next();
127         }
128         if (m_lexer.currentToken()->type != TokSemi)
129             break;
130         m_lexer.next();
131     } while (m_lexer.currentToken()->type == TokIdentifier);
132     return m_lexer.currentToken()->type == TokEnd;
133 }
134     
135 template <typename CharType>
136 ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const LChar* characters, size_t length)
137 {
138     if (!length)
139         return m_exec->vm().propertyNames->emptyIdentifier;
140     if (characters[0] >= MaximumCachableCharacter)
141         return Identifier::fromString(&m_exec->vm(), characters, length);
142
143     if (length == 1) {
144         if (!m_shortIdentifiers[characters[0]].isNull())
145             return m_shortIdentifiers[characters[0]];
146         m_shortIdentifiers[characters[0]] = Identifier::fromString(&m_exec->vm(), characters, length);
147         return m_shortIdentifiers[characters[0]];
148     }
149     if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
150         return m_recentIdentifiers[characters[0]];
151     m_recentIdentifiers[characters[0]] = Identifier::fromString(&m_exec->vm(), characters, length);
152     return m_recentIdentifiers[characters[0]];
153 }
154
155 template <typename CharType>
156 ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const UChar* characters, size_t length)
157 {
158     if (!length)
159         return m_exec->vm().propertyNames->emptyIdentifier;
160     if (characters[0] >= MaximumCachableCharacter)
161         return Identifier::fromString(&m_exec->vm(), characters, length);
162
163     if (length == 1) {
164         if (!m_shortIdentifiers[characters[0]].isNull())
165             return m_shortIdentifiers[characters[0]];
166         m_shortIdentifiers[characters[0]] = Identifier::fromString(&m_exec->vm(), characters, length);
167         return m_shortIdentifiers[characters[0]];
168     }
169     if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
170         return m_recentIdentifiers[characters[0]];
171     m_recentIdentifiers[characters[0]] = Identifier::fromString(&m_exec->vm(), characters, length);
172     return m_recentIdentifiers[characters[0]];
173 }
174
175 // 256 Latin-1 codes
176 static constexpr const TokenType TokenTypesOfLatin1Characters[256] = {
177 /*   0 - Null               */ TokError,
178 /*   1 - Start of Heading   */ TokError,
179 /*   2 - Start of Text      */ TokError,
180 /*   3 - End of Text        */ TokError,
181 /*   4 - End of Transm.     */ TokError,
182 /*   5 - Enquiry            */ TokError,
183 /*   6 - Acknowledgment     */ TokError,
184 /*   7 - Bell               */ TokError,
185 /*   8 - Back Space         */ TokError,
186 /*   9 - Horizontal Tab     */ TokError,
187 /*  10 - Line Feed          */ TokError,
188 /*  11 - Vertical Tab       */ TokError,
189 /*  12 - Form Feed          */ TokError,
190 /*  13 - Carriage Return    */ TokError,
191 /*  14 - Shift Out          */ TokError,
192 /*  15 - Shift In           */ TokError,
193 /*  16 - Data Line Escape   */ TokError,
194 /*  17 - Device Control 1   */ TokError,
195 /*  18 - Device Control 2   */ TokError,
196 /*  19 - Device Control 3   */ TokError,
197 /*  20 - Device Control 4   */ TokError,
198 /*  21 - Negative Ack.      */ TokError,
199 /*  22 - Synchronous Idle   */ TokError,
200 /*  23 - End of Transmit    */ TokError,
201 /*  24 - Cancel             */ TokError,
202 /*  25 - End of Medium      */ TokError,
203 /*  26 - Substitute         */ TokError,
204 /*  27 - Escape             */ TokError,
205 /*  28 - File Separator     */ TokError,
206 /*  29 - Group Separator    */ TokError,
207 /*  30 - Record Separator   */ TokError,
208 /*  31 - Unit Separator     */ TokError,
209 /*  32 - Space              */ TokError,
210 /*  33 - !                  */ TokError,
211 /*  34 - "                  */ TokString,
212 /*  35 - #                  */ TokError,
213 /*  36 - $                  */ TokIdentifier,
214 /*  37 - %                  */ TokError,
215 /*  38 - &                  */ TokError,
216 /*  39 - '                  */ TokString,
217 /*  40 - (                  */ TokLParen,
218 /*  41 - )                  */ TokRParen,
219 /*  42 - *                  */ TokError,
220 /*  43 - +                  */ TokError,
221 /*  44 - ,                  */ TokComma,
222 /*  45 - -                  */ TokNumber,
223 /*  46 - .                  */ TokDot,
224 /*  47 - /                  */ TokError,
225 /*  48 - 0                  */ TokNumber,
226 /*  49 - 1                  */ TokNumber,
227 /*  50 - 2                  */ TokNumber,
228 /*  51 - 3                  */ TokNumber,
229 /*  52 - 4                  */ TokNumber,
230 /*  53 - 5                  */ TokNumber,
231 /*  54 - 6                  */ TokNumber,
232 /*  55 - 7                  */ TokNumber,
233 /*  56 - 8                  */ TokNumber,
234 /*  57 - 9                  */ TokNumber,
235 /*  58 - :                  */ TokColon,
236 /*  59 - ;                  */ TokSemi,
237 /*  60 - <                  */ TokError,
238 /*  61 - =                  */ TokAssign,
239 /*  62 - >                  */ TokError,
240 /*  63 - ?                  */ TokError,
241 /*  64 - @                  */ TokError,
242 /*  65 - A                  */ TokIdentifier,
243 /*  66 - B                  */ TokIdentifier,
244 /*  67 - C                  */ TokIdentifier,
245 /*  68 - D                  */ TokIdentifier,
246 /*  69 - E                  */ TokIdentifier,
247 /*  70 - F                  */ TokIdentifier,
248 /*  71 - G                  */ TokIdentifier,
249 /*  72 - H                  */ TokIdentifier,
250 /*  73 - I                  */ TokIdentifier,
251 /*  74 - J                  */ TokIdentifier,
252 /*  75 - K                  */ TokIdentifier,
253 /*  76 - L                  */ TokIdentifier,
254 /*  77 - M                  */ TokIdentifier,
255 /*  78 - N                  */ TokIdentifier,
256 /*  79 - O                  */ TokIdentifier,
257 /*  80 - P                  */ TokIdentifier,
258 /*  81 - Q                  */ TokIdentifier,
259 /*  82 - R                  */ TokIdentifier,
260 /*  83 - S                  */ TokIdentifier,
261 /*  84 - T                  */ TokIdentifier,
262 /*  85 - U                  */ TokIdentifier,
263 /*  86 - V                  */ TokIdentifier,
264 /*  87 - W                  */ TokIdentifier,
265 /*  88 - X                  */ TokIdentifier,
266 /*  89 - Y                  */ TokIdentifier,
267 /*  90 - Z                  */ TokIdentifier,
268 /*  91 - [                  */ TokLBracket,
269 /*  92 - \                  */ TokError,
270 /*  93 - ]                  */ TokRBracket,
271 /*  94 - ^                  */ TokError,
272 /*  95 - _                  */ TokIdentifier,
273 /*  96 - `                  */ TokError,
274 /*  97 - a                  */ TokIdentifier,
275 /*  98 - b                  */ TokIdentifier,
276 /*  99 - c                  */ TokIdentifier,
277 /* 100 - d                  */ TokIdentifier,
278 /* 101 - e                  */ TokIdentifier,
279 /* 102 - f                  */ TokIdentifier,
280 /* 103 - g                  */ TokIdentifier,
281 /* 104 - h                  */ TokIdentifier,
282 /* 105 - i                  */ TokIdentifier,
283 /* 106 - j                  */ TokIdentifier,
284 /* 107 - k                  */ TokIdentifier,
285 /* 108 - l                  */ TokIdentifier,
286 /* 109 - m                  */ TokIdentifier,
287 /* 110 - n                  */ TokIdentifier,
288 /* 111 - o                  */ TokIdentifier,
289 /* 112 - p                  */ TokIdentifier,
290 /* 113 - q                  */ TokIdentifier,
291 /* 114 - r                  */ TokIdentifier,
292 /* 115 - s                  */ TokIdentifier,
293 /* 116 - t                  */ TokIdentifier,
294 /* 117 - u                  */ TokIdentifier,
295 /* 118 - v                  */ TokIdentifier,
296 /* 119 - w                  */ TokIdentifier,
297 /* 120 - x                  */ TokIdentifier,
298 /* 121 - y                  */ TokIdentifier,
299 /* 122 - z                  */ TokIdentifier,
300 /* 123 - {                  */ TokLBrace,
301 /* 124 - |                  */ TokError,
302 /* 125 - }                  */ TokRBrace,
303 /* 126 - ~                  */ TokError,
304 /* 127 - Delete             */ TokError,
305 /* 128 - Cc category        */ TokError,
306 /* 129 - Cc category        */ TokError,
307 /* 130 - Cc category        */ TokError,
308 /* 131 - Cc category        */ TokError,
309 /* 132 - Cc category        */ TokError,
310 /* 133 - Cc category        */ TokError,
311 /* 134 - Cc category        */ TokError,
312 /* 135 - Cc category        */ TokError,
313 /* 136 - Cc category        */ TokError,
314 /* 137 - Cc category        */ TokError,
315 /* 138 - Cc category        */ TokError,
316 /* 139 - Cc category        */ TokError,
317 /* 140 - Cc category        */ TokError,
318 /* 141 - Cc category        */ TokError,
319 /* 142 - Cc category        */ TokError,
320 /* 143 - Cc category        */ TokError,
321 /* 144 - Cc category        */ TokError,
322 /* 145 - Cc category        */ TokError,
323 /* 146 - Cc category        */ TokError,
324 /* 147 - Cc category        */ TokError,
325 /* 148 - Cc category        */ TokError,
326 /* 149 - Cc category        */ TokError,
327 /* 150 - Cc category        */ TokError,
328 /* 151 - Cc category        */ TokError,
329 /* 152 - Cc category        */ TokError,
330 /* 153 - Cc category        */ TokError,
331 /* 154 - Cc category        */ TokError,
332 /* 155 - Cc category        */ TokError,
333 /* 156 - Cc category        */ TokError,
334 /* 157 - Cc category        */ TokError,
335 /* 158 - Cc category        */ TokError,
336 /* 159 - Cc category        */ TokError,
337 /* 160 - Zs category (nbsp) */ TokError,
338 /* 161 - Po category        */ TokError,
339 /* 162 - Sc category        */ TokError,
340 /* 163 - Sc category        */ TokError,
341 /* 164 - Sc category        */ TokError,
342 /* 165 - Sc category        */ TokError,
343 /* 166 - So category        */ TokError,
344 /* 167 - So category        */ TokError,
345 /* 168 - Sk category        */ TokError,
346 /* 169 - So category        */ TokError,
347 /* 170 - Ll category        */ TokError,
348 /* 171 - Pi category        */ TokError,
349 /* 172 - Sm category        */ TokError,
350 /* 173 - Cf category        */ TokError,
351 /* 174 - So category        */ TokError,
352 /* 175 - Sk category        */ TokError,
353 /* 176 - So category        */ TokError,
354 /* 177 - Sm category        */ TokError,
355 /* 178 - No category        */ TokError,
356 /* 179 - No category        */ TokError,
357 /* 180 - Sk category        */ TokError,
358 /* 181 - Ll category        */ TokError,
359 /* 182 - So category        */ TokError,
360 /* 183 - Po category        */ TokError,
361 /* 184 - Sk category        */ TokError,
362 /* 185 - No category        */ TokError,
363 /* 186 - Ll category        */ TokError,
364 /* 187 - Pf category        */ TokError,
365 /* 188 - No category        */ TokError,
366 /* 189 - No category        */ TokError,
367 /* 190 - No category        */ TokError,
368 /* 191 - Po category        */ TokError,
369 /* 192 - Lu category        */ TokError,
370 /* 193 - Lu category        */ TokError,
371 /* 194 - Lu category        */ TokError,
372 /* 195 - Lu category        */ TokError,
373 /* 196 - Lu category        */ TokError,
374 /* 197 - Lu category        */ TokError,
375 /* 198 - Lu category        */ TokError,
376 /* 199 - Lu category        */ TokError,
377 /* 200 - Lu category        */ TokError,
378 /* 201 - Lu category        */ TokError,
379 /* 202 - Lu category        */ TokError,
380 /* 203 - Lu category        */ TokError,
381 /* 204 - Lu category        */ TokError,
382 /* 205 - Lu category        */ TokError,
383 /* 206 - Lu category        */ TokError,
384 /* 207 - Lu category        */ TokError,
385 /* 208 - Lu category        */ TokError,
386 /* 209 - Lu category        */ TokError,
387 /* 210 - Lu category        */ TokError,
388 /* 211 - Lu category        */ TokError,
389 /* 212 - Lu category        */ TokError,
390 /* 213 - Lu category        */ TokError,
391 /* 214 - Lu category        */ TokError,
392 /* 215 - Sm category        */ TokError,
393 /* 216 - Lu category        */ TokError,
394 /* 217 - Lu category        */ TokError,
395 /* 218 - Lu category        */ TokError,
396 /* 219 - Lu category        */ TokError,
397 /* 220 - Lu category        */ TokError,
398 /* 221 - Lu category        */ TokError,
399 /* 222 - Lu category        */ TokError,
400 /* 223 - Ll category        */ TokError,
401 /* 224 - Ll category        */ TokError,
402 /* 225 - Ll category        */ TokError,
403 /* 226 - Ll category        */ TokError,
404 /* 227 - Ll category        */ TokError,
405 /* 228 - Ll category        */ TokError,
406 /* 229 - Ll category        */ TokError,
407 /* 230 - Ll category        */ TokError,
408 /* 231 - Ll category        */ TokError,
409 /* 232 - Ll category        */ TokError,
410 /* 233 - Ll category        */ TokError,
411 /* 234 - Ll category        */ TokError,
412 /* 235 - Ll category        */ TokError,
413 /* 236 - Ll category        */ TokError,
414 /* 237 - Ll category        */ TokError,
415 /* 238 - Ll category        */ TokError,
416 /* 239 - Ll category        */ TokError,
417 /* 240 - Ll category        */ TokError,
418 /* 241 - Ll category        */ TokError,
419 /* 242 - Ll category        */ TokError,
420 /* 243 - Ll category        */ TokError,
421 /* 244 - Ll category        */ TokError,
422 /* 245 - Ll category        */ TokError,
423 /* 246 - Ll category        */ TokError,
424 /* 247 - Sm category        */ TokError,
425 /* 248 - Ll category        */ TokError,
426 /* 249 - Ll category        */ TokError,
427 /* 250 - Ll category        */ TokError,
428 /* 251 - Ll category        */ TokError,
429 /* 252 - Ll category        */ TokError,
430 /* 253 - Ll category        */ TokError,
431 /* 254 - Ll category        */ TokError,
432 /* 255 - Ll category        */ TokError
433 };
434
435 template <typename CharType>
436 ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token)
437 {
438 #if !ASSERT_DISABLED
439     m_currentTokenID++;
440 #endif
441
442     while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
443         ++m_ptr;
444
445     ASSERT(m_ptr <= m_end);
446     if (m_ptr == m_end) {
447         token.type = TokEnd;
448         token.start = token.end = m_ptr;
449         return TokEnd;
450     }
451     ASSERT(m_ptr < m_end);
452     token.type = TokError;
453     token.start = m_ptr;
454     CharType character = *m_ptr;
455     if (LIKELY(character < 256)) {
456         TokenType tokenType = TokenTypesOfLatin1Characters[character];
457         switch (tokenType) {
458         case TokString:
459             if (character == '\'' && m_mode == StrictJSON) {
460                 m_lexErrorMessage = "Single quotes (\') are not allowed in JSON"_s;
461                 return TokError;
462             }
463             return lexString(token, character);
464
465         case TokIdentifier: {
466             switch (character) {
467             case 't':
468                 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
469                     m_ptr += 4;
470                     token.type = TokTrue;
471                     token.end = m_ptr;
472                     return TokTrue;
473                 }
474                 break;
475             case 'f':
476                 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
477                     m_ptr += 5;
478                     token.type = TokFalse;
479                     token.end = m_ptr;
480                     return TokFalse;
481                 }
482                 break;
483             case 'n':
484                 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
485                     m_ptr += 4;
486                     token.type = TokNull;
487                     token.end = m_ptr;
488                     return TokNull;
489                 }
490                 break;
491             }
492             return lexIdentifier(token);
493         }
494
495         case TokNumber:
496             return lexNumber(token);
497
498         case TokError:
499             break;
500
501         default:
502             ASSERT(tokenType == TokLBracket
503                 || tokenType == TokRBracket
504                 || tokenType == TokLBrace
505                 || tokenType == TokRBrace
506                 || tokenType == TokColon
507                 || tokenType == TokLParen
508                 || tokenType == TokRParen
509                 || tokenType == TokComma
510                 || tokenType == TokDot
511                 || tokenType == TokAssign
512                 || tokenType == TokSemi);
513             token.type = tokenType;
514             token.end = ++m_ptr;
515             return tokenType;
516         }
517     }
518     m_lexErrorMessage = String::format("Unrecognized token '%c'", *m_ptr);
519     return TokError;
520 }
521
522 template <>
523 ALWAYS_INLINE TokenType LiteralParser<LChar>::Lexer::lexIdentifier(LiteralParserToken<LChar>& token)
524 {
525     while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$'))
526         m_ptr++;
527     token.stringIs8Bit = 1;
528     token.stringToken8 = token.start;
529     token.stringLength = m_ptr - token.start;
530     token.type = TokIdentifier;
531     token.end = m_ptr;
532     return TokIdentifier;
533 }
534
535 template <>
536 ALWAYS_INLINE TokenType LiteralParser<UChar>::Lexer::lexIdentifier(LiteralParserToken<UChar>& token)
537 {
538     while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$' || *m_ptr == 0x200C || *m_ptr == 0x200D))
539         m_ptr++;
540     token.stringIs8Bit = 0;
541     token.stringToken16 = token.start;
542     token.stringLength = m_ptr - token.start;
543     token.type = TokIdentifier;
544     token.end = m_ptr;
545     return TokIdentifier;
546 }
547
548 template <typename CharType>
549 TokenType LiteralParser<CharType>::Lexer::next()
550 {
551     TokenType result = lex(m_currentToken);
552     ASSERT(m_currentToken.type == result);
553     return result;
554 }
555
556 template <>
557 ALWAYS_INLINE void setParserTokenString<LChar>(LiteralParserToken<LChar>& token, const LChar* string)
558 {
559     token.stringIs8Bit = 1;
560     token.stringToken8 = string;
561 }
562
563 template <>
564 ALWAYS_INLINE void setParserTokenString<UChar>(LiteralParserToken<UChar>& token, const UChar* string)
565 {
566     token.stringIs8Bit = 0;
567     token.stringToken16 = string;
568 }
569
570 enum class SafeStringCharacterSet { Strict, NonStrict };
571
572 template <SafeStringCharacterSet set>
573 static ALWAYS_INLINE bool isSafeStringCharacter(LChar c, LChar terminator)
574 {
575     return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict);
576 }
577
578 template <SafeStringCharacterSet set>
579 static ALWAYS_INLINE bool isSafeStringCharacter(UChar c, UChar terminator)
580 {
581     return (c >= ' ' && (set == SafeStringCharacterSet::Strict || c <= 0xff) && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict);
582 }
583
584 template <typename CharType>
585 ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lexString(LiteralParserToken<CharType>& token, CharType terminator)
586 {
587     ++m_ptr;
588     const CharType* runStart = m_ptr;
589
590     if (m_mode == StrictJSON) {
591         while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
592             ++m_ptr;
593     } else {
594         while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
595             ++m_ptr;
596     }
597
598     if (LIKELY(m_ptr < m_end && *m_ptr == terminator)) {
599         setParserTokenString<CharType>(token, runStart);
600         token.stringLength = m_ptr - runStart;
601         token.type = TokString;
602         token.end = ++m_ptr;
603         return TokString;
604     }
605     return lexStringSlow(token, runStart, terminator);
606 }
607
608 template <typename CharType>
609 TokenType LiteralParser<CharType>::Lexer::lexStringSlow(LiteralParserToken<CharType>& token, const CharType* runStart, CharType terminator)
610 {
611     m_builder.clear();
612     goto slowPathBegin;
613     do {
614         runStart = m_ptr;
615         if (m_mode == StrictJSON) {
616             while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
617                 ++m_ptr;
618         } else {
619             while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
620                 ++m_ptr;
621         }
622
623         if (!m_builder.isEmpty())
624             m_builder.append(runStart, m_ptr - runStart);
625
626 slowPathBegin:
627         if ((m_mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
628             if (m_builder.isEmpty() && runStart < m_ptr)
629                 m_builder.append(runStart, m_ptr - runStart);
630             ++m_ptr;
631             if (m_ptr >= m_end) {
632                 m_lexErrorMessage = "Unterminated string"_s;
633                 return TokError;
634             }
635             switch (*m_ptr) {
636                 case '"':
637                     m_builder.append('"');
638                     m_ptr++;
639                     break;
640                 case '\\':
641                     m_builder.append('\\');
642                     m_ptr++;
643                     break;
644                 case '/':
645                     m_builder.append('/');
646                     m_ptr++;
647                     break;
648                 case 'b':
649                     m_builder.append('\b');
650                     m_ptr++;
651                     break;
652                 case 'f':
653                     m_builder.append('\f');
654                     m_ptr++;
655                     break;
656                 case 'n':
657                     m_builder.append('\n');
658                     m_ptr++;
659                     break;
660                 case 'r':
661                     m_builder.append('\r');
662                     m_ptr++;
663                     break;
664                 case 't':
665                     m_builder.append('\t');
666                     m_ptr++;
667                     break;
668
669                 case 'u':
670                     if ((m_end - m_ptr) < 5) { 
671                         m_lexErrorMessage = "\\u must be followed by 4 hex digits"_s;
672                         return TokError;
673                     } // uNNNN == 5 characters
674                     for (int i = 1; i < 5; i++) {
675                         if (!isASCIIHexDigit(m_ptr[i])) {
676                             m_lexErrorMessage = String::format("\"\\%s\" is not a valid unicode escape", String(m_ptr, 5).ascii().data());
677                             return TokError;
678                         }
679                     }
680                     m_builder.append(JSC::Lexer<CharType>::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
681                     m_ptr += 5;
682                     break;
683
684                 default:
685                     if (*m_ptr == '\'' && m_mode != StrictJSON) {
686                         m_builder.append('\'');
687                         m_ptr++;
688                         break;
689                     }
690                     m_lexErrorMessage = String::format("Invalid escape character %c", *m_ptr);
691                     return TokError;
692             }
693         }
694     } while ((m_mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator);
695
696     if (m_ptr >= m_end || *m_ptr != terminator) {
697         m_lexErrorMessage = "Unterminated string"_s;
698         return TokError;
699     }
700
701     if (m_builder.isEmpty()) {
702         setParserTokenString<CharType>(token, runStart);
703         token.stringLength = m_ptr - runStart;
704     } else {
705         if (m_builder.is8Bit()) {
706             token.stringIs8Bit = 1;
707             token.stringToken8 = m_builder.characters8();
708         } else {
709             token.stringIs8Bit = 0;
710             token.stringToken16 = m_builder.characters16();
711         }
712         token.stringLength = m_builder.length();
713     }
714     token.type = TokString;
715     token.end = ++m_ptr;
716     return TokString;
717 }
718
719 template <typename CharType>
720 TokenType LiteralParser<CharType>::Lexer::lexNumber(LiteralParserToken<CharType>& token)
721 {
722     // ES5 and json.org define numbers as
723     // number
724     //     int
725     //     int frac? exp?
726     //
727     // int
728     //     -? 0
729     //     -? digit1-9 digits?
730     //
731     // digits
732     //     digit digits?
733     //
734     // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
735
736     if (m_ptr < m_end && *m_ptr == '-') // -?
737         ++m_ptr;
738     
739     // (0 | [1-9][0-9]*)
740     if (m_ptr < m_end && *m_ptr == '0') // 0
741         ++m_ptr;
742     else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
743         ++m_ptr;
744         // [0-9]*
745         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
746             ++m_ptr;
747     } else {
748         m_lexErrorMessage = "Invalid number"_s;
749         return TokError;
750     }
751
752     // ('.' [0-9]+)?
753     const int NumberOfDigitsForSafeInt32 = 9;  // The numbers from -99999999 to 999999999 are always in range of Int32.
754     if (m_ptr < m_end && *m_ptr == '.') {
755         ++m_ptr;
756         // [0-9]+
757         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
758             m_lexErrorMessage = "Invalid digits after decimal point"_s;
759             return TokError;
760         }
761
762         ++m_ptr;
763         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
764             ++m_ptr;
765     } else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) <= NumberOfDigitsForSafeInt32) {
766         int32_t result = 0;
767         token.type = TokNumber;
768         token.end = m_ptr;
769         const CharType* digit = token.start;
770         bool negative = false;
771         if (*digit == '-') {
772             negative = true;
773             digit++;
774         }
775         
776         ASSERT((m_ptr - digit) <= NumberOfDigitsForSafeInt32);
777         while (digit < m_ptr)
778             result = result * 10 + (*digit++) - '0';
779
780         if (!negative)
781             token.numberToken = result;
782         else {
783             if (!result)
784                 token.numberToken = -0.0;
785             else
786                 token.numberToken = -result;
787         }
788         return TokNumber;
789     }
790
791     //  ([eE][+-]? [0-9]+)?
792     if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
793         ++m_ptr;
794
795         // [-+]?
796         if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
797             ++m_ptr;
798
799         // [0-9]+
800         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
801             m_lexErrorMessage = "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number"_s;
802             return TokError;
803         }
804         
805         ++m_ptr;
806         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
807             ++m_ptr;
808     }
809     
810     token.type = TokNumber;
811     token.end = m_ptr;
812     size_t parsedLength;
813     token.numberToken = parseDouble(token.start, token.end - token.start, parsedLength);
814     return TokNumber;
815 }
816
817 template <typename CharType>
818 JSValue LiteralParser<CharType>::parse(ParserState initialState)
819 {
820     VM& vm = m_exec->vm();
821     auto scope = DECLARE_THROW_SCOPE(vm);
822     ParserState state = initialState;
823     MarkedArgumentBuffer objectStack;
824     JSValue lastValue;
825     Vector<ParserState, 16, UnsafeVectorOverflow> stateStack;
826     Vector<Identifier, 16, UnsafeVectorOverflow> identifierStack;
827     HashSet<JSObject*> visitedUnderscoreProto;
828     while (1) {
829         switch(state) {
830             startParseArray:
831             case StartParseArray: {
832                 JSArray* array = constructEmptyArray(m_exec, 0);
833                 RETURN_IF_EXCEPTION(scope, JSValue());
834                 objectStack.appendWithCrashOnOverflow(array);
835             }
836             doParseArrayStartExpression:
837             FALLTHROUGH;
838             case DoParseArrayStartExpression: {
839                 TokenType lastToken = m_lexer.currentToken()->type;
840                 if (m_lexer.next() == TokRBracket) {
841                     if (lastToken == TokComma) {
842                         m_parseErrorMessage = "Unexpected comma at the end of array expression"_s;
843                         return JSValue();
844                     }
845                     m_lexer.next();
846                     lastValue = objectStack.takeLast();
847                     break;
848                 }
849
850                 stateStack.append(DoParseArrayEndExpression);
851                 goto startParseExpression;
852             }
853             case DoParseArrayEndExpression: {
854                 JSArray* array = asArray(objectStack.last());
855                 array->putDirectIndex(m_exec, array->length(), lastValue);
856                 RETURN_IF_EXCEPTION(scope, JSValue());
857
858                 if (m_lexer.currentToken()->type == TokComma)
859                     goto doParseArrayStartExpression;
860
861                 if (m_lexer.currentToken()->type != TokRBracket) {
862                     m_parseErrorMessage = "Expected ']'"_s;
863                     return JSValue();
864                 }
865                 
866                 m_lexer.next();
867                 lastValue = objectStack.takeLast();
868                 break;
869             }
870             startParseObject:
871             case StartParseObject: {
872                 JSObject* object = constructEmptyObject(m_exec);
873                 objectStack.appendWithCrashOnOverflow(object);
874
875                 TokenType type = m_lexer.next();
876                 if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) {
877                     typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
878                     if (identifierToken->stringIs8Bit)
879                         identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
880                     else
881                         identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
882
883                     // Check for colon
884                     if (m_lexer.next() != TokColon) {
885                         m_parseErrorMessage = "Expected ':' before value in object property definition"_s;
886                         return JSValue();
887                     }
888                     
889                     m_lexer.next();
890                     stateStack.append(DoParseObjectEndExpression);
891                     goto startParseExpression;
892                 }
893                 if (type != TokRBrace)  {
894                     m_parseErrorMessage = "Expected '}'"_s;
895                     return JSValue();
896                 }
897                 m_lexer.next();
898                 lastValue = objectStack.takeLast();
899                 break;
900             }
901             doParseObjectStartExpression:
902             case DoParseObjectStartExpression: {
903                 TokenType type = m_lexer.next();
904                 if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier)) {
905                     m_parseErrorMessage = "Property name must be a string literal"_s;
906                     return JSValue();
907                 }
908                 typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
909                 if (identifierToken->stringIs8Bit)
910                     identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
911                 else
912                     identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
913
914                 // Check for colon
915                 if (m_lexer.next() != TokColon) {
916                     m_parseErrorMessage = "Expected ':'"_s;
917                     return JSValue();
918                 }
919
920                 m_lexer.next();
921                 stateStack.append(DoParseObjectEndExpression);
922                 goto startParseExpression;
923             }
924             case DoParseObjectEndExpression:
925             {
926                 JSObject* object = asObject(objectStack.last());
927                 Identifier ident = identifierStack.takeLast();
928                 if (m_mode != StrictJSON && ident == vm.propertyNames->underscoreProto) {
929                     if (!visitedUnderscoreProto.add(object).isNewEntry) {
930                         m_parseErrorMessage = "Attempted to redefine __proto__ property"_s;
931                         return JSValue();
932                     }
933                     CodeBlock* codeBlock = m_exec->codeBlock();
934                     PutPropertySlot slot(object, codeBlock ? codeBlock->isStrictMode() : false);
935                     objectStack.last().put(m_exec, ident, lastValue, slot);
936                 } else {
937                     if (std::optional<uint32_t> index = parseIndex(ident))
938                         object->putDirectIndex(m_exec, index.value(), lastValue);
939                     else
940                         object->putDirect(vm, ident, lastValue);
941                 }
942                 RETURN_IF_EXCEPTION(scope, JSValue());
943                 if (m_lexer.currentToken()->type == TokComma)
944                     goto doParseObjectStartExpression;
945                 if (m_lexer.currentToken()->type != TokRBrace) {
946                     m_parseErrorMessage = "Expected '}'"_s;
947                     return JSValue();
948                 }
949                 m_lexer.next();
950                 lastValue = objectStack.takeLast();
951                 break;
952             }
953             startParseExpression:
954             case StartParseExpression: {
955                 switch (m_lexer.currentToken()->type) {
956                     case TokLBracket:
957                         goto startParseArray;
958                     case TokLBrace:
959                         goto startParseObject;
960                     case TokString: {
961                         typename Lexer::LiteralParserTokenPtr stringToken = m_lexer.currentToken();
962                         if (stringToken->stringIs8Bit)
963                             lastValue = jsString(m_exec, makeIdentifier(stringToken->stringToken8, stringToken->stringLength).string());
964                         else
965                             lastValue = jsString(m_exec, makeIdentifier(stringToken->stringToken16, stringToken->stringLength).string());
966                         m_lexer.next();
967                         break;
968                     }
969                     case TokNumber: {
970                         typename Lexer::LiteralParserTokenPtr numberToken = m_lexer.currentToken();
971                         lastValue = jsNumber(numberToken->numberToken);
972                         m_lexer.next();
973                         break;
974                     }
975                     case TokNull:
976                         m_lexer.next();
977                         lastValue = jsNull();
978                         break;
979
980                     case TokTrue:
981                         m_lexer.next();
982                         lastValue = jsBoolean(true);
983                         break;
984
985                     case TokFalse:
986                         m_lexer.next();
987                         lastValue = jsBoolean(false);
988                         break;
989                     case TokRBracket:
990                         m_parseErrorMessage = "Unexpected token ']'"_s;
991                         return JSValue();
992                     case TokRBrace:
993                         m_parseErrorMessage = "Unexpected token '}'"_s;
994                         return JSValue();
995                     case TokIdentifier: {
996                         typename Lexer::LiteralParserTokenPtr token = m_lexer.currentToken();
997                         if (token->stringIs8Bit)
998                             m_parseErrorMessage = String::format("Unexpected identifier \"%s\"", String(token->stringToken8, token->stringLength).ascii().data());
999                         else
1000                             m_parseErrorMessage = String::format("Unexpected identifier \"%s\"", String(token->stringToken16, token->stringLength).ascii().data());
1001                         return JSValue();
1002                     }
1003                     case TokColon:
1004                         m_parseErrorMessage = "Unexpected token ':'"_s;
1005                         return JSValue();
1006                     case TokLParen:
1007                         m_parseErrorMessage = "Unexpected token '('"_s;
1008                         return JSValue();
1009                     case TokRParen:
1010                         m_parseErrorMessage = "Unexpected token ')'"_s;
1011                         return JSValue();
1012                     case TokComma:
1013                         m_parseErrorMessage = "Unexpected token ','"_s;
1014                         return JSValue();
1015                     case TokDot:
1016                         m_parseErrorMessage = "Unexpected token '.'"_s;
1017                         return JSValue();
1018                     case TokAssign:
1019                         m_parseErrorMessage = "Unexpected token '='"_s;
1020                         return JSValue();
1021                     case TokSemi:
1022                         m_parseErrorMessage = "Unexpected token ';'"_s;
1023                         return JSValue();
1024                     case TokEnd:
1025                         m_parseErrorMessage = "Unexpected EOF"_s;
1026                         return JSValue();
1027                     case TokError:
1028                     default:
1029                         // Error
1030                         m_parseErrorMessage = "Could not parse value expression"_s;
1031                         return JSValue();
1032                 }
1033                 break;
1034             }
1035             case StartParseStatement: {
1036                 switch (m_lexer.currentToken()->type) {
1037                     case TokLBracket:
1038                     case TokNumber:
1039                     case TokString:
1040                         goto startParseExpression;
1041
1042                     case TokLParen: {
1043                         m_lexer.next();
1044                         stateStack.append(StartParseStatementEndStatement);
1045                         goto startParseExpression;
1046                     }
1047                     case TokRBracket:
1048                         m_parseErrorMessage = "Unexpected token ']'"_s;
1049                         return JSValue();
1050                     case TokLBrace:
1051                         m_parseErrorMessage = "Unexpected token '{'"_s;
1052                         return JSValue();
1053                     case TokRBrace:
1054                         m_parseErrorMessage = "Unexpected token '}'"_s;
1055                         return JSValue();
1056                     case TokIdentifier:
1057                         m_parseErrorMessage = "Unexpected identifier"_s;
1058                         return JSValue();
1059                     case TokColon:
1060                         m_parseErrorMessage = "Unexpected token ':'"_s;
1061                         return JSValue();
1062                     case TokRParen:
1063                         m_parseErrorMessage = "Unexpected token ')'"_s;
1064                         return JSValue();
1065                     case TokComma:
1066                         m_parseErrorMessage = "Unexpected token ','"_s;
1067                         return JSValue();
1068                     case TokTrue:
1069                         m_parseErrorMessage = "Unexpected token 'true'"_s;
1070                         return JSValue();
1071                     case TokFalse:
1072                         m_parseErrorMessage = "Unexpected token 'false'"_s;
1073                         return JSValue();
1074                     case TokNull:
1075                         m_parseErrorMessage = "Unexpected token 'null'"_s;
1076                         return JSValue();
1077                     case TokEnd:
1078                         m_parseErrorMessage = "Unexpected EOF"_s;
1079                         return JSValue();
1080                     case TokDot:
1081                         m_parseErrorMessage = "Unexpected token '.'"_s;
1082                         return JSValue();
1083                     case TokAssign:
1084                         m_parseErrorMessage = "Unexpected token '='"_s;
1085                         return JSValue();
1086                     case TokSemi:
1087                         m_parseErrorMessage = "Unexpected token ';'"_s;
1088                         return JSValue();
1089                     case TokError:
1090                     default:
1091                         m_parseErrorMessage = "Could not parse statement"_s;
1092                         return JSValue();
1093                 }
1094             }
1095             case StartParseStatementEndStatement: {
1096                 ASSERT(stateStack.isEmpty());
1097                 if (m_lexer.currentToken()->type != TokRParen)
1098                     return JSValue();
1099                 if (m_lexer.next() == TokEnd)
1100                     return lastValue;
1101                 m_parseErrorMessage = "Unexpected content at end of JSON literal"_s;
1102                 return JSValue();
1103             }
1104             default:
1105                 RELEASE_ASSERT_NOT_REACHED();
1106         }
1107         if (stateStack.isEmpty())
1108             return lastValue;
1109         state = stateStack.takeLast();
1110         continue;
1111     }
1112 }
1113
1114 // Instantiate the two flavors of LiteralParser we need instead of putting most of this file in LiteralParser.h
1115 template class LiteralParser<LChar>;
1116 template class LiteralParser<UChar>;
1117
1118 }