2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
27 #include "ParserArena.h"
28 #include "ParserTokens.h"
29 #include "SourceCode.h"
30 #include <wtf/ASCIICType.h>
31 #include <wtf/AlwaysInline.h>
32 #include <wtf/SegmentedVector.h>
33 #include <wtf/Vector.h>
34 #include <wtf/unicode/Unicode.h>
40 bool isKeyword(const Identifier& ident) const
42 return m_keywordTable.entry(m_globalData, ident);
45 const HashEntry* getKeyword(const Identifier& ident) const
47 return m_keywordTable.entry(m_globalData, ident);
52 m_keywordTable.deleteTable();
56 friend class JSGlobalData;
58 Keywords(JSGlobalData*);
60 JSGlobalData* m_globalData;
61 const HashTable m_keywordTable;
65 LexerFlagsIgnoreReservedWords = 1,
66 LexerFlagsDontBuildStrings = 2,
67 LexexFlagsDontBuildKeywords = 4
74 WTF_MAKE_NONCOPYABLE(Lexer);
75 WTF_MAKE_FAST_ALLOCATED;
81 // Character manipulation functions.
82 static bool isWhiteSpace(T character);
83 static bool isLineTerminator(T character);
84 static unsigned char convertHex(int c1, int c2);
85 static UChar convertUnicode(int c1, int c2, int c3, int c4);
87 // Functions to set up parsing.
88 void setCode(const SourceCode&, ParserArena*);
89 void setIsReparsing() { m_isReparsing = true; }
90 bool isReparsing() const { return m_isReparsing; }
92 JSTokenType lex(JSTokenData*, JSTokenInfo*, unsigned, bool strictMode);
93 bool nextTokenIsColon();
94 int lineNumber() const { return m_lineNumber; }
95 void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
96 int lastLineNumber() const { return m_lastLineNumber; }
97 bool prevTerminator() const { return m_terminator; }
98 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
99 bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
102 // Functions for use after parsing.
103 bool sawError() const { return m_error; }
104 UString getErrorMessage() const { return m_lexErrorMessage; }
106 void setOffset(int offset)
109 m_lexErrorMessage = UString();
110 m_code = m_codeStart + offset;
112 m_buffer16.resize(0);
113 // Faster than an if-else sequence
115 if (LIKELY(m_code < m_codeEnd))
118 void setLineNumber(int line)
123 SourceProvider* sourceProvider() const { return m_source->provider(); }
125 JSTokenType lexExpectIdentifier(JSTokenData*, JSTokenInfo*, unsigned, bool strictMode);
129 void append8(const T*, size_t);
132 void append16(const LChar*, size_t);
133 void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); }
135 ALWAYS_INLINE void shift();
136 ALWAYS_INLINE int peek(int offset);
137 int getUnicodeCharacter();
138 void shiftLineTerminator();
140 UString getInvalidCharMessage();
141 ALWAYS_INLINE const T* currentCharacter() const;
142 ALWAYS_INLINE int currentOffset() const { return m_code - m_codeStart; }
143 ALWAYS_INLINE void setOffsetFromCharOffset(const T* charOffset) { setOffset(charOffset - m_codeStart); }
145 ALWAYS_INLINE void setCodeStart(const StringImpl*);
147 ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length);
148 ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
149 ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length);
151 ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
153 template <int shiftAmount> void internalShift();
154 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
155 template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned lexerFlags, bool strictMode);
156 template <bool shouldBuildIdentifiers> NEVER_INLINE JSTokenType parseIdentifierSlowCase(JSTokenData*, unsigned lexerFlags, bool strictMode);
157 template <bool shouldBuildStrings> ALWAYS_INLINE bool parseString(JSTokenData*, bool strictMode);
158 template <bool shouldBuildStrings> NEVER_INLINE bool parseStringSlowCase(JSTokenData*, bool strictMode);
159 ALWAYS_INLINE void parseHex(double& returnValue);
160 ALWAYS_INLINE bool parseOctal(double& returnValue);
161 ALWAYS_INLINE bool parseDecimal(double& returnValue);
162 ALWAYS_INLINE void parseNumberAfterDecimalPoint();
163 ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
164 ALWAYS_INLINE bool parseMultilineComment();
166 static const size_t initialReadBufferCapacity = 32;
169 int m_lastLineNumber;
171 Vector<LChar> m_buffer8;
172 Vector<UChar> m_buffer16;
176 const SourceCode* m_source;
178 const T* m_codeStart;
183 UString m_lexErrorMessage;
185 // current and following unicode characters (int to allow for -1 for end-of-file marker)
188 IdentifierArena* m_arena;
190 JSGlobalData* m_globalData;
194 ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch)
196 return ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC || ch == 0xA0;
200 ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch)
202 return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF);
206 ALWAYS_INLINE bool Lexer<LChar>::isLineTerminator(LChar ch)
208 return ch == '\r' || ch == '\n';
212 ALWAYS_INLINE bool Lexer<UChar>::isLineTerminator(UChar ch)
214 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
217 template <typename T>
218 inline unsigned char Lexer<T>::convertHex(int c1, int c2)
220 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
223 template <typename T>
224 inline UChar Lexer<T>::convertUnicode(int c1, int c2, int c3, int c4)
226 return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
229 template <typename T>
230 ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const LChar* characters, size_t length)
232 return &m_arena->makeIdentifier(m_globalData, characters, length);
235 template <typename T>
236 ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const UChar* characters, size_t length)
238 return &m_arena->makeIdentifier(m_globalData, characters, length);
242 ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString)
244 ASSERT(sourceString->is8Bit());
245 m_codeStart = sourceString->characters8();
249 ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString)
251 ASSERT(!sourceString->is8Bit());
252 m_codeStart = sourceString->characters16();
255 template <typename T>
256 ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifierLCharFromUChar(const UChar* characters, size_t length)
258 return &m_arena->makeIdentifierLCharFromUChar(m_globalData, characters, length);
261 template <typename T>
262 ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexerFlags, bool strictMode)
264 ASSERT((lexerFlags & LexerFlagsIgnoreReservedWords));
265 const T* start = m_code;
266 const T* ptr = start;
267 const T* end = m_codeEnd;
272 if (!WTF::isASCIIAlpha(*ptr))
276 if (!WTF::isASCIIAlphanumeric(*ptr))
283 if ((!WTF::isASCII(*ptr)) || (*ptr == '\\') || (*ptr == '_') || (*ptr == '$'))
291 // Create the identifier if needed
292 if (lexerFlags & LexexFlagsDontBuildKeywords)
293 tokenData->ident = 0;
295 tokenData->ident = makeIdentifier(start, ptr - start);
296 tokenInfo->line = m_lineNumber;
297 tokenInfo->startOffset = start - m_codeStart;
298 tokenInfo->endOffset = currentOffset();
303 return lex(tokenData, tokenInfo, lexerFlags, strictMode);