Whoops, fix last minute bug.
[WebKit-https.git] / Source / JavaScriptCore / parser / Lexer.cpp
index ff7079f364333363b8c275306d5c484b6795e460..a9ce770dc7c31dd4d0549fca17c8f2e30c7acc03 100644 (file)
@@ -40,6 +40,7 @@ using namespace WTF;
 using namespace Unicode;
 
 #include "JSParser.h"
+#include "KeywordLookup.h"
 #include "Lookup.h"
 #include "Lexer.lut.h"
 
@@ -271,14 +272,24 @@ void Lexer::setCode(const SourceCode& source, ParserArena& arena)
     ASSERT(currentOffset() == source.startOffset());
 }
 
-ALWAYS_INLINE void Lexer::shift()
+template <int shiftAmount, Lexer::ShiftType shouldBoundsCheck> ALWAYS_INLINE void Lexer::internalShift()
 {
-    // Faster than an if-else sequence
-    ASSERT(m_current != -1);
-    m_current = -1;
-    ++m_code;
-    if (LIKELY(m_code < m_codeEnd))
+    if (shouldBoundsCheck == DoBoundsCheck) {
+        // Faster than an if-else sequence
+        ASSERT(m_current != -1);
+        m_current = -1;
+        m_code += shiftAmount;
+        if (LIKELY(m_code < m_codeEnd))
+            m_current = *m_code;
+    } else {
+        m_code += shiftAmount;
         m_current = *m_code;
+    }
+}
+
+ALWAYS_INLINE void Lexer::shift()
+{
+    internalShift<1, DoBoundsCheck>();
 }
 
 ALWAYS_INLINE int Lexer::peek(int offset)
@@ -399,11 +410,16 @@ inline void Lexer::record16(int c)
     record16(UChar(static_cast<unsigned short>(c)));
 }
 
-ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* lvalp, LexType lexType)
+template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* lvalp, unsigned lexType)
 {
-    bool bufferRequired = false;
+    const ptrdiff_t remaining = m_codeEnd - m_code;
+    if ((remaining >= maxTokenLength) && !(lexType & IgnoreReservedWords)) {
+        JSTokenType keyword = parseKeyword();
+        if (keyword != IDENT)
+            return keyword;
+    }
     const UChar* identifierStart = currentCharacter();
-    int identifierLength;
+    bool bufferRequired = false;
 
     while (true) {
         if (LIKELY(isIdentPart(m_current))) {
@@ -426,34 +442,46 @@ ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* lvalp, LexType lex
             return ERRORTOK;
         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character) : !isIdentStart(character)))
             return ERRORTOK;
-        record16(character);
+        if  (shouldCreateIdentifier)
+            record16(character);
         identifierStart = currentCharacter();
     }
+    
+    int identifierLength;
+    const Identifier* ident = 0;
+    if (shouldCreateIdentifier) {
+        if (!bufferRequired)
+            identifierLength = currentCharacter() - identifierStart;
+        else {
+            if (identifierStart != currentCharacter())
+                m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
+            identifierStart = m_buffer16.data();
+            identifierLength = m_buffer16.size();
+        }
 
-    if (!bufferRequired)
-        identifierLength = currentCharacter() - identifierStart;
-    else {
-        if (identifierStart != currentCharacter())
-            m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
-        identifierStart = m_buffer16.data();
-        identifierLength = m_buffer16.size();
-    }
+        ident = makeIdentifier(identifierStart, identifierLength);
+        lvalp->ident = ident;
+    } else
+        lvalp->ident = 0;
 
-    const Identifier* ident = makeIdentifier(identifierStart, identifierLength);
-    lvalp->ident = ident;
     m_delimited = false;
 
-    if (LIKELY(!bufferRequired && lexType == IdentifyReservedWords)) {
+    if (LIKELY(!bufferRequired && !(lexType & IgnoreReservedWords))) {
+        ASSERT(shouldCreateIdentifier);
         // Keywords must not be recognized if there was an \uXXXX in the identifier.
-        const HashEntry* entry = m_keywordTable.entry(m_globalData, *ident);
-        return entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT;
+        if (remaining < maxTokenLength) {
+            const HashEntry* entry = m_keywordTable.entry(m_globalData, *ident);
+            ASSERT((remaining < maxTokenLength) || !entry);
+            return entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT;
+        }
+        return IDENT;
     }
 
     m_buffer16.resize(0);
     return IDENT;
 }
 
-ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
+template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
 {
     int stringQuoteCharacter = m_current;
     shift();
@@ -462,7 +490,7 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
 
     while (m_current != stringQuoteCharacter) {
         if (UNLIKELY(m_current == '\\')) {
-            if (stringStart != currentCharacter())
+            if (stringStart != currentCharacter() && shouldBuildStrings)
                 m_buffer16.append(stringStart, currentCharacter() - stringStart);
             shift();
 
@@ -470,7 +498,8 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
 
             // Most common escape sequences first
             if (escape) {
-                record16(escape);
+                 if (shouldBuildStrings)
+                     record16(escape);
                 shift();
             } else if (UNLIKELY(isLineTerminator(m_current)))
                 shiftLineTerminator();
@@ -479,18 +508,21 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
                 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) {
                     int prev = m_current;
                     shift();
-                    record16(convertHex(prev, m_current));
+                    if (shouldBuildStrings)
+                        record16(convertHex(prev, m_current));
                     shift();
-                } else
+                } else if (shouldBuildStrings)
                     record16('x');
             } else if (m_current == 'u') {
                 shift();
                 int character = getUnicodeCharacter();
-                if (character != -1)
-                    record16(character);
-                else if (m_current == stringQuoteCharacter)
-                    record16('u');
-                else // Only stringQuoteCharacter allowed after \u
+                if (character != -1) {
+                    if (shouldBuildStrings)
+                        record16(character);
+                } else if (m_current == stringQuoteCharacter) {
+                    if (shouldBuildStrings)
+                        record16('u');
+                } else // Only stringQuoteCharacter allowed after \u
                     return false;
             } else if (strictMode && isASCIIDigit(m_current)) {
                 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
@@ -498,7 +530,8 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
                 shift();
                 if (character1 != '0' || isASCIIDigit(m_current))
                     return false;
-                record16(0);
+                if (shouldBuildStrings)
+                    record16(0);
             } else if (!strictMode && isASCIIOctalDigit(m_current)) {
                 // Octal character sequences
                 int character1 = m_current;
@@ -508,14 +541,20 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
                     int character2 = m_current;
                     shift();
                     if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
-                        record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
+                        if (shouldBuildStrings)
+                            record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
                         shift();
-                    } else
-                        record16((character1 - '0') * 8 + character2 - '0');
-                } else
-                    record16(character1 - '0');
+                    } else {
+                        if (shouldBuildStrings)
+                            record16((character1 - '0') * 8 + character2 - '0');
+                    }
+                } else {
+                    if (shouldBuildStrings)
+                        record16(character1 - '0');
+                }
             } else if (m_current != -1) {
-                record16(m_current);
+                if (shouldBuildStrings)
+                    record16(m_current);
                 shift();
             } else
                 return false;
@@ -535,9 +574,13 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
         shift();
     }
 
-    if (currentCharacter() != stringStart)
+    if (currentCharacter() != stringStart && shouldBuildStrings)
         m_buffer16.append(stringStart, currentCharacter() - stringStart);
-    lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+    if (shouldBuildStrings)
+        lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+    else
+        lvalp->ident = 0;
+
     m_buffer16.resize(0);
     return true;
 }
@@ -703,7 +746,16 @@ ALWAYS_INLINE bool Lexer::parseMultilineComment()
     }
 }
 
-JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType lexType, bool strictMode)
+bool Lexer::nextTokenIsColon()
+{
+    const UChar* code = m_code;
+    while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
+        code++;
+        
+    return code < m_codeEnd && *code == ':';
+}
+
+JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp, unsigned lexType, bool strictMode)
 {
     ASSERT(!m_error);
     ASSERT(m_buffer8.isEmpty());
@@ -1016,8 +1068,13 @@ inNumberAfterDecimalPoint:
         m_delimited = false;
         break;
     case CharacterQuote:
-        if (UNLIKELY(!parseString(lvalp, strictMode)))
-            goto returnError;
+        if (lexType & DontBuildStrings) {
+            if (UNLIKELY(!parseString<false>(lvalp, strictMode)))
+                goto returnError;
+        } else {
+            if (UNLIKELY(!parseString<true>(lvalp, strictMode)))
+                goto returnError;
+        }
         shift();
         m_delimited = false;
         token = STRING;
@@ -1026,7 +1083,10 @@ inNumberAfterDecimalPoint:
         ASSERT(isIdentStart(m_current));
         // Fall through into CharacterBackSlash.
     case CharacterBackSlash:
-        token = parseIdentifier(lvalp, lexType);
+        if (lexType & DontBuildKeywords)
+            token = parseIdentifier<false>(lvalp, lexType);
+        else
+            token = parseIdentifier<true>(lvalp, lexType);
         break;
     case CharacterLineTerminator:
         ASSERT(isLineTerminator(m_current));