2011-06-03 Oliver Hunt <oliver@apple.com>
authoroliver@apple.com <oliver@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 3 Jun 2011 23:30:22 +0000 (23:30 +0000)
committeroliver@apple.com <oliver@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 3 Jun 2011 23:30:22 +0000 (23:30 +0000)
        Reviewed by Geoffrey Garen.

        Improve keyword lookup
        https://bugs.webkit.org/show_bug.cgi?id=61913

        Rather than doing multiple hash lookups as we currently
        do when trying to identify keywords we now use an
        automatically generated decision tree (essentially it's
        a hard coded patricia trie).  We still use the regular
        lookup table for the last few characters of an input as
        this allows us to completely skip all bounds checks.

        * CMakeLists.txt:
        * DerivedSources.make:
        * DerivedSources.pro:
        * GNUmakefile.am:
        * JavaScriptCore.gyp/JavaScriptCore.gyp:
        * JavaScriptCore.xcodeproj/project.pbxproj:
        * KeywordLookupGenerator.py: Added.
        * make-generated-sources.sh:
        * parser/Lexer.cpp:
        (JSC::Lexer::internalShift):
        (JSC::Lexer::shift):
        (JSC::Lexer::parseIdentifier):
        * parser/Lexer.h:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@88076 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/CMakeLists.txt
Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/DerivedSources.make
Source/JavaScriptCore/DerivedSources.pro
Source/JavaScriptCore/GNUmakefile.am
Source/JavaScriptCore/JavaScriptCore.gyp/JavaScriptCore.gyp
Source/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj
Source/JavaScriptCore/KeywordLookupGenerator.py [new file with mode: 0644]
Source/JavaScriptCore/make-generated-sources.sh
Source/JavaScriptCore/parser/Lexer.cpp
Source/JavaScriptCore/parser/Lexer.h

index 754d21bc9c38ec0ce09cb27e6d1c95c1d35f258a..20d9270eab59dd84047ccdcb3684be4bb0b668a1 100644 (file)
@@ -219,6 +219,13 @@ ADD_CUSTOM_COMMAND(
 ADD_SOURCE_DEPENDENCIES(${JAVASCRIPTCORE_DIR}/yarr/YarrPattern.cpp ${DERIVED_SOURCES_JAVASCRIPTCORE_DIR}/RegExpJitTables.h)
 
 
+#GENERATOR: "KeywordLookup.h": keyword decision tree used by the lexer
+ADD_CUSTOM_COMMAND(
+    OUTPUT ${DERIVED_SOURCES_JAVASCRIPTCORE_DIR}/KeywordLookup.h
+    MAIN_DEPENDENCY ${JAVASCRIPTCORE_DIR}/KeywordLookupGenerator.py
+    COMMAND ${PYTHON_EXECUTABLE} ${JAVASCRIPTCORE_DIR}/KeywordLookupGenerator.py ${JAVASCRIPTCORE_DIR}/parser/Keywords.table > ${DERIVED_SOURCES_JAVASCRIPTCORE_DIR}/KeywordLookup.h
+    VERBATIM)
+ADD_SOURCE_DEPENDENCIES(${JAVASCRIPTCORE_DIR}/parser/Lexer.cpp ${DERIVED_SOURCES_JAVASCRIPTCORE_DIR}/KeywordLookup.h)
 
 IF (WTF_CPU_ARM)
     LIST(APPEND JavaScriptCore_SOURCES
index b14a3f19afbf687bdd8bd717df96aceb4b3d5949..39cbc56cfe25c145cd57e483217ded65a1429185 100755 (executable)
@@ -1,3 +1,31 @@
+2011-06-03  Oliver Hunt  <oliver@apple.com>
+
+        Reviewed by Geoffrey Garen.
+
+        Improve keyword lookup
+        https://bugs.webkit.org/show_bug.cgi?id=61913
+
+        Rather than doing multiple hash lookups as we currently
+        do when trying to identify keywords we now use an 
+        automatically generated decision tree (essentially it's
+        a hard coded patricia trie).  We still use the regular
+        lookup table for the last few characters of an input as
+        this allows us to completely skip all bounds checks.
+
+        * CMakeLists.txt:
+        * DerivedSources.make:
+        * DerivedSources.pro:
+        * GNUmakefile.am:
+        * JavaScriptCore.gyp/JavaScriptCore.gyp:
+        * JavaScriptCore.xcodeproj/project.pbxproj:
+        * KeywordLookupGenerator.py: Added.
+        * make-generated-sources.sh:
+        * parser/Lexer.cpp:
+        (JSC::Lexer::internalShift):
+        (JSC::Lexer::shift):
+        (JSC::Lexer::parseIdentifier):
+        * parser/Lexer.h:
+
 2011-06-03  Siddharth Mathur  <siddharth.mathur@nokia.com>
 
         Reviewed by Benjamin Poulain.
index d738289435b69a93f28a01ded53e718375eab9d7..80af4b6d506d551a096bc272b2ea3ece4716fae6 100644 (file)
@@ -46,6 +46,7 @@ all : \
     JavaScriptCore.JSVALUE32_64.exp \
     JavaScriptCore.JSVALUE64.exp \
     JSGlobalObject.lut.h \
+    KeywordLookup.h \
     Lexer.lut.h \
     MathObject.lut.h \
     NumberConstructor.lut.h \
@@ -76,6 +77,9 @@ docs/bytecode.html: make-bytecode-docs.pl Interpreter.cpp
 RegExpJitTables.h: create_regex_tables
        python $^ > $@
 
+KeywordLookup.h: KeywordLookupGenerator.py Keywords.table
+       python $^ > $@
+
 # export files
 
 JavaScriptCore.JSVALUE32_64.exp: JavaScriptCore.exp JavaScriptCore.JSVALUE32_64only.exp
index 04a7a5470093a00f9eec57a31cad1f597610a964..09896051498e4427a5b9ec2795458c2ac11f052b 100644 (file)
@@ -99,3 +99,10 @@ retgen.wkScript = $$PWD/create_regex_tables
 retgen.input = retgen.wkScript
 retgen.commands = python $$retgen.wkScript > ${QMAKE_FILE_OUT}
 addExtraCompiler(retgen)
+
+#GENERATOR: "KeywordLookup.h": decision tree used by the lexer
+klgen.output = $$JSC_GENERATED_SOURCES_DIR/KeywordLookup.h
+klgen.wkScript = $$PWD/KeywordLookupGenerator.py 
+klgen.input = KEYWORDLUT_FILES
+klgen.commands = python $$klgen.wkScript ${QMAKE_FILE_NAME} > ${QMAKE_FILE_OUT}
+addExtraCompiler(klgen)
index 7408504a5b92f0eb498551eae802e448e5d3ee83..c710d2aba427dee3ca4c429c426e3d89095f706f 100644 (file)
@@ -79,6 +79,9 @@ Source/JavaScriptCore/%.lut.h: $(srcdir)/Source/JavaScriptCore/create_hash_table
 Source/JavaScriptCore/RegExpJitTables.h: $(srcdir)/Source/JavaScriptCore/create_regex_tables
        $(AM_V_GEN)$(PYTHON) $^ > $@
 
+Source/JavaScriptCore/KeywordLookup.h: $(srcdir)/Source/JavaScriptCore/KeywordLookupGenerator.py $(srcdir)/Source/JavaScriptCore/parser/Keywords.table
+       $(AM_V_GEN)$(PYTHON) $^ > $@
+
 jsc: $(javascriptcore_built_nosources) Programs/jsc$(EXEEXT)
 
 bin_PROGRAMS += \
index b04c59ab60ea4c72afcf1b6a2ce49215cbc5b353..d886a62859d86f39998f1f954e8dde76283e88ab 100644 (file)
           ],
           'action': ['python', '<@(_inputs)', '<@(_arguments)', '<@(_outputs)'],
         },
+        {
+          'action_name': 'klgen',
+          'inputs': [
+            '../KeywordLookupGenerator.py',
+            '../parser/Keywords.table'
+          ],
+          'arguments': [
+          ],
+          'outputs': [
+            '<(INTERMEDIATE_DIR)/KeywordLookup.h',
+          ],
+          'action': ['python', '<@(_inputs)', '<@(_arguments)', '<@(_outputs)'],
+        },
       ],
       'include_dirs': [
         '<(INTERMEDIATE_DIR)',
index 5c5b844c609b53bee1c6b962d303f4c3d6503c2f..5504b048de7d93f20e18fb7966663dad76d702a2 100644 (file)
                A72701B90DADE94900E548D7 /* ExceptionHelpers.h in Headers */ = {isa = PBXBuildFile; fileRef = A72701B30DADE94900E548D7 /* ExceptionHelpers.h */; };
                A727FF6B0DA3092200E548D7 /* JSPropertyNameIterator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727FF660DA3053B00E548D7 /* JSPropertyNameIterator.cpp */; };
                A7280A2811557E3000D56957 /* JSObjectRefPrivate.h in Headers */ = {isa = PBXBuildFile; fileRef = A79EDB0811531CD60019E912 /* JSObjectRefPrivate.h */; settings = {ATTRIBUTES = (Private, ); }; };
+               A72FFD64139985A800E5365A /* KeywordLookup.h in Headers */ = {isa = PBXBuildFile; fileRef = A7C225CD1399849C00FF1662 /* KeywordLookup.h */; };
                A730B6121250068F009D25B1 /* StrictEvalActivation.h in Headers */ = {isa = PBXBuildFile; fileRef = A730B6101250068F009D25B1 /* StrictEvalActivation.h */; };
                A730B6131250068F009D25B1 /* StrictEvalActivation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A730B6111250068F009D25B1 /* StrictEvalActivation.cpp */; };
                A731B25A130093880040A7FA /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 51F0EB6105C86C6B00E6DF1B /* Foundation.framework */; };
                A7B48DB50EE74CFC00DCBDB6 /* ExecutableAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ExecutableAllocator.h; sourceTree = "<group>"; };
                A7B48DB60EE74CFC00DCBDB6 /* ExecutableAllocator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ExecutableAllocator.cpp; sourceTree = "<group>"; };
                A7C1E8C8112E701C00A37F98 /* JITPropertyAccess32_64.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JITPropertyAccess32_64.cpp; sourceTree = "<group>"; };
+               A7C225CC139981F100FF1662 /* KeywordLookupGenerator.py */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.python; path = KeywordLookupGenerator.py; sourceTree = "<group>"; };
+               A7C225CD1399849C00FF1662 /* KeywordLookup.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KeywordLookup.h; sourceTree = "<group>"; };
                A7C40C07130B057D00D002A1 /* BlockStack.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BlockStack.h; sourceTree = "<group>"; };
                A7C40C08130B057D00D002A1 /* SentinelLinkedList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SentinelLinkedList.h; sourceTree = "<group>"; };
                A7C40C09130B057D00D002A1 /* SinglyLinkedList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SinglyLinkedList.h; sourceTree = "<group>"; };
                        children = (
                                A718F8211178EB4B002465A7 /* create_regex_tables */,
                                937B63CC09E766D200A671DD /* DerivedSources.make */,
+                               A7C225CC139981F100FF1662 /* KeywordLookupGenerator.py */,
                                F692A8540255597D01FF60F7 /* create_hash_table */,
                                14B8ECA60A5653980062BE54 /* JavaScriptCore.exp */,
                                867FC35F11B763950025105E /* JavaScriptCore.JSVALUE32_64only.exp */,
                                BCD203E70E1718F4002C7E82 /* DatePrototype.lut.h */,
                                BC8149AF12F89F53007B2C32 /* HeaderDetection.h */,
                                BC87CDB810712ACA000614CF /* JSONObject.lut.h */,
+                               A7C225CD1399849C00FF1662 /* KeywordLookup.h */,
                                BC18C52D0E16FCE100B34460 /* Lexer.lut.h */,
                                BC18C5290E16FCC200B34460 /* MathObject.lut.h */,
                                BC2680E60E16D52300A06E92 /* NumberConstructor.lut.h */,
                                651DCA04136A6FEF00F74194 /* PassTraits.h in Headers */,
                                14F97447138C853E00DA1C67 /* HeapRootVisitor.h in Headers */,
                                86BB09C1138E381B0056702F /* DFGRepatch.h in Headers */,
+                               A72FFD64139985A800E5365A /* KeywordLookup.h in Headers */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
diff --git a/Source/JavaScriptCore/KeywordLookupGenerator.py b/Source/JavaScriptCore/KeywordLookupGenerator.py
new file mode 100644 (file)
index 0000000..b93d5d3
--- /dev/null
@@ -0,0 +1,228 @@
+# Copyright (C) 2010 Apple Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import string
+import operator
+
+keywordsText = open(sys.argv[1]).read()
+
+# Observed weights of the most common keywords, rounded to 2.s.d
+keyWordWeights = {
+    "catch": 0.01,
+    "try": 0.01,
+    "while": 0.01,
+    "case": 0.01,
+    "break": 0.01,
+    "new": 0.01,
+    "in": 0.01,
+    "typeof": 0.02,
+    "true": 0.02,
+    "false": 0.02,
+    "for": 0.03,
+    "null": 0.03,
+    "else": 0.03,
+    "return": 0.13,
+    "var": 0.13,
+    "if": 0.16,
+    "function": 0.18,
+    "this": 0.18,
+}
+
+
+def allWhitespace(str):
+    for c in str:
+        if not(c in string.whitespace):
+            return False
+    return True
+
+
+def parseKeywords(keywordsText):
+    lines = keywordsText.split("\n")
+    lines = [line.split("#")[0] for line in lines]
+    lines = [line for line in lines if (not allWhitespace(line))]
+    name = lines[0].split()
+    terminator = lines[-1]
+    if not name[0] == "@begin":
+        raise Exception("expected description beginning with @begin")
+    if not terminator == "@end":
+        raise Exception("expected description ending with @end")
+
+    lines = lines[1:-1]  # trim off the old heading
+    return [line.split() for line in lines]
+
+
+def makePadding(size):
+    str = ""
+    for i in range(size):
+        str = str + " "
+    return str
+
+
+class Trie:
+    def __init__(self, prefix):
+        self.prefix = prefix
+        self.keys = {}
+        self.value = None
+
+    def insert(self, key, value):
+        if len(key) == 0:
+            self.value = value
+            return
+        if not (key[0] in self.keys):
+            self.keys[key[0]] = Trie(key[0])
+        self.keys[key[0]].insert(key[1:], value)
+
+    def coalesce(self):
+        keys = {}
+        for k, v in self.keys.items():
+            t = v.coalesce()
+            keys[t.prefix] = t
+        self.keys = keys
+        if self.value != None:
+            return self
+        if len(self.keys) != 1:
+            return self
+        (prefix, suffix) = self.keys.items()[0]
+        res = Trie(self.prefix + prefix)
+        res.value = suffix.value
+        res.keys = suffix.keys
+        return res
+
+    def fillOut(self, prefix=""):
+        self.fullPrefix = prefix + self.prefix
+        weight = 0
+        if self.fullPrefix in keyWordWeights:
+            weight = weight + keyWordWeights[self.fullPrefix]
+        self.selfWeight = weight
+        for trie in self.keys.values():
+            trie.fillOut(self.fullPrefix)
+            weight = weight + trie.weight
+        self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)]
+        self.weight = weight
+
+    def printSubTreeAsC(self, indent):
+        str = makePadding(indent)
+
+        if self.value != None:
+            print(str + "if (!isIdentPart(code[%d])) {" % (len(self.fullPrefix)))
+            print(str + "    internalShift<%d, DoNotBoundsCheck>();" % len(self.fullPrefix))
+            print(str + "    return " + self.value + ";")
+            print(str + "}")
+        rootIndex = len(self.fullPrefix)
+        itemCount = 0
+        for k, trie in self.keys:
+            baseIndex = rootIndex
+            if (baseIndex > 0) and (len(k) == 3):
+                baseIndex = baseIndex - 1
+                k = trie.fullPrefix[baseIndex] + k
+            test = [("'%s'" % c) for c in k]
+            if len(test) == 1:
+                comparison = "code[%d] == %s" % (baseIndex, test[0])
+            else:
+                base = "code"
+                if baseIndex > 0:
+                    base = "code + %d" % baseIndex
+                comparison = ("COMPARE_CHARACTERS%d(%s, " % (len(test), base)) + ", ".join(test) + ")"
+            if itemCount == 0:
+                print(str + "if (" + comparison + ") {")
+            else:
+                print(str + "else if (" + comparison + ") {")
+
+            trie.printSubTreeAsC(indent + 4)
+            itemCount = itemCount + 1
+            print(str + "}")
+
+    def maxLength(self):
+        max = len(self.fullPrefix)
+        for (_, trie) in self.keys:
+            l = trie.maxLength()
+            if l > max:
+                max = l
+        return max
+
+    def printAsC(self):
+        print("namespace JSC {")
+        print("static inline bool isIdentPart(int c);")
+        # max length + 1 so we don't need to do any bounds checking at all
+        print("static const int maxTokenLength = %d;" % (self.maxLength() + 1))
+        print("ALWAYS_INLINE JSTokenType Lexer::parseKeyword() {")
+        print("    ASSERT(m_codeEnd - m_code >= maxTokenLength);")
+        print("    const UChar* code = m_code;")
+        self.printSubTreeAsC(4)
+        print("    return IDENT;")
+        print("}")
+        print("}")
+
+keywords = parseKeywords(keywordsText)
+trie = Trie("")
+for k, v in keywords:
+    trie.insert(k, v)
+trie.coalesce()
+trie.fillOut()
+print("// This file was generated by KeywordLookupGenerator.py.  Do not edit.")
+print("""
+
+#if CPU(NEEDS_ALIGNED_ACCESS)
+
+#define COMPARE_CHARACTERS2(address, char1, char2) \
+    (((address)[0] == char1) && ((address)[1] == char2))
+#define COMPARE_CHARACTERS4(address, char1, char2, char3, char4) \
+    (COMPARE_CHARACTERS2(address, char1, char2) && COMPARE_CHARACTERS2((address) + 2, char3, char4))
+
+#else
+
+#if CPU(BIG_ENDIAN)
+#define CHARPAIR_TOUINT32(a, b) ((((uint32_t)(a)) << 16) + (uint32_t)(b))
+#define CHARQUAD_TOUINT64(a, b, c, d) ((((uint64_t)(CHARPAIR_TOUINT32(a, b))) << 32) + CHARPAIR_TOUINT32(c, d))
+#else
+#define CHARPAIR_TOUINT32(a, b) ((((uint32_t)(b)) << 16) + (uint32_t)(a))
+#define CHARQUAD_TOUINT64(a, b, c, d) ((((uint64_t)(CHARPAIR_TOUINT32(c, d))) << 32) + CHARPAIR_TOUINT32(a, b))
+#endif
+
+#define COMPARE_CHARACTERS2(address, char1, char2) \
+    (((uint32_t*)(address))[0] == CHARPAIR_TOUINT32(char1, char2))
+#if CPU(X86_64)
+
+#define COMPARE_CHARACTERS4(address, char1, char2, char3, char4) \
+    (((uint64_t*)(address))[0] == CHARQUAD_TOUINT64(char1, char2, char3, char4))
+#else
+#define COMPARE_CHARACTERS4(address, char1, char2, char3, char4) \
+    (COMPARE_CHARACTERS2(address, char1, char2) && COMPARE_CHARACTERS2((address) + 2, char3, char4))
+#endif
+
+#endif
+
+#define COMPARE_CHARACTERS3(address, char1, char2, char3) \
+    (COMPARE_CHARACTERS2(address, char1, char2) && ((address)[2] == (char3)))
+#define COMPARE_CHARACTERS5(address, char1, char2, char3, char4, char5) \
+    (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && ((address)[4] == (char5)))
+#define COMPARE_CHARACTERS6(address, char1, char2, char3, char4, char5, char6) \
+    (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && COMPARE_CHARACTERS2(address + 4, char5, char6))
+#define COMPARE_CHARACTERS7(address, char1, char2, char3, char4, char5, char6, char7) \
+    (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && COMPARE_CHARACTERS4(address + 3, char4, char5, char6, char7))
+#define COMPARE_CHARACTERS8(address, char1, char2, char3, char4, char5, char6, char7, char8) \
+    (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && COMPARE_CHARACTERS4(address + 4, char5, char6, char7, char8))
+""")
+
+trie.printAsC()
index a6b0f6369b19546f88940f6507efabd8548462dd..b9e19e79eb8cf3fce48c49ef81e12a76d7af2e46 100755 (executable)
@@ -4,6 +4,7 @@ export SRCROOT=$PWD
 export WebCore=$PWD
 export CREATE_HASH_TABLE="$SRCROOT/create_hash_table"
 export CREATE_REGEXP_TABLES="$SRCROOT/create_regex_tables"
+export CREATE_KEYWORD_LOOKUP="$SRCROOT/KeywordLookupGenerator.py"
 
 mkdir -p DerivedSources/JavaScriptCore
 cd DerivedSources/JavaScriptCore
index a7d8cb9f21a3e8153d2b122f33416ab5b9bd7b79..736e7bb4ce216667fdb2bf422228d3cf18c75574 100644 (file)
@@ -40,6 +40,7 @@ using namespace WTF;
 using namespace Unicode;
 
 #include "JSParser.h"
+#include "KeywordLookup.h"
 #include "Lookup.h"
 #include "Lexer.lut.h"
 
@@ -271,14 +272,24 @@ void Lexer::setCode(const SourceCode& source, ParserArena& arena)
     ASSERT(currentOffset() == source.startOffset());
 }
 
-ALWAYS_INLINE void Lexer::shift()
+template <int shiftAmount, Lexer::ShiftType shouldBoundsCheck> ALWAYS_INLINE void Lexer::internalShift()
 {
-    // Faster than an if-else sequence
-    ASSERT(m_current != -1);
-    m_current = -1;
-    ++m_code;
-    if (LIKELY(m_code < m_codeEnd))
+    if (shouldBoundsCheck == DoBoundsCheck) {
+        // Faster than an if-else sequence
+        ASSERT(m_current != -1);
+        m_current = -1;
+        m_code += shiftAmount;
+        if (LIKELY(m_code < m_codeEnd))
+            m_current = *m_code;
+    } else {
+        m_code += shiftAmount;
         m_current = *m_code;
+    }
+}
+
+ALWAYS_INLINE void Lexer::shift()
+{
+    internalShift<1, DoBoundsCheck>();
 }
 
 ALWAYS_INLINE int Lexer::peek(int offset)
@@ -401,9 +412,14 @@ inline void Lexer::record16(int c)
 
 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* lvalp, unsigned lexType)
 {
-    bool bufferRequired = false;
+    static const ptrdiff_t remaining = m_codeEnd - m_code;
+    if ((remaining >= maxTokenLength) && !(lexType & IgnoreReservedWords)) {
+        JSTokenType keyword = parseKeyword();
+        if (keyword != IDENT)
+            return keyword;
+    }
     const UChar* identifierStart = currentCharacter();
-    int identifierLength;
+    bool bufferRequired = false;
 
     while (true) {
         if (LIKELY(isIdentPart(m_current))) {
@@ -430,7 +446,8 @@ template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseIde
             record16(character);
         identifierStart = currentCharacter();
     }
-
+    
+    int identifierLength;
     const Identifier* ident = 0;
     if (shouldCreateIdentifier) {
         if (!bufferRequired)
@@ -452,8 +469,12 @@ template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseIde
     if (LIKELY(!bufferRequired && !(lexType & IgnoreReservedWords))) {
         ASSERT(shouldCreateIdentifier);
         // Keywords must not be recognized if there was an \uXXXX in the identifier.
-        const HashEntry* entry = m_keywordTable.entry(m_globalData, *ident);
-        return entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT;
+        if (remaining < maxTokenLength) {
+            const HashEntry* entry = m_keywordTable.entry(m_globalData, *ident);
+            ASSERT((remaining < maxTokenLength) || !entry);
+            return entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT;
+        }
+        return IDENT;
     }
 
     m_buffer16.resize(0);
index 83c290623d19ba65af361844d2ba3e1f8fcd575f..e1335999c9c9474b7642a3a2dca649fd868cb1d5 100644 (file)
@@ -113,6 +113,9 @@ namespace JSC {
 
         ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
 
+        enum ShiftType { DoBoundsCheck, DoNotBoundsCheck };
+        template <int shiftAmount, ShiftType shouldBoundsCheck> void internalShift();
+        ALWAYS_INLINE JSTokenType parseKeyword();
         template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned);
         template <bool shouldBuildStrings> ALWAYS_INLINE bool parseString(JSTokenData* lvalp, bool strictMode);
         ALWAYS_INLINE void parseHex(double& returnValue);