[JSC] Optimize JSON.parse string fast path
authorutatane.tea@gmail.com <utatane.tea@gmail.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 24 Apr 2016 17:07:22 +0000 (17:07 +0000)
committerutatane.tea@gmail.com <utatane.tea@gmail.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 24 Apr 2016 17:07:22 +0000 (17:07 +0000)
https://bugs.webkit.org/show_bug.cgi?id=156953

Reviewed by Mark Lam.

This patch further optimizes the string parsing fast path.
Previously, we generated the WTF::String to hold the ownership of the token's string.
And always copied the token in LiteralParser side.
Instead, we hold the ownership of the token String by the StringBuilder in LiteralParser::Lexer,
and remove the processing in the string parsing fast path.
This patch gives us stable 1 - 2.5% improvement in Kraken json-parse-financial.

                               Baseline                  Modified

json-parse-financial        41.383+-0.248      ^      40.894+-0.189         ^ definitely 1.0120x faster

* runtime/LiteralParser.cpp:
(JSC::LiteralParser<CharType>::tryJSONPParse):
(JSC::LiteralParser<CharType>::Lexer::lex):
(JSC::LiteralParser<CharType>::Lexer::lexStringSlow):
(JSC::LiteralParser<CharType>::parse):
(JSC::LiteralParser<CharType>::Lexer::lexString): Deleted.
* runtime/LiteralParser.h:
(JSC::LiteralParser::tryLiteralParse):
(JSC::LiteralParser::Lexer::currentToken):
(JSC::LiteralParser::Lexer::LiteralParserTokenPtr::LiteralParserTokenPtr):
(JSC::LiteralParser::Lexer::LiteralParserTokenPtr::operator->):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@199968 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/runtime/LiteralParser.cpp
Source/JavaScriptCore/runtime/LiteralParser.h

index a584bff..60291b0 100644 (file)
@@ -1,3 +1,33 @@
+2016-04-24  Yusuke Suzuki  <utatane.tea@gmail.com>
+
+        [JSC] Optimize JSON.parse string fast path
+        https://bugs.webkit.org/show_bug.cgi?id=156953
+
+        Reviewed by Mark Lam.
+
+        This patch further optimizes the string parsing fast path.
+        Previously, we generated the WTF::String to hold the ownership of the token's string.
+        And always copied the token in LiteralParser side.
+        Instead, we hold the ownership of the token String by the StringBuilder in LiteralParser::Lexer,
+        and remove the processing in the string parsing fast path.
+        This patch gives us stable 1 - 2.5% improvement in Kraken json-parse-financial.
+
+                                       Baseline                  Modified
+
+        json-parse-financial        41.383+-0.248      ^      40.894+-0.189         ^ definitely 1.0120x faster
+
+        * runtime/LiteralParser.cpp:
+        (JSC::LiteralParser<CharType>::tryJSONPParse):
+        (JSC::LiteralParser<CharType>::Lexer::lex):
+        (JSC::LiteralParser<CharType>::Lexer::lexStringSlow):
+        (JSC::LiteralParser<CharType>::parse):
+        (JSC::LiteralParser<CharType>::Lexer::lexString): Deleted.
+        * runtime/LiteralParser.h:
+        (JSC::LiteralParser::tryLiteralParse):
+        (JSC::LiteralParser::Lexer::currentToken):
+        (JSC::LiteralParser::Lexer::LiteralParserTokenPtr::LiteralParserTokenPtr):
+        (JSC::LiteralParser::Lexer::LiteralParserTokenPtr::operator->):
+
 2016-04-24  Filip Pizlo <fpizlo@apple.com> and Andy VanWagoner <thetalecrafter@gmail.com>
 
         [INTL] Implement String.prototype.localeCompare in ECMA-402
index 40eef7c..09474e2 100644 (file)
@@ -38,7 +38,6 @@
 #include "StrongInlines.h"
 #include <wtf/ASCIICType.h>
 #include <wtf/dtoa.h>
-#include <wtf/text/StringBuilder.h>
 
 namespace JSC {
 
@@ -58,17 +57,17 @@ bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool nee
     do {
         Vector<JSONPPathEntry> path;
         // Unguarded next to start off the lexer
-        Identifier name = Identifier::fromString(&m_exec->vm(), m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
+        Identifier name = Identifier::fromString(&m_exec->vm(), m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
         JSONPPathEntry entry;
         if (name == m_exec->vm().propertyNames->varKeyword) {
             if (m_lexer.next() != TokIdentifier)
                 return false;
             entry.m_type = JSONPPathEntryTypeDeclare;
-            entry.m_pathEntryName = Identifier::fromString(&m_exec->vm(), m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
+            entry.m_pathEntryName = Identifier::fromString(&m_exec->vm(), m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
             path.append(entry);
         } else {
             entry.m_type = JSONPPathEntryTypeDot;
-            entry.m_pathEntryName = Identifier::fromString(&m_exec->vm(), m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
+            entry.m_pathEntryName = Identifier::fromString(&m_exec->vm(), m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
             path.append(entry);
         }
         if (isLexerKeyword(entry.m_pathEntryName))
@@ -82,7 +81,7 @@ bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool nee
                 entry.m_type = JSONPPathEntryTypeLookup;
                 if (m_lexer.next() != TokNumber)
                     return false;
-                double doubleIndex = m_lexer.currentToken().numberToken;
+                double doubleIndex = m_lexer.currentToken()->numberToken;
                 int index = (int)doubleIndex;
                 if (index != doubleIndex || index < 0)
                     return false;
@@ -95,7 +94,7 @@ bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool nee
                 entry.m_type = JSONPPathEntryTypeDot;
                 if (m_lexer.next() != TokIdentifier)
                     return false;
-                entry.m_pathEntryName = Identifier::fromString(&m_exec->vm(), m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
+                entry.m_pathEntryName = Identifier::fromString(&m_exec->vm(), m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
                 break;
             }
             case TokLParen: {
@@ -119,15 +118,15 @@ bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool nee
             return false;
         results.last().m_path.swap(path);
         if (entry.m_type == JSONPPathEntryTypeCall) {
-            if (m_lexer.currentToken().type != TokRParen)
+            if (m_lexer.currentToken()->type != TokRParen)
                 return false;
             m_lexer.next();
         }
-        if (m_lexer.currentToken().type != TokSemi)
+        if (m_lexer.currentToken()->type != TokSemi)
             break;
         m_lexer.next();
-    } while (m_lexer.currentToken().type == TokIdentifier);
-    return m_lexer.currentToken().type == TokEnd;
+    } while (m_lexer.currentToken()->type == TokIdentifier);
+    return m_lexer.currentToken()->type == TokEnd;
 }
     
 template <typename CharType>
@@ -173,6 +172,10 @@ ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const UCh
 template <typename CharType>
 template <ParserMode mode> TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token)
 {
+#if !ASSERT_DISABLED
+    m_currentTokenID++;
+#endif
+
     while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
         ++m_ptr;
 
@@ -354,7 +357,6 @@ template <ParserMode mode, char terminator> ALWAYS_INLINE TokenType LiteralParse
     while (m_ptr < m_end && isSafeStringCharacter<mode, CharType, terminator>(*m_ptr))
         ++m_ptr;
     if (LIKELY(m_ptr < m_end && *m_ptr == terminator)) {
-        token.stringBuffer = String();
         setParserTokenString<CharType>(token, runStart);
         token.stringLength = m_ptr - runStart;
         token.type = TokString;
@@ -367,19 +369,19 @@ template <ParserMode mode, char terminator> ALWAYS_INLINE TokenType LiteralParse
 template <typename CharType>
 template <ParserMode mode, char terminator> TokenType LiteralParser<CharType>::Lexer::lexStringSlow(LiteralParserToken<CharType>& token, const CharType* runStart)
 {
-    StringBuilder builder;
+    m_builder.clear();
     goto slowPathBegin;
     do {
         runStart = m_ptr;
         while (m_ptr < m_end && isSafeStringCharacter<mode, CharType, terminator>(*m_ptr))
             ++m_ptr;
-        if (!builder.isEmpty())
-            builder.append(runStart, m_ptr - runStart);
+        if (!m_builder.isEmpty())
+            m_builder.append(runStart, m_ptr - runStart);
 
 slowPathBegin:
         if ((mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
-            if (builder.isEmpty() && runStart < m_ptr)
-                builder.append(runStart, m_ptr - runStart);
+            if (m_builder.isEmpty() && runStart < m_ptr)
+                m_builder.append(runStart, m_ptr - runStart);
             ++m_ptr;
             if (m_ptr >= m_end) {
                 m_lexErrorMessage = ASCIILiteral("Unterminated string");
@@ -387,35 +389,35 @@ slowPathBegin:
             }
             switch (*m_ptr) {
                 case '"':
-                    builder.append('"');
+                    m_builder.append('"');
                     m_ptr++;
                     break;
                 case '\\':
-                    builder.append('\\');
+                    m_builder.append('\\');
                     m_ptr++;
                     break;
                 case '/':
-                    builder.append('/');
+                    m_builder.append('/');
                     m_ptr++;
                     break;
                 case 'b':
-                    builder.append('\b');
+                    m_builder.append('\b');
                     m_ptr++;
                     break;
                 case 'f':
-                    builder.append('\f');
+                    m_builder.append('\f');
                     m_ptr++;
                     break;
                 case 'n':
-                    builder.append('\n');
+                    m_builder.append('\n');
                     m_ptr++;
                     break;
                 case 'r':
-                    builder.append('\r');
+                    m_builder.append('\r');
                     m_ptr++;
                     break;
                 case 't':
-                    builder.append('\t');
+                    m_builder.append('\t');
                     m_ptr++;
                     break;
 
@@ -430,13 +432,13 @@ slowPathBegin:
                             return TokError;
                         }
                     }
-                    builder.append(JSC::Lexer<CharType>::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
+                    m_builder.append(JSC::Lexer<CharType>::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
                     m_ptr += 5;
                     break;
 
                 default:
                     if (*m_ptr == '\'' && mode != StrictJSON) {
-                        builder.append('\'');
+                        m_builder.append('\'');
                         m_ptr++;
                         break;
                     }
@@ -451,20 +453,18 @@ slowPathBegin:
         return TokError;
     }
 
-    if (builder.isEmpty()) {
-        token.stringBuffer = String();
+    if (m_builder.isEmpty()) {
         setParserTokenString<CharType>(token, runStart);
         token.stringLength = m_ptr - runStart;
     } else {
-        token.stringBuffer = builder.toString();
-        if (token.stringBuffer.is8Bit()) {
+        if (m_builder.is8Bit()) {
             token.stringIs8Bit = 1;
-            token.stringToken8 = token.stringBuffer.characters8();
+            token.stringToken8 = m_builder.characters8();
         } else {
             token.stringIs8Bit = 0;
-            token.stringToken16 = token.stringBuffer.characters16();
+            token.stringToken16 = m_builder.characters16();
         }
-        token.stringLength = token.stringBuffer.length();
+        token.stringLength = m_builder.length();
     }
     token.type = TokString;
     token.end = ++m_ptr;
@@ -588,7 +588,7 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
             doParseArrayStartExpression:
             FALLTHROUGH;
             case DoParseArrayStartExpression: {
-                TokenType lastToken = m_lexer.currentToken().type;
+                TokenType lastToken = m_lexer.currentToken()->type;
                 if (m_lexer.next() == TokRBracket) {
                     if (lastToken == TokComma) {
                         m_parseErrorMessage = ASCIILiteral("Unexpected comma at the end of array expression");
@@ -607,10 +607,10 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
                 JSArray* array = asArray(objectStack.last());
                 array->putDirectIndex(m_exec, array->length(), lastValue);
                 
-                if (m_lexer.currentToken().type == TokComma)
+                if (m_lexer.currentToken()->type == TokComma)
                     goto doParseArrayStartExpression;
 
-                if (m_lexer.currentToken().type != TokRBracket) {
+                if (m_lexer.currentToken()->type != TokRBracket) {
                     m_parseErrorMessage = ASCIILiteral("Expected ']'");
                     return JSValue();
                 }
@@ -627,7 +627,11 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
 
                 TokenType type = m_lexer.next();
                 if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) {
-                    LiteralParserToken<CharType> identifierToken = m_lexer.currentToken();
+                    typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
+                    if (identifierToken->stringIs8Bit)
+                        identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
+                    else
+                        identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
 
                     // Check for colon
                     if (m_lexer.next() != TokColon) {
@@ -636,10 +640,6 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
                     }
                     
                     m_lexer.next();
-                    if (identifierToken.stringIs8Bit)
-                        identifierStack.append(makeIdentifier(identifierToken.stringToken8, identifierToken.stringLength));
-                    else
-                        identifierStack.append(makeIdentifier(identifierToken.stringToken16, identifierToken.stringLength));
                     stateStack.append(DoParseObjectEndExpression);
                     goto startParseExpression;
                 }
@@ -659,7 +659,11 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
                     m_parseErrorMessage = ASCIILiteral("Property name must be a string literal");
                     return JSValue();
                 }
-                LiteralParserToken<CharType> identifierToken = m_lexer.currentToken();
+                typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
+                if (identifierToken->stringIs8Bit)
+                    identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
+                else
+                    identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
 
                 // Check for colon
                 if (m_lexer.next() != TokColon) {
@@ -668,10 +672,6 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
                 }
 
                 m_lexer.next();
-                if (identifierToken.stringIs8Bit)
-                    identifierStack.append(makeIdentifier(identifierToken.stringToken8, identifierToken.stringLength));
-                else
-                    identifierStack.append(makeIdentifier(identifierToken.stringToken16, identifierToken.stringLength));
                 stateStack.append(DoParseObjectEndExpression);
                 goto startParseExpression;
             }
@@ -694,9 +694,9 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
                         object->putDirect(m_exec->vm(), ident, lastValue);                    
                 }
                 identifierStack.removeLast();
-                if (m_lexer.currentToken().type == TokComma)
+                if (m_lexer.currentToken()->type == TokComma)
                     goto doParseObjectStartExpression;
-                if (m_lexer.currentToken().type != TokRBrace) {
+                if (m_lexer.currentToken()->type != TokRBrace) {
                     m_parseErrorMessage = ASCIILiteral("Expected '}'");
                     return JSValue();
                 }
@@ -707,24 +707,24 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
             }
             startParseExpression:
             case StartParseExpression: {
-                switch (m_lexer.currentToken().type) {
+                switch (m_lexer.currentToken()->type) {
                     case TokLBracket:
                         goto startParseArray;
                     case TokLBrace:
                         goto startParseObject;
                     case TokString: {
-                        LiteralParserToken<CharType> stringToken = m_lexer.currentToken();
-                        m_lexer.next();
-                        if (stringToken.stringIs8Bit)
-                            lastValue = jsString(m_exec, makeIdentifier(stringToken.stringToken8, stringToken.stringLength).string());
+                        typename Lexer::LiteralParserTokenPtr stringToken = m_lexer.currentToken();
+                        if (stringToken->stringIs8Bit)
+                            lastValue = jsString(m_exec, makeIdentifier(stringToken->stringToken8, stringToken->stringLength).string());
                         else
-                            lastValue = jsString(m_exec, makeIdentifier(stringToken.stringToken16, stringToken.stringLength).string());
+                            lastValue = jsString(m_exec, makeIdentifier(stringToken->stringToken16, stringToken->stringLength).string());
+                        m_lexer.next();
                         break;
                     }
                     case TokNumber: {
-                        LiteralParserToken<CharType> numberToken = m_lexer.currentToken();
+                        typename Lexer::LiteralParserTokenPtr numberToken = m_lexer.currentToken();
+                        lastValue = jsNumber(numberToken->numberToken);
                         m_lexer.next();
-                        lastValue = jsNumber(numberToken.numberToken);
                         break;
                     }
                     case TokNull:
@@ -748,11 +748,11 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
                         m_parseErrorMessage = ASCIILiteral("Unexpected token '}'");
                         return JSValue();
                     case TokIdentifier: {
-                        const LiteralParserToken<CharType>& token = m_lexer.currentToken();
-                        if (token.stringIs8Bit)
-                            m_parseErrorMessage = String::format("Unexpected identifier \"%s\"", String(m_lexer.currentToken().stringToken8, m_lexer.currentToken().stringLength).ascii().data());
+                        typename Lexer::LiteralParserTokenPtr token = m_lexer.currentToken();
+                        if (token->stringIs8Bit)
+                            m_parseErrorMessage = String::format("Unexpected identifier \"%s\"", String(token->stringToken8, token->stringLength).ascii().data());
                         else
-                            m_parseErrorMessage = String::format("Unexpected identifier \"%s\"", String(m_lexer.currentToken().stringToken16, m_lexer.currentToken().stringLength).ascii().data());
+                            m_parseErrorMessage = String::format("Unexpected identifier \"%s\"", String(token->stringToken16, token->stringLength).ascii().data());
                         return JSValue();
                     }
                     case TokColon:
@@ -788,7 +788,7 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
                 break;
             }
             case StartParseStatement: {
-                switch (m_lexer.currentToken().type) {
+                switch (m_lexer.currentToken()->type) {
                     case TokLBracket:
                     case TokNumber:
                     case TokString:
@@ -849,7 +849,7 @@ JSValue LiteralParser<CharType>::parse(ParserState initialState)
             }
             case StartParseStatementEndStatement: {
                 ASSERT(stateStack.isEmpty());
-                if (m_lexer.currentToken().type != TokRParen)
+                if (m_lexer.currentToken()->type != TokRParen)
                     return JSValue();
                 if (m_lexer.next() == TokEnd)
                     return lastValue;
index 9e22f38..d05ccac 100644 (file)
@@ -30,6 +30,7 @@
 #include "JSCJSValue.h"
 #include "JSGlobalObjectFunctions.h"
 #include <array>
+#include <wtf/text/StringBuilder.h>
 #include <wtf/text/WTFString.h>
 
 namespace JSC {
@@ -65,10 +66,15 @@ struct JSONPData {
 
 template <typename CharType>
 struct LiteralParserToken {
+private:
+WTF_MAKE_NONCOPYABLE(LiteralParserToken<CharType>);
+
+public:
+    LiteralParserToken() = default;
+
     TokenType type;
     const CharType* start;
     const CharType* end;
-    String stringBuffer;
     union {
         double numberToken;
         struct {
@@ -108,9 +114,9 @@ public:
     {
         m_lexer.next();
         JSValue result = parse(m_mode == StrictJSON ? StartParseExpression : StartParseStatement);
-        if (m_lexer.currentToken().type == TokSemi)
+        if (m_lexer.currentToken()->type == TokSemi)
             m_lexer.next();
-        if (m_lexer.currentToken().type != TokEnd)
+        if (m_lexer.currentToken()->type != TokEnd)
             return JSValue();
         return result;
     }
@@ -129,10 +135,40 @@ private:
         
         TokenType next();
         
-        const LiteralParserToken<CharType>& currentToken()
+#if ASSERT_DISABLED
+        typedef const LiteralParserToken<CharType>* LiteralParserTokenPtr;
+
+        LiteralParserTokenPtr currentToken()
         {
-            return m_currentToken;
+            return &m_currentToken;
         }
+#else
+        class LiteralParserTokenPtr;
+        friend class LiteralParserTokenPtr;
+        class LiteralParserTokenPtr {
+        public:
+            LiteralParserTokenPtr(Lexer& lexer)
+                : m_lexer(lexer)
+                , m_tokenID(lexer.m_currentTokenID)
+            {
+            }
+
+            ALWAYS_INLINE const LiteralParserToken<CharType>* operator->() const
+            {
+                ASSERT(m_tokenID == m_lexer.m_currentTokenID);
+                return &m_lexer.m_currentToken;
+            }
+
+        private:
+            Lexer& m_lexer;
+            unsigned m_tokenID;
+        };
+
+        LiteralParserTokenPtr currentToken()
+        {
+            return LiteralParserTokenPtr(*this);
+        }
+#endif
         
         String getErrorMessage() { return m_lexErrorMessage; }
         
@@ -147,6 +183,10 @@ private:
         ParserMode m_mode;
         const CharType* m_ptr;
         const CharType* m_end;
+        StringBuilder m_builder;
+#if !ASSERT_DISABLED
+        unsigned m_currentTokenID { 0 };
+#endif
     };
     
     class StackGuard;
@@ -161,7 +201,7 @@ private:
     std::array<Identifier, MaximumCachableCharacter> m_recentIdentifiers;
     ALWAYS_INLINE const Identifier makeIdentifier(const LChar* characters, size_t length);
     ALWAYS_INLINE const Identifier makeIdentifier(const UChar* characters, size_t length);
-    };
+};
 
 }