Elide unnecessary moves in Air O0
authorkeith_miller@apple.com <keith_miller@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Wed, 18 Sep 2019 01:06:46 +0000 (01:06 +0000)
committerkeith_miller@apple.com <keith_miller@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Wed, 18 Sep 2019 01:06:46 +0000 (01:06 +0000)
https://bugs.webkit.org/show_bug.cgi?id=201703

Reviewed by Saam Barati.

This patch also removes the code that would try to reuse temps in
WasmAirIRGenerator. That code makes it hard to accurately
determine where a temp dies as it could be reused again
later. Thus every temp, may appear to live for a long time in the
global ordering.

This appears to be a minor progression on the overall score of
wasm subtests in JS2 and a 10% wasm-JIT memory usage reduction.

This patch also fixes an issue where we didn't ask Patchpoints
for early clobber registers when determining what callee saves
were used by the program.

* b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp:
(JSC::B3::Air::GenerateAndAllocateRegisters::generate):
* b3/air/AirBasicBlock.h:
* b3/air/AirCode.h:
* b3/air/AirHandleCalleeSaves.cpp:
(JSC::B3::Air::handleCalleeSaves):
* b3/air/testair.cpp:
* wasm/WasmAirIRGenerator.cpp:
(JSC::Wasm::AirIRGenerator::didKill): Deleted.
* wasm/WasmB3IRGenerator.cpp:
(JSC::Wasm::B3IRGenerator::didKill): Deleted.
* wasm/WasmFunctionParser.h:
(JSC::Wasm::FunctionParser<Context>::parseBody):
(JSC::Wasm::FunctionParser<Context>::parseExpression):
* wasm/WasmValidate.cpp:
(JSC::Wasm::Validate::didKill): Deleted.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@250009 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp
Source/JavaScriptCore/b3/air/AirBasicBlock.h
Source/JavaScriptCore/b3/air/AirCode.h
Source/JavaScriptCore/b3/air/AirHandleCalleeSaves.cpp
Source/JavaScriptCore/b3/air/testair.cpp
Source/JavaScriptCore/wasm/WasmAirIRGenerator.cpp
Source/JavaScriptCore/wasm/WasmB3IRGenerator.cpp
Source/JavaScriptCore/wasm/WasmFunctionParser.h
Source/JavaScriptCore/wasm/WasmValidate.cpp

index 927d01c..6b567e3 100644 (file)
@@ -1,3 +1,40 @@
+2019-09-17  Keith Miller  <keith_miller@apple.com>
+
+        Elide unnecessary moves in Air O0
+        https://bugs.webkit.org/show_bug.cgi?id=201703
+
+        Reviewed by Saam Barati.
+
+        This patch also removes the code that would try to reuse temps in
+        WasmAirIRGenerator. That code makes it hard to accurately
+        determine where a temp dies as it could be reused again
+        later. Thus every temp, may appear to live for a long time in the
+        global ordering.
+
+        This appears to be a minor progression on the overall score of
+        wasm subtests in JS2 and a 10% wasm-JIT memory usage reduction.
+
+        This patch also fixes an issue where we didn't ask Patchpoints
+        for early clobber registers when determining what callee saves
+        were used by the program.
+
+        * b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp:
+        (JSC::B3::Air::GenerateAndAllocateRegisters::generate):
+        * b3/air/AirBasicBlock.h:
+        * b3/air/AirCode.h:
+        * b3/air/AirHandleCalleeSaves.cpp:
+        (JSC::B3::Air::handleCalleeSaves):
+        * b3/air/testair.cpp:
+        * wasm/WasmAirIRGenerator.cpp:
+        (JSC::Wasm::AirIRGenerator::didKill): Deleted.
+        * wasm/WasmB3IRGenerator.cpp:
+        (JSC::Wasm::B3IRGenerator::didKill): Deleted.
+        * wasm/WasmFunctionParser.h:
+        (JSC::Wasm::FunctionParser<Context>::parseBody):
+        (JSC::Wasm::FunctionParser<Context>::parseExpression):
+        * wasm/WasmValidate.cpp:
+        (JSC::Wasm::Validate::didKill): Deleted.
+
 2019-09-17  Mark Lam  <mark.lam@apple.com>
 
         Use constexpr instead of const in symbol definitions that are obviously constexpr.
index bb5cf6d..11513be 100644 (file)
@@ -421,6 +421,43 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)
             m_namedUsedRegs = RegisterSet();
             m_namedDefdRegs = RegisterSet();
 
+            bool needsToGenerate = ([&] () -> bool {
+                // FIXME: We should consider trying to figure out if we can also elide Mov32s
+                if (!(inst.kind.opcode == Move || inst.kind.opcode == MoveDouble))
+                    return true;
+
+                ASSERT(inst.args.size() >= 2);
+                Arg source = inst.args[0];
+                Arg dest = inst.args[1];
+                if (!source.isTmp() || !dest.isTmp())
+                    return true;
+
+                // FIXME: We don't track where the last use of a reg is globally so we don't know where we can elide them.
+                ASSERT(source.isReg() || m_liveRangeEnd[source.tmp()] >= m_globalInstIndex);
+                if (source.isReg() || m_liveRangeEnd[source.tmp()] != m_globalInstIndex)
+                    return true;
+
+                Reg sourceReg = m_map[source.tmp()].reg;
+                // If the value is not already materialized into a register we may still move it into one so let the normal generation code run.
+                if (!sourceReg)
+                    return true;
+
+                ASSERT(m_currentAllocation->at(sourceReg) == source.tmp());
+
+                if (dest.isReg() && dest.reg() != sourceReg)
+                    return true;
+
+                if (Reg oldReg = m_map[dest.tmp()].reg) {
+                    ASSERT(m_currentAllocation->at(oldReg) == dest.tmp());
+                    m_currentAllocation->at(oldReg) = Tmp();
+                }
+
+                m_currentAllocation->at(sourceReg) = dest.tmp();
+                m_map[source.tmp()].reg = Reg();
+                m_map[dest.tmp()].reg = sourceReg;
+                return false;
+            })();
+
             inst.forEachArg([&] (Arg& arg, Arg::Role role, Bank, Width) {
                 if (!arg.isTmp())
                     return;
@@ -482,7 +519,7 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)
             allocNamed(m_namedUsedRegs, false); // Must come before the defd registers since we may use and def the same register.
             allocNamed(m_namedDefdRegs, true);
 
-            {
+            if (needsToGenerate) {
                 auto tryAllocate = [&] {
                     Vector<Tmp*, 8> usesToAlloc;
                     Vector<Tmp*, 8> defsToAlloc;
@@ -605,7 +642,9 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)
             }
 
             if (!inst.isTerminal()) {
-                CCallHelpers::Jump jump = inst.generate(*m_jit, context);
+                CCallHelpers::Jump jump;
+                if (needsToGenerate)
+                    jump = inst.generate(*m_jit, context);
                 ASSERT_UNUSED(jump, !jump.isSet());
 
                 for (Reg reg : clobberedRegisters) {
@@ -616,7 +655,7 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)
                     m_map[tmp].reg = Reg();
                 }
             } else {
-                bool needsToGenerate = true;
+                ASSERT(needsToGenerate);
                 if (inst.kind.opcode == Jump && block->successorBlock(0) == m_code.findNextBlock(block))
                     needsToGenerate = false;
 
index 379c635..3f835e1 100644 (file)
@@ -104,8 +104,8 @@ public:
     const SuccessorList& successors() const { return m_successors; }
     SuccessorList& successors() { return m_successors; }
 
-    void setSuccessors(FrequentedBlock);
-    void setSuccessors(FrequentedBlock, FrequentedBlock);
+    JS_EXPORT_PRIVATE void setSuccessors(FrequentedBlock);
+    JS_EXPORT_PRIVATE void setSuccessors(FrequentedBlock, FrequentedBlock);
 
     BasicBlock* successorBlock(unsigned index) const { return successor(index).block(); }
     BasicBlock*& successorBlock(unsigned index) { return successor(index).block(); }
index 6149c9a..3c44aff 100644 (file)
@@ -224,7 +224,7 @@ public:
     RegisterSet calleeSaveRegisters() const { return m_calleeSaveRegisters; }
 
     // Recomputes predecessors and deletes unreachable blocks.
-    void resetReachability();
+    JS_EXPORT_PRIVATE void resetReachability();
     
     JS_EXPORT_PRIVATE void dump(PrintStream&) const;
 
index 547bb6e..11e84c4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015-2017 Apple Inc. All rights reserved.
+ * Copyright (C) 2015-2019 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -46,8 +46,10 @@ void handleCalleeSaves(Code& code)
                     usedCalleeSaves.set(tmp.reg());
                 });
 
-            if (inst.kind.opcode == Patch)
+            if (inst.kind.opcode == Patch) {
                 usedCalleeSaves.merge(inst.extraClobberedRegs());
+                usedCalleeSaves.merge(inst.extraEarlyClobberedRegs());
+            }
         }
     }
 
index 3dca296..36486ed 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016-2018 Apple Inc. All rights reserved.
+ * Copyright (C) 2016-2019 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
 #include "InitializeThreading.h"
 #include "JSCInlines.h"
 #include "LinkBuffer.h"
+#include "ProbeContext.h"
 #include "PureNaN.h"
 #include <cmath>
+#include <regex>
 #include <string>
 #include <wtf/Lock.h>
 #include <wtf/NumberOfCores.h>
@@ -2063,6 +2065,163 @@ void testLea32()
     CHECK(r == a + b);
 }
 
+inline Vector<String> matchAll(const CString& source, std::regex regex)
+{
+    Vector<String> matches;
+    std::smatch match;
+    for (std::string str = source.data(); std::regex_search(str, match, regex);) {
+        ASSERT(match.size() == 1);
+        str = match.suffix();
+        matches.append(match[0].str().c_str());
+    }
+    return matches;
+}
+
+void testElideSimpleMove()
+{
+    for (unsigned tmpCount = 1; tmpCount < 100; tmpCount++) {
+        B3::Procedure proc;
+        Code& code = proc.code();
+
+        BasicBlock* root = code.addBlock();
+
+        Tmp tmp = code.newTmp(B3::GP);
+        root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), tmp);
+        for (unsigned i = 0; i < tmpCount; i++) {
+            Tmp newTmp = code.newTmp(B3::GP);
+            root->append(Move, nullptr, tmp, newTmp);
+            tmp = newTmp;
+        }
+        root->append(Move, nullptr, tmp, Tmp(GPRInfo::returnValueGPR));
+        root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
+
+        auto compilation = compile(proc);
+        CString disassembly = compilation->disassembly();
+        std::regex findRRMove(isARM64() ? "mov r\\d+, r\\d+\\n" : "mov %\\w+, %\\w+\\n");
+        auto result = matchAll(disassembly, findRRMove);
+        // sp -> fp; arg0 -> ret0; fp -> sp
+        // fp -> sp only happens in O0 because we don't actually need to move the stack in general.
+        CHECK(result.size() == 2 + !Options::defaultB3OptLevel());
+    }
+}
+
+void testElideHandlesEarlyClobber()
+{
+    B3::Procedure proc;
+    Code& code = proc.code();
+
+    BasicBlock* root = code.addBlock();
+
+    const unsigned tmpCount = RegisterSet::allGPRs().numberOfSetRegisters() * 2;
+    Vector<Tmp> tmps(tmpCount);
+    for (unsigned i = 0; i < tmpCount; ++i) {
+        tmps[i] = code.newTmp(B3::GP);
+        root->append(Move, nullptr, Arg::imm(i), tmps[i]);
+    }
+
+    RegisterSet registers = RegisterSet::allGPRs();
+    registers.exclude(RegisterSet::reservedHardwareRegisters());
+    registers.exclude(RegisterSet::stackRegisters());
+    Reg firstCalleeSave;
+    Reg lastCalleeSave;
+    auto* patch = proc.add<B3::PatchpointValue>(B3::Int32, B3::Origin());
+    patch->clobberEarly(registers);
+    for (Reg reg : registers) {
+        if (!firstCalleeSave)
+            firstCalleeSave = reg;
+        lastCalleeSave = reg;
+    }
+    ASSERT(firstCalleeSave != lastCalleeSave);
+    patch->earlyClobbered().clear(firstCalleeSave);
+    patch->resultConstraints.append({ B3::ValueRep::reg(firstCalleeSave) });
+    patch->earlyClobbered().clear(lastCalleeSave);
+    patch->clobber(RegisterSet(lastCalleeSave));
+
+    patch->setGenerator([=] (CCallHelpers& jit, const JSC::B3::StackmapGenerationParams&) {
+        jit.probe([=] (Probe::Context& context) {
+            for (Reg reg : registers)
+                context.gpr(reg.gpr()) = 0;
+        });
+    });
+
+    Inst inst(Patch, patch, Arg::special(code.addSpecial(WTF::makeUnique<JSC::B3::PatchpointSpecial>())));
+    inst.args.append(Tmp(firstCalleeSave));
+    root->appendInst(WTFMove(inst));
+
+    Tmp result = code.newTmp(B3::GP);
+    root->append(Move, nullptr, tmps[0], result);
+    for (Tmp tmp : tmps)
+        root->append(Add32, nullptr, tmp, result);
+
+    root->append(Move, nullptr, result, Tmp(GPRInfo::returnValueGPR));
+    root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
+
+    auto runResult = compileAndRun<uint32_t>(proc);
+    CHECK(runResult == (tmpCount * (tmpCount - 1)) / 2);
+}
+
+void testElideMoveThenRealloc()
+{
+    RegisterSet registers = RegisterSet::allGPRs();
+    registers.exclude(RegisterSet::stackRegisters());
+    registers.exclude(RegisterSet::reservedHardwareRegisters());
+
+    for (Reg reg : registers) {
+        B3::Procedure proc;
+        Code& code = proc.code();
+
+        BasicBlock* root = code.addBlock();
+        BasicBlock* taken = code.addBlock();
+        BasicBlock* notTaken = code.addBlock();
+        BasicBlock* notTakenReturn = code.addBlock();
+        BasicBlock* ret = code.addBlock();
+        BasicBlock* continuation = code.addBlock();
+
+        Tmp tmp = code.newTmp(B3::GP);
+        {
+            root->append(Move, nullptr, Arg::imm(1), Tmp(reg));
+
+            root->append(BranchTest32, nullptr, Arg::resCond(MacroAssembler::NonZero), Tmp(reg), Arg::bitImm(-1));
+            root->setSuccessors(taken, notTaken);
+        }
+
+        {
+            taken->append(Jump, nullptr);
+            taken->setSuccessors(continuation);
+        }
+
+        {
+            notTaken->append(BranchTest32, nullptr, Arg::resCond(MacroAssembler::NonZero), Tmp(reg), Arg::bitImm(-1));
+            notTaken->setSuccessors(continuation, notTakenReturn);
+        }
+
+        {
+            tmp = code.newTmp(B3::GP);
+            continuation->append(Move, nullptr, Arg::imm(42), tmp);
+            continuation->append(BranchTest32, nullptr, Arg::resCond(MacroAssembler::NonZero), tmp, Arg::bitImm(-1));
+            continuation->setSuccessors(ret, notTakenReturn);
+        }
+
+        {
+            tmp = code.newTmp(B3::GP);
+            ret->append(Move, nullptr, Arg::imm(42), tmp);
+            ret->append(Move, nullptr, tmp, Tmp(reg));
+            ret->append(Move, nullptr, Tmp(reg), Tmp(GPRInfo::returnValueGPR));
+            ret->append(Add32, nullptr, Tmp(reg), Tmp(reg));
+            ret->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
+        }
+
+        {
+            notTakenReturn->append(Move, nullptr, Tmp(reg), Tmp(GPRInfo::returnValueGPR));
+            notTakenReturn->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
+        }
+
+        code.resetReachability();
+        auto runResult = compileAndRun<uint32_t>(proc);
+        CHECK(runResult == 42 + (42 * (reg == GPRInfo::returnValueGPR)));
+    }
+}
+
 #define PREFIX "O", Options::defaultB3OptLevel(), ": "
 
 #define RUN(test) do {                                 \
@@ -2151,6 +2310,10 @@ void run(const char* filter)
     RUN(testLea32());
     RUN(testLea64());
 
+    RUN(testElideSimpleMove());
+    RUN(testElideHandlesEarlyClobber());
+    RUN(testElideMoveThenRealloc());
+
     if (tasks.isEmpty())
         usage();
 
index c671563..a1f0c5d 100644 (file)
@@ -303,17 +303,6 @@ public:
         return result;
     }
 
-    ALWAYS_INLINE void didKill(const ExpressionType& typedTmp)
-    {
-        Tmp tmp = typedTmp.tmp();
-        if (!tmp)
-            return;
-        if (tmp.isGP())
-            m_freeGPs.append(tmp);
-        else
-            m_freeFPs.append(tmp);
-    }
-
     const Bag<B3::PatchpointValue*>& patchpoints() const
     {
         return m_patchpoints;
index d04b2b1..e253c20 100644 (file)
@@ -246,8 +246,6 @@ public:
     Value* constant(B3::Type, uint64_t bits, Optional<Origin> = WTF::nullopt);
     void insertConstants();
 
-    ALWAYS_INLINE void didKill(ExpressionType) { }
-
 private:
     void emitExceptionCheck(CCallHelpers&, ExceptionType);
 
index b0c1cc6..7ae382f 100644 (file)
@@ -73,7 +73,6 @@ private:
 #define WASM_TRY_POP_EXPRESSION_STACK_INTO(result, what) do {                               \
         WASM_PARSER_FAIL_IF(m_expressionStack.isEmpty(), "can't pop empty stack in " what); \
         result = m_expressionStack.takeLast();                                              \
-        m_toKillAfterExpression.append(result);                                             \
     } while (0)
 
     template<OpType>
@@ -95,8 +94,6 @@ private:
     OpType m_currentOpcode;
     size_t m_currentOpcodeStartingOffset { 0 };
 
-    Vector<ExpressionType, 8> m_toKillAfterExpression;
-
     unsigned m_unreachableBlocks { 0 };
     unsigned m_loopIndex { 0 };
 };
@@ -145,8 +142,6 @@ auto FunctionParser<Context>::parseBody() -> PartialResult
     m_controlStack.append({ m_context.createStack(), m_context.addTopLevel(m_signature.returnType()) });
     uint8_t op;
     while (m_controlStack.size()) {
-        ASSERT(m_toKillAfterExpression.isEmpty());
-
         m_currentOpcodeStartingOffset = m_offset;
         WASM_PARSER_FAIL_IF(!parseUInt8(op), "can't decode opcode");
         WASM_PARSER_FAIL_IF(!isValidOpType(op), "invalid opcode ", op);
@@ -162,8 +157,6 @@ auto FunctionParser<Context>::parseBody() -> PartialResult
             WASM_FAIL_IF_HELPER_FAILS(parseUnreachableExpression());
         else {
             WASM_FAIL_IF_HELPER_FAILS(parseExpression());
-            while (m_toKillAfterExpression.size())
-                m_context.didKill(m_toKillAfterExpression.takeLast());
         }
     }
 
@@ -583,8 +576,7 @@ auto FunctionParser<Context>::parseExpression() -> PartialResult
 
     case Drop: {
         WASM_PARSER_FAIL_IF(!m_expressionStack.size(), "can't drop on empty stack");
-        auto expression = m_expressionStack.takeLast();
-        m_toKillAfterExpression.append(expression);
+        m_expressionStack.takeLast();
         return { };
     }
 
index 62e8c21..9525d94 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2016-2019 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -154,8 +154,6 @@ public:
     Result WARN_UNUSED_RETURN addCall(unsigned calleeIndex, const Signature&, const Vector<ExpressionType>& args, ExpressionType& result);
     Result WARN_UNUSED_RETURN addCallIndirect(unsigned tableIndex, const Signature&, const Vector<ExpressionType>& args, ExpressionType& result);
 
-    ALWAYS_INLINE void didKill(ExpressionType) { }
-
     bool hasMemory() const { return !!m_module.memory; }
 
     Validate(const ModuleInformation& module)