B3 CSE should be able to match a full redundancy even if none of the matches dominate...
authorfpizlo@apple.com <fpizlo@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 22 Jan 2016 03:31:32 +0000 (03:31 +0000)
committerfpizlo@apple.com <fpizlo@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 22 Jan 2016 03:31:32 +0000 (03:31 +0000)
https://bugs.webkit.org/show_bug.cgi?id=153321

Reviewed by Benjamin Poulain.

I once learned that LLVM's GVN can manufacture Phi functions. I don't know the details
but I'm presuming that it involves:

    if (p)
        tmp1 = *ptr
    else
        tmp2 = *ptr
    tmp3 = *ptr // Replace this with Phi(tmp1, tmp2).

This adds such an optimization to our CSE. The idea is that we search through basic
blocks until we find the value we want, a side effect, or the start of the procedure. If
we find a value that matches our search criteria, we record it and ignore the
predecessors. If we find a side effect or the start of the procedure, we give up the
whole search. This ensures that if we come out of the search without giving up, we'll
have a set of matches that are fully redundant.

CSE could then create a Phi graph by using SSACalculator. But the recent work on FixSSA
revealed a much more exciting option: create a stack slot! In case there is more than one
match, CSE now creates a stack slot that each match stores to, and replaces the redundant
instruction with a loadfrom the stack slot. The stack slot is anonymous, which ensures
that FixSSA will turn it into an optimal Phi graph or whatever.

This is a significant speed-up on Octane/richards.

* b3/B3DuplicateTails.cpp:
* b3/B3EliminateCommonSubexpressions.cpp:
* b3/B3FixSSA.cpp:
(JSC::B3::fixSSA):
* b3/B3Generate.cpp:
(JSC::B3::generateToAir):
* b3/B3Procedure.h:
(JSC::B3::Procedure::setFrontendData):
(JSC::B3::Procedure::frontendData):
* b3/testb3.cpp:
* ftl/FTLState.cpp:
(JSC::FTL::State::State):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@195434 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/b3/B3DuplicateTails.cpp
Source/JavaScriptCore/b3/B3EliminateCommonSubexpressions.cpp
Source/JavaScriptCore/b3/B3FixSSA.cpp
Source/JavaScriptCore/b3/B3Generate.cpp
Source/JavaScriptCore/b3/B3Procedure.h
Source/JavaScriptCore/b3/testb3.cpp
Source/JavaScriptCore/ftl/FTLState.cpp

index 5cf8fc9..6589c8e 100644 (file)
@@ -1,5 +1,49 @@
 2016-01-21  Filip Pizlo  <fpizlo@apple.com>
 
+        B3 CSE should be able to match a full redundancy even if none of the matches dominate the value in question
+        https://bugs.webkit.org/show_bug.cgi?id=153321
+
+        Reviewed by Benjamin Poulain.
+
+        I once learned that LLVM's GVN can manufacture Phi functions. I don't know the details
+        but I'm presuming that it involves:
+
+            if (p)
+                tmp1 = *ptr
+            else
+                tmp2 = *ptr
+            tmp3 = *ptr // Replace this with Phi(tmp1, tmp2).
+
+        This adds such an optimization to our CSE. The idea is that we search through basic
+        blocks until we find the value we want, a side effect, or the start of the procedure. If
+        we find a value that matches our search criteria, we record it and ignore the
+        predecessors. If we find a side effect or the start of the procedure, we give up the
+        whole search. This ensures that if we come out of the search without giving up, we'll
+        have a set of matches that are fully redundant.
+
+        CSE could then create a Phi graph by using SSACalculator. But the recent work on FixSSA
+        revealed a much more exciting option: create a stack slot! In case there is more than one
+        match, CSE now creates a stack slot that each match stores to, and replaces the redundant
+        instruction with a loadfrom the stack slot. The stack slot is anonymous, which ensures
+        that FixSSA will turn it into an optimal Phi graph or whatever.
+
+        This is a significant speed-up on Octane/richards.
+
+        * b3/B3DuplicateTails.cpp:
+        * b3/B3EliminateCommonSubexpressions.cpp:
+        * b3/B3FixSSA.cpp:
+        (JSC::B3::fixSSA):
+        * b3/B3Generate.cpp:
+        (JSC::B3::generateToAir):
+        * b3/B3Procedure.h:
+        (JSC::B3::Procedure::setFrontendData):
+        (JSC::B3::Procedure::frontendData):
+        * b3/testb3.cpp:
+        * ftl/FTLState.cpp:
+        (JSC::FTL::State::State):
+
+2016-01-21  Filip Pizlo  <fpizlo@apple.com>
+
         Air should know that CeilDouble has the partial register stall issue
         https://bugs.webkit.org/show_bug.cgi?id=153338
 
index ae891ad..56f929d 100644 (file)
@@ -123,12 +123,6 @@ public:
 
         m_proc.resetReachability();
         m_proc.invalidateCFG();
-
-        if (verbose) {
-            dataLog("Procedure just before SSA conversion:\n");
-            dataLog(m_proc);
-        }
-        fixSSA(m_proc);
     }
     
 private:
index 6e82823..eea45ae 100644 (file)
 #include "B3PhaseScope.h"
 #include "B3ProcedureInlines.h"
 #include "B3PureCSE.h"
+#include "B3StackSlotValue.h"
 #include "B3ValueKey.h"
 #include "B3ValueInlines.h"
+#include "DFGGraph.h"
+#include <wtf/CommaPrinter.h>
 #include <wtf/HashMap.h>
+#include <wtf/ListDump.h>
 #include <wtf/RangeSet.h>
 
 namespace JSC { namespace B3 {
@@ -60,11 +64,24 @@ const bool verbose = false;
 typedef Vector<MemoryValue*, 1> MemoryMatches;
 
 struct ImpureBlockData {
+    void dump(PrintStream& out) const
+    {
+        out.print("{writes = ", writes, ", memoryValues = {");
+
+        CommaPrinter comma;
+        for (auto& entry : memoryValues)
+            out.print(comma, pointerDump(entry.key), "=>", pointerListDump(entry.value));
+
+        out.print("}}");
+    }
+    
     RangeSet<HeapRange> writes;
     
     // Maps an address base to all of the MemoryValues that do things to it. After we're done
-    // processing a map, this tells us the values at tail.
-    HashMap<Value*, MemoryMatches> memoryValues;
+    // processing a map, this tells us the values at tail. Note that we use a Matches array
+    // because those MemoryValues could be turned into Identity's, and we need to check for this
+    // as we go.
+    HashMap<Value*, Matches> memoryValues;
 };
 
 class CSE {
@@ -85,22 +102,51 @@ public:
         m_proc.resetValueOwners();
 
         for (BasicBlock* block : m_proc) {
-            m_data = &m_impureBlockData[block];
-            for (Value* value : *block)
-                m_data->writes.add(value->effects().writes);
+            ImpureBlockData& data = m_impureBlockData[block];
+            for (Value* value : *block) {
+                if (HeapRange writes = value->effects().writes)
+                    clobber(data, writes);
+
+                if (MemoryValue* memory = value->as<MemoryValue>())
+                    addMemoryValue(data, memory);
+            }
+
+            if (verbose)
+                dataLog("Block ", *block, ": ", data, "\n");
         }
-        
-        for (BasicBlock* block : m_proc.blocksInPreOrder()) {
-            m_block = block;
-            m_data = &m_impureBlockData[block];
-            m_writes.clear();
-            for (m_index = 0; m_index < block->size(); ++m_index) {
-                m_value = block->at(m_index);
+
+        Vector<BasicBlock*> postOrder = m_proc.blocksInPostOrder();
+        for (unsigned i = postOrder.size(); i--;) {
+            m_block = postOrder[i];
+            if (verbose)
+                dataLog("Looking at ", *m_block, ":\n");
+
+            m_data = ImpureBlockData();
+            for (m_index = 0; m_index < m_block->size(); ++m_index) {
+                m_value = m_block->at(m_index);
                 process();
             }
+            m_insertionSet.execute(m_block);
+            m_impureBlockData[m_block] = m_data;
+        }
+
+        for (StackSlotValue* stack : m_stacks)
+            m_insertionSet.insertValue(0, stack);
+        for (BasicBlock* block : m_proc) {
+            for (unsigned valueIndex = 0; valueIndex < block->size(); ++valueIndex) {
+                auto iter = m_stores.find(block->at(valueIndex));
+                if (iter == m_stores.end())
+                    continue;
+
+                for (Value* value : iter->value)
+                    m_insertionSet.insertValue(valueIndex + 1, value);
+            }
             m_insertionSet.execute(block);
         }
 
+        if (verbose)
+            dataLog("B3 after CSE:\n", m_proc);
+
         return m_changed;
     }
     
@@ -116,21 +162,23 @@ private:
         }
 
         if (HeapRange writes = m_value->effects().writes)
-            clobber(writes);
+            clobber(m_data, writes);
         
         if (MemoryValue* memory = m_value->as<MemoryValue>())
             processMemory(memory);
     }
 
-    void clobber(HeapRange writes)
+    void clobber(ImpureBlockData& data, HeapRange writes)
     {
-        m_writes.add(writes);
+        data.writes.add(writes);
         
-        m_data->memoryValues.removeIf(
-            [&] (HashMap<Value*, MemoryMatches>::KeyValuePairType& entry) -> bool {
+        data.memoryValues.removeIf(
+            [&] (HashMap<Value*, Matches>::KeyValuePairType& entry) -> bool {
                 entry.value.removeAllMatching(
-                    [&] (MemoryValue* memory) -> bool {
-                        return memory->range().overlaps(writes);
+                    [&] (Value* value) -> bool {
+                        if (MemoryValue* memory = value->as<MemoryValue>())
+                            return memory->range().overlaps(writes);
+                        return true;
                     });
                 return entry.value.isEmpty();
             });
@@ -156,74 +204,88 @@ private:
         
         switch (memory->opcode()) {
         case Load8Z: {
-            MemoryValue* match = findMemoryValue(
-                ptr, range, [&] (MemoryValue* candidate) -> bool {
+            handleMemoryValue(
+                ptr, range,
+                [&] (MemoryValue* candidate) -> bool {
                     return candidate->offset() == offset
                         && (candidate->opcode() == Load8Z || candidate->opcode() == Store8);
+                },
+                [&] (MemoryValue* match, Vector<Value*>& fixups) -> Value* {
+                    if (match->opcode() == Store8) {
+                        Value* mask = m_proc.add<Const32Value>(m_value->origin(), 0xff);
+                        fixups.append(mask);
+                        Value* zext = m_proc.add<Value>(
+                            BitAnd, m_value->origin(), match->child(0), mask);
+                        fixups.append(sext);
+                        return sext;
+                    }
+                    return nullptr;
                 });
-            if (replace(match))
-                break;
-            addMemoryValue(memory);
             break;
         }
 
         case Load8S: {
-            MemoryValue* match = findMemoryValue(
-                ptr, range, [&] (MemoryValue* candidate) -> bool {
+            handleMemoryValue(
+                ptr, range,
+                [&] (MemoryValue* candidate) -> bool {
                     return candidate->offset() == offset
                         && (candidate->opcode() == Load8S || candidate->opcode() == Store8);
+                },
+                [&] (MemoryValue* match, Vector<Value*>& fixups) -> Value* {
+                    if (match->opcode() == Store8) {
+                        Value* sext = m_proc.add<Value>(
+                            SExt8, m_value->origin(), match->child(0));
+                        fixups.append(sext);
+                        return sext;
+                    }
+                    return nullptr;
                 });
-            if (match) {
-                if (match->opcode() == Store8) {
-                    m_value->replaceWithIdentity(
-                        m_insertionSet.insert<Value>(
-                            m_index, SExt8, m_value->origin(), match->child(0)));
-                    m_changed = true;
-                    break;
-                }
-                replace(match);
-                break;
-            }
-            addMemoryValue(memory);
             break;
         }
 
         case Load16Z: {
-            MemoryValue* match = findMemoryValue(
-                ptr, range, [&] (MemoryValue* candidate) -> bool {
+            handleMemoryValue(
+                ptr, range,
+                [&] (MemoryValue* candidate) -> bool {
                     return candidate->offset() == offset
                         && (candidate->opcode() == Load16Z || candidate->opcode() == Store16);
+                },
+                [&] (MemoryValue* match, Vector<Value*>& fixups) -> Value* {
+                    if (match->opcode() == Store16) {
+                        Value* mask = m_proc.add<Const32Value>(m_value->origin(), 0xffff);
+                        fixups.append(mask);
+                        Value* zext = m_proc.add<Value>(
+                            BitAnd, m_value->origin(), match->child(0), mask);
+                        fixups.append(sext);
+                        return sext;
+                    }
+                    return nullptr;
                 });
-            if (replace(match))
-                break;
-            addMemoryValue(memory);
             break;
         }
 
         case Load16S: {
-            MemoryValue* match = findMemoryValue(
+            handleMemoryValue(
                 ptr, range, [&] (MemoryValue* candidate) -> bool {
                     return candidate->offset() == offset
                         && (candidate->opcode() == Load16S || candidate->opcode() == Store16);
+                },
+                [&] (MemoryValue* match, Vector<Value*>& fixups) -> Value* {
+                    if (match->opcode() == Store16) {
+                        Value* sext = m_proc.add<Value>(
+                            SExt16, m_value->origin(), match->child(0));
+                        fixups.append(sext);
+                        return sext;
+                    }
+                    return nullptr;
                 });
-            if (match) {
-                if (match->opcode() == Store16) {
-                    m_value->replaceWithIdentity(
-                        m_insertionSet.insert<Value>(
-                            m_index, SExt16, m_value->origin(), match->child(0)));
-                    m_changed = true;
-                    break;
-                }
-                replace(match);
-                break;
-            }
-            addMemoryValue(memory);
             break;
         }
 
         case Load: {
-            MemoryValue* match = findMemoryValue(
-                ptr, range, [&] (MemoryValue* candidate) -> bool {
+            handleMemoryValue(
+                ptr, range,
+                [&] (MemoryValue* candidate) -> bool {
                     if (candidate->offset() != offset)
                         return false;
 
@@ -235,16 +297,13 @@ private:
 
                     return false;
                 });
-            if (replace(match))
-                break;
-            addMemoryValue(memory);
             break;
         }
 
         case Store8:
         case Store16:
         case Store: {
-            addMemoryValue(memory);
+            addMemoryValue(m_data, memory);
             break;
         }
 
@@ -255,31 +314,95 @@ private:
         }
     }
 
-    bool replace(MemoryValue* match)
+    template<typename Filter>
+    void handleMemoryValue(Value* ptr, HeapRange range, const Filter& filter)
+    {
+        handleMemoryValue(
+            ptr, range, filter,
+            [] (MemoryValue*, Vector<Value*>&) -> Value* {
+                return nullptr;
+            });
+    }
+
+    template<typename Filter, typename Replace>
+    void handleMemoryValue(
+        Value* ptr, HeapRange range, const Filter& filter, const Replace& replace)
+    {
+        MemoryMatches matches = findMemoryValue(ptr, range, filter);
+        if (replaceMemoryValue(matches, replace))
+            return;
+        addMemoryValue(m_data, m_value->as<MemoryValue>());
+    }
+
+    template<typename Replace>
+    bool replaceMemoryValue(const MemoryMatches& matches, const Replace& replace)
     {
-        if (!match)
+        if (matches.isEmpty())
             return false;
 
         if (verbose)
-            dataLog("Eliminating ", *m_value, " due to ", *match, "\n");
+            dataLog("Eliminating ", *m_value, " due to ", pointerListDump(matches), "\n");
         
-        if (match->isStore())
-            m_value->replaceWithIdentity(match->child(0));
-        else
-            m_value->replaceWithIdentity(match);
         m_changed = true;
-        return true;
-    }
 
-    void addMemoryValue(MemoryValue* memory)
-    {
-        addMemoryValue(*m_data, memory);
+        if (matches.size() == 1) {
+            MemoryValue* dominatingMatch = matches[0];
+            RELEASE_ASSERT(m_dominators.dominates(dominatingMatch->owner, m_block));
+            
+            if (verbose)
+                dataLog("    Eliminating using ", *dominatingMatch, "\n");
+            Vector<Value*> extraValues;
+            if (Value* value = replace(dominatingMatch, extraValues)) {
+                for (Value* extraValue : extraValues)
+                    m_insertionSet.insertValue(m_index, extraValue);
+                m_value->replaceWithIdentity(value);
+            } else {
+                if (dominatingMatch->isStore())
+                    m_value->replaceWithIdentity(dominatingMatch->child(0));
+                else
+                    m_value->replaceWithIdentity(dominatingMatch);
+            }
+            return true;
+        }
+
+        // FIXME: It would be way better if this phase just did SSA calculation directly.
+        // Right now we're relying on the fact that CSE's position in the phase order is
+        // almost right before SSA fixup.
+            
+        StackSlotValue* stack = m_proc.add<StackSlotValue>(
+            m_value->origin(), sizeofType(m_value->type()), StackSlotKind::Anonymous);
+        m_stacks.append(stack);
+
+        MemoryValue* load = m_insertionSet.insert<MemoryValue>(
+            m_index, Load, m_value->type(), m_value->origin(), stack);
+        if (verbose)
+            dataLog("    Inserting load of value: ", *load, "\n");
+        m_value->replaceWithIdentity(load);
+            
+        for (MemoryValue* match : matches) {
+            Vector<Value*>& stores = m_stores.add(match, Vector<Value*>()).iterator->value;
+
+            Value* value = replace(match, stores);
+            if (!value) {
+                if (match->isStore())
+                    value = match->child(0);
+                else
+                    value = match;
+            }
+                
+            Value* store = m_proc.add<MemoryValue>(Store, m_value->origin(), value, stack);
+            stores.append(store);
+        }
+
+        return true;
     }
 
     void addMemoryValue(ImpureBlockData& data, MemoryValue* memory)
     {
-        MemoryMatches& matches =
-            data.memoryValues.add(memory->lastChild(), MemoryMatches()).iterator->value;
+        if (verbose)
+            dataLog("Adding memory: ", *memory, "\n");
+        
+        Matches& matches = data.memoryValues.add(memory->lastChild(), Matches()).iterator->value;
 
         if (matches.contains(memory))
             return;
@@ -288,61 +411,80 @@ private:
     }
 
     template<typename Filter>
-    MemoryValue* findMemoryValue(Value* ptr, HeapRange range, const Filter& filter)
+    MemoryMatches findMemoryValue(Value* ptr, HeapRange range, const Filter& filter)
     {
+        if (verbose)
+            dataLog(*m_value, ": looking for ", *ptr, "...\n");
+        
         auto find = [&] (ImpureBlockData& data) -> MemoryValue* {
             auto iter = data.memoryValues.find(ptr);
             if (iter != data.memoryValues.end()) {
-                for (MemoryValue* candidate : iter->value) {
-                    if (filter(candidate))
-                        return candidate;
+                for (Value* candidateValue : iter->value) {
+                    if (MemoryValue* candidateMemory = candidateValue->as<MemoryValue>()) {
+                        if (filter(candidateMemory))
+                            return candidateMemory;
+                    }
                 }
             }
             return nullptr;
         };
 
-        if (MemoryValue* match = find(*m_data))
-            return match;
+        if (MemoryValue* match = find(m_data)) {
+            if (verbose)
+                dataLog("    Found ", *match, " locally.\n");
+            return { match };
+        }
 
-        if (m_writes.overlaps(range))
-            return nullptr;
+        if (m_data.writes.overlaps(range)) {
+            if (verbose)
+                dataLog("    Giving up because of writes.\n");
+            return { };
+        }
 
         BlockWorklist worklist;
         Vector<BasicBlock*, 8> seenList;
 
         worklist.pushAll(m_block->predecessors());
 
-        MemoryValue* match = nullptr;
+        MemoryMatches matches;
 
         while (BasicBlock* block = worklist.pop()) {
+            if (verbose)
+                dataLog("    Looking at ", *block, "\n");
             seenList.append(block);
 
             ImpureBlockData& data = m_impureBlockData[block];
 
-            if (m_dominators.strictlyDominates(block, m_block)) {
-                match = find(data);
-                if (match)
-                    continue;
+            MemoryValue* match = find(data);
+            if (match && match != m_value) {
+                if (verbose)
+                    dataLog("    Found match: ", *match, "\n");
+                matches.append(match);
+                continue;
             }
 
-            if (data.writes.overlaps(range))
-                return nullptr;
+            if (data.writes.overlaps(range)) {
+                if (verbose)
+                    dataLog("    Giving up because of writes.\n");
+                return { };
+            }
 
+            if (!block->numPredecessors()) {
+                if (verbose)
+                    dataLog("    Giving up because it's live at root.\n");
+                // This essentially proves that this is live at the prologue. That means that we
+                // cannot reliably optimize this case.
+                return { };
+            }
+            
             worklist.pushAll(block->predecessors());
         }
 
-        if (!match)
-            return nullptr;
-
-        for (BasicBlock* block : seenList)
-            addMemoryValue(m_impureBlockData[block], match);
-        addMemoryValue(match);
-
-        return match;
+        if (verbose)
+            dataLog("    Got matches: ", pointerListDump(matches), "\n");
+        return matches;
     }
 
-    typedef Vector<Value*, 1> Matches;
-
     Procedure& m_proc;
 
     Dominators& m_dominators;
@@ -350,13 +492,15 @@ private:
     
     IndexMap<BasicBlock, ImpureBlockData> m_impureBlockData;
 
-    ImpureBlockData* m_data;
-    RangeSet<HeapRange> m_writes;
+    ImpureBlockData m_data;
 
     BasicBlock* m_block;
     unsigned m_index;
     Value* m_value;
 
+    Vector<StackSlotValue*> m_stacks;
+    HashMap<Value*, Vector<Value*>> m_stores;
+
     InsertionSet m_insertionSet;
 
     bool m_changed { false };
index d547b73..967f920 100644 (file)
@@ -122,6 +122,8 @@ void demoteValues(Procedure& proc, const IndexSet<Value>& values)
 
 bool fixSSA(Procedure& proc)
 {
+    PhaseScope phaseScope(proc, "fixSSA");
+    
     // Collect the stack "variables". If there aren't any, then we don't have anything to do.
     // That's a fairly common case.
     HashMap<StackSlotValue*, Type> stackVariable;
index ec10e1d..e09d5fc 100644 (file)
@@ -34,6 +34,7 @@
 #include "B3Common.h"
 #include "B3DuplicateTails.h"
 #include "B3EliminateCommonSubexpressions.h"
+#include "B3FixSSA.h"
 #include "B3FoldPathConstants.h"
 #include "B3LegalizeMemoryOffsets.h"
 #include "B3LowerMacros.h"
@@ -81,6 +82,7 @@ void generateToAir(Procedure& procedure, unsigned optLevel)
         reduceStrength(procedure);
         eliminateCommonSubexpressions(procedure);
         duplicateTails(procedure);
+        fixSSA(procedure);
         foldPathConstants(procedure);
         
         // FIXME: Add more optimizations here.
index 0db7be8..4081787 100644 (file)
@@ -71,6 +71,12 @@ public:
     // Usually you use this via OriginDump, though it's cool to use it directly.
     void printOrigin(PrintStream& out, Origin origin) const;
 
+    // This is a debugging hack. Sometimes while debugging B3 you need to break the abstraction
+    // and get at the DFG Graph, or whatever data structure the frontend used to describe the
+    // program. The FTL passes the DFG Graph.
+    void setFrontendData(const void* value) { m_frontendData = value; }
+    const void* frontendData() const { return m_frontendData; }
+
     JS_EXPORT_PRIVATE BasicBlock* addBlock(double frequency = 1);
     
     template<typename ValueType, typename... Arguments>
@@ -284,6 +290,7 @@ private:
     std::unique_ptr<OpaqueByproducts> m_byproducts;
     std::unique_ptr<Air::Code> m_code;
     RefPtr<SharedTask<void(PrintStream&, Origin)>> m_originPrinter;
+    const void* m_frontendData;
 };
 
 } } // namespace JSC::B3
index d1f31d1..c3e0076 100644 (file)
@@ -9457,6 +9457,50 @@ void testBranch64EqualMemImm(int64_t left, int64_t right)
     CHECK(compileAndRun<bool>(proc, &left) == (left == right));
 }
 
+void testStore8Load8Z(int32_t value)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    
+    int8_t byte;
+    Value* ptr = root->appendNew<ConstPtrValue>(proc, Origin(), &byte);
+    
+    root->appendNew<MemoryValue>(
+        proc, Store8, Origin(),
+        root->appendNew<Value>(
+            proc, Trunc, Origin(),
+            root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0)),
+        ptr);
+
+    root->appendNew<ControlValue>(
+        proc, Return, Origin(),
+        root->appendNew<MemoryValue>(proc, Load8Z, Origin(), ptr));
+
+    CHECK(compileAndRun<int32_t>(proc, value) == static_cast<uint8_t>(value));
+}
+
+void testStore16Load16Z(int32_t value)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    
+    int16_t byte;
+    Value* ptr = root->appendNew<ConstPtrValue>(proc, Origin(), &byte);
+    
+    root->appendNew<MemoryValue>(
+        proc, Store16, Origin(),
+        root->appendNew<Value>(
+            proc, Trunc, Origin(),
+            root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0)),
+        ptr);
+
+    root->appendNew<ControlValue>(
+        proc, Return, Origin(),
+        root->appendNew<MemoryValue>(proc, Load16Z, Origin(), ptr));
+
+    CHECK(compileAndRun<int32_t>(proc, value) == static_cast<uint16_t>(value));
+}
+
 // Make sure the compiler does not try to optimize anything out.
 NEVER_INLINE double zero()
 {
@@ -10730,6 +10774,17 @@ void run(const char* filter)
     RUN(testBranch64EqualMemImm(1, -1));
     RUN(testBranch64EqualMemImm(-1, 1));
 
+    RUN(testStore8Load8Z(0));
+    RUN(testStore8Load8Z(123));
+    RUN(testStore8Load8Z(12345));
+    RUN(testStore8Load8Z(-123));
+
+    RUN(testStore16Load16Z(0));
+    RUN(testStore16Load16Z(123));
+    RUN(testStore16Load16Z(12345));
+    RUN(testStore16Load16Z(12345678));
+    RUN(testStore16Load16Z(-123));
+
     if (tasks.isEmpty())
         usage();
 
index 8176735..2eac409 100644 (file)
@@ -78,6 +78,8 @@ State::State(Graph& graph)
         [this] (PrintStream& out, B3::Origin origin) {
             out.print("DFG:", bitwise_cast<Node*>(origin.data()));
         });
+
+    proc->setFrontendData(&graph);
 #endif // FTL_USES_B3
 }