Enhance the MacroAssembler and LinkBuffer to support pointer profiling.
[WebKit-https.git] / Source / JavaScriptCore / b3 / air / testair.cpp
1 /*
2  * Copyright (C) 2016-2018 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27
28 #include "AirCode.h"
29 #include "AirGenerate.h"
30 #include "AirInstInlines.h"
31 #include "AirSpecial.h"
32 #include "AllowMacroScratchRegisterUsage.h"
33 #include "B3BasicBlockInlines.h"
34 #include "B3Compilation.h"
35 #include "B3Procedure.h"
36 #include "B3PatchpointSpecial.h"
37 #include "CCallHelpers.h"
38 #include "InitializeThreading.h"
39 #include "JSCInlines.h"
40 #include "LinkBuffer.h"
41 #include "PureNaN.h"
42 #include <cmath>
43 #include <map>
44 #include <string>
45 #include <wtf/Lock.h>
46 #include <wtf/NumberOfCores.h>
47 #include <wtf/Threading.h>
48
49 // We don't have a NO_RETURN_DUE_TO_EXIT, nor should we. That's ridiculous.
50 static bool hiddenTruthBecauseNoReturnIsStupid() { return true; }
51
52 static void usage()
53 {
54     dataLog("Usage: testair [<filter>]\n");
55     if (hiddenTruthBecauseNoReturnIsStupid())
56         exit(1);
57 }
58
59 #if ENABLE(B3_JIT)
60
61 using namespace JSC;
62 using namespace JSC::B3::Air;
63
64 using JSC::B3::FP;
65 using JSC::B3::GP;
66 using JSC::B3::Width;
67 using JSC::B3::Width8;
68 using JSC::B3::Width16;
69 using JSC::B3::Width32;
70 using JSC::B3::Width64;
71
72 namespace {
73
74 StaticLock crashLock;
75
76 // Nothing fancy for now; we just use the existing WTF assertion machinery.
77 #define CHECK(x) do {                                                   \
78         if (!!(x))                                                      \
79             break;                                                      \
80         crashLock.lock();                                               \
81         WTFReportAssertionFailure(__FILE__, __LINE__, WTF_PRETTY_FUNCTION, #x); \
82         CRASH();                                                        \
83     } while (false)
84
85 std::unique_ptr<B3::Compilation> compile(B3::Procedure& proc)
86 {
87     prepareForGeneration(proc.code());
88     CCallHelpers jit;
89     generate(proc.code(), jit);
90     LinkBuffer linkBuffer(jit, nullptr);
91
92     return std::make_unique<B3::Compilation>(
93         FINALIZE_CODE(linkBuffer, JITCodePtrTag, "testair compilation"), proc.releaseByproducts());
94 }
95
96 template<typename T, typename... Arguments>
97 T invoke(const B3::Compilation& code, Arguments... arguments)
98 {
99     void* executableAddress = untagCFunctionPtr(code.code().executableAddress(), JITCodePtrTag);
100     T (*function)(Arguments...) = bitwise_cast<T(*)(Arguments...)>(executableAddress);
101     return function(arguments...);
102 }
103
104 template<typename T, typename... Arguments>
105 T compileAndRun(B3::Procedure& procedure, Arguments... arguments)
106 {
107     return invoke<T>(*compile(procedure), arguments...);
108 }
109
110 void testSimple()
111 {
112     B3::Procedure proc;
113     Code& code = proc.code();
114
115     BasicBlock* root = code.addBlock();
116     root->append(Move, nullptr, Arg::imm(42), Tmp(GPRInfo::returnValueGPR));
117     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
118
119     CHECK(compileAndRun<int>(proc) == 42);
120 }
121
122 // Use this to put a constant into a register without Air being able to see the constant.
123 template<typename T>
124 void loadConstantImpl(BasicBlock* block, T value, B3::Air::Opcode move, Tmp tmp, Tmp scratch)
125 {
126     static StaticLock lock;
127     static std::map<T, T*>* map; // I'm not messing with HashMap's problems with integers.
128
129     LockHolder locker(lock);
130     if (!map)
131         map = new std::map<T, T*>();
132
133     if (!map->count(value))
134         (*map)[value] = new T(value);
135
136     T* ptr = (*map)[value];
137     block->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(ptr)), scratch);
138     block->append(move, nullptr, Arg::addr(scratch), tmp);
139 }
140
141 void loadConstant(BasicBlock* block, intptr_t value, Tmp tmp)
142 {
143     loadConstantImpl<intptr_t>(block, value, Move, tmp, tmp);
144 }
145
146 void loadDoubleConstant(BasicBlock* block, double value, Tmp tmp, Tmp scratch)
147 {
148     loadConstantImpl<double>(block, value, MoveDouble, tmp, scratch);
149 }
150
151 void testShuffleSimpleSwap()
152 {
153     B3::Procedure proc;
154     Code& code = proc.code();
155
156     BasicBlock* root = code.addBlock();
157     loadConstant(root, 1, Tmp(GPRInfo::regT0));
158     loadConstant(root, 2, Tmp(GPRInfo::regT1));
159     loadConstant(root, 3, Tmp(GPRInfo::regT2));
160     loadConstant(root, 4, Tmp(GPRInfo::regT3));
161     root->append(
162         Shuffle, nullptr,
163         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
164         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT2), Arg::widthArg(Width32));
165
166     int32_t things[4];
167     Tmp base = code.newTmp(GP);
168     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
169     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
170     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
171     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
172     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
173     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
174     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
175
176     memset(things, 0, sizeof(things));
177     
178     CHECK(!compileAndRun<int>(proc));
179
180     CHECK(things[0] == 1);
181     CHECK(things[1] == 2);
182     CHECK(things[2] == 4);
183     CHECK(things[3] == 3);
184 }
185
186 void testShuffleSimpleShift()
187 {
188     B3::Procedure proc;
189     Code& code = proc.code();
190
191     BasicBlock* root = code.addBlock();
192     loadConstant(root, 1, Tmp(GPRInfo::regT0));
193     loadConstant(root, 2, Tmp(GPRInfo::regT1));
194     loadConstant(root, 3, Tmp(GPRInfo::regT2));
195     loadConstant(root, 4, Tmp(GPRInfo::regT3));
196     root->append(
197         Shuffle, nullptr,
198         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
199         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32));
200
201     int32_t things[5];
202     Tmp base = code.newTmp(GP);
203     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
204     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
205     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
206     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
207     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
208     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
209     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
210     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
211
212     memset(things, 0, sizeof(things));
213     
214     CHECK(!compileAndRun<int>(proc));
215
216     CHECK(things[0] == 1);
217     CHECK(things[1] == 2);
218     CHECK(things[2] == 3);
219     CHECK(things[3] == 3);
220     CHECK(things[4] == 4);
221 }
222
223 void testShuffleLongShift()
224 {
225     B3::Procedure proc;
226     Code& code = proc.code();
227
228     BasicBlock* root = code.addBlock();
229     loadConstant(root, 1, Tmp(GPRInfo::regT0));
230     loadConstant(root, 2, Tmp(GPRInfo::regT1));
231     loadConstant(root, 3, Tmp(GPRInfo::regT2));
232     loadConstant(root, 4, Tmp(GPRInfo::regT3));
233     loadConstant(root, 5, Tmp(GPRInfo::regT4));
234     loadConstant(root, 6, Tmp(GPRInfo::regT5));
235     loadConstant(root, 7, Tmp(GPRInfo::regT6));
236     loadConstant(root, 8, Tmp(GPRInfo::regT7));
237     root->append(
238         Shuffle, nullptr,
239         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
240         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
241         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
242         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
243         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
244         Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT6), Arg::widthArg(Width32),
245         Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT7), Arg::widthArg(Width32));
246
247     int32_t things[8];
248     Tmp base = code.newTmp(GP);
249     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
250     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
251     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
252     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
253     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
254     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
255     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
256     root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t)));
257     root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t)));
258     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
259     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
260
261     memset(things, 0, sizeof(things));
262     
263     CHECK(!compileAndRun<int>(proc));
264
265     CHECK(things[0] == 1);
266     CHECK(things[1] == 1);
267     CHECK(things[2] == 2);
268     CHECK(things[3] == 3);
269     CHECK(things[4] == 4);
270     CHECK(things[5] == 5);
271     CHECK(things[6] == 6);
272     CHECK(things[7] == 7);
273 }
274
275 void testShuffleLongShiftBackwards()
276 {
277     B3::Procedure proc;
278     Code& code = proc.code();
279
280     BasicBlock* root = code.addBlock();
281     loadConstant(root, 1, Tmp(GPRInfo::regT0));
282     loadConstant(root, 2, Tmp(GPRInfo::regT1));
283     loadConstant(root, 3, Tmp(GPRInfo::regT2));
284     loadConstant(root, 4, Tmp(GPRInfo::regT3));
285     loadConstant(root, 5, Tmp(GPRInfo::regT4));
286     loadConstant(root, 6, Tmp(GPRInfo::regT5));
287     loadConstant(root, 7, Tmp(GPRInfo::regT6));
288     loadConstant(root, 8, Tmp(GPRInfo::regT7));
289     root->append(
290         Shuffle, nullptr,
291         Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT7), Arg::widthArg(Width32),
292         Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT6), Arg::widthArg(Width32),
293         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
294         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
295         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
296         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
297         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32));
298
299     int32_t things[8];
300     Tmp base = code.newTmp(GP);
301     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
302     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
303     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
304     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
305     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
306     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
307     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
308     root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t)));
309     root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t)));
310     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
311     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
312
313     memset(things, 0, sizeof(things));
314     
315     CHECK(!compileAndRun<int>(proc));
316
317     CHECK(things[0] == 1);
318     CHECK(things[1] == 1);
319     CHECK(things[2] == 2);
320     CHECK(things[3] == 3);
321     CHECK(things[4] == 4);
322     CHECK(things[5] == 5);
323     CHECK(things[6] == 6);
324     CHECK(things[7] == 7);
325 }
326
327 void testShuffleSimpleRotate()
328 {
329     B3::Procedure proc;
330     Code& code = proc.code();
331
332     BasicBlock* root = code.addBlock();
333     loadConstant(root, 1, Tmp(GPRInfo::regT0));
334     loadConstant(root, 2, Tmp(GPRInfo::regT1));
335     loadConstant(root, 3, Tmp(GPRInfo::regT2));
336     loadConstant(root, 4, Tmp(GPRInfo::regT3));
337     root->append(
338         Shuffle, nullptr,
339         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
340         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
341         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32));
342
343     int32_t things[4];
344     Tmp base = code.newTmp(GP);
345     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
346     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
347     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
348     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
349     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
350     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
351     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
352
353     memset(things, 0, sizeof(things));
354     
355     CHECK(!compileAndRun<int>(proc));
356
357     CHECK(things[0] == 3);
358     CHECK(things[1] == 1);
359     CHECK(things[2] == 2);
360     CHECK(things[3] == 4);
361 }
362
363 void testShuffleSimpleBroadcast()
364 {
365     B3::Procedure proc;
366     Code& code = proc.code();
367
368     BasicBlock* root = code.addBlock();
369     loadConstant(root, 1, Tmp(GPRInfo::regT0));
370     loadConstant(root, 2, Tmp(GPRInfo::regT1));
371     loadConstant(root, 3, Tmp(GPRInfo::regT2));
372     loadConstant(root, 4, Tmp(GPRInfo::regT3));
373     root->append(
374         Shuffle, nullptr,
375         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
376         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
377         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Width32));
378
379     int32_t things[4];
380     Tmp base = code.newTmp(GP);
381     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
382     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
383     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
384     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
385     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
386     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
387     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
388
389     memset(things, 0, sizeof(things));
390     
391     CHECK(!compileAndRun<int>(proc));
392
393     CHECK(things[0] == 1);
394     CHECK(things[1] == 1);
395     CHECK(things[2] == 1);
396     CHECK(things[3] == 1);
397 }
398
399 void testShuffleBroadcastAllRegs()
400 {
401     B3::Procedure proc;
402     Code& code = proc.code();
403
404     const Vector<Reg>& regs = code.regsInPriorityOrder(GP);
405
406     BasicBlock* root = code.addBlock();
407     root->append(Move, nullptr, Arg::imm(35), Tmp(GPRInfo::regT0));
408     unsigned count = 1;
409     for (Reg reg : regs) {
410         if (reg != Reg(GPRInfo::regT0))
411             loadConstant(root, count++, Tmp(reg));
412     }
413     Inst& shuffle = root->append(Shuffle, nullptr);
414     for (Reg reg : regs) {
415         if (reg != Reg(GPRInfo::regT0))
416             shuffle.append(Tmp(GPRInfo::regT0), Tmp(reg), Arg::widthArg(Width32));
417     }
418
419     StackSlot* slot = code.addStackSlot(sizeof(int32_t) * regs.size(), StackSlotKind::Locked);
420     for (unsigned i = 0; i < regs.size(); ++i)
421         root->append(Move32, nullptr, Tmp(regs[i]), Arg::stack(slot, i * sizeof(int32_t)));
422
423     Vector<int32_t> things(regs.size(), 666);
424     Tmp base = code.newTmp(GP);
425     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), base);
426     for (unsigned i = 0; i < regs.size(); ++i) {
427         root->append(Move32, nullptr, Arg::stack(slot, i * sizeof(int32_t)), Tmp(GPRInfo::regT0));
428         root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, i * sizeof(int32_t)));
429     }
430     
431     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
432     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
433
434     CHECK(!compileAndRun<int>(proc));
435
436     for (int32_t thing : things)
437         CHECK(thing == 35);
438 }
439
440 void testShuffleTreeShift()
441 {
442     B3::Procedure proc;
443     Code& code = proc.code();
444
445     BasicBlock* root = code.addBlock();
446     loadConstant(root, 1, Tmp(GPRInfo::regT0));
447     loadConstant(root, 2, Tmp(GPRInfo::regT1));
448     loadConstant(root, 3, Tmp(GPRInfo::regT2));
449     loadConstant(root, 4, Tmp(GPRInfo::regT3));
450     loadConstant(root, 5, Tmp(GPRInfo::regT4));
451     loadConstant(root, 6, Tmp(GPRInfo::regT5));
452     loadConstant(root, 7, Tmp(GPRInfo::regT6));
453     loadConstant(root, 8, Tmp(GPRInfo::regT7));
454     root->append(
455         Shuffle, nullptr,
456         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
457         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
458         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
459         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
460         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
461         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT6), Arg::widthArg(Width32),
462         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT7), Arg::widthArg(Width32));
463
464     int32_t things[8];
465     Tmp base = code.newTmp(GP);
466     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
467     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
468     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
469     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
470     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
471     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
472     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
473     root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t)));
474     root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t)));
475     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
476     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
477
478     memset(things, 0, sizeof(things));
479     
480     CHECK(!compileAndRun<int>(proc));
481
482     CHECK(things[0] == 1);
483     CHECK(things[1] == 1);
484     CHECK(things[2] == 1);
485     CHECK(things[3] == 2);
486     CHECK(things[4] == 2);
487     CHECK(things[5] == 3);
488     CHECK(things[6] == 3);
489     CHECK(things[7] == 4);
490 }
491
492 void testShuffleTreeShiftBackward()
493 {
494     B3::Procedure proc;
495     Code& code = proc.code();
496
497     BasicBlock* root = code.addBlock();
498     loadConstant(root, 1, Tmp(GPRInfo::regT0));
499     loadConstant(root, 2, Tmp(GPRInfo::regT1));
500     loadConstant(root, 3, Tmp(GPRInfo::regT2));
501     loadConstant(root, 4, Tmp(GPRInfo::regT3));
502     loadConstant(root, 5, Tmp(GPRInfo::regT4));
503     loadConstant(root, 6, Tmp(GPRInfo::regT5));
504     loadConstant(root, 7, Tmp(GPRInfo::regT6));
505     loadConstant(root, 8, Tmp(GPRInfo::regT7));
506     root->append(
507         Shuffle, nullptr,
508         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT7), Arg::widthArg(Width32),
509         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT6), Arg::widthArg(Width32),
510         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
511         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
512         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
513         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
514         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32));
515
516     int32_t things[8];
517     Tmp base = code.newTmp(GP);
518     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
519     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
520     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
521     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
522     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
523     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
524     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
525     root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t)));
526     root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t)));
527     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
528     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
529
530     memset(things, 0, sizeof(things));
531     
532     CHECK(!compileAndRun<int>(proc));
533
534     CHECK(things[0] == 1);
535     CHECK(things[1] == 1);
536     CHECK(things[2] == 1);
537     CHECK(things[3] == 2);
538     CHECK(things[4] == 2);
539     CHECK(things[5] == 3);
540     CHECK(things[6] == 3);
541     CHECK(things[7] == 4);
542 }
543
544 void testShuffleTreeShiftOtherBackward()
545 {
546     // NOTE: This test was my original attempt at TreeShiftBackward but mistakes were made. So, this
547     // ends up being just a weird test. But weird tests are useful, so I kept it.
548     
549     B3::Procedure proc;
550     Code& code = proc.code();
551
552     BasicBlock* root = code.addBlock();
553     loadConstant(root, 1, Tmp(GPRInfo::regT0));
554     loadConstant(root, 2, Tmp(GPRInfo::regT1));
555     loadConstant(root, 3, Tmp(GPRInfo::regT2));
556     loadConstant(root, 4, Tmp(GPRInfo::regT3));
557     loadConstant(root, 5, Tmp(GPRInfo::regT4));
558     loadConstant(root, 6, Tmp(GPRInfo::regT5));
559     loadConstant(root, 7, Tmp(GPRInfo::regT6));
560     loadConstant(root, 8, Tmp(GPRInfo::regT7));
561     root->append(
562         Shuffle, nullptr,
563         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT7), Arg::widthArg(Width32),
564         Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT6), Arg::widthArg(Width32),
565         Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
566         Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
567         Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
568         Tmp(GPRInfo::regT7), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
569         Tmp(GPRInfo::regT7), Tmp(GPRInfo::regT1), Arg::widthArg(Width32));
570
571     int32_t things[8];
572     Tmp base = code.newTmp(GP);
573     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
574     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
575     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
576     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
577     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
578     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
579     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
580     root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t)));
581     root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t)));
582     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
583     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
584
585     memset(things, 0, sizeof(things));
586     
587     CHECK(!compileAndRun<int>(proc));
588
589     CHECK(things[0] == 1);
590     CHECK(things[1] == 8);
591     CHECK(things[2] == 8);
592     CHECK(things[3] == 7);
593     CHECK(things[4] == 7);
594     CHECK(things[5] == 6);
595     CHECK(things[6] == 6);
596     CHECK(things[7] == 5);
597 }
598
599 void testShuffleMultipleShifts()
600 {
601     B3::Procedure proc;
602     Code& code = proc.code();
603
604     BasicBlock* root = code.addBlock();
605     loadConstant(root, 1, Tmp(GPRInfo::regT0));
606     loadConstant(root, 2, Tmp(GPRInfo::regT1));
607     loadConstant(root, 3, Tmp(GPRInfo::regT2));
608     loadConstant(root, 4, Tmp(GPRInfo::regT3));
609     loadConstant(root, 5, Tmp(GPRInfo::regT4));
610     loadConstant(root, 6, Tmp(GPRInfo::regT5));
611     root->append(
612         Shuffle, nullptr,
613         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
614         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
615         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
616         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT5), Arg::widthArg(Width32));
617
618     int32_t things[6];
619     Tmp base = code.newTmp(GP);
620     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
621     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
622     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
623     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
624     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
625     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
626     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
627     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
628     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
629
630     memset(things, 0, sizeof(things));
631     
632     CHECK(!compileAndRun<int>(proc));
633
634     CHECK(things[0] == 1);
635     CHECK(things[1] == 1);
636     CHECK(things[2] == 3);
637     CHECK(things[3] == 3);
638     CHECK(things[4] == 3);
639     CHECK(things[5] == 1);
640 }
641
642 void testShuffleRotateWithFringe()
643 {
644     B3::Procedure proc;
645     Code& code = proc.code();
646
647     BasicBlock* root = code.addBlock();
648     loadConstant(root, 1, Tmp(GPRInfo::regT0));
649     loadConstant(root, 2, Tmp(GPRInfo::regT1));
650     loadConstant(root, 3, Tmp(GPRInfo::regT2));
651     loadConstant(root, 4, Tmp(GPRInfo::regT3));
652     loadConstant(root, 5, Tmp(GPRInfo::regT4));
653     loadConstant(root, 6, Tmp(GPRInfo::regT5));
654     root->append(
655         Shuffle, nullptr,
656         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
657         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
658         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32),
659         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
660         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
661         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Width32));
662
663     int32_t things[6];
664     Tmp base = code.newTmp(GP);
665     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
666     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
667     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
668     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
669     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
670     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
671     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
672     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
673     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
674
675     memset(things, 0, sizeof(things));
676     
677     CHECK(!compileAndRun<int>(proc));
678
679     CHECK(things[0] == 3);
680     CHECK(things[1] == 1);
681     CHECK(things[2] == 2);
682     CHECK(things[3] == 1);
683     CHECK(things[4] == 2);
684     CHECK(things[5] == 3);
685 }
686
687 void testShuffleRotateWithFringeInWeirdOrder()
688 {
689     B3::Procedure proc;
690     Code& code = proc.code();
691
692     BasicBlock* root = code.addBlock();
693     loadConstant(root, 1, Tmp(GPRInfo::regT0));
694     loadConstant(root, 2, Tmp(GPRInfo::regT1));
695     loadConstant(root, 3, Tmp(GPRInfo::regT2));
696     loadConstant(root, 4, Tmp(GPRInfo::regT3));
697     loadConstant(root, 5, Tmp(GPRInfo::regT4));
698     loadConstant(root, 6, Tmp(GPRInfo::regT5));
699     root->append(
700         Shuffle, nullptr,
701         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
702         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
703         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
704         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32),
705         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
706         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32));
707
708     int32_t things[6];
709     Tmp base = code.newTmp(GP);
710     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
711     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
712     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
713     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
714     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
715     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
716     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
717     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
718     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
719
720     memset(things, 0, sizeof(things));
721     
722     CHECK(!compileAndRun<int>(proc));
723
724     CHECK(things[0] == 3);
725     CHECK(things[1] == 1);
726     CHECK(things[2] == 2);
727     CHECK(things[3] == 1);
728     CHECK(things[4] == 2);
729     CHECK(things[5] == 3);
730 }
731
732 void testShuffleRotateWithLongFringe()
733 {
734     B3::Procedure proc;
735     Code& code = proc.code();
736
737     BasicBlock* root = code.addBlock();
738     loadConstant(root, 1, Tmp(GPRInfo::regT0));
739     loadConstant(root, 2, Tmp(GPRInfo::regT1));
740     loadConstant(root, 3, Tmp(GPRInfo::regT2));
741     loadConstant(root, 4, Tmp(GPRInfo::regT3));
742     loadConstant(root, 5, Tmp(GPRInfo::regT4));
743     loadConstant(root, 6, Tmp(GPRInfo::regT5));
744     root->append(
745         Shuffle, nullptr,
746         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
747         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
748         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32),
749         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
750         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
751         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Width32));
752
753     int32_t things[6];
754     Tmp base = code.newTmp(GP);
755     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
756     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
757     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
758     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
759     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
760     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
761     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
762     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
763     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
764
765     memset(things, 0, sizeof(things));
766     
767     CHECK(!compileAndRun<int>(proc));
768
769     CHECK(things[0] == 3);
770     CHECK(things[1] == 1);
771     CHECK(things[2] == 2);
772     CHECK(things[3] == 1);
773     CHECK(things[4] == 4);
774     CHECK(things[5] == 5);
775 }
776
777 void testShuffleMultipleRotates()
778 {
779     B3::Procedure proc;
780     Code& code = proc.code();
781
782     BasicBlock* root = code.addBlock();
783     loadConstant(root, 1, Tmp(GPRInfo::regT0));
784     loadConstant(root, 2, Tmp(GPRInfo::regT1));
785     loadConstant(root, 3, Tmp(GPRInfo::regT2));
786     loadConstant(root, 4, Tmp(GPRInfo::regT3));
787     loadConstant(root, 5, Tmp(GPRInfo::regT4));
788     loadConstant(root, 6, Tmp(GPRInfo::regT5));
789     root->append(
790         Shuffle, nullptr,
791         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
792         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
793         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32),
794         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
795         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
796         Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT3), Arg::widthArg(Width32));
797
798     int32_t things[6];
799     Tmp base = code.newTmp(GP);
800     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
801     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
802     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
803     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
804     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
805     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
806     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
807     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
808     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
809
810     memset(things, 0, sizeof(things));
811     
812     CHECK(!compileAndRun<int>(proc));
813
814     CHECK(things[0] == 3);
815     CHECK(things[1] == 1);
816     CHECK(things[2] == 2);
817     CHECK(things[3] == 6);
818     CHECK(things[4] == 4);
819     CHECK(things[5] == 5);
820 }
821
822 void testShuffleShiftAndRotate()
823 {
824     B3::Procedure proc;
825     Code& code = proc.code();
826
827     BasicBlock* root = code.addBlock();
828     loadConstant(root, 1, Tmp(GPRInfo::regT0));
829     loadConstant(root, 2, Tmp(GPRInfo::regT1));
830     loadConstant(root, 3, Tmp(GPRInfo::regT2));
831     loadConstant(root, 4, Tmp(GPRInfo::regT3));
832     loadConstant(root, 5, Tmp(GPRInfo::regT4));
833     loadConstant(root, 6, Tmp(GPRInfo::regT5));
834     root->append(
835         Shuffle, nullptr,
836         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
837         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
838         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32),
839         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
840         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Width32));
841
842     int32_t things[6];
843     Tmp base = code.newTmp(GP);
844     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
845     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
846     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
847     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
848     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
849     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
850     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
851     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
852     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
853
854     memset(things, 0, sizeof(things));
855     
856     CHECK(!compileAndRun<int>(proc));
857
858     CHECK(things[0] == 3);
859     CHECK(things[1] == 1);
860     CHECK(things[2] == 2);
861     CHECK(things[3] == 4);
862     CHECK(things[4] == 4);
863     CHECK(things[5] == 5);
864 }
865
866 void testShuffleShiftAllRegs()
867 {
868     B3::Procedure proc;
869     Code& code = proc.code();
870
871     const Vector<Reg>& regs = code.regsInPriorityOrder(GP);
872
873     BasicBlock* root = code.addBlock();
874     for (unsigned i = 0; i < regs.size(); ++i)
875         loadConstant(root, 35 + i, Tmp(regs[i]));
876     Inst& shuffle = root->append(Shuffle, nullptr);
877     for (unsigned i = 1; i < regs.size(); ++i)
878         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width32));
879
880     StackSlot* slot = code.addStackSlot(sizeof(int32_t) * regs.size(), StackSlotKind::Locked);
881     for (unsigned i = 0; i < regs.size(); ++i)
882         root->append(Move32, nullptr, Tmp(regs[i]), Arg::stack(slot, i * sizeof(int32_t)));
883
884     Vector<int32_t> things(regs.size(), 666);
885     Tmp base = code.newTmp(GP);
886     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), base);
887     for (unsigned i = 0; i < regs.size(); ++i) {
888         root->append(Move32, nullptr, Arg::stack(slot, i * sizeof(int32_t)), Tmp(GPRInfo::regT0));
889         root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, i * sizeof(int32_t)));
890     }
891     
892     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
893     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
894
895     CHECK(!compileAndRun<int>(proc));
896
897     CHECK(things[0] == 35);
898     for (unsigned i = 1; i < regs.size(); ++i)
899         CHECK(things[i] == 35 + static_cast<int32_t>(i) - 1);
900 }
901
902 void testShuffleRotateAllRegs()
903 {
904     B3::Procedure proc;
905     Code& code = proc.code();
906
907     const Vector<Reg>& regs = code.regsInPriorityOrder(GP);
908
909     BasicBlock* root = code.addBlock();
910     for (unsigned i = 0; i < regs.size(); ++i)
911         loadConstant(root, 35 + i, Tmp(regs[i]));
912     Inst& shuffle = root->append(Shuffle, nullptr);
913     for (unsigned i = 1; i < regs.size(); ++i)
914         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width32));
915     shuffle.append(Tmp(regs.last()), Tmp(regs[0]), Arg::widthArg(Width32));
916
917     StackSlot* slot = code.addStackSlot(sizeof(int32_t) * regs.size(), StackSlotKind::Locked);
918     for (unsigned i = 0; i < regs.size(); ++i)
919         root->append(Move32, nullptr, Tmp(regs[i]), Arg::stack(slot, i * sizeof(int32_t)));
920
921     Vector<int32_t> things(regs.size(), 666);
922     Tmp base = code.newTmp(GP);
923     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), base);
924     for (unsigned i = 0; i < regs.size(); ++i) {
925         root->append(Move32, nullptr, Arg::stack(slot, i * sizeof(int32_t)), Tmp(GPRInfo::regT0));
926         root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, i * sizeof(int32_t)));
927     }
928     
929     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
930     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
931
932     CHECK(!compileAndRun<int>(proc));
933
934     CHECK(things[0] == 35 + static_cast<int32_t>(regs.size()) - 1);
935     for (unsigned i = 1; i < regs.size(); ++i)
936         CHECK(things[i] == 35 + static_cast<int32_t>(i) - 1);
937 }
938
939 void testShuffleSimpleSwap64()
940 {
941     B3::Procedure proc;
942     Code& code = proc.code();
943
944     BasicBlock* root = code.addBlock();
945     loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0));
946     loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1));
947     loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2));
948     loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3));
949     root->append(
950         Shuffle, nullptr,
951         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width64),
952         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT2), Arg::widthArg(Width64));
953
954     int64_t things[4];
955     Tmp base = code.newTmp(GP);
956     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
957     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
958     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
959     root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t)));
960     root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t)));
961     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
962     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
963
964     memset(things, 0, sizeof(things));
965     
966     CHECK(!compileAndRun<int>(proc));
967
968     CHECK(things[0] == 10000000000000000ll);
969     CHECK(things[1] == 20000000000000000ll);
970     CHECK(things[2] == 40000000000000000ll);
971     CHECK(things[3] == 30000000000000000ll);
972 }
973
974 void testShuffleSimpleShift64()
975 {
976     B3::Procedure proc;
977     Code& code = proc.code();
978
979     BasicBlock* root = code.addBlock();
980     loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0));
981     loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1));
982     loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2));
983     loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3));
984     loadConstant(root, 50000000000000000ll, Tmp(GPRInfo::regT4));
985     root->append(
986         Shuffle, nullptr,
987         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width64),
988         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width64));
989
990     int64_t things[5];
991     Tmp base = code.newTmp(GP);
992     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
993     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
994     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
995     root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t)));
996     root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t)));
997     root->append(Move, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int64_t)));
998     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
999     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1000
1001     memset(things, 0, sizeof(things));
1002     
1003     CHECK(!compileAndRun<int>(proc));
1004
1005     CHECK(things[0] == 10000000000000000ll);
1006     CHECK(things[1] == 20000000000000000ll);
1007     CHECK(things[2] == 30000000000000000ll);
1008     CHECK(things[3] == 30000000000000000ll);
1009     CHECK(things[4] == 40000000000000000ll);
1010 }
1011
1012 void testShuffleSwapMixedWidth()
1013 {
1014     B3::Procedure proc;
1015     Code& code = proc.code();
1016
1017     BasicBlock* root = code.addBlock();
1018     loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0));
1019     loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1));
1020     loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2));
1021     loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3));
1022     root->append(
1023         Shuffle, nullptr,
1024         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
1025         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT2), Arg::widthArg(Width64));
1026
1027     int64_t things[4];
1028     Tmp base = code.newTmp(GP);
1029     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1030     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
1031     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
1032     root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t)));
1033     root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t)));
1034     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1035     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1036
1037     memset(things, 0, sizeof(things));
1038     
1039     CHECK(!compileAndRun<int>(proc));
1040
1041     CHECK(things[0] == 10000000000000000ll);
1042     CHECK(things[1] == 20000000000000000ll);
1043     CHECK(things[2] == 40000000000000000ll);
1044     CHECK(things[3] == static_cast<uint32_t>(30000000000000000ll));
1045 }
1046
1047 void testShuffleShiftMixedWidth()
1048 {
1049     B3::Procedure proc;
1050     Code& code = proc.code();
1051
1052     BasicBlock* root = code.addBlock();
1053     loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0));
1054     loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1));
1055     loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2));
1056     loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3));
1057     loadConstant(root, 50000000000000000ll, Tmp(GPRInfo::regT4));
1058     root->append(
1059         Shuffle, nullptr,
1060         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width64),
1061         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32));
1062
1063     int64_t things[5];
1064     Tmp base = code.newTmp(GP);
1065     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1066     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
1067     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
1068     root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t)));
1069     root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t)));
1070     root->append(Move, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int64_t)));
1071     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1072     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1073
1074     memset(things, 0, sizeof(things));
1075     
1076     CHECK(!compileAndRun<int>(proc));
1077
1078     CHECK(things[0] == 10000000000000000ll);
1079     CHECK(things[1] == 20000000000000000ll);
1080     CHECK(things[2] == 30000000000000000ll);
1081     CHECK(things[3] == 30000000000000000ll);
1082     CHECK(things[4] == static_cast<uint32_t>(40000000000000000ll));
1083 }
1084
1085 void testShuffleShiftMemory()
1086 {
1087     B3::Procedure proc;
1088     Code& code = proc.code();
1089
1090     int32_t memory[2];
1091     memory[0] = 35;
1092     memory[1] = 36;
1093
1094     BasicBlock* root = code.addBlock();
1095     loadConstant(root, 1, Tmp(GPRInfo::regT0));
1096     loadConstant(root, 2, Tmp(GPRInfo::regT1));
1097     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2));
1098     root->append(
1099         Shuffle, nullptr,
1100         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
1101         Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int32_t)),
1102         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int32_t)), Arg::widthArg(Width32));
1103
1104     int32_t things[2];
1105     Tmp base = code.newTmp(GP);
1106     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1107     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
1108     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
1109     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1110     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1111
1112     memset(things, 0, sizeof(things));
1113     
1114     CHECK(!compileAndRun<int>(proc));
1115
1116     CHECK(things[0] == 1);
1117     CHECK(things[1] == 1);
1118     CHECK(memory[0] == 35);
1119     CHECK(memory[1] == 35);
1120 }
1121
1122 void testShuffleShiftMemoryLong()
1123 {
1124     B3::Procedure proc;
1125     Code& code = proc.code();
1126
1127     int32_t memory[2];
1128     memory[0] = 35;
1129     memory[1] = 36;
1130
1131     BasicBlock* root = code.addBlock();
1132     loadConstant(root, 1, Tmp(GPRInfo::regT0));
1133     loadConstant(root, 2, Tmp(GPRInfo::regT1));
1134     loadConstant(root, 3, Tmp(GPRInfo::regT2));
1135     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT3));
1136     root->append(
1137         Shuffle, nullptr,
1138         
1139         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
1140         
1141         Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT3), 0 * sizeof(int32_t)),
1142         Arg::widthArg(Width32),
1143         
1144         Arg::addr(Tmp(GPRInfo::regT3), 0 * sizeof(int32_t)),
1145         Arg::addr(Tmp(GPRInfo::regT3), 1 * sizeof(int32_t)), Arg::widthArg(Width32),
1146
1147         Arg::addr(Tmp(GPRInfo::regT3), 1 * sizeof(int32_t)), Tmp(GPRInfo::regT2),
1148         Arg::widthArg(Width32));
1149
1150     int32_t things[3];
1151     Tmp base = code.newTmp(GP);
1152     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1153     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
1154     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
1155     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
1156     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1157     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1158
1159     memset(things, 0, sizeof(things));
1160     
1161     CHECK(!compileAndRun<int>(proc));
1162
1163     CHECK(things[0] == 1);
1164     CHECK(things[1] == 1);
1165     CHECK(things[2] == 36);
1166     CHECK(memory[0] == 2);
1167     CHECK(memory[1] == 35);
1168 }
1169
1170 void testShuffleShiftMemoryAllRegs()
1171 {
1172     B3::Procedure proc;
1173     Code& code = proc.code();
1174
1175     int32_t memory[2];
1176     memory[0] = 35;
1177     memory[1] = 36;
1178
1179     Vector<Reg> regs = code.regsInPriorityOrder(GP);
1180     regs.removeFirst(Reg(GPRInfo::regT0));
1181
1182     BasicBlock* root = code.addBlock();
1183     for (unsigned i = 0; i < regs.size(); ++i)
1184         loadConstant(root, i + 1, Tmp(regs[i]));
1185     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0));
1186     Inst& shuffle = root->append(
1187         Shuffle, nullptr,
1188         
1189         Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int32_t)),
1190         Arg::widthArg(Width32),
1191         
1192         Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int32_t)),
1193         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int32_t)), Arg::widthArg(Width32),
1194
1195         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int32_t)), Tmp(regs[1]),
1196         Arg::widthArg(Width32));
1197
1198     for (unsigned i = 2; i < regs.size(); ++i)
1199         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width32));
1200
1201     Vector<int32_t> things(regs.size(), 666);
1202     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0));
1203     for (unsigned i = 0; i < regs.size(); ++i) {
1204         root->append(
1205             Move32, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int32_t)));
1206     }
1207     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1208     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1209
1210     CHECK(!compileAndRun<int>(proc));
1211
1212     CHECK(things[0] == 1);
1213     CHECK(things[1] == 36);
1214     for (unsigned i = 2; i < regs.size(); ++i)
1215         CHECK(things[i] == static_cast<int32_t>(i));
1216     CHECK(memory[0] == 1);
1217     CHECK(memory[1] == 35);
1218 }
1219
1220 void testShuffleShiftMemoryAllRegs64()
1221 {
1222     B3::Procedure proc;
1223     Code& code = proc.code();
1224
1225     int64_t memory[2];
1226     memory[0] = 35000000000000ll;
1227     memory[1] = 36000000000000ll;
1228
1229     Vector<Reg> regs = code.regsInPriorityOrder(GP);
1230     regs.removeFirst(Reg(GPRInfo::regT0));
1231
1232     BasicBlock* root = code.addBlock();
1233     for (unsigned i = 0; i < regs.size(); ++i)
1234         loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i]));
1235     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0));
1236     Inst& shuffle = root->append(
1237         Shuffle, nullptr,
1238         
1239         Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1240         Arg::widthArg(Width64),
1241         
1242         Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1243         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Width64),
1244
1245         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]),
1246         Arg::widthArg(Width64));
1247
1248     for (unsigned i = 2; i < regs.size(); ++i)
1249         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width64));
1250
1251     Vector<int64_t> things(regs.size(), 666);
1252     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0));
1253     for (unsigned i = 0; i < regs.size(); ++i) {
1254         root->append(
1255             Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t)));
1256     }
1257     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1258     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1259
1260     CHECK(!compileAndRun<int>(proc));
1261
1262     CHECK(things[0] == 1000000000000ll);
1263     CHECK(things[1] == 36000000000000ll);
1264     for (unsigned i = 2; i < regs.size(); ++i)
1265         CHECK(things[i] == static_cast<int64_t>(i) * 1000000000000ll);
1266     CHECK(memory[0] == 1000000000000ll);
1267     CHECK(memory[1] == 35000000000000ll);
1268 }
1269
1270 int64_t combineHiLo(int64_t high, int64_t low)
1271 {
1272     union {
1273         int64_t value;
1274         int32_t halves[2];
1275     } u;
1276     u.value = high;
1277     u.halves[0] = static_cast<int32_t>(low);
1278     return u.value;
1279 }
1280
1281 void testShuffleShiftMemoryAllRegsMixedWidth()
1282 {
1283     B3::Procedure proc;
1284     Code& code = proc.code();
1285
1286     int64_t memory[2];
1287     memory[0] = 35000000000000ll;
1288     memory[1] = 36000000000000ll;
1289
1290     Vector<Reg> regs = code.regsInPriorityOrder(GP);
1291     regs.removeFirst(Reg(GPRInfo::regT0));
1292
1293     BasicBlock* root = code.addBlock();
1294     for (unsigned i = 0; i < regs.size(); ++i)
1295         loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i]));
1296     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0));
1297     Inst& shuffle = root->append(
1298         Shuffle, nullptr,
1299         
1300         Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1301         Arg::widthArg(Width32),
1302         
1303         Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1304         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Width64),
1305
1306         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]),
1307         Arg::widthArg(Width32));
1308
1309     for (unsigned i = 2; i < regs.size(); ++i) {
1310         shuffle.append(
1311             Tmp(regs[i - 1]), Tmp(regs[i]),
1312             (i & 1) ? Arg::widthArg(Width32) : Arg::widthArg(Width64));
1313     }
1314
1315     Vector<int64_t> things(regs.size(), 666);
1316     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0));
1317     for (unsigned i = 0; i < regs.size(); ++i) {
1318         root->append(
1319             Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t)));
1320     }
1321     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1322     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1323
1324     CHECK(!compileAndRun<int>(proc));
1325
1326     CHECK(things[0] == 1000000000000ll);
1327     CHECK(things[1] == static_cast<uint32_t>(36000000000000ll));
1328     for (unsigned i = 2; i < regs.size(); ++i) {
1329         int64_t value = static_cast<int64_t>(i) * 1000000000000ll;
1330         CHECK(things[i] == ((i & 1) ? static_cast<uint32_t>(value) : value));
1331     }
1332     CHECK(memory[0] == combineHiLo(35000000000000ll, 1000000000000ll));
1333     CHECK(memory[1] == 35000000000000ll);
1334 }
1335
1336 void testShuffleRotateMemory()
1337 {
1338     B3::Procedure proc;
1339     Code& code = proc.code();
1340
1341     int32_t memory[2];
1342     memory[0] = 35;
1343     memory[1] = 36;
1344
1345     BasicBlock* root = code.addBlock();
1346     loadConstant(root, 1, Tmp(GPRInfo::regT0));
1347     loadConstant(root, 2, Tmp(GPRInfo::regT1));
1348     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2));
1349     root->append(
1350         Shuffle, nullptr,
1351         
1352         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
1353
1354         Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int32_t)),
1355         Arg::widthArg(Width32),
1356         
1357         Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int32_t)),
1358         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int32_t)), Arg::widthArg(Width32),
1359
1360         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int32_t)), Tmp(GPRInfo::regT0),
1361         Arg::widthArg(Width32));
1362
1363     int32_t things[2];
1364     Tmp base = code.newTmp(GP);
1365     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1366     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
1367     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
1368     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1369     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1370
1371     memset(things, 0, sizeof(things));
1372     
1373     CHECK(!compileAndRun<int>(proc));
1374
1375     CHECK(things[0] == 36);
1376     CHECK(things[1] == 1);
1377     CHECK(memory[0] == 2);
1378     CHECK(memory[1] == 35);
1379 }
1380
1381 void testShuffleRotateMemory64()
1382 {
1383     B3::Procedure proc;
1384     Code& code = proc.code();
1385
1386     int64_t memory[2];
1387     memory[0] = 35000000000000ll;
1388     memory[1] = 36000000000000ll;
1389
1390     BasicBlock* root = code.addBlock();
1391     loadConstant(root, 1000000000000ll, Tmp(GPRInfo::regT0));
1392     loadConstant(root, 2000000000000ll, Tmp(GPRInfo::regT1));
1393     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2));
1394     root->append(
1395         Shuffle, nullptr,
1396         
1397         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width64),
1398
1399         Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)),
1400         Arg::widthArg(Width64),
1401         
1402         Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)),
1403         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Arg::widthArg(Width64),
1404
1405         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Tmp(GPRInfo::regT0),
1406         Arg::widthArg(Width64));
1407
1408     int64_t things[2];
1409     Tmp base = code.newTmp(GP);
1410     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1411     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
1412     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
1413     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1414     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1415
1416     memset(things, 0, sizeof(things));
1417     
1418     CHECK(!compileAndRun<int>(proc));
1419
1420     CHECK(things[0] == 36000000000000ll);
1421     CHECK(things[1] == 1000000000000ll);
1422     CHECK(memory[0] == 2000000000000ll);
1423     CHECK(memory[1] == 35000000000000ll);
1424 }
1425
1426 void testShuffleRotateMemoryMixedWidth()
1427 {
1428     B3::Procedure proc;
1429     Code& code = proc.code();
1430
1431     int64_t memory[2];
1432     memory[0] = 35000000000000ll;
1433     memory[1] = 36000000000000ll;
1434
1435     BasicBlock* root = code.addBlock();
1436     loadConstant(root, 1000000000000ll, Tmp(GPRInfo::regT0));
1437     loadConstant(root, 2000000000000ll, Tmp(GPRInfo::regT1));
1438     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2));
1439     root->append(
1440         Shuffle, nullptr,
1441         
1442         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
1443
1444         Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)),
1445         Arg::widthArg(Width64),
1446         
1447         Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)),
1448         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Arg::widthArg(Width32),
1449
1450         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Tmp(GPRInfo::regT0),
1451         Arg::widthArg(Width64));
1452
1453     int64_t things[2];
1454     Tmp base = code.newTmp(GP);
1455     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1456     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
1457     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
1458     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1459     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1460
1461     memset(things, 0, sizeof(things));
1462     
1463     CHECK(!compileAndRun<int>(proc));
1464
1465     CHECK(things[0] == 36000000000000ll);
1466     CHECK(things[1] == static_cast<uint32_t>(1000000000000ll));
1467     CHECK(memory[0] == 2000000000000ll);
1468     CHECK(memory[1] == combineHiLo(36000000000000ll, 35000000000000ll));
1469 }
1470
1471 void testShuffleRotateMemoryAllRegs64()
1472 {
1473     B3::Procedure proc;
1474     Code& code = proc.code();
1475
1476     int64_t memory[2];
1477     memory[0] = 35000000000000ll;
1478     memory[1] = 36000000000000ll;
1479
1480     Vector<Reg> regs = code.regsInPriorityOrder(GP);
1481     regs.removeFirst(Reg(GPRInfo::regT0));
1482
1483     BasicBlock* root = code.addBlock();
1484     for (unsigned i = 0; i < regs.size(); ++i)
1485         loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i]));
1486     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0));
1487     Inst& shuffle = root->append(
1488         Shuffle, nullptr,
1489         
1490         Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1491         Arg::widthArg(Width64),
1492         
1493         Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1494         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Width64),
1495
1496         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]),
1497         Arg::widthArg(Width64),
1498
1499         regs.last(), regs[0], Arg::widthArg(Width64));
1500
1501     for (unsigned i = 2; i < regs.size(); ++i)
1502         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width64));
1503
1504     Vector<int64_t> things(regs.size(), 666);
1505     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0));
1506     for (unsigned i = 0; i < regs.size(); ++i) {
1507         root->append(
1508             Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t)));
1509     }
1510     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1511     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1512
1513     CHECK(!compileAndRun<int>(proc));
1514
1515     CHECK(things[0] == static_cast<int64_t>(regs.size()) * 1000000000000ll);
1516     CHECK(things[1] == 36000000000000ll);
1517     for (unsigned i = 2; i < regs.size(); ++i)
1518         CHECK(things[i] == static_cast<int64_t>(i) * 1000000000000ll);
1519     CHECK(memory[0] == 1000000000000ll);
1520     CHECK(memory[1] == 35000000000000ll);
1521 }
1522
1523 void testShuffleRotateMemoryAllRegsMixedWidth()
1524 {
1525     B3::Procedure proc;
1526     Code& code = proc.code();
1527
1528     int64_t memory[2];
1529     memory[0] = 35000000000000ll;
1530     memory[1] = 36000000000000ll;
1531
1532     Vector<Reg> regs = code.regsInPriorityOrder(GP);
1533     regs.removeFirst(Reg(GPRInfo::regT0));
1534
1535     BasicBlock* root = code.addBlock();
1536     for (unsigned i = 0; i < regs.size(); ++i)
1537         loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i]));
1538     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0));
1539     Inst& shuffle = root->append(
1540         Shuffle, nullptr,
1541         
1542         Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1543         Arg::widthArg(Width32),
1544         
1545         Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1546         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Width64),
1547
1548         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]),
1549         Arg::widthArg(Width32),
1550
1551         regs.last(), regs[0], Arg::widthArg(Width32));
1552
1553     for (unsigned i = 2; i < regs.size(); ++i)
1554         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width64));
1555
1556     Vector<int64_t> things(regs.size(), 666);
1557     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0));
1558     for (unsigned i = 0; i < regs.size(); ++i) {
1559         root->append(
1560             Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t)));
1561     }
1562     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1563     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1564
1565     CHECK(!compileAndRun<int>(proc));
1566
1567     CHECK(things[0] == static_cast<uint32_t>(static_cast<int64_t>(regs.size()) * 1000000000000ll));
1568     CHECK(things[1] == static_cast<uint32_t>(36000000000000ll));
1569     for (unsigned i = 2; i < regs.size(); ++i)
1570         CHECK(things[i] == static_cast<int64_t>(i) * 1000000000000ll);
1571     CHECK(memory[0] == combineHiLo(35000000000000ll, 1000000000000ll));
1572     CHECK(memory[1] == 35000000000000ll);
1573 }
1574
1575 void testShuffleSwapDouble()
1576 {
1577     B3::Procedure proc;
1578     Code& code = proc.code();
1579
1580     BasicBlock* root = code.addBlock();
1581     loadDoubleConstant(root, 1, Tmp(FPRInfo::fpRegT0), Tmp(GPRInfo::regT0));
1582     loadDoubleConstant(root, 2, Tmp(FPRInfo::fpRegT1), Tmp(GPRInfo::regT0));
1583     loadDoubleConstant(root, 3, Tmp(FPRInfo::fpRegT2), Tmp(GPRInfo::regT0));
1584     loadDoubleConstant(root, 4, Tmp(FPRInfo::fpRegT3), Tmp(GPRInfo::regT0));
1585     root->append(
1586         Shuffle, nullptr,
1587         Tmp(FPRInfo::fpRegT2), Tmp(FPRInfo::fpRegT3), Arg::widthArg(Width64),
1588         Tmp(FPRInfo::fpRegT3), Tmp(FPRInfo::fpRegT2), Arg::widthArg(Width64));
1589
1590     double things[4];
1591     Tmp base = code.newTmp(GP);
1592     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1593     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT0), Arg::addr(base, 0 * sizeof(double)));
1594     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT1), Arg::addr(base, 1 * sizeof(double)));
1595     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT2), Arg::addr(base, 2 * sizeof(double)));
1596     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT3), Arg::addr(base, 3 * sizeof(double)));
1597     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1598     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1599
1600     memset(things, 0, sizeof(things));
1601     
1602     CHECK(!compileAndRun<int>(proc));
1603
1604     CHECK(things[0] == 1);
1605     CHECK(things[1] == 2);
1606     CHECK(things[2] == 4);
1607     CHECK(things[3] == 3);
1608 }
1609
1610 void testShuffleShiftDouble()
1611 {
1612     B3::Procedure proc;
1613     Code& code = proc.code();
1614
1615     BasicBlock* root = code.addBlock();
1616     loadDoubleConstant(root, 1, Tmp(FPRInfo::fpRegT0), Tmp(GPRInfo::regT0));
1617     loadDoubleConstant(root, 2, Tmp(FPRInfo::fpRegT1), Tmp(GPRInfo::regT0));
1618     loadDoubleConstant(root, 3, Tmp(FPRInfo::fpRegT2), Tmp(GPRInfo::regT0));
1619     loadDoubleConstant(root, 4, Tmp(FPRInfo::fpRegT3), Tmp(GPRInfo::regT0));
1620     root->append(
1621         Shuffle, nullptr,
1622         Tmp(FPRInfo::fpRegT2), Tmp(FPRInfo::fpRegT3), Arg::widthArg(Width64));
1623
1624     double things[4];
1625     Tmp base = code.newTmp(GP);
1626     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1627     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT0), Arg::addr(base, 0 * sizeof(double)));
1628     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT1), Arg::addr(base, 1 * sizeof(double)));
1629     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT2), Arg::addr(base, 2 * sizeof(double)));
1630     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT3), Arg::addr(base, 3 * sizeof(double)));
1631     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1632     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1633
1634     memset(things, 0, sizeof(things));
1635     
1636     CHECK(!compileAndRun<int>(proc));
1637
1638     CHECK(things[0] == 1);
1639     CHECK(things[1] == 2);
1640     CHECK(things[2] == 3);
1641     CHECK(things[3] == 3);
1642 }
1643
1644 #if CPU(X86) || CPU(X86_64)
1645 void testX86VMULSD()
1646 {
1647     B3::Procedure proc;
1648     Code& code = proc.code();
1649
1650     BasicBlock* root = code.addBlock();
1651     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(FPRInfo::argumentFPR1), Tmp(FPRInfo::argumentFPR2));
1652     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR));
1653     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1654
1655     CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
1656 }
1657
1658 void testX86VMULSDDestRex()
1659 {
1660     B3::Procedure proc;
1661     Code& code = proc.code();
1662
1663     BasicBlock* root = code.addBlock();
1664     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm15));
1665     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1666     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1667
1668     CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
1669 }
1670
1671 void testX86VMULSDOp1DestRex()
1672 {
1673     B3::Procedure proc;
1674     Code& code = proc.code();
1675
1676     BasicBlock* root = code.addBlock();
1677     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14));
1678     root->append(MulDouble, nullptr, Tmp(X86Registers::xmm14), Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm15));
1679     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1680     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1681
1682     CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
1683 }
1684
1685 void testX86VMULSDOp2DestRex()
1686 {
1687     B3::Procedure proc;
1688     Code& code = proc.code();
1689
1690     BasicBlock* root = code.addBlock();
1691     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm14));
1692     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm15));
1693     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1694     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1695
1696     CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
1697 }
1698
1699 void testX86VMULSDOpsDestRex()
1700 {
1701     B3::Procedure proc;
1702     Code& code = proc.code();
1703
1704     BasicBlock* root = code.addBlock();
1705     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14));
1706     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm13));
1707     root->append(MulDouble, nullptr, Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm13), Tmp(X86Registers::xmm15));
1708     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1709     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1710
1711     CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
1712 }
1713
1714 void testX86VMULSDAddr()
1715 {
1716     B3::Procedure proc;
1717     Code& code = proc.code();
1718
1719     BasicBlock* root = code.addBlock();
1720     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(GPRInfo::argumentGPR0), - 16), Tmp(FPRInfo::argumentFPR2));
1721     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR));
1722     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1723
1724     double secondArg = 4.2;
1725     CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 2, pureNaN()) == 2.4 * 4.2);
1726 }
1727
1728 void testX86VMULSDAddrOpRexAddr()
1729 {
1730     B3::Procedure proc;
1731     Code& code = proc.code();
1732
1733     BasicBlock* root = code.addBlock();
1734     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13));
1735     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(X86Registers::r13), - 16), Tmp(FPRInfo::argumentFPR2));
1736     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR));
1737     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1738
1739     double secondArg = 4.2;
1740     CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 2, pureNaN()) == 2.4 * 4.2);
1741 }
1742
1743 void testX86VMULSDDestRexAddr()
1744 {
1745     B3::Procedure proc;
1746     Code& code = proc.code();
1747
1748     BasicBlock* root = code.addBlock();
1749     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(GPRInfo::argumentGPR0), 16), Tmp(X86Registers::xmm15));
1750     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1751     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1752
1753     double secondArg = 4.2;
1754     CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 2, pureNaN()) == 2.4 * 4.2);
1755 }
1756
1757 void testX86VMULSDRegOpDestRexAddr()
1758 {
1759     B3::Procedure proc;
1760     Code& code = proc.code();
1761
1762     BasicBlock* root = code.addBlock();
1763     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14));
1764     root->append(MulDouble, nullptr, Arg::addr(Tmp(GPRInfo::argumentGPR0)), Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm15));
1765     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1766     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1767
1768     double secondArg = 4.2;
1769     CHECK(compileAndRun<double>(proc, 2.4, &secondArg, pureNaN()) == 2.4 * 4.2);
1770 }
1771
1772 void testX86VMULSDAddrOpDestRexAddr()
1773 {
1774     B3::Procedure proc;
1775     Code& code = proc.code();
1776
1777     BasicBlock* root = code.addBlock();
1778     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13));
1779     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(X86Registers::r13), 8), Tmp(X86Registers::xmm15));
1780     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1781     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1782
1783     double secondArg = 4.2;
1784     CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 1, pureNaN()) == 2.4 * 4.2);
1785 }
1786
1787 void testX86VMULSDBaseNeedsRex()
1788 {
1789     B3::Procedure proc;
1790     Code& code = proc.code();
1791
1792     BasicBlock* root = code.addBlock();
1793     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13));
1794     root->append(MulDouble, nullptr, Arg::index(Tmp(X86Registers::r13), Tmp(GPRInfo::argumentGPR1)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0));
1795     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR));
1796     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1797
1798     double secondArg = 4.2;
1799     uint64_t index = 8;
1800     CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 1, index, pureNaN()) == 2.4 * 4.2);
1801 }
1802
1803 void testX86VMULSDIndexNeedsRex()
1804 {
1805     B3::Procedure proc;
1806     Code& code = proc.code();
1807
1808     BasicBlock* root = code.addBlock();
1809     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR1), Tmp(X86Registers::r13));
1810     root->append(MulDouble, nullptr, Arg::index(Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0));
1811     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR));
1812     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1813
1814     double secondArg = 4.2;
1815     uint64_t index = - 8;
1816     CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 1, index, pureNaN()) == 2.4 * 4.2);
1817 }
1818
1819 void testX86VMULSDBaseIndexNeedRex()
1820 {
1821     B3::Procedure proc;
1822     Code& code = proc.code();
1823
1824     BasicBlock* root = code.addBlock();
1825     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r12));
1826     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR1), Tmp(X86Registers::r13));
1827     root->append(MulDouble, nullptr, Arg::index(Tmp(X86Registers::r12), Tmp(X86Registers::r13)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0));
1828     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR));
1829     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1830
1831     double secondArg = 4.2;
1832     uint64_t index = 16;
1833     CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 2, index, pureNaN()) == 2.4 * 4.2);
1834 }
1835 #endif // #if CPU(X86) || CPU(X86_64)
1836
1837 void testArgumentRegPinned()
1838 {
1839     B3::Procedure proc;
1840     Code& code = proc.code();
1841     GPRReg pinned = GPRInfo::argumentGPR0;
1842     proc.pinRegister(pinned);
1843
1844     B3::Air::Special* patchpointSpecial = code.addSpecial(std::make_unique<B3::PatchpointSpecial>());
1845
1846     B3::BasicBlock* b3Root = proc.addBlock();
1847     B3::PatchpointValue* patchpoint = b3Root->appendNew<B3::PatchpointValue>(proc, B3::Void, B3::Origin());
1848     patchpoint->clobber(RegisterSet(pinned));
1849     patchpoint->setGenerator(
1850         [=] (CCallHelpers& jit, const B3::StackmapGenerationParams&) {
1851             jit.move(CCallHelpers::TrustedImm32(42), pinned);
1852         });
1853
1854     BasicBlock* root = code.addBlock();
1855
1856     Tmp t1 = code.newTmp(GP);
1857     Tmp t2 = code.newTmp(GP);
1858
1859     root->append(Move, nullptr, Tmp(pinned), t1);
1860     root->append(Patch, patchpoint, Arg::special(patchpointSpecial));
1861     root->append(Move, nullptr, Tmp(pinned), t2);
1862     root->append(Add32, nullptr, t1, t2, Tmp(GPRInfo::returnValueGPR));
1863     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1864
1865     int32_t r = compileAndRun<int32_t>(proc, 10);
1866     CHECK(r == 10 + 42);
1867 }
1868
1869 void testArgumentRegPinned2()
1870 {
1871     B3::Procedure proc;
1872     Code& code = proc.code();
1873     GPRReg pinned = GPRInfo::argumentGPR0;
1874     proc.pinRegister(pinned);
1875
1876     B3::Air::Special* patchpointSpecial = code.addSpecial(std::make_unique<B3::PatchpointSpecial>());
1877
1878     B3::BasicBlock* b3Root = proc.addBlock();
1879     B3::PatchpointValue* patchpoint = b3Root->appendNew<B3::PatchpointValue>(proc, B3::Void, B3::Origin());
1880     patchpoint->clobber({ }); 
1881     patchpoint->setGenerator(
1882         [=] (CCallHelpers& jit, const B3::StackmapGenerationParams&) {
1883             jit.move(CCallHelpers::TrustedImm32(42), pinned);
1884         });
1885
1886     BasicBlock* root = code.addBlock();
1887
1888     Tmp t1 = code.newTmp(GP);
1889     Tmp t2 = code.newTmp(GP);
1890
1891     // Since the patchpoint does not claim to clobber the pinned register,
1892     // the register allocator is allowed to either coalesce the first move,
1893     // the second move, or neither. The allowed results are:
1894     // - No move coalesced: 52
1895     // - The first move is coalesced: 84
1896     // - The second move is coalesced: 52
1897     root->append(Move, nullptr, Tmp(pinned), t1);
1898     root->append(Patch, patchpoint, Arg::special(patchpointSpecial));
1899     root->append(Move, nullptr, Tmp(pinned), t2);
1900     root->append(Add32, nullptr, t1, t2, Tmp(GPRInfo::returnValueGPR));
1901     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1902
1903     int32_t r = compileAndRun<int32_t>(proc, 10);
1904     CHECK(r == 52 || r == 84);
1905 }
1906
1907 void testArgumentRegPinned3()
1908 {
1909     B3::Procedure proc;
1910     Code& code = proc.code();
1911     GPRReg pinned = GPRInfo::argumentGPR0;
1912     proc.pinRegister(pinned);
1913
1914     B3::Air::Special* patchpointSpecial = code.addSpecial(std::make_unique<B3::PatchpointSpecial>());
1915
1916     B3::BasicBlock* b3Root = proc.addBlock();
1917     B3::PatchpointValue* patchpoint = b3Root->appendNew<B3::PatchpointValue>(proc, B3::Void, B3::Origin());
1918     patchpoint->clobber(RegisterSet(pinned));
1919     patchpoint->setGenerator(
1920         [=] (CCallHelpers& jit, const B3::StackmapGenerationParams&) {
1921             jit.move(CCallHelpers::TrustedImm32(42), pinned);
1922         });
1923
1924     BasicBlock* root = code.addBlock();
1925
1926     Tmp t1 = code.newTmp(GP);
1927     Tmp t2 = code.newTmp(GP);
1928     Tmp t3 = code.newTmp(GP);
1929
1930     root->append(Move, nullptr, Tmp(pinned), t1);
1931     root->append(Patch, patchpoint, Arg::special(patchpointSpecial));
1932     root->append(Move, nullptr, Tmp(pinned), t2);
1933     root->append(Patch, patchpoint, Arg::special(patchpointSpecial));
1934     root->append(Move, nullptr, Tmp(pinned), t3);
1935     root->append(Add32, nullptr, t1, t2, Tmp(GPRInfo::returnValueGPR));
1936     root->append(Add32, nullptr, Tmp(GPRInfo::returnValueGPR), t3, Tmp(GPRInfo::returnValueGPR));
1937     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1938
1939     int32_t r = compileAndRun<int32_t>(proc, 10);
1940     CHECK(r == 10 + 42 + 42);
1941 }
1942
1943 #define RUN(test) do {                          \
1944         if (!shouldRun(#test))                  \
1945             break;                              \
1946         tasks.append(                           \
1947             createSharedTask<void()>(           \
1948                 [&] () {                        \
1949                     dataLog(#test "...\n");     \
1950                     test;                       \
1951                     dataLog(#test ": OK!\n");   \
1952                 }));                            \
1953     } while (false);
1954
1955 void run(const char* filter)
1956 {
1957     JSC::initializeThreading();
1958
1959     Deque<RefPtr<SharedTask<void()>>> tasks;
1960
1961     auto shouldRun = [&] (const char* testName) -> bool {
1962         return !filter || !!strcasestr(testName, filter);
1963     };
1964
1965     RUN(testSimple());
1966     
1967     RUN(testShuffleSimpleSwap());
1968     RUN(testShuffleSimpleShift());
1969     RUN(testShuffleLongShift());
1970     RUN(testShuffleLongShiftBackwards());
1971     RUN(testShuffleSimpleRotate());
1972     RUN(testShuffleSimpleBroadcast());
1973     RUN(testShuffleBroadcastAllRegs());
1974     RUN(testShuffleTreeShift());
1975     RUN(testShuffleTreeShiftBackward());
1976     RUN(testShuffleTreeShiftOtherBackward());
1977     RUN(testShuffleMultipleShifts());
1978     RUN(testShuffleRotateWithFringe());
1979     RUN(testShuffleRotateWithFringeInWeirdOrder());
1980     RUN(testShuffleRotateWithLongFringe());
1981     RUN(testShuffleMultipleRotates());
1982     RUN(testShuffleShiftAndRotate());
1983     RUN(testShuffleShiftAllRegs());
1984     RUN(testShuffleRotateAllRegs());
1985     RUN(testShuffleSimpleSwap64());
1986     RUN(testShuffleSimpleShift64());
1987     RUN(testShuffleSwapMixedWidth());
1988     RUN(testShuffleShiftMixedWidth());
1989     RUN(testShuffleShiftMemory());
1990     RUN(testShuffleShiftMemoryLong());
1991     RUN(testShuffleShiftMemoryAllRegs());
1992     RUN(testShuffleShiftMemoryAllRegs64());
1993     RUN(testShuffleShiftMemoryAllRegsMixedWidth());
1994     RUN(testShuffleRotateMemory());
1995     RUN(testShuffleRotateMemory64());
1996     RUN(testShuffleRotateMemoryMixedWidth());
1997     RUN(testShuffleRotateMemoryAllRegs64());
1998     RUN(testShuffleRotateMemoryAllRegsMixedWidth());
1999     RUN(testShuffleSwapDouble());
2000     RUN(testShuffleShiftDouble());
2001
2002 #if CPU(X86) || CPU(X86_64)
2003     RUN(testX86VMULSD());
2004     RUN(testX86VMULSDDestRex());
2005     RUN(testX86VMULSDOp1DestRex());
2006     RUN(testX86VMULSDOp2DestRex());
2007     RUN(testX86VMULSDOpsDestRex());
2008
2009     RUN(testX86VMULSDAddr());
2010     RUN(testX86VMULSDAddrOpRexAddr());
2011     RUN(testX86VMULSDDestRexAddr());
2012     RUN(testX86VMULSDRegOpDestRexAddr());
2013     RUN(testX86VMULSDAddrOpDestRexAddr());
2014
2015     RUN(testX86VMULSDBaseNeedsRex());
2016     RUN(testX86VMULSDIndexNeedsRex());
2017     RUN(testX86VMULSDBaseIndexNeedRex());
2018 #endif
2019
2020     RUN(testArgumentRegPinned());
2021     RUN(testArgumentRegPinned2());
2022     RUN(testArgumentRegPinned3());
2023
2024     if (tasks.isEmpty())
2025         usage();
2026
2027     Lock lock;
2028
2029     Vector<Ref<Thread>> threads;
2030     for (unsigned i = filter ? 1 : WTF::numberOfProcessorCores(); i--;) {
2031         threads.append(
2032             Thread::create(
2033                 "testair thread",
2034                 [&] () {
2035                     for (;;) {
2036                         RefPtr<SharedTask<void()>> task;
2037                         {
2038                             LockHolder locker(lock);
2039                             if (tasks.isEmpty())
2040                                 return;
2041                             task = tasks.takeFirst();
2042                         }
2043
2044                         task->run();
2045                     }
2046                 }));
2047     }
2048
2049     for (auto& thread : threads)
2050         thread->waitForCompletion();
2051     crashLock.lock();
2052 }
2053
2054 } // anonymous namespace
2055
2056 #else // ENABLE(B3_JIT)
2057
2058 static void run(const char*)
2059 {
2060     dataLog("B3 JIT is not enabled.\n");
2061 }
2062
2063 #endif // ENABLE(B3_JIT)
2064
2065 int main(int argc, char** argv)
2066 {
2067     const char* filter = nullptr;
2068     switch (argc) {
2069     case 1:
2070         break;
2071     case 2:
2072         filter = argv[1];
2073         break;
2074     default:
2075         usage();
2076         break;
2077     }
2078     
2079     run(filter);
2080     return 0;
2081 }