43b923a64cd5e54d9da96877cef5ace79ac685c9
[WebKit-https.git] / Source / JavaScriptCore / b3 / air / testair.cpp
1 /*
2  * Copyright (C) 2016-2018 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27
28 #include "AirCode.h"
29 #include "AirGenerate.h"
30 #include "AirInstInlines.h"
31 #include "AirSpecial.h"
32 #include "AllowMacroScratchRegisterUsage.h"
33 #include "B3BasicBlockInlines.h"
34 #include "B3Compilation.h"
35 #include "B3Procedure.h"
36 #include "B3PatchpointSpecial.h"
37 #include "CCallHelpers.h"
38 #include "InitializeThreading.h"
39 #include "JSCInlines.h"
40 #include "LinkBuffer.h"
41 #include "PureNaN.h"
42 #include <cmath>
43 #include <map>
44 #include <string>
45 #include <wtf/Lock.h>
46 #include <wtf/NumberOfCores.h>
47 #include <wtf/Threading.h>
48
49 // We don't have a NO_RETURN_DUE_TO_EXIT, nor should we. That's ridiculous.
50 static bool hiddenTruthBecauseNoReturnIsStupid() { return true; }
51
52 static void usage()
53 {
54     dataLog("Usage: testair [<filter>]\n");
55     if (hiddenTruthBecauseNoReturnIsStupid())
56         exit(1);
57 }
58
59 #if ENABLE(B3_JIT)
60
61 using namespace JSC;
62 using namespace JSC::B3::Air;
63
64 using JSC::B3::FP;
65 using JSC::B3::GP;
66 using JSC::B3::Width;
67 using JSC::B3::Width8;
68 using JSC::B3::Width16;
69 using JSC::B3::Width32;
70 using JSC::B3::Width64;
71
72 namespace {
73
74 StaticLock crashLock;
75
76 // Nothing fancy for now; we just use the existing WTF assertion machinery.
77 #define CHECK(x) do {                                                   \
78         if (!!(x))                                                      \
79             break;                                                      \
80         crashLock.lock();                                               \
81         WTFReportAssertionFailure(__FILE__, __LINE__, WTF_PRETTY_FUNCTION, #x); \
82         CRASH();                                                        \
83     } while (false)
84
85 std::unique_ptr<B3::Compilation> compile(B3::Procedure& proc)
86 {
87     prepareForGeneration(proc.code());
88     CCallHelpers jit;
89     generate(proc.code(), jit);
90     LinkBuffer linkBuffer(jit, nullptr);
91
92     return std::make_unique<B3::Compilation>(
93         FINALIZE_CODE(linkBuffer, "testair compilation"), proc.releaseByproducts());
94 }
95
96 template<typename T, typename... Arguments>
97 T invoke(const B3::Compilation& code, Arguments... arguments)
98 {
99     T (*function)(Arguments...) = bitwise_cast<T(*)(Arguments...)>(code.code().executableAddress());
100     return function(arguments...);
101 }
102
103 template<typename T, typename... Arguments>
104 T compileAndRun(B3::Procedure& procedure, Arguments... arguments)
105 {
106     return invoke<T>(*compile(procedure), arguments...);
107 }
108
109 void testSimple()
110 {
111     B3::Procedure proc;
112     Code& code = proc.code();
113
114     BasicBlock* root = code.addBlock();
115     root->append(Move, nullptr, Arg::imm(42), Tmp(GPRInfo::returnValueGPR));
116     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
117
118     CHECK(compileAndRun<int>(proc) == 42);
119 }
120
121 // Use this to put a constant into a register without Air being able to see the constant.
122 template<typename T>
123 void loadConstantImpl(BasicBlock* block, T value, B3::Air::Opcode move, Tmp tmp, Tmp scratch)
124 {
125     static StaticLock lock;
126     static std::map<T, T*>* map; // I'm not messing with HashMap's problems with integers.
127
128     LockHolder locker(lock);
129     if (!map)
130         map = new std::map<T, T*>();
131
132     if (!map->count(value))
133         (*map)[value] = new T(value);
134
135     T* ptr = (*map)[value];
136     block->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(ptr)), scratch);
137     block->append(move, nullptr, Arg::addr(scratch), tmp);
138 }
139
140 void loadConstant(BasicBlock* block, intptr_t value, Tmp tmp)
141 {
142     loadConstantImpl<intptr_t>(block, value, Move, tmp, tmp);
143 }
144
145 void loadDoubleConstant(BasicBlock* block, double value, Tmp tmp, Tmp scratch)
146 {
147     loadConstantImpl<double>(block, value, MoveDouble, tmp, scratch);
148 }
149
150 void testShuffleSimpleSwap()
151 {
152     B3::Procedure proc;
153     Code& code = proc.code();
154
155     BasicBlock* root = code.addBlock();
156     loadConstant(root, 1, Tmp(GPRInfo::regT0));
157     loadConstant(root, 2, Tmp(GPRInfo::regT1));
158     loadConstant(root, 3, Tmp(GPRInfo::regT2));
159     loadConstant(root, 4, Tmp(GPRInfo::regT3));
160     root->append(
161         Shuffle, nullptr,
162         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
163         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT2), Arg::widthArg(Width32));
164
165     int32_t things[4];
166     Tmp base = code.newTmp(GP);
167     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
168     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
169     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
170     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
171     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
172     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
173     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
174
175     memset(things, 0, sizeof(things));
176     
177     CHECK(!compileAndRun<int>(proc));
178
179     CHECK(things[0] == 1);
180     CHECK(things[1] == 2);
181     CHECK(things[2] == 4);
182     CHECK(things[3] == 3);
183 }
184
185 void testShuffleSimpleShift()
186 {
187     B3::Procedure proc;
188     Code& code = proc.code();
189
190     BasicBlock* root = code.addBlock();
191     loadConstant(root, 1, Tmp(GPRInfo::regT0));
192     loadConstant(root, 2, Tmp(GPRInfo::regT1));
193     loadConstant(root, 3, Tmp(GPRInfo::regT2));
194     loadConstant(root, 4, Tmp(GPRInfo::regT3));
195     root->append(
196         Shuffle, nullptr,
197         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
198         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32));
199
200     int32_t things[5];
201     Tmp base = code.newTmp(GP);
202     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
203     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
204     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
205     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
206     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
207     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
208     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
209     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
210
211     memset(things, 0, sizeof(things));
212     
213     CHECK(!compileAndRun<int>(proc));
214
215     CHECK(things[0] == 1);
216     CHECK(things[1] == 2);
217     CHECK(things[2] == 3);
218     CHECK(things[3] == 3);
219     CHECK(things[4] == 4);
220 }
221
222 void testShuffleLongShift()
223 {
224     B3::Procedure proc;
225     Code& code = proc.code();
226
227     BasicBlock* root = code.addBlock();
228     loadConstant(root, 1, Tmp(GPRInfo::regT0));
229     loadConstant(root, 2, Tmp(GPRInfo::regT1));
230     loadConstant(root, 3, Tmp(GPRInfo::regT2));
231     loadConstant(root, 4, Tmp(GPRInfo::regT3));
232     loadConstant(root, 5, Tmp(GPRInfo::regT4));
233     loadConstant(root, 6, Tmp(GPRInfo::regT5));
234     loadConstant(root, 7, Tmp(GPRInfo::regT6));
235     loadConstant(root, 8, Tmp(GPRInfo::regT7));
236     root->append(
237         Shuffle, nullptr,
238         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
239         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
240         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
241         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
242         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
243         Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT6), Arg::widthArg(Width32),
244         Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT7), Arg::widthArg(Width32));
245
246     int32_t things[8];
247     Tmp base = code.newTmp(GP);
248     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
249     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
250     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
251     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
252     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
253     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
254     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
255     root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t)));
256     root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t)));
257     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
258     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
259
260     memset(things, 0, sizeof(things));
261     
262     CHECK(!compileAndRun<int>(proc));
263
264     CHECK(things[0] == 1);
265     CHECK(things[1] == 1);
266     CHECK(things[2] == 2);
267     CHECK(things[3] == 3);
268     CHECK(things[4] == 4);
269     CHECK(things[5] == 5);
270     CHECK(things[6] == 6);
271     CHECK(things[7] == 7);
272 }
273
274 void testShuffleLongShiftBackwards()
275 {
276     B3::Procedure proc;
277     Code& code = proc.code();
278
279     BasicBlock* root = code.addBlock();
280     loadConstant(root, 1, Tmp(GPRInfo::regT0));
281     loadConstant(root, 2, Tmp(GPRInfo::regT1));
282     loadConstant(root, 3, Tmp(GPRInfo::regT2));
283     loadConstant(root, 4, Tmp(GPRInfo::regT3));
284     loadConstant(root, 5, Tmp(GPRInfo::regT4));
285     loadConstant(root, 6, Tmp(GPRInfo::regT5));
286     loadConstant(root, 7, Tmp(GPRInfo::regT6));
287     loadConstant(root, 8, Tmp(GPRInfo::regT7));
288     root->append(
289         Shuffle, nullptr,
290         Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT7), Arg::widthArg(Width32),
291         Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT6), Arg::widthArg(Width32),
292         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
293         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
294         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
295         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
296         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32));
297
298     int32_t things[8];
299     Tmp base = code.newTmp(GP);
300     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
301     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
302     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
303     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
304     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
305     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
306     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
307     root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t)));
308     root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t)));
309     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
310     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
311
312     memset(things, 0, sizeof(things));
313     
314     CHECK(!compileAndRun<int>(proc));
315
316     CHECK(things[0] == 1);
317     CHECK(things[1] == 1);
318     CHECK(things[2] == 2);
319     CHECK(things[3] == 3);
320     CHECK(things[4] == 4);
321     CHECK(things[5] == 5);
322     CHECK(things[6] == 6);
323     CHECK(things[7] == 7);
324 }
325
326 void testShuffleSimpleRotate()
327 {
328     B3::Procedure proc;
329     Code& code = proc.code();
330
331     BasicBlock* root = code.addBlock();
332     loadConstant(root, 1, Tmp(GPRInfo::regT0));
333     loadConstant(root, 2, Tmp(GPRInfo::regT1));
334     loadConstant(root, 3, Tmp(GPRInfo::regT2));
335     loadConstant(root, 4, Tmp(GPRInfo::regT3));
336     root->append(
337         Shuffle, nullptr,
338         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
339         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
340         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32));
341
342     int32_t things[4];
343     Tmp base = code.newTmp(GP);
344     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
345     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
346     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
347     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
348     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
349     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
350     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
351
352     memset(things, 0, sizeof(things));
353     
354     CHECK(!compileAndRun<int>(proc));
355
356     CHECK(things[0] == 3);
357     CHECK(things[1] == 1);
358     CHECK(things[2] == 2);
359     CHECK(things[3] == 4);
360 }
361
362 void testShuffleSimpleBroadcast()
363 {
364     B3::Procedure proc;
365     Code& code = proc.code();
366
367     BasicBlock* root = code.addBlock();
368     loadConstant(root, 1, Tmp(GPRInfo::regT0));
369     loadConstant(root, 2, Tmp(GPRInfo::regT1));
370     loadConstant(root, 3, Tmp(GPRInfo::regT2));
371     loadConstant(root, 4, Tmp(GPRInfo::regT3));
372     root->append(
373         Shuffle, nullptr,
374         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
375         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
376         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Width32));
377
378     int32_t things[4];
379     Tmp base = code.newTmp(GP);
380     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
381     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
382     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
383     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
384     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
385     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
386     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
387
388     memset(things, 0, sizeof(things));
389     
390     CHECK(!compileAndRun<int>(proc));
391
392     CHECK(things[0] == 1);
393     CHECK(things[1] == 1);
394     CHECK(things[2] == 1);
395     CHECK(things[3] == 1);
396 }
397
398 void testShuffleBroadcastAllRegs()
399 {
400     B3::Procedure proc;
401     Code& code = proc.code();
402
403     const Vector<Reg>& regs = code.regsInPriorityOrder(GP);
404
405     BasicBlock* root = code.addBlock();
406     root->append(Move, nullptr, Arg::imm(35), Tmp(GPRInfo::regT0));
407     unsigned count = 1;
408     for (Reg reg : regs) {
409         if (reg != Reg(GPRInfo::regT0))
410             loadConstant(root, count++, Tmp(reg));
411     }
412     Inst& shuffle = root->append(Shuffle, nullptr);
413     for (Reg reg : regs) {
414         if (reg != Reg(GPRInfo::regT0))
415             shuffle.append(Tmp(GPRInfo::regT0), Tmp(reg), Arg::widthArg(Width32));
416     }
417
418     StackSlot* slot = code.addStackSlot(sizeof(int32_t) * regs.size(), StackSlotKind::Locked);
419     for (unsigned i = 0; i < regs.size(); ++i)
420         root->append(Move32, nullptr, Tmp(regs[i]), Arg::stack(slot, i * sizeof(int32_t)));
421
422     Vector<int32_t> things(regs.size(), 666);
423     Tmp base = code.newTmp(GP);
424     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), base);
425     for (unsigned i = 0; i < regs.size(); ++i) {
426         root->append(Move32, nullptr, Arg::stack(slot, i * sizeof(int32_t)), Tmp(GPRInfo::regT0));
427         root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, i * sizeof(int32_t)));
428     }
429     
430     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
431     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
432
433     CHECK(!compileAndRun<int>(proc));
434
435     for (int32_t thing : things)
436         CHECK(thing == 35);
437 }
438
439 void testShuffleTreeShift()
440 {
441     B3::Procedure proc;
442     Code& code = proc.code();
443
444     BasicBlock* root = code.addBlock();
445     loadConstant(root, 1, Tmp(GPRInfo::regT0));
446     loadConstant(root, 2, Tmp(GPRInfo::regT1));
447     loadConstant(root, 3, Tmp(GPRInfo::regT2));
448     loadConstant(root, 4, Tmp(GPRInfo::regT3));
449     loadConstant(root, 5, Tmp(GPRInfo::regT4));
450     loadConstant(root, 6, Tmp(GPRInfo::regT5));
451     loadConstant(root, 7, Tmp(GPRInfo::regT6));
452     loadConstant(root, 8, Tmp(GPRInfo::regT7));
453     root->append(
454         Shuffle, nullptr,
455         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
456         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
457         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
458         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
459         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
460         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT6), Arg::widthArg(Width32),
461         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT7), Arg::widthArg(Width32));
462
463     int32_t things[8];
464     Tmp base = code.newTmp(GP);
465     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
466     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
467     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
468     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
469     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
470     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
471     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
472     root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t)));
473     root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t)));
474     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
475     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
476
477     memset(things, 0, sizeof(things));
478     
479     CHECK(!compileAndRun<int>(proc));
480
481     CHECK(things[0] == 1);
482     CHECK(things[1] == 1);
483     CHECK(things[2] == 1);
484     CHECK(things[3] == 2);
485     CHECK(things[4] == 2);
486     CHECK(things[5] == 3);
487     CHECK(things[6] == 3);
488     CHECK(things[7] == 4);
489 }
490
491 void testShuffleTreeShiftBackward()
492 {
493     B3::Procedure proc;
494     Code& code = proc.code();
495
496     BasicBlock* root = code.addBlock();
497     loadConstant(root, 1, Tmp(GPRInfo::regT0));
498     loadConstant(root, 2, Tmp(GPRInfo::regT1));
499     loadConstant(root, 3, Tmp(GPRInfo::regT2));
500     loadConstant(root, 4, Tmp(GPRInfo::regT3));
501     loadConstant(root, 5, Tmp(GPRInfo::regT4));
502     loadConstant(root, 6, Tmp(GPRInfo::regT5));
503     loadConstant(root, 7, Tmp(GPRInfo::regT6));
504     loadConstant(root, 8, Tmp(GPRInfo::regT7));
505     root->append(
506         Shuffle, nullptr,
507         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT7), Arg::widthArg(Width32),
508         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT6), Arg::widthArg(Width32),
509         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
510         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
511         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
512         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
513         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32));
514
515     int32_t things[8];
516     Tmp base = code.newTmp(GP);
517     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
518     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
519     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
520     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
521     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
522     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
523     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
524     root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t)));
525     root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t)));
526     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
527     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
528
529     memset(things, 0, sizeof(things));
530     
531     CHECK(!compileAndRun<int>(proc));
532
533     CHECK(things[0] == 1);
534     CHECK(things[1] == 1);
535     CHECK(things[2] == 1);
536     CHECK(things[3] == 2);
537     CHECK(things[4] == 2);
538     CHECK(things[5] == 3);
539     CHECK(things[6] == 3);
540     CHECK(things[7] == 4);
541 }
542
543 void testShuffleTreeShiftOtherBackward()
544 {
545     // NOTE: This test was my original attempt at TreeShiftBackward but mistakes were made. So, this
546     // ends up being just a weird test. But weird tests are useful, so I kept it.
547     
548     B3::Procedure proc;
549     Code& code = proc.code();
550
551     BasicBlock* root = code.addBlock();
552     loadConstant(root, 1, Tmp(GPRInfo::regT0));
553     loadConstant(root, 2, Tmp(GPRInfo::regT1));
554     loadConstant(root, 3, Tmp(GPRInfo::regT2));
555     loadConstant(root, 4, Tmp(GPRInfo::regT3));
556     loadConstant(root, 5, Tmp(GPRInfo::regT4));
557     loadConstant(root, 6, Tmp(GPRInfo::regT5));
558     loadConstant(root, 7, Tmp(GPRInfo::regT6));
559     loadConstant(root, 8, Tmp(GPRInfo::regT7));
560     root->append(
561         Shuffle, nullptr,
562         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT7), Arg::widthArg(Width32),
563         Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT6), Arg::widthArg(Width32),
564         Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
565         Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
566         Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
567         Tmp(GPRInfo::regT7), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
568         Tmp(GPRInfo::regT7), Tmp(GPRInfo::regT1), Arg::widthArg(Width32));
569
570     int32_t things[8];
571     Tmp base = code.newTmp(GP);
572     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
573     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
574     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
575     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
576     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
577     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
578     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
579     root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t)));
580     root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t)));
581     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
582     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
583
584     memset(things, 0, sizeof(things));
585     
586     CHECK(!compileAndRun<int>(proc));
587
588     CHECK(things[0] == 1);
589     CHECK(things[1] == 8);
590     CHECK(things[2] == 8);
591     CHECK(things[3] == 7);
592     CHECK(things[4] == 7);
593     CHECK(things[5] == 6);
594     CHECK(things[6] == 6);
595     CHECK(things[7] == 5);
596 }
597
598 void testShuffleMultipleShifts()
599 {
600     B3::Procedure proc;
601     Code& code = proc.code();
602
603     BasicBlock* root = code.addBlock();
604     loadConstant(root, 1, Tmp(GPRInfo::regT0));
605     loadConstant(root, 2, Tmp(GPRInfo::regT1));
606     loadConstant(root, 3, Tmp(GPRInfo::regT2));
607     loadConstant(root, 4, Tmp(GPRInfo::regT3));
608     loadConstant(root, 5, Tmp(GPRInfo::regT4));
609     loadConstant(root, 6, Tmp(GPRInfo::regT5));
610     root->append(
611         Shuffle, nullptr,
612         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
613         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
614         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
615         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT5), Arg::widthArg(Width32));
616
617     int32_t things[6];
618     Tmp base = code.newTmp(GP);
619     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
620     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
621     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
622     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
623     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
624     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
625     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
626     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
627     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
628
629     memset(things, 0, sizeof(things));
630     
631     CHECK(!compileAndRun<int>(proc));
632
633     CHECK(things[0] == 1);
634     CHECK(things[1] == 1);
635     CHECK(things[2] == 3);
636     CHECK(things[3] == 3);
637     CHECK(things[4] == 3);
638     CHECK(things[5] == 1);
639 }
640
641 void testShuffleRotateWithFringe()
642 {
643     B3::Procedure proc;
644     Code& code = proc.code();
645
646     BasicBlock* root = code.addBlock();
647     loadConstant(root, 1, Tmp(GPRInfo::regT0));
648     loadConstant(root, 2, Tmp(GPRInfo::regT1));
649     loadConstant(root, 3, Tmp(GPRInfo::regT2));
650     loadConstant(root, 4, Tmp(GPRInfo::regT3));
651     loadConstant(root, 5, Tmp(GPRInfo::regT4));
652     loadConstant(root, 6, Tmp(GPRInfo::regT5));
653     root->append(
654         Shuffle, nullptr,
655         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
656         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
657         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32),
658         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
659         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
660         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Width32));
661
662     int32_t things[6];
663     Tmp base = code.newTmp(GP);
664     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
665     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
666     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
667     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
668     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
669     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
670     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
671     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
672     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
673
674     memset(things, 0, sizeof(things));
675     
676     CHECK(!compileAndRun<int>(proc));
677
678     CHECK(things[0] == 3);
679     CHECK(things[1] == 1);
680     CHECK(things[2] == 2);
681     CHECK(things[3] == 1);
682     CHECK(things[4] == 2);
683     CHECK(things[5] == 3);
684 }
685
686 void testShuffleRotateWithFringeInWeirdOrder()
687 {
688     B3::Procedure proc;
689     Code& code = proc.code();
690
691     BasicBlock* root = code.addBlock();
692     loadConstant(root, 1, Tmp(GPRInfo::regT0));
693     loadConstant(root, 2, Tmp(GPRInfo::regT1));
694     loadConstant(root, 3, Tmp(GPRInfo::regT2));
695     loadConstant(root, 4, Tmp(GPRInfo::regT3));
696     loadConstant(root, 5, Tmp(GPRInfo::regT4));
697     loadConstant(root, 6, Tmp(GPRInfo::regT5));
698     root->append(
699         Shuffle, nullptr,
700         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
701         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
702         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
703         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32),
704         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
705         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32));
706
707     int32_t things[6];
708     Tmp base = code.newTmp(GP);
709     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
710     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
711     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
712     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
713     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
714     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
715     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
716     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
717     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
718
719     memset(things, 0, sizeof(things));
720     
721     CHECK(!compileAndRun<int>(proc));
722
723     CHECK(things[0] == 3);
724     CHECK(things[1] == 1);
725     CHECK(things[2] == 2);
726     CHECK(things[3] == 1);
727     CHECK(things[4] == 2);
728     CHECK(things[5] == 3);
729 }
730
731 void testShuffleRotateWithLongFringe()
732 {
733     B3::Procedure proc;
734     Code& code = proc.code();
735
736     BasicBlock* root = code.addBlock();
737     loadConstant(root, 1, Tmp(GPRInfo::regT0));
738     loadConstant(root, 2, Tmp(GPRInfo::regT1));
739     loadConstant(root, 3, Tmp(GPRInfo::regT2));
740     loadConstant(root, 4, Tmp(GPRInfo::regT3));
741     loadConstant(root, 5, Tmp(GPRInfo::regT4));
742     loadConstant(root, 6, Tmp(GPRInfo::regT5));
743     root->append(
744         Shuffle, nullptr,
745         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
746         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
747         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32),
748         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
749         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
750         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Width32));
751
752     int32_t things[6];
753     Tmp base = code.newTmp(GP);
754     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
755     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
756     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
757     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
758     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
759     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
760     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
761     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
762     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
763
764     memset(things, 0, sizeof(things));
765     
766     CHECK(!compileAndRun<int>(proc));
767
768     CHECK(things[0] == 3);
769     CHECK(things[1] == 1);
770     CHECK(things[2] == 2);
771     CHECK(things[3] == 1);
772     CHECK(things[4] == 4);
773     CHECK(things[5] == 5);
774 }
775
776 void testShuffleMultipleRotates()
777 {
778     B3::Procedure proc;
779     Code& code = proc.code();
780
781     BasicBlock* root = code.addBlock();
782     loadConstant(root, 1, Tmp(GPRInfo::regT0));
783     loadConstant(root, 2, Tmp(GPRInfo::regT1));
784     loadConstant(root, 3, Tmp(GPRInfo::regT2));
785     loadConstant(root, 4, Tmp(GPRInfo::regT3));
786     loadConstant(root, 5, Tmp(GPRInfo::regT4));
787     loadConstant(root, 6, Tmp(GPRInfo::regT5));
788     root->append(
789         Shuffle, nullptr,
790         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
791         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
792         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32),
793         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
794         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Width32),
795         Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT3), Arg::widthArg(Width32));
796
797     int32_t things[6];
798     Tmp base = code.newTmp(GP);
799     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
800     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
801     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
802     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
803     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
804     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
805     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
806     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
807     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
808
809     memset(things, 0, sizeof(things));
810     
811     CHECK(!compileAndRun<int>(proc));
812
813     CHECK(things[0] == 3);
814     CHECK(things[1] == 1);
815     CHECK(things[2] == 2);
816     CHECK(things[3] == 6);
817     CHECK(things[4] == 4);
818     CHECK(things[5] == 5);
819 }
820
821 void testShuffleShiftAndRotate()
822 {
823     B3::Procedure proc;
824     Code& code = proc.code();
825
826     BasicBlock* root = code.addBlock();
827     loadConstant(root, 1, Tmp(GPRInfo::regT0));
828     loadConstant(root, 2, Tmp(GPRInfo::regT1));
829     loadConstant(root, 3, Tmp(GPRInfo::regT2));
830     loadConstant(root, 4, Tmp(GPRInfo::regT3));
831     loadConstant(root, 5, Tmp(GPRInfo::regT4));
832     loadConstant(root, 6, Tmp(GPRInfo::regT5));
833     root->append(
834         Shuffle, nullptr,
835         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
836         Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Width32),
837         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Width32),
838         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32),
839         Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Width32));
840
841     int32_t things[6];
842     Tmp base = code.newTmp(GP);
843     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
844     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
845     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
846     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
847     root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t)));
848     root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t)));
849     root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t)));
850     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
851     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
852
853     memset(things, 0, sizeof(things));
854     
855     CHECK(!compileAndRun<int>(proc));
856
857     CHECK(things[0] == 3);
858     CHECK(things[1] == 1);
859     CHECK(things[2] == 2);
860     CHECK(things[3] == 4);
861     CHECK(things[4] == 4);
862     CHECK(things[5] == 5);
863 }
864
865 void testShuffleShiftAllRegs()
866 {
867     B3::Procedure proc;
868     Code& code = proc.code();
869
870     const Vector<Reg>& regs = code.regsInPriorityOrder(GP);
871
872     BasicBlock* root = code.addBlock();
873     for (unsigned i = 0; i < regs.size(); ++i)
874         loadConstant(root, 35 + i, Tmp(regs[i]));
875     Inst& shuffle = root->append(Shuffle, nullptr);
876     for (unsigned i = 1; i < regs.size(); ++i)
877         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width32));
878
879     StackSlot* slot = code.addStackSlot(sizeof(int32_t) * regs.size(), StackSlotKind::Locked);
880     for (unsigned i = 0; i < regs.size(); ++i)
881         root->append(Move32, nullptr, Tmp(regs[i]), Arg::stack(slot, i * sizeof(int32_t)));
882
883     Vector<int32_t> things(regs.size(), 666);
884     Tmp base = code.newTmp(GP);
885     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), base);
886     for (unsigned i = 0; i < regs.size(); ++i) {
887         root->append(Move32, nullptr, Arg::stack(slot, i * sizeof(int32_t)), Tmp(GPRInfo::regT0));
888         root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, i * sizeof(int32_t)));
889     }
890     
891     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
892     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
893
894     CHECK(!compileAndRun<int>(proc));
895
896     CHECK(things[0] == 35);
897     for (unsigned i = 1; i < regs.size(); ++i)
898         CHECK(things[i] == 35 + static_cast<int32_t>(i) - 1);
899 }
900
901 void testShuffleRotateAllRegs()
902 {
903     B3::Procedure proc;
904     Code& code = proc.code();
905
906     const Vector<Reg>& regs = code.regsInPriorityOrder(GP);
907
908     BasicBlock* root = code.addBlock();
909     for (unsigned i = 0; i < regs.size(); ++i)
910         loadConstant(root, 35 + i, Tmp(regs[i]));
911     Inst& shuffle = root->append(Shuffle, nullptr);
912     for (unsigned i = 1; i < regs.size(); ++i)
913         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width32));
914     shuffle.append(Tmp(regs.last()), Tmp(regs[0]), Arg::widthArg(Width32));
915
916     StackSlot* slot = code.addStackSlot(sizeof(int32_t) * regs.size(), StackSlotKind::Locked);
917     for (unsigned i = 0; i < regs.size(); ++i)
918         root->append(Move32, nullptr, Tmp(regs[i]), Arg::stack(slot, i * sizeof(int32_t)));
919
920     Vector<int32_t> things(regs.size(), 666);
921     Tmp base = code.newTmp(GP);
922     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), base);
923     for (unsigned i = 0; i < regs.size(); ++i) {
924         root->append(Move32, nullptr, Arg::stack(slot, i * sizeof(int32_t)), Tmp(GPRInfo::regT0));
925         root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, i * sizeof(int32_t)));
926     }
927     
928     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
929     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
930
931     CHECK(!compileAndRun<int>(proc));
932
933     CHECK(things[0] == 35 + static_cast<int32_t>(regs.size()) - 1);
934     for (unsigned i = 1; i < regs.size(); ++i)
935         CHECK(things[i] == 35 + static_cast<int32_t>(i) - 1);
936 }
937
938 void testShuffleSimpleSwap64()
939 {
940     B3::Procedure proc;
941     Code& code = proc.code();
942
943     BasicBlock* root = code.addBlock();
944     loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0));
945     loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1));
946     loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2));
947     loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3));
948     root->append(
949         Shuffle, nullptr,
950         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width64),
951         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT2), Arg::widthArg(Width64));
952
953     int64_t things[4];
954     Tmp base = code.newTmp(GP);
955     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
956     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
957     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
958     root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t)));
959     root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t)));
960     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
961     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
962
963     memset(things, 0, sizeof(things));
964     
965     CHECK(!compileAndRun<int>(proc));
966
967     CHECK(things[0] == 10000000000000000ll);
968     CHECK(things[1] == 20000000000000000ll);
969     CHECK(things[2] == 40000000000000000ll);
970     CHECK(things[3] == 30000000000000000ll);
971 }
972
973 void testShuffleSimpleShift64()
974 {
975     B3::Procedure proc;
976     Code& code = proc.code();
977
978     BasicBlock* root = code.addBlock();
979     loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0));
980     loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1));
981     loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2));
982     loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3));
983     loadConstant(root, 50000000000000000ll, Tmp(GPRInfo::regT4));
984     root->append(
985         Shuffle, nullptr,
986         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width64),
987         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width64));
988
989     int64_t things[5];
990     Tmp base = code.newTmp(GP);
991     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
992     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
993     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
994     root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t)));
995     root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t)));
996     root->append(Move, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int64_t)));
997     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
998     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
999
1000     memset(things, 0, sizeof(things));
1001     
1002     CHECK(!compileAndRun<int>(proc));
1003
1004     CHECK(things[0] == 10000000000000000ll);
1005     CHECK(things[1] == 20000000000000000ll);
1006     CHECK(things[2] == 30000000000000000ll);
1007     CHECK(things[3] == 30000000000000000ll);
1008     CHECK(things[4] == 40000000000000000ll);
1009 }
1010
1011 void testShuffleSwapMixedWidth()
1012 {
1013     B3::Procedure proc;
1014     Code& code = proc.code();
1015
1016     BasicBlock* root = code.addBlock();
1017     loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0));
1018     loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1));
1019     loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2));
1020     loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3));
1021     root->append(
1022         Shuffle, nullptr,
1023         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width32),
1024         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT2), Arg::widthArg(Width64));
1025
1026     int64_t things[4];
1027     Tmp base = code.newTmp(GP);
1028     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1029     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
1030     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
1031     root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t)));
1032     root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t)));
1033     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1034     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1035
1036     memset(things, 0, sizeof(things));
1037     
1038     CHECK(!compileAndRun<int>(proc));
1039
1040     CHECK(things[0] == 10000000000000000ll);
1041     CHECK(things[1] == 20000000000000000ll);
1042     CHECK(things[2] == 40000000000000000ll);
1043     CHECK(things[3] == static_cast<uint32_t>(30000000000000000ll));
1044 }
1045
1046 void testShuffleShiftMixedWidth()
1047 {
1048     B3::Procedure proc;
1049     Code& code = proc.code();
1050
1051     BasicBlock* root = code.addBlock();
1052     loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0));
1053     loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1));
1054     loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2));
1055     loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3));
1056     loadConstant(root, 50000000000000000ll, Tmp(GPRInfo::regT4));
1057     root->append(
1058         Shuffle, nullptr,
1059         Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Width64),
1060         Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Width32));
1061
1062     int64_t things[5];
1063     Tmp base = code.newTmp(GP);
1064     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1065     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
1066     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
1067     root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t)));
1068     root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t)));
1069     root->append(Move, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int64_t)));
1070     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1071     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1072
1073     memset(things, 0, sizeof(things));
1074     
1075     CHECK(!compileAndRun<int>(proc));
1076
1077     CHECK(things[0] == 10000000000000000ll);
1078     CHECK(things[1] == 20000000000000000ll);
1079     CHECK(things[2] == 30000000000000000ll);
1080     CHECK(things[3] == 30000000000000000ll);
1081     CHECK(things[4] == static_cast<uint32_t>(40000000000000000ll));
1082 }
1083
1084 void testShuffleShiftMemory()
1085 {
1086     B3::Procedure proc;
1087     Code& code = proc.code();
1088
1089     int32_t memory[2];
1090     memory[0] = 35;
1091     memory[1] = 36;
1092
1093     BasicBlock* root = code.addBlock();
1094     loadConstant(root, 1, Tmp(GPRInfo::regT0));
1095     loadConstant(root, 2, Tmp(GPRInfo::regT1));
1096     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2));
1097     root->append(
1098         Shuffle, nullptr,
1099         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
1100         Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int32_t)),
1101         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int32_t)), Arg::widthArg(Width32));
1102
1103     int32_t things[2];
1104     Tmp base = code.newTmp(GP);
1105     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1106     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
1107     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
1108     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1109     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1110
1111     memset(things, 0, sizeof(things));
1112     
1113     CHECK(!compileAndRun<int>(proc));
1114
1115     CHECK(things[0] == 1);
1116     CHECK(things[1] == 1);
1117     CHECK(memory[0] == 35);
1118     CHECK(memory[1] == 35);
1119 }
1120
1121 void testShuffleShiftMemoryLong()
1122 {
1123     B3::Procedure proc;
1124     Code& code = proc.code();
1125
1126     int32_t memory[2];
1127     memory[0] = 35;
1128     memory[1] = 36;
1129
1130     BasicBlock* root = code.addBlock();
1131     loadConstant(root, 1, Tmp(GPRInfo::regT0));
1132     loadConstant(root, 2, Tmp(GPRInfo::regT1));
1133     loadConstant(root, 3, Tmp(GPRInfo::regT2));
1134     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT3));
1135     root->append(
1136         Shuffle, nullptr,
1137         
1138         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
1139         
1140         Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT3), 0 * sizeof(int32_t)),
1141         Arg::widthArg(Width32),
1142         
1143         Arg::addr(Tmp(GPRInfo::regT3), 0 * sizeof(int32_t)),
1144         Arg::addr(Tmp(GPRInfo::regT3), 1 * sizeof(int32_t)), Arg::widthArg(Width32),
1145
1146         Arg::addr(Tmp(GPRInfo::regT3), 1 * sizeof(int32_t)), Tmp(GPRInfo::regT2),
1147         Arg::widthArg(Width32));
1148
1149     int32_t things[3];
1150     Tmp base = code.newTmp(GP);
1151     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1152     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
1153     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
1154     root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t)));
1155     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1156     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1157
1158     memset(things, 0, sizeof(things));
1159     
1160     CHECK(!compileAndRun<int>(proc));
1161
1162     CHECK(things[0] == 1);
1163     CHECK(things[1] == 1);
1164     CHECK(things[2] == 36);
1165     CHECK(memory[0] == 2);
1166     CHECK(memory[1] == 35);
1167 }
1168
1169 void testShuffleShiftMemoryAllRegs()
1170 {
1171     B3::Procedure proc;
1172     Code& code = proc.code();
1173
1174     int32_t memory[2];
1175     memory[0] = 35;
1176     memory[1] = 36;
1177
1178     Vector<Reg> regs = code.regsInPriorityOrder(GP);
1179     regs.removeFirst(Reg(GPRInfo::regT0));
1180
1181     BasicBlock* root = code.addBlock();
1182     for (unsigned i = 0; i < regs.size(); ++i)
1183         loadConstant(root, i + 1, Tmp(regs[i]));
1184     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0));
1185     Inst& shuffle = root->append(
1186         Shuffle, nullptr,
1187         
1188         Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int32_t)),
1189         Arg::widthArg(Width32),
1190         
1191         Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int32_t)),
1192         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int32_t)), Arg::widthArg(Width32),
1193
1194         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int32_t)), Tmp(regs[1]),
1195         Arg::widthArg(Width32));
1196
1197     for (unsigned i = 2; i < regs.size(); ++i)
1198         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width32));
1199
1200     Vector<int32_t> things(regs.size(), 666);
1201     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0));
1202     for (unsigned i = 0; i < regs.size(); ++i) {
1203         root->append(
1204             Move32, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int32_t)));
1205     }
1206     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1207     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1208
1209     CHECK(!compileAndRun<int>(proc));
1210
1211     CHECK(things[0] == 1);
1212     CHECK(things[1] == 36);
1213     for (unsigned i = 2; i < regs.size(); ++i)
1214         CHECK(things[i] == static_cast<int32_t>(i));
1215     CHECK(memory[0] == 1);
1216     CHECK(memory[1] == 35);
1217 }
1218
1219 void testShuffleShiftMemoryAllRegs64()
1220 {
1221     B3::Procedure proc;
1222     Code& code = proc.code();
1223
1224     int64_t memory[2];
1225     memory[0] = 35000000000000ll;
1226     memory[1] = 36000000000000ll;
1227
1228     Vector<Reg> regs = code.regsInPriorityOrder(GP);
1229     regs.removeFirst(Reg(GPRInfo::regT0));
1230
1231     BasicBlock* root = code.addBlock();
1232     for (unsigned i = 0; i < regs.size(); ++i)
1233         loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i]));
1234     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0));
1235     Inst& shuffle = root->append(
1236         Shuffle, nullptr,
1237         
1238         Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1239         Arg::widthArg(Width64),
1240         
1241         Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1242         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Width64),
1243
1244         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]),
1245         Arg::widthArg(Width64));
1246
1247     for (unsigned i = 2; i < regs.size(); ++i)
1248         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width64));
1249
1250     Vector<int64_t> things(regs.size(), 666);
1251     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0));
1252     for (unsigned i = 0; i < regs.size(); ++i) {
1253         root->append(
1254             Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t)));
1255     }
1256     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1257     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1258
1259     CHECK(!compileAndRun<int>(proc));
1260
1261     CHECK(things[0] == 1000000000000ll);
1262     CHECK(things[1] == 36000000000000ll);
1263     for (unsigned i = 2; i < regs.size(); ++i)
1264         CHECK(things[i] == static_cast<int64_t>(i) * 1000000000000ll);
1265     CHECK(memory[0] == 1000000000000ll);
1266     CHECK(memory[1] == 35000000000000ll);
1267 }
1268
1269 int64_t combineHiLo(int64_t high, int64_t low)
1270 {
1271     union {
1272         int64_t value;
1273         int32_t halves[2];
1274     } u;
1275     u.value = high;
1276     u.halves[0] = static_cast<int32_t>(low);
1277     return u.value;
1278 }
1279
1280 void testShuffleShiftMemoryAllRegsMixedWidth()
1281 {
1282     B3::Procedure proc;
1283     Code& code = proc.code();
1284
1285     int64_t memory[2];
1286     memory[0] = 35000000000000ll;
1287     memory[1] = 36000000000000ll;
1288
1289     Vector<Reg> regs = code.regsInPriorityOrder(GP);
1290     regs.removeFirst(Reg(GPRInfo::regT0));
1291
1292     BasicBlock* root = code.addBlock();
1293     for (unsigned i = 0; i < regs.size(); ++i)
1294         loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i]));
1295     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0));
1296     Inst& shuffle = root->append(
1297         Shuffle, nullptr,
1298         
1299         Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1300         Arg::widthArg(Width32),
1301         
1302         Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1303         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Width64),
1304
1305         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]),
1306         Arg::widthArg(Width32));
1307
1308     for (unsigned i = 2; i < regs.size(); ++i) {
1309         shuffle.append(
1310             Tmp(regs[i - 1]), Tmp(regs[i]),
1311             (i & 1) ? Arg::widthArg(Width32) : Arg::widthArg(Width64));
1312     }
1313
1314     Vector<int64_t> things(regs.size(), 666);
1315     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0));
1316     for (unsigned i = 0; i < regs.size(); ++i) {
1317         root->append(
1318             Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t)));
1319     }
1320     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1321     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1322
1323     CHECK(!compileAndRun<int>(proc));
1324
1325     CHECK(things[0] == 1000000000000ll);
1326     CHECK(things[1] == static_cast<uint32_t>(36000000000000ll));
1327     for (unsigned i = 2; i < regs.size(); ++i) {
1328         int64_t value = static_cast<int64_t>(i) * 1000000000000ll;
1329         CHECK(things[i] == ((i & 1) ? static_cast<uint32_t>(value) : value));
1330     }
1331     CHECK(memory[0] == combineHiLo(35000000000000ll, 1000000000000ll));
1332     CHECK(memory[1] == 35000000000000ll);
1333 }
1334
1335 void testShuffleRotateMemory()
1336 {
1337     B3::Procedure proc;
1338     Code& code = proc.code();
1339
1340     int32_t memory[2];
1341     memory[0] = 35;
1342     memory[1] = 36;
1343
1344     BasicBlock* root = code.addBlock();
1345     loadConstant(root, 1, Tmp(GPRInfo::regT0));
1346     loadConstant(root, 2, Tmp(GPRInfo::regT1));
1347     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2));
1348     root->append(
1349         Shuffle, nullptr,
1350         
1351         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
1352
1353         Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int32_t)),
1354         Arg::widthArg(Width32),
1355         
1356         Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int32_t)),
1357         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int32_t)), Arg::widthArg(Width32),
1358
1359         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int32_t)), Tmp(GPRInfo::regT0),
1360         Arg::widthArg(Width32));
1361
1362     int32_t things[2];
1363     Tmp base = code.newTmp(GP);
1364     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1365     root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t)));
1366     root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t)));
1367     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1368     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1369
1370     memset(things, 0, sizeof(things));
1371     
1372     CHECK(!compileAndRun<int>(proc));
1373
1374     CHECK(things[0] == 36);
1375     CHECK(things[1] == 1);
1376     CHECK(memory[0] == 2);
1377     CHECK(memory[1] == 35);
1378 }
1379
1380 void testShuffleRotateMemory64()
1381 {
1382     B3::Procedure proc;
1383     Code& code = proc.code();
1384
1385     int64_t memory[2];
1386     memory[0] = 35000000000000ll;
1387     memory[1] = 36000000000000ll;
1388
1389     BasicBlock* root = code.addBlock();
1390     loadConstant(root, 1000000000000ll, Tmp(GPRInfo::regT0));
1391     loadConstant(root, 2000000000000ll, Tmp(GPRInfo::regT1));
1392     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2));
1393     root->append(
1394         Shuffle, nullptr,
1395         
1396         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width64),
1397
1398         Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)),
1399         Arg::widthArg(Width64),
1400         
1401         Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)),
1402         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Arg::widthArg(Width64),
1403
1404         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Tmp(GPRInfo::regT0),
1405         Arg::widthArg(Width64));
1406
1407     int64_t things[2];
1408     Tmp base = code.newTmp(GP);
1409     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1410     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
1411     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
1412     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1413     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1414
1415     memset(things, 0, sizeof(things));
1416     
1417     CHECK(!compileAndRun<int>(proc));
1418
1419     CHECK(things[0] == 36000000000000ll);
1420     CHECK(things[1] == 1000000000000ll);
1421     CHECK(memory[0] == 2000000000000ll);
1422     CHECK(memory[1] == 35000000000000ll);
1423 }
1424
1425 void testShuffleRotateMemoryMixedWidth()
1426 {
1427     B3::Procedure proc;
1428     Code& code = proc.code();
1429
1430     int64_t memory[2];
1431     memory[0] = 35000000000000ll;
1432     memory[1] = 36000000000000ll;
1433
1434     BasicBlock* root = code.addBlock();
1435     loadConstant(root, 1000000000000ll, Tmp(GPRInfo::regT0));
1436     loadConstant(root, 2000000000000ll, Tmp(GPRInfo::regT1));
1437     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2));
1438     root->append(
1439         Shuffle, nullptr,
1440         
1441         Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Width32),
1442
1443         Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)),
1444         Arg::widthArg(Width64),
1445         
1446         Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)),
1447         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Arg::widthArg(Width32),
1448
1449         Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Tmp(GPRInfo::regT0),
1450         Arg::widthArg(Width64));
1451
1452     int64_t things[2];
1453     Tmp base = code.newTmp(GP);
1454     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1455     root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t)));
1456     root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t)));
1457     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1458     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1459
1460     memset(things, 0, sizeof(things));
1461     
1462     CHECK(!compileAndRun<int>(proc));
1463
1464     CHECK(things[0] == 36000000000000ll);
1465     CHECK(things[1] == static_cast<uint32_t>(1000000000000ll));
1466     CHECK(memory[0] == 2000000000000ll);
1467     CHECK(memory[1] == combineHiLo(36000000000000ll, 35000000000000ll));
1468 }
1469
1470 void testShuffleRotateMemoryAllRegs64()
1471 {
1472     B3::Procedure proc;
1473     Code& code = proc.code();
1474
1475     int64_t memory[2];
1476     memory[0] = 35000000000000ll;
1477     memory[1] = 36000000000000ll;
1478
1479     Vector<Reg> regs = code.regsInPriorityOrder(GP);
1480     regs.removeFirst(Reg(GPRInfo::regT0));
1481
1482     BasicBlock* root = code.addBlock();
1483     for (unsigned i = 0; i < regs.size(); ++i)
1484         loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i]));
1485     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0));
1486     Inst& shuffle = root->append(
1487         Shuffle, nullptr,
1488         
1489         Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1490         Arg::widthArg(Width64),
1491         
1492         Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1493         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Width64),
1494
1495         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]),
1496         Arg::widthArg(Width64),
1497
1498         regs.last(), regs[0], Arg::widthArg(Width64));
1499
1500     for (unsigned i = 2; i < regs.size(); ++i)
1501         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width64));
1502
1503     Vector<int64_t> things(regs.size(), 666);
1504     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0));
1505     for (unsigned i = 0; i < regs.size(); ++i) {
1506         root->append(
1507             Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t)));
1508     }
1509     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1510     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1511
1512     CHECK(!compileAndRun<int>(proc));
1513
1514     CHECK(things[0] == static_cast<int64_t>(regs.size()) * 1000000000000ll);
1515     CHECK(things[1] == 36000000000000ll);
1516     for (unsigned i = 2; i < regs.size(); ++i)
1517         CHECK(things[i] == static_cast<int64_t>(i) * 1000000000000ll);
1518     CHECK(memory[0] == 1000000000000ll);
1519     CHECK(memory[1] == 35000000000000ll);
1520 }
1521
1522 void testShuffleRotateMemoryAllRegsMixedWidth()
1523 {
1524     B3::Procedure proc;
1525     Code& code = proc.code();
1526
1527     int64_t memory[2];
1528     memory[0] = 35000000000000ll;
1529     memory[1] = 36000000000000ll;
1530
1531     Vector<Reg> regs = code.regsInPriorityOrder(GP);
1532     regs.removeFirst(Reg(GPRInfo::regT0));
1533
1534     BasicBlock* root = code.addBlock();
1535     for (unsigned i = 0; i < regs.size(); ++i)
1536         loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i]));
1537     root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0));
1538     Inst& shuffle = root->append(
1539         Shuffle, nullptr,
1540         
1541         Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1542         Arg::widthArg(Width32),
1543         
1544         Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)),
1545         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Width64),
1546
1547         Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]),
1548         Arg::widthArg(Width32),
1549
1550         regs.last(), regs[0], Arg::widthArg(Width32));
1551
1552     for (unsigned i = 2; i < regs.size(); ++i)
1553         shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Width64));
1554
1555     Vector<int64_t> things(regs.size(), 666);
1556     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0));
1557     for (unsigned i = 0; i < regs.size(); ++i) {
1558         root->append(
1559             Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t)));
1560     }
1561     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1562     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1563
1564     CHECK(!compileAndRun<int>(proc));
1565
1566     CHECK(things[0] == static_cast<uint32_t>(static_cast<int64_t>(regs.size()) * 1000000000000ll));
1567     CHECK(things[1] == static_cast<uint32_t>(36000000000000ll));
1568     for (unsigned i = 2; i < regs.size(); ++i)
1569         CHECK(things[i] == static_cast<int64_t>(i) * 1000000000000ll);
1570     CHECK(memory[0] == combineHiLo(35000000000000ll, 1000000000000ll));
1571     CHECK(memory[1] == 35000000000000ll);
1572 }
1573
1574 void testShuffleSwapDouble()
1575 {
1576     B3::Procedure proc;
1577     Code& code = proc.code();
1578
1579     BasicBlock* root = code.addBlock();
1580     loadDoubleConstant(root, 1, Tmp(FPRInfo::fpRegT0), Tmp(GPRInfo::regT0));
1581     loadDoubleConstant(root, 2, Tmp(FPRInfo::fpRegT1), Tmp(GPRInfo::regT0));
1582     loadDoubleConstant(root, 3, Tmp(FPRInfo::fpRegT2), Tmp(GPRInfo::regT0));
1583     loadDoubleConstant(root, 4, Tmp(FPRInfo::fpRegT3), Tmp(GPRInfo::regT0));
1584     root->append(
1585         Shuffle, nullptr,
1586         Tmp(FPRInfo::fpRegT2), Tmp(FPRInfo::fpRegT3), Arg::widthArg(Width64),
1587         Tmp(FPRInfo::fpRegT3), Tmp(FPRInfo::fpRegT2), Arg::widthArg(Width64));
1588
1589     double things[4];
1590     Tmp base = code.newTmp(GP);
1591     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1592     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT0), Arg::addr(base, 0 * sizeof(double)));
1593     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT1), Arg::addr(base, 1 * sizeof(double)));
1594     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT2), Arg::addr(base, 2 * sizeof(double)));
1595     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT3), Arg::addr(base, 3 * sizeof(double)));
1596     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1597     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1598
1599     memset(things, 0, sizeof(things));
1600     
1601     CHECK(!compileAndRun<int>(proc));
1602
1603     CHECK(things[0] == 1);
1604     CHECK(things[1] == 2);
1605     CHECK(things[2] == 4);
1606     CHECK(things[3] == 3);
1607 }
1608
1609 void testShuffleShiftDouble()
1610 {
1611     B3::Procedure proc;
1612     Code& code = proc.code();
1613
1614     BasicBlock* root = code.addBlock();
1615     loadDoubleConstant(root, 1, Tmp(FPRInfo::fpRegT0), Tmp(GPRInfo::regT0));
1616     loadDoubleConstant(root, 2, Tmp(FPRInfo::fpRegT1), Tmp(GPRInfo::regT0));
1617     loadDoubleConstant(root, 3, Tmp(FPRInfo::fpRegT2), Tmp(GPRInfo::regT0));
1618     loadDoubleConstant(root, 4, Tmp(FPRInfo::fpRegT3), Tmp(GPRInfo::regT0));
1619     root->append(
1620         Shuffle, nullptr,
1621         Tmp(FPRInfo::fpRegT2), Tmp(FPRInfo::fpRegT3), Arg::widthArg(Width64));
1622
1623     double things[4];
1624     Tmp base = code.newTmp(GP);
1625     root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base);
1626     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT0), Arg::addr(base, 0 * sizeof(double)));
1627     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT1), Arg::addr(base, 1 * sizeof(double)));
1628     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT2), Arg::addr(base, 2 * sizeof(double)));
1629     root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT3), Arg::addr(base, 3 * sizeof(double)));
1630     root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR));
1631     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1632
1633     memset(things, 0, sizeof(things));
1634     
1635     CHECK(!compileAndRun<int>(proc));
1636
1637     CHECK(things[0] == 1);
1638     CHECK(things[1] == 2);
1639     CHECK(things[2] == 3);
1640     CHECK(things[3] == 3);
1641 }
1642
1643 #if CPU(X86) || CPU(X86_64)
1644 void testX86VMULSD()
1645 {
1646     B3::Procedure proc;
1647     Code& code = proc.code();
1648
1649     BasicBlock* root = code.addBlock();
1650     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(FPRInfo::argumentFPR1), Tmp(FPRInfo::argumentFPR2));
1651     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR));
1652     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1653
1654     CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
1655 }
1656
1657 void testX86VMULSDDestRex()
1658 {
1659     B3::Procedure proc;
1660     Code& code = proc.code();
1661
1662     BasicBlock* root = code.addBlock();
1663     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm15));
1664     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1665     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1666
1667     CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
1668 }
1669
1670 void testX86VMULSDOp1DestRex()
1671 {
1672     B3::Procedure proc;
1673     Code& code = proc.code();
1674
1675     BasicBlock* root = code.addBlock();
1676     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14));
1677     root->append(MulDouble, nullptr, Tmp(X86Registers::xmm14), Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm15));
1678     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1679     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1680
1681     CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
1682 }
1683
1684 void testX86VMULSDOp2DestRex()
1685 {
1686     B3::Procedure proc;
1687     Code& code = proc.code();
1688
1689     BasicBlock* root = code.addBlock();
1690     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm14));
1691     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm15));
1692     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1693     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1694
1695     CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
1696 }
1697
1698 void testX86VMULSDOpsDestRex()
1699 {
1700     B3::Procedure proc;
1701     Code& code = proc.code();
1702
1703     BasicBlock* root = code.addBlock();
1704     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14));
1705     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm13));
1706     root->append(MulDouble, nullptr, Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm13), Tmp(X86Registers::xmm15));
1707     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1708     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1709
1710     CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
1711 }
1712
1713 void testX86VMULSDAddr()
1714 {
1715     B3::Procedure proc;
1716     Code& code = proc.code();
1717
1718     BasicBlock* root = code.addBlock();
1719     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(GPRInfo::argumentGPR0), - 16), Tmp(FPRInfo::argumentFPR2));
1720     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR));
1721     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1722
1723     double secondArg = 4.2;
1724     CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 2, pureNaN()) == 2.4 * 4.2);
1725 }
1726
1727 void testX86VMULSDAddrOpRexAddr()
1728 {
1729     B3::Procedure proc;
1730     Code& code = proc.code();
1731
1732     BasicBlock* root = code.addBlock();
1733     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13));
1734     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(X86Registers::r13), - 16), Tmp(FPRInfo::argumentFPR2));
1735     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR));
1736     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1737
1738     double secondArg = 4.2;
1739     CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 2, pureNaN()) == 2.4 * 4.2);
1740 }
1741
1742 void testX86VMULSDDestRexAddr()
1743 {
1744     B3::Procedure proc;
1745     Code& code = proc.code();
1746
1747     BasicBlock* root = code.addBlock();
1748     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(GPRInfo::argumentGPR0), 16), Tmp(X86Registers::xmm15));
1749     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1750     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1751
1752     double secondArg = 4.2;
1753     CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 2, pureNaN()) == 2.4 * 4.2);
1754 }
1755
1756 void testX86VMULSDRegOpDestRexAddr()
1757 {
1758     B3::Procedure proc;
1759     Code& code = proc.code();
1760
1761     BasicBlock* root = code.addBlock();
1762     root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14));
1763     root->append(MulDouble, nullptr, Arg::addr(Tmp(GPRInfo::argumentGPR0)), Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm15));
1764     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1765     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1766
1767     double secondArg = 4.2;
1768     CHECK(compileAndRun<double>(proc, 2.4, &secondArg, pureNaN()) == 2.4 * 4.2);
1769 }
1770
1771 void testX86VMULSDAddrOpDestRexAddr()
1772 {
1773     B3::Procedure proc;
1774     Code& code = proc.code();
1775
1776     BasicBlock* root = code.addBlock();
1777     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13));
1778     root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(X86Registers::r13), 8), Tmp(X86Registers::xmm15));
1779     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
1780     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1781
1782     double secondArg = 4.2;
1783     CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 1, pureNaN()) == 2.4 * 4.2);
1784 }
1785
1786 void testX86VMULSDBaseNeedsRex()
1787 {
1788     B3::Procedure proc;
1789     Code& code = proc.code();
1790
1791     BasicBlock* root = code.addBlock();
1792     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13));
1793     root->append(MulDouble, nullptr, Arg::index(Tmp(X86Registers::r13), Tmp(GPRInfo::argumentGPR1)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0));
1794     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR));
1795     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1796
1797     double secondArg = 4.2;
1798     uint64_t index = 8;
1799     CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 1, index, pureNaN()) == 2.4 * 4.2);
1800 }
1801
1802 void testX86VMULSDIndexNeedsRex()
1803 {
1804     B3::Procedure proc;
1805     Code& code = proc.code();
1806
1807     BasicBlock* root = code.addBlock();
1808     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR1), Tmp(X86Registers::r13));
1809     root->append(MulDouble, nullptr, Arg::index(Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0));
1810     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR));
1811     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1812
1813     double secondArg = 4.2;
1814     uint64_t index = - 8;
1815     CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 1, index, pureNaN()) == 2.4 * 4.2);
1816 }
1817
1818 void testX86VMULSDBaseIndexNeedRex()
1819 {
1820     B3::Procedure proc;
1821     Code& code = proc.code();
1822
1823     BasicBlock* root = code.addBlock();
1824     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r12));
1825     root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR1), Tmp(X86Registers::r13));
1826     root->append(MulDouble, nullptr, Arg::index(Tmp(X86Registers::r12), Tmp(X86Registers::r13)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0));
1827     root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR));
1828     root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
1829
1830     double secondArg = 4.2;
1831     uint64_t index = 16;
1832     CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 2, index, pureNaN()) == 2.4 * 4.2);
1833 }
1834 #endif // #if CPU(X86) || CPU(X86_64)
1835
1836 void testArgumentRegPinned()
1837 {
1838     B3::Procedure proc;
1839     Code& code = proc.code();
1840     GPRReg pinned = GPRInfo::argumentGPR0;
1841     proc.pinRegister(pinned);
1842
1843     B3::Air::Special* patchpointSpecial = code.addSpecial(std::make_unique<B3::PatchpointSpecial>());
1844
1845     B3::BasicBlock* b3Root = proc.addBlock();
1846     B3::PatchpointValue* patchpoint = b3Root->appendNew<B3::PatchpointValue>(proc, B3::Void, B3::Origin());
1847     patchpoint->clobber(RegisterSet(pinned));
1848     patchpoint->setGenerator(
1849         [=] (CCallHelpers& jit, const B3::StackmapGenerationParams&) {
1850             jit.move(CCallHelpers::TrustedImm32(42), pinned);
1851         });
1852
1853     BasicBlock* root = code.addBlock();
1854
1855     Tmp t1 = code.newTmp(GP);
1856     Tmp t2 = code.newTmp(GP);
1857
1858     root->append(Move, nullptr, Tmp(pinned), t1);
1859     root->append(Patch, patchpoint, Arg::special(patchpointSpecial));
1860     root->append(Move, nullptr, Tmp(pinned), t2);
1861     root->append(Add32, nullptr, t1, t2, Tmp(GPRInfo::returnValueGPR));
1862     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1863
1864     int32_t r = compileAndRun<int32_t>(proc, 10);
1865     CHECK(r == 10 + 42);
1866 }
1867
1868 void testArgumentRegPinned2()
1869 {
1870     B3::Procedure proc;
1871     Code& code = proc.code();
1872     GPRReg pinned = GPRInfo::argumentGPR0;
1873     proc.pinRegister(pinned);
1874
1875     B3::Air::Special* patchpointSpecial = code.addSpecial(std::make_unique<B3::PatchpointSpecial>());
1876
1877     B3::BasicBlock* b3Root = proc.addBlock();
1878     B3::PatchpointValue* patchpoint = b3Root->appendNew<B3::PatchpointValue>(proc, B3::Void, B3::Origin());
1879     patchpoint->clobber({ }); 
1880     patchpoint->setGenerator(
1881         [=] (CCallHelpers& jit, const B3::StackmapGenerationParams&) {
1882             jit.move(CCallHelpers::TrustedImm32(42), pinned);
1883         });
1884
1885     BasicBlock* root = code.addBlock();
1886
1887     Tmp t1 = code.newTmp(GP);
1888     Tmp t2 = code.newTmp(GP);
1889
1890     // Since the patchpoint does not claim to clobber the pinned register,
1891     // the register allocator is allowed to either coalesce the first move,
1892     // the second move, or neither. The allowed results are:
1893     // - No move coalesced: 52
1894     // - The first move is coalesced: 84
1895     // - The second move is coalesced: 52
1896     root->append(Move, nullptr, Tmp(pinned), t1);
1897     root->append(Patch, patchpoint, Arg::special(patchpointSpecial));
1898     root->append(Move, nullptr, Tmp(pinned), t2);
1899     root->append(Add32, nullptr, t1, t2, Tmp(GPRInfo::returnValueGPR));
1900     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1901
1902     int32_t r = compileAndRun<int32_t>(proc, 10);
1903     CHECK(r == 52 || r == 84);
1904 }
1905
1906 void testArgumentRegPinned3()
1907 {
1908     B3::Procedure proc;
1909     Code& code = proc.code();
1910     GPRReg pinned = GPRInfo::argumentGPR0;
1911     proc.pinRegister(pinned);
1912
1913     B3::Air::Special* patchpointSpecial = code.addSpecial(std::make_unique<B3::PatchpointSpecial>());
1914
1915     B3::BasicBlock* b3Root = proc.addBlock();
1916     B3::PatchpointValue* patchpoint = b3Root->appendNew<B3::PatchpointValue>(proc, B3::Void, B3::Origin());
1917     patchpoint->clobber(RegisterSet(pinned));
1918     patchpoint->setGenerator(
1919         [=] (CCallHelpers& jit, const B3::StackmapGenerationParams&) {
1920             jit.move(CCallHelpers::TrustedImm32(42), pinned);
1921         });
1922
1923     BasicBlock* root = code.addBlock();
1924
1925     Tmp t1 = code.newTmp(GP);
1926     Tmp t2 = code.newTmp(GP);
1927     Tmp t3 = code.newTmp(GP);
1928
1929     root->append(Move, nullptr, Tmp(pinned), t1);
1930     root->append(Patch, patchpoint, Arg::special(patchpointSpecial));
1931     root->append(Move, nullptr, Tmp(pinned), t2);
1932     root->append(Patch, patchpoint, Arg::special(patchpointSpecial));
1933     root->append(Move, nullptr, Tmp(pinned), t3);
1934     root->append(Add32, nullptr, t1, t2, Tmp(GPRInfo::returnValueGPR));
1935     root->append(Add32, nullptr, Tmp(GPRInfo::returnValueGPR), t3, Tmp(GPRInfo::returnValueGPR));
1936     root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR));
1937
1938     int32_t r = compileAndRun<int32_t>(proc, 10);
1939     CHECK(r == 10 + 42 + 42);
1940 }
1941
1942 #define RUN(test) do {                          \
1943         if (!shouldRun(#test))                  \
1944             break;                              \
1945         tasks.append(                           \
1946             createSharedTask<void()>(           \
1947                 [&] () {                        \
1948                     dataLog(#test "...\n");     \
1949                     test;                       \
1950                     dataLog(#test ": OK!\n");   \
1951                 }));                            \
1952     } while (false);
1953
1954 void run(const char* filter)
1955 {
1956     JSC::initializeThreading();
1957
1958     Deque<RefPtr<SharedTask<void()>>> tasks;
1959
1960     auto shouldRun = [&] (const char* testName) -> bool {
1961         return !filter || !!strcasestr(testName, filter);
1962     };
1963
1964     RUN(testSimple());
1965     
1966     RUN(testShuffleSimpleSwap());
1967     RUN(testShuffleSimpleShift());
1968     RUN(testShuffleLongShift());
1969     RUN(testShuffleLongShiftBackwards());
1970     RUN(testShuffleSimpleRotate());
1971     RUN(testShuffleSimpleBroadcast());
1972     RUN(testShuffleBroadcastAllRegs());
1973     RUN(testShuffleTreeShift());
1974     RUN(testShuffleTreeShiftBackward());
1975     RUN(testShuffleTreeShiftOtherBackward());
1976     RUN(testShuffleMultipleShifts());
1977     RUN(testShuffleRotateWithFringe());
1978     RUN(testShuffleRotateWithFringeInWeirdOrder());
1979     RUN(testShuffleRotateWithLongFringe());
1980     RUN(testShuffleMultipleRotates());
1981     RUN(testShuffleShiftAndRotate());
1982     RUN(testShuffleShiftAllRegs());
1983     RUN(testShuffleRotateAllRegs());
1984     RUN(testShuffleSimpleSwap64());
1985     RUN(testShuffleSimpleShift64());
1986     RUN(testShuffleSwapMixedWidth());
1987     RUN(testShuffleShiftMixedWidth());
1988     RUN(testShuffleShiftMemory());
1989     RUN(testShuffleShiftMemoryLong());
1990     RUN(testShuffleShiftMemoryAllRegs());
1991     RUN(testShuffleShiftMemoryAllRegs64());
1992     RUN(testShuffleShiftMemoryAllRegsMixedWidth());
1993     RUN(testShuffleRotateMemory());
1994     RUN(testShuffleRotateMemory64());
1995     RUN(testShuffleRotateMemoryMixedWidth());
1996     RUN(testShuffleRotateMemoryAllRegs64());
1997     RUN(testShuffleRotateMemoryAllRegsMixedWidth());
1998     RUN(testShuffleSwapDouble());
1999     RUN(testShuffleShiftDouble());
2000
2001 #if CPU(X86) || CPU(X86_64)
2002     RUN(testX86VMULSD());
2003     RUN(testX86VMULSDDestRex());
2004     RUN(testX86VMULSDOp1DestRex());
2005     RUN(testX86VMULSDOp2DestRex());
2006     RUN(testX86VMULSDOpsDestRex());
2007
2008     RUN(testX86VMULSDAddr());
2009     RUN(testX86VMULSDAddrOpRexAddr());
2010     RUN(testX86VMULSDDestRexAddr());
2011     RUN(testX86VMULSDRegOpDestRexAddr());
2012     RUN(testX86VMULSDAddrOpDestRexAddr());
2013
2014     RUN(testX86VMULSDBaseNeedsRex());
2015     RUN(testX86VMULSDIndexNeedsRex());
2016     RUN(testX86VMULSDBaseIndexNeedRex());
2017 #endif
2018
2019     RUN(testArgumentRegPinned());
2020     RUN(testArgumentRegPinned2());
2021     RUN(testArgumentRegPinned3());
2022
2023     if (tasks.isEmpty())
2024         usage();
2025
2026     Lock lock;
2027
2028     Vector<Ref<Thread>> threads;
2029     for (unsigned i = filter ? 1 : WTF::numberOfProcessorCores(); i--;) {
2030         threads.append(
2031             Thread::create(
2032                 "testair thread",
2033                 [&] () {
2034                     for (;;) {
2035                         RefPtr<SharedTask<void()>> task;
2036                         {
2037                             LockHolder locker(lock);
2038                             if (tasks.isEmpty())
2039                                 return;
2040                             task = tasks.takeFirst();
2041                         }
2042
2043                         task->run();
2044                     }
2045                 }));
2046     }
2047
2048     for (auto& thread : threads)
2049         thread->waitForCompletion();
2050     crashLock.lock();
2051 }
2052
2053 } // anonymous namespace
2054
2055 #else // ENABLE(B3_JIT)
2056
2057 static void run(const char*)
2058 {
2059     dataLog("B3 JIT is not enabled.\n");
2060 }
2061
2062 #endif // ENABLE(B3_JIT)
2063
2064 int main(int argc, char** argv)
2065 {
2066     const char* filter = nullptr;
2067     switch (argc) {
2068     case 1:
2069         break;
2070     case 2:
2071         filter = argv[1];
2072         break;
2073     default:
2074         usage();
2075         break;
2076     }
2077     
2078     run(filter);
2079     return 0;
2080 }