LinkBuffer::copyCompactAndLinkCode() needs to be aware of ENABLE(SEPARATED_WX_HEAP).
[WebKit-https.git] / Source / JavaScriptCore / assembler / LinkBuffer.cpp
1 /*
2  * Copyright (C) 2012-2018 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "LinkBuffer.h"
28
29 #if ENABLE(ASSEMBLER)
30
31 #include "CodeBlock.h"
32 #include "Disassembler.h"
33 #include "JITCode.h"
34 #include "JSCInlines.h"
35 #include "Options.h"
36 #include <wtf/CompilationThread.h>
37
38 #if OS(LINUX)
39 #include "PerfLog.h"
40 #endif
41
42 namespace JSC {
43
44 #if ENABLE(SEPARATED_WX_HEAP)
45 extern JS_EXPORT_PRIVATE bool useFastPermisionsJITCopy;
46 #endif // ENABLE(SEPARATED_WX_HEAP)
47
48 bool shouldDumpDisassemblyFor(CodeBlock* codeBlock)
49 {
50     if (codeBlock && JITCode::isOptimizingJIT(codeBlock->jitType()) && Options::dumpDFGDisassembly())
51         return true;
52     return Options::dumpDisassembly();
53 }
54
55 LinkBuffer::CodeRef<LinkBufferPtrTag> LinkBuffer::finalizeCodeWithoutDisassemblyImpl()
56 {
57     performFinalization();
58     
59     ASSERT(m_didAllocate);
60     if (m_executableMemory)
61         return CodeRef<LinkBufferPtrTag>(*m_executableMemory);
62     
63     return CodeRef<LinkBufferPtrTag>::createSelfManagedCodeRef(m_code);
64 }
65
66 LinkBuffer::CodeRef<LinkBufferPtrTag> LinkBuffer::finalizeCodeWithDisassemblyImpl(bool dumpDisassembly, const char* format, ...)
67 {
68     CodeRef<LinkBufferPtrTag> result = finalizeCodeWithoutDisassemblyImpl();
69
70 #if OS(LINUX)
71     if (Options::logJITCodeForPerf()) {
72         StringPrintStream out;
73         va_list argList;
74         va_start(argList, format);
75         va_start(argList, format);
76         out.vprintf(format, argList);
77         va_end(argList);
78         PerfLog::log(out.toCString(), result.code().untaggedExecutableAddress<const uint8_t*>(), result.size());
79     }
80 #endif
81
82     if (!dumpDisassembly || m_alreadyDisassembled)
83         return result;
84     
85     StringPrintStream out;
86     out.printf("Generated JIT code for ");
87     va_list argList;
88     va_start(argList, format);
89     out.vprintf(format, argList);
90     va_end(argList);
91     out.printf(":\n");
92
93     uint8_t* executableAddress = result.code().untaggedExecutableAddress<uint8_t*>();
94     out.printf("    Code at [%p, %p):\n", executableAddress, executableAddress + result.size());
95     
96     CString header = out.toCString();
97     
98     if (Options::asyncDisassembly()) {
99         CodeRef<DisassemblyPtrTag> codeRefForDisassembly = result.retagged<DisassemblyPtrTag>();
100         disassembleAsynchronously(header, WTFMove(codeRefForDisassembly), m_size, "    ");
101         return result;
102     }
103     
104     dataLog(header);
105     disassemble(result.retaggedCode<DisassemblyPtrTag>(), m_size, "    ", WTF::dataFile());
106     
107     return result;
108 }
109
110 #if ENABLE(BRANCH_COMPACTION)
111 static ALWAYS_INLINE void recordLinkOffsets(AssemblerData& assemblerData, int32_t regionStart, int32_t regionEnd, int32_t offset)
112 {
113     int32_t ptr = regionStart / sizeof(int32_t);
114     const int32_t end = regionEnd / sizeof(int32_t);
115     int32_t* offsets = reinterpret_cast_ptr<int32_t*>(assemblerData.buffer());
116     while (ptr < end)
117         offsets[ptr++] = offset;
118 }
119
120 template <typename InstructionType>
121 void LinkBuffer::copyCompactAndLinkCode(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort)
122 {
123 #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS)
124 #if ENABLE(SEPARATED_WX_HEAP)
125     const bool isUsingFastPermissionsJITCopy = useFastPermisionsJITCopy;
126 #else
127     const bool isUsingFastPermissionsJITCopy = true;
128 #endif
129 #endif
130
131     allocate(macroAssembler, ownerUID, effort);
132     const size_t initialSize = macroAssembler.m_assembler.codeSize();
133     if (didFailToAllocate())
134         return;
135
136     Vector<LinkRecord, 0, UnsafeVectorOverflow>& jumpsToLink = macroAssembler.jumpsToLink();
137     m_assemblerStorage = macroAssembler.m_assembler.buffer().releaseAssemblerData();
138     uint8_t* inData = reinterpret_cast<uint8_t*>(m_assemblerStorage.buffer());
139
140     uint8_t* codeOutData = m_code.dataLocation<uint8_t*>();
141 #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS)
142     const ARM64EHash assemblerBufferHash = macroAssembler.m_assembler.buffer().hash();
143     ARM64EHash verifyUncompactedHash(assemblerBufferHash.randomSeed());
144     uint8_t* outData = codeOutData;
145 #if ENABLE(SEPARATED_WX_HEAP)
146     AssemblerData outBuffer(m_size);
147     if (!isUsingFastPermissionsJITCopy)
148         outData = reinterpret_cast<uint8_t*>(outBuffer.buffer());
149 #endif // ENABLE(SEPARATED_WX_HEAP)
150 #else
151     AssemblerData outBuffer(m_size);
152     uint8_t* outData = reinterpret_cast<uint8_t*>(outBuffer.buffer());
153 #endif
154 #if CPU(ARM64)
155     RELEASE_ASSERT(roundUpToMultipleOf<sizeof(unsigned)>(outData) == outData);
156     RELEASE_ASSERT(roundUpToMultipleOf<sizeof(unsigned)>(codeOutData) == codeOutData);
157 #endif
158
159     int readPtr = 0;
160     int writePtr = 0;
161     unsigned jumpCount = jumpsToLink.size();
162
163 #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS)
164     if (isUsingFastPermissionsJITCopy)
165         os_thread_self_restrict_rwx_to_rw();
166 #endif
167
168     if (m_shouldPerformBranchCompaction) {
169         for (unsigned i = 0; i < jumpCount; ++i) {
170             int offset = readPtr - writePtr;
171             ASSERT(!(offset & 1));
172                 
173             // Copy the instructions from the last jump to the current one.
174             size_t regionSize = jumpsToLink[i].from() - readPtr;
175             InstructionType* copySource = reinterpret_cast_ptr<InstructionType*>(inData + readPtr);
176             InstructionType* copyEnd = reinterpret_cast_ptr<InstructionType*>(inData + readPtr + regionSize);
177             InstructionType* copyDst = reinterpret_cast_ptr<InstructionType*>(outData + writePtr);
178             ASSERT(!(regionSize % 2));
179             ASSERT(!(readPtr % 2));
180             ASSERT(!(writePtr % 2));
181 #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS)
182             unsigned index = readPtr;
183 #endif
184             while (copySource != copyEnd) {
185                 InstructionType insn = *copySource++;
186 #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS)
187                 static_assert(sizeof(InstructionType) == 4, "");
188                 verifyUncompactedHash.update(insn, index);
189                 index += sizeof(InstructionType);
190 #endif
191                 *copyDst++ = insn;
192             }
193             recordLinkOffsets(m_assemblerStorage, readPtr, jumpsToLink[i].from(), offset);
194             readPtr += regionSize;
195             writePtr += regionSize;
196                 
197             // Calculate absolute address of the jump target, in the case of backwards
198             // branches we need to be precise, forward branches we are pessimistic
199             const uint8_t* target;
200             if (jumpsToLink[i].to() >= jumpsToLink[i].from())
201                 target = codeOutData + jumpsToLink[i].to() - offset; // Compensate for what we have collapsed so far
202             else
203                 target = codeOutData + jumpsToLink[i].to() - executableOffsetFor(jumpsToLink[i].to());
204                 
205             JumpLinkType jumpLinkType = MacroAssembler::computeJumpType(jumpsToLink[i], codeOutData + writePtr, target);
206             // Compact branch if we can...
207             if (MacroAssembler::canCompact(jumpsToLink[i].type())) {
208                 // Step back in the write stream
209                 int32_t delta = MacroAssembler::jumpSizeDelta(jumpsToLink[i].type(), jumpLinkType);
210                 if (delta) {
211                     writePtr -= delta;
212                     recordLinkOffsets(m_assemblerStorage, jumpsToLink[i].from() - delta, readPtr, readPtr - writePtr);
213                 }
214             }
215             jumpsToLink[i].setFrom(writePtr);
216         }
217     } else {
218         if (!ASSERT_DISABLED) {
219             for (unsigned i = 0; i < jumpCount; ++i)
220                 ASSERT(!MacroAssembler::canCompact(jumpsToLink[i].type()));
221         }
222     }
223
224     // Copy everything after the last jump
225     {
226         InstructionType* dst = bitwise_cast<InstructionType*>(outData + writePtr);
227         InstructionType* src = bitwise_cast<InstructionType*>(inData + readPtr);
228         size_t bytes = initialSize - readPtr;
229
230         RELEASE_ASSERT(bitwise_cast<uintptr_t>(dst) % sizeof(InstructionType) == 0);
231         RELEASE_ASSERT(bitwise_cast<uintptr_t>(src) % sizeof(InstructionType) == 0);
232         RELEASE_ASSERT(bytes % sizeof(InstructionType) == 0);
233
234 #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS)
235         unsigned index = readPtr;
236 #endif
237
238         for (size_t i = 0; i < bytes; i += sizeof(InstructionType)) {
239             InstructionType insn = *src++;
240 #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS)
241             verifyUncompactedHash.update(insn, index);
242             index += sizeof(InstructionType);
243 #endif
244             *dst++ = insn;
245         }
246     }
247
248 #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS)
249     if (verifyUncompactedHash.hash() != assemblerBufferHash.hash()) {
250         dataLogLn("Hashes don't match: ", RawPointer(bitwise_cast<void*>(verifyUncompactedHash.hash())), " ", RawPointer(bitwise_cast<void*>(assemblerBufferHash.hash())));
251         dataLogLn("Crashing!");
252         CRASH();
253     }
254 #endif
255
256     recordLinkOffsets(m_assemblerStorage, readPtr, initialSize, readPtr - writePtr);
257         
258     for (unsigned i = 0; i < jumpCount; ++i) {
259 #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS)
260         auto memcpyFunction = memcpy;
261         if (!isUsingFastPermissionsJITCopy)
262             memcpyFunction = performJITMemcpy;
263 #else
264         auto memcpyFunction = performJITMemcpy;
265 #endif
266
267         uint8_t* location = codeOutData + jumpsToLink[i].from();
268         uint8_t* target = codeOutData + jumpsToLink[i].to() - executableOffsetFor(jumpsToLink[i].to());
269         MacroAssembler::link(jumpsToLink[i], outData + jumpsToLink[i].from(), location, target, memcpyFunction);
270     }
271
272     size_t compactSize = writePtr + initialSize - readPtr;
273     if (!m_executableMemory) {
274         size_t nopSizeInBytes = initialSize - compactSize;
275         MacroAssembler::AssemblerType_T::fillNops(outData + compactSize, nopSizeInBytes, memcpy);
276     }
277
278 #if CPU(ARM64E) && ENABLE(FAST_JIT_PERMISSIONS)
279     if (isUsingFastPermissionsJITCopy)
280         os_thread_self_restrict_rwx_to_rx();
281 #endif
282
283     if (m_executableMemory) {
284         m_size = compactSize;
285         m_executableMemory->shrink(m_size);
286     }
287
288 #if !CPU(ARM64E) || !ENABLE(FAST_JIT_PERMISSIONS)
289     ASSERT(codeOutData != outData);
290     performJITMemcpy(codeOutData, outData, m_size);
291 #else
292     if (isUsingFastPermissionsJITCopy)
293         ASSERT(codeOutData == outData);
294     else {
295         ASSERT(codeOutData != outData);
296         performJITMemcpy(codeOutData, outData, m_size);
297     }
298 #endif
299
300     jumpsToLink.clear();
301
302 #if DUMP_LINK_STATISTICS
303     dumpLinkStatistics(codeOutData, initialSize, m_size);
304 #endif
305 #if DUMP_CODE
306     dumpCode(codeOutData, m_size);
307 #endif
308 }
309 #endif // ENABLE(BRANCH_COMPACTION)
310
311
312 void LinkBuffer::linkCode(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort)
313 {
314     // Ensure that the end of the last invalidation point does not extend beyond the end of the buffer.
315     macroAssembler.label();
316
317 #if !ENABLE(BRANCH_COMPACTION)
318 #if defined(ASSEMBLER_HAS_CONSTANT_POOL) && ASSEMBLER_HAS_CONSTANT_POOL
319     macroAssembler.m_assembler.buffer().flushConstantPool(false);
320 #endif
321     allocate(macroAssembler, ownerUID, effort);
322     if (!m_didAllocate)
323         return;
324     ASSERT(m_code);
325     AssemblerBuffer& buffer = macroAssembler.m_assembler.buffer();
326     void* code = m_code.dataLocation();
327 #if CPU(ARM64)
328     RELEASE_ASSERT(roundUpToMultipleOf<Assembler::instructionSize>(code) == code);
329 #endif
330     performJITMemcpy(code, buffer.data(), buffer.codeSize());
331 #if CPU(MIPS)
332     macroAssembler.m_assembler.relocateJumps(buffer.data(), code);
333 #endif
334 #elif CPU(ARM_THUMB2)
335     copyCompactAndLinkCode<uint16_t>(macroAssembler, ownerUID, effort);
336 #elif CPU(ARM64)
337     copyCompactAndLinkCode<uint32_t>(macroAssembler, ownerUID, effort);
338 #endif // !ENABLE(BRANCH_COMPACTION)
339
340     m_linkTasks = WTFMove(macroAssembler.m_linkTasks);
341 }
342
343 void LinkBuffer::allocate(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort)
344 {
345     size_t initialSize = macroAssembler.m_assembler.codeSize();
346     if (m_code) {
347         if (initialSize > m_size)
348             return;
349         
350         size_t nopsToFillInBytes = m_size - initialSize;
351         macroAssembler.emitNops(nopsToFillInBytes);
352         m_didAllocate = true;
353         return;
354     }
355     
356     while (initialSize % jitAllocationGranule) {
357         macroAssembler.breakpoint();
358         initialSize = macroAssembler.m_assembler.codeSize();
359     }
360
361     m_executableMemory = ExecutableAllocator::singleton().allocate(initialSize, ownerUID, effort);
362     if (!m_executableMemory)
363         return;
364     m_code = MacroAssemblerCodePtr<LinkBufferPtrTag>(m_executableMemory->start().retaggedPtr<LinkBufferPtrTag>());
365     m_size = initialSize;
366     m_didAllocate = true;
367 }
368
369 void LinkBuffer::performFinalization()
370 {
371     for (auto& task : m_linkTasks)
372         task->run(*this);
373
374 #ifndef NDEBUG
375     ASSERT(!isCompilationThread());
376     ASSERT(!m_completed);
377     ASSERT(isValid());
378     m_completed = true;
379 #endif
380     
381     MacroAssembler::cacheFlush(code(), m_size);
382 }
383
384 #if DUMP_LINK_STATISTICS
385 void LinkBuffer::dumpLinkStatistics(void* code, size_t initializeSize, size_t finalSize)
386 {
387     static unsigned linkCount = 0;
388     static unsigned totalInitialSize = 0;
389     static unsigned totalFinalSize = 0;
390     linkCount++;
391     totalInitialSize += initialSize;
392     totalFinalSize += finalSize;
393     dataLogF("link %p: orig %u, compact %u (delta %u, %.2f%%)\n", 
394             code, static_cast<unsigned>(initialSize), static_cast<unsigned>(finalSize),
395             static_cast<unsigned>(initialSize - finalSize),
396             100.0 * (initialSize - finalSize) / initialSize);
397     dataLogF("\ttotal %u: orig %u, compact %u (delta %u, %.2f%%)\n", 
398             linkCount, totalInitialSize, totalFinalSize, totalInitialSize - totalFinalSize,
399             100.0 * (totalInitialSize - totalFinalSize) / totalInitialSize);
400 }
401 #endif
402
403 #if DUMP_CODE
404 void LinkBuffer::dumpCode(void* code, size_t size)
405 {
406 #if CPU(ARM_THUMB2)
407     // Dump the generated code in an asm file format that can be assembled and then disassembled
408     // for debugging purposes. For example, save this output as jit.s:
409     //   gcc -arch armv7 -c jit.s
410     //   otool -tv jit.o
411     static unsigned codeCount = 0;
412     unsigned short* tcode = static_cast<unsigned short*>(code);
413     size_t tsize = size / sizeof(short);
414     char nameBuf[128];
415     snprintf(nameBuf, sizeof(nameBuf), "_jsc_jit%u", codeCount++);
416     dataLogF("\t.syntax unified\n"
417             "\t.section\t__TEXT,__text,regular,pure_instructions\n"
418             "\t.globl\t%s\n"
419             "\t.align 2\n"
420             "\t.code 16\n"
421             "\t.thumb_func\t%s\n"
422             "# %p\n"
423             "%s:\n", nameBuf, nameBuf, code, nameBuf);
424         
425     for (unsigned i = 0; i < tsize; i++)
426         dataLogF("\t.short\t0x%x\n", tcode[i]);
427 #endif
428 }
429 #endif
430
431 } // namespace JSC
432
433 #endif // ENABLE(ASSEMBLER)