Enhance the MacroAssembler and LinkBuffer to support pointer profiling.
[WebKit-https.git] / Source / JavaScriptCore / assembler / LinkBuffer.cpp
1 /*
2  * Copyright (C) 2012-2018 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "LinkBuffer.h"
28
29 #if ENABLE(ASSEMBLER)
30
31 #include "CodeBlock.h"
32 #include "Disassembler.h"
33 #include "JITCode.h"
34 #include "JSCInlines.h"
35 #include "Options.h"
36 #include <wtf/CompilationThread.h>
37
38 namespace JSC {
39
40 bool shouldDumpDisassemblyFor(CodeBlock* codeBlock)
41 {
42     if (codeBlock && JITCode::isOptimizingJIT(codeBlock->jitType()) && Options::dumpDFGDisassembly())
43         return true;
44     return Options::dumpDisassembly();
45 }
46
47 LinkBuffer::CodeRef LinkBuffer::finalizeCodeWithoutDisassembly(PtrTag tag)
48 {
49     performFinalization();
50     
51     ASSERT(m_didAllocate);
52     if (m_executableMemory)
53         return CodeRef(*m_executableMemory, tag);
54     
55     return CodeRef::createSelfManagedCodeRef(MacroAssemblerCodePtr(tagCodePtr(m_code, tag)));
56 }
57
58 LinkBuffer::CodeRef LinkBuffer::finalizeCodeWithDisassembly(PtrTag tag, const char* format, ...)
59 {
60     CodeRef result = finalizeCodeWithoutDisassembly(tag);
61
62     if (m_alreadyDisassembled)
63         return result;
64     
65     StringPrintStream out;
66     out.printf("Generated JIT code for ");
67     va_list argList;
68     va_start(argList, format);
69     out.vprintf(format, argList);
70     va_end(argList);
71     out.printf(":\n");
72
73     uint8_t* executableAddress = removeCodePtrTag<uint8_t*>(result.code().executableAddress());
74     out.printf("    Code at [%p, %p):\n", executableAddress, executableAddress + result.size());
75     
76     CString header = out.toCString();
77     
78     if (Options::asyncDisassembly()) {
79         disassembleAsynchronously(header, result, m_size, "    ");
80         return result;
81     }
82     
83     dataLog(header);
84     disassemble(result.code(), m_size, "    ", WTF::dataFile());
85     
86     return result;
87 }
88
89 #if ENABLE(BRANCH_COMPACTION)
90 static ALWAYS_INLINE void recordLinkOffsets(AssemblerData& assemblerData, int32_t regionStart, int32_t regionEnd, int32_t offset)
91 {
92     int32_t ptr = regionStart / sizeof(int32_t);
93     const int32_t end = regionEnd / sizeof(int32_t);
94     int32_t* offsets = reinterpret_cast_ptr<int32_t*>(assemblerData.buffer());
95     while (ptr < end)
96         offsets[ptr++] = offset;
97 }
98
99 template <typename InstructionType>
100 void LinkBuffer::copyCompactAndLinkCode(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort)
101 {
102     allocate(macroAssembler, ownerUID, effort);
103     const size_t initialSize = macroAssembler.m_assembler.codeSize();
104     if (didFailToAllocate())
105         return;
106
107     Vector<LinkRecord, 0, UnsafeVectorOverflow>& jumpsToLink = macroAssembler.jumpsToLink();
108     m_assemblerStorage = macroAssembler.m_assembler.buffer().releaseAssemblerData();
109     uint8_t* inData = reinterpret_cast<uint8_t*>(m_assemblerStorage.buffer());
110
111     AssemblerData outBuffer(m_size);
112
113     uint8_t* outData = reinterpret_cast<uint8_t*>(outBuffer.buffer());
114     uint8_t* codeOutData = reinterpret_cast<uint8_t*>(m_code);
115
116     int readPtr = 0;
117     int writePtr = 0;
118     unsigned jumpCount = jumpsToLink.size();
119     if (m_shouldPerformBranchCompaction) {
120         for (unsigned i = 0; i < jumpCount; ++i) {
121             int offset = readPtr - writePtr;
122             ASSERT(!(offset & 1));
123                 
124             // Copy the instructions from the last jump to the current one.
125             size_t regionSize = jumpsToLink[i].from() - readPtr;
126             InstructionType* copySource = reinterpret_cast_ptr<InstructionType*>(inData + readPtr);
127             InstructionType* copyEnd = reinterpret_cast_ptr<InstructionType*>(inData + readPtr + regionSize);
128             InstructionType* copyDst = reinterpret_cast_ptr<InstructionType*>(outData + writePtr);
129             ASSERT(!(regionSize % 2));
130             ASSERT(!(readPtr % 2));
131             ASSERT(!(writePtr % 2));
132             while (copySource != copyEnd)
133                 *copyDst++ = *copySource++;
134             recordLinkOffsets(m_assemblerStorage, readPtr, jumpsToLink[i].from(), offset);
135             readPtr += regionSize;
136             writePtr += regionSize;
137                 
138             // Calculate absolute address of the jump target, in the case of backwards
139             // branches we need to be precise, forward branches we are pessimistic
140             const uint8_t* target;
141             if (jumpsToLink[i].to() >= jumpsToLink[i].from())
142                 target = codeOutData + jumpsToLink[i].to() - offset; // Compensate for what we have collapsed so far
143             else
144                 target = codeOutData + jumpsToLink[i].to() - executableOffsetFor(jumpsToLink[i].to());
145                 
146             JumpLinkType jumpLinkType = MacroAssembler::computeJumpType(jumpsToLink[i], codeOutData + writePtr, target);
147             // Compact branch if we can...
148             if (MacroAssembler::canCompact(jumpsToLink[i].type())) {
149                 // Step back in the write stream
150                 int32_t delta = MacroAssembler::jumpSizeDelta(jumpsToLink[i].type(), jumpLinkType);
151                 if (delta) {
152                     writePtr -= delta;
153                     recordLinkOffsets(m_assemblerStorage, jumpsToLink[i].from() - delta, readPtr, readPtr - writePtr);
154                 }
155             }
156             jumpsToLink[i].setFrom(writePtr);
157         }
158     } else {
159         if (!ASSERT_DISABLED) {
160             for (unsigned i = 0; i < jumpCount; ++i)
161                 ASSERT(!MacroAssembler::canCompact(jumpsToLink[i].type()));
162         }
163     }
164     // Copy everything after the last jump
165     memcpy(outData + writePtr, inData + readPtr, initialSize - readPtr);
166     recordLinkOffsets(m_assemblerStorage, readPtr, initialSize, readPtr - writePtr);
167         
168     for (unsigned i = 0; i < jumpCount; ++i) {
169         uint8_t* location = codeOutData + jumpsToLink[i].from();
170         uint8_t* target = codeOutData + jumpsToLink[i].to() - executableOffsetFor(jumpsToLink[i].to());
171         MacroAssembler::link(jumpsToLink[i], outData + jumpsToLink[i].from(), location, target);
172     }
173
174     jumpsToLink.clear();
175
176     size_t compactSize = writePtr + initialSize - readPtr;
177     if (m_executableMemory) {
178         m_size = compactSize;
179         m_executableMemory->shrink(m_size);
180     } else {
181         size_t nopSizeInBytes = initialSize - compactSize;
182         bool isCopyingToExecutableMemory = false;
183         MacroAssembler::AssemblerType_T::fillNops(outData + compactSize, nopSizeInBytes, isCopyingToExecutableMemory);
184     }
185
186     performJITMemcpy(m_code, outData, m_size);
187
188 #if DUMP_LINK_STATISTICS
189     dumpLinkStatistics(m_code, initialSize, m_size);
190 #endif
191 #if DUMP_CODE
192     dumpCode(m_code, m_size);
193 #endif
194 }
195 #endif
196
197
198 void LinkBuffer::linkCode(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort)
199 {
200     // Ensure that the end of the last invalidation point does not extend beyond the end of the buffer.
201     macroAssembler.label();
202
203 #if !ENABLE(BRANCH_COMPACTION)
204 #if defined(ASSEMBLER_HAS_CONSTANT_POOL) && ASSEMBLER_HAS_CONSTANT_POOL
205     macroAssembler.m_assembler.buffer().flushConstantPool(false);
206 #endif
207     allocate(macroAssembler, ownerUID, effort);
208     if (!m_didAllocate)
209         return;
210     ASSERT(m_code);
211     AssemblerBuffer& buffer = macroAssembler.m_assembler.buffer();
212 #if CPU(ARM_TRADITIONAL)
213     macroAssembler.m_assembler.prepareExecutableCopy(m_code);
214 #endif
215     performJITMemcpy(m_code, buffer.data(), buffer.codeSize());
216 #if CPU(MIPS)
217     macroAssembler.m_assembler.relocateJumps(buffer.data(), m_code);
218 #endif
219 #elif CPU(ARM_THUMB2)
220     copyCompactAndLinkCode<uint16_t>(macroAssembler, ownerUID, effort);
221 #elif CPU(ARM64)
222     copyCompactAndLinkCode<uint32_t>(macroAssembler, ownerUID, effort);
223 #endif // !ENABLE(BRANCH_COMPACTION)
224
225     m_linkTasks = WTFMove(macroAssembler.m_linkTasks);
226 }
227
228 void LinkBuffer::allocate(MacroAssembler& macroAssembler, void* ownerUID, JITCompilationEffort effort)
229 {
230     size_t initialSize = macroAssembler.m_assembler.codeSize();
231     if (m_code) {
232         if (initialSize > m_size)
233             return;
234         
235         size_t nopsToFillInBytes = m_size - initialSize;
236         macroAssembler.emitNops(nopsToFillInBytes);
237         m_didAllocate = true;
238         return;
239     }
240     
241     while (initialSize % jitAllocationGranule) {
242         macroAssembler.breakpoint();
243         initialSize = macroAssembler.m_assembler.codeSize();
244     }
245     
246     m_executableMemory = ExecutableAllocator::singleton().allocate(initialSize, ownerUID, effort);
247     if (!m_executableMemory)
248         return;
249     m_code = m_executableMemory->start();
250     m_size = initialSize;
251     m_didAllocate = true;
252 }
253
254 void LinkBuffer::performFinalization()
255 {
256     for (auto& task : m_linkTasks)
257         task->run(*this);
258
259 #ifndef NDEBUG
260     ASSERT(!isCompilationThread());
261     ASSERT(!m_completed);
262     ASSERT(isValid());
263     m_completed = true;
264 #endif
265     
266     MacroAssembler::cacheFlush(code(), m_size);
267 }
268
269 #if DUMP_LINK_STATISTICS
270 void LinkBuffer::dumpLinkStatistics(void* code, size_t initializeSize, size_t finalSize)
271 {
272     static unsigned linkCount = 0;
273     static unsigned totalInitialSize = 0;
274     static unsigned totalFinalSize = 0;
275     linkCount++;
276     totalInitialSize += initialSize;
277     totalFinalSize += finalSize;
278     dataLogF("link %p: orig %u, compact %u (delta %u, %.2f%%)\n", 
279             code, static_cast<unsigned>(initialSize), static_cast<unsigned>(finalSize),
280             static_cast<unsigned>(initialSize - finalSize),
281             100.0 * (initialSize - finalSize) / initialSize);
282     dataLogF("\ttotal %u: orig %u, compact %u (delta %u, %.2f%%)\n", 
283             linkCount, totalInitialSize, totalFinalSize, totalInitialSize - totalFinalSize,
284             100.0 * (totalInitialSize - totalFinalSize) / totalInitialSize);
285 }
286 #endif
287
288 #if DUMP_CODE
289 void LinkBuffer::dumpCode(void* code, size_t size)
290 {
291 #if CPU(ARM_THUMB2)
292     // Dump the generated code in an asm file format that can be assembled and then disassembled
293     // for debugging purposes. For example, save this output as jit.s:
294     //   gcc -arch armv7 -c jit.s
295     //   otool -tv jit.o
296     static unsigned codeCount = 0;
297     unsigned short* tcode = static_cast<unsigned short*>(code);
298     size_t tsize = size / sizeof(short);
299     char nameBuf[128];
300     snprintf(nameBuf, sizeof(nameBuf), "_jsc_jit%u", codeCount++);
301     dataLogF("\t.syntax unified\n"
302             "\t.section\t__TEXT,__text,regular,pure_instructions\n"
303             "\t.globl\t%s\n"
304             "\t.align 2\n"
305             "\t.code 16\n"
306             "\t.thumb_func\t%s\n"
307             "# %p\n"
308             "%s:\n", nameBuf, nameBuf, code, nameBuf);
309         
310     for (unsigned i = 0; i < tsize; i++)
311         dataLogF("\t.short\t0x%x\n", tcode[i]);
312 #elif CPU(ARM_TRADITIONAL)
313     //   gcc -c jit.s
314     //   objdump -D jit.o
315     static unsigned codeCount = 0;
316     unsigned int* tcode = static_cast<unsigned int*>(code);
317     size_t tsize = size / sizeof(unsigned int);
318     char nameBuf[128];
319     snprintf(nameBuf, sizeof(nameBuf), "_jsc_jit%u", codeCount++);
320     dataLogF("\t.globl\t%s\n"
321             "\t.align 4\n"
322             "\t.code 32\n"
323             "\t.text\n"
324             "# %p\n"
325             "%s:\n", nameBuf, code, nameBuf);
326
327     for (unsigned i = 0; i < tsize; i++)
328         dataLogF("\t.long\t0x%x\n", tcode[i]);
329 #endif
330 }
331 #endif
332
333 } // namespace JSC
334
335 #endif // ENABLE(ASSEMBLER)
336
337