2 * Copyright (C) 2007-2008, 2010, 2012-2013, 2015 Apple Inc. All rights reserved.
3 * Copyright (C) 2007 Justin Haygood (jhaygood@reaktix.com)
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of Apple Inc. ("Apple") nor the names of
15 * its contributors may be used to endorse or promote products derived
16 * from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Note: The implementations of InterlockedIncrement and InterlockedDecrement are based
31 * on atomic_increment and atomic_exchange_and_add from the Boost C++ Library. The license
32 * is virtually identical to the Apple license above but is included here for completeness.
34 * Boost Software License - Version 1.0 - August 17th, 2003
36 * Permission is hereby granted, free of charge, to any person or organization
37 * obtaining a copy of the software and accompanying documentation covered by
38 * this license (the "Software") to use, reproduce, display, distribute,
39 * execute, and transmit the Software, and to prepare derivative works of the
40 * Software, and to permit third-parties to whom the Software is furnished to
41 * do so, all subject to the following:
43 * The copyright notices in the Software and this entire statement, including
44 * the above license grant, this restriction and the following disclaimer,
45 * must be included in all copies of the Software, in whole or in part, and
46 * all derivative works of the Software, unless such copies or derivative
47 * works are solely in the form of machine-executable object code generated by
48 * a source language processor.
50 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
51 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
52 * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
53 * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
54 * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
55 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
56 * DEALINGS IN THE SOFTWARE.
63 #include <wtf/StdLibExtras.h>
66 #if !COMPILER(GCC_OR_CLANG)
67 extern "C" void _ReadWriteBarrier(void);
68 #pragma intrinsic(_ReadWriteBarrier)
75 // Atomic wraps around std::atomic with the sole purpose of making the compare_exchange
76 // operations not alter the expected value. This is more in line with how we typically
77 // use CAS in our code.
79 // Atomic is a struct without explicitly defined constructors so that it can be
80 // initialized at compile time.
84 // Don't pass a non-default value for the order parameter unless you really know
85 // what you are doing and have thought about it very hard. The cost of seq_cst
86 // is usually not high enough to justify the risk.
88 T load(std::memory_order order = std::memory_order_seq_cst) const { return value.load(order); }
90 void store(T desired, std::memory_order order = std::memory_order_seq_cst) { value.store(desired, order); }
92 bool compareExchangeWeak(T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
94 T expectedOrActual = expected;
95 return value.compare_exchange_weak(expectedOrActual, desired, order);
98 bool compareExchangeStrong(T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
100 T expectedOrActual = expected;
101 return value.compare_exchange_strong(expectedOrActual, desired, order);
104 std::atomic<T> value;
108 inline bool weakCompareAndSwap(volatile unsigned* location, unsigned expected, unsigned newValue)
110 return InterlockedCompareExchange(reinterpret_cast<LONG volatile*>(location), static_cast<LONG>(newValue), static_cast<LONG>(expected)) == static_cast<LONG>(expected);
113 inline bool weakCompareAndSwap(void*volatile* location, void* expected, void* newValue)
115 return InterlockedCompareExchangePointer(location, newValue, expected) == expected;
117 #else // OS(WINDOWS) --> not windows
118 inline bool weakCompareAndSwap(volatile unsigned* location, unsigned expected, unsigned newValue)
120 #if ENABLE(COMPARE_AND_SWAP)
121 #if CPU(X86) || CPU(X86_64)
122 unsigned char result;
124 "lock; cmpxchgl %3, %2\n\t"
126 : "+a"(expected), "=q"(result), "+m"(*location)
130 #elif CPU(ARM_THUMB2)
138 "strex %1, %4, %0\n\t"
140 : "+Q"(*location), "=&r"(result), "=&r"(tmp)
141 : "r"(expected), "r"(newValue)
144 #elif CPU(ARM64) && COMPILER(GCC_OR_CLANG)
152 "stxr %w1, %w4, [%0]\n\t"
154 : "+r"(location), "=&r"(result), "=&r"(tmp)
155 : "r"(expected), "r"(newValue)
166 "stxr %w1, %w4, %0\n\t"
168 : "+m"(*location), "=&r"(result), "=&r"(tmp)
169 : "r"(expected), "r"(newValue)
173 #error "Bad architecture for compare and swap."
177 UNUSED_PARAM(location);
178 UNUSED_PARAM(expected);
179 UNUSED_PARAM(newValue);
185 inline bool weakCompareAndSwap(void*volatile* location, void* expected, void* newValue)
187 #if ENABLE(COMPARE_AND_SWAP)
191 "lock; cmpxchgq %3, %2\n\t"
193 : "+a"(expected), "=q"(result), "+m"(*location)
198 #elif CPU(ARM64) && COMPILER(GCC_OR_CLANG)
206 "stxr %w1, %x4, [%0]\n\t"
208 : "+r"(location), "=&r"(result), "=&r"(tmp)
209 : "r"(expected), "r"(newValue)
220 "stxr %w1, %x4, %0\n\t"
222 : "+m"(*location), "=&r"(result), "=&r"(tmp)
223 : "r"(expected), "r"(newValue)
227 return weakCompareAndSwap(bitwise_cast<unsigned*>(location), bitwise_cast<unsigned>(expected), bitwise_cast<unsigned>(newValue));
229 #else // ENABLE(COMPARE_AND_SWAP)
230 UNUSED_PARAM(location);
231 UNUSED_PARAM(expected);
232 UNUSED_PARAM(newValue);
235 #endif // ENABLE(COMPARE_AND_SWAP)
237 #endif // OS(WINDOWS) (end of the not-windows case)
239 inline bool weakCompareAndSwapUIntPtr(volatile uintptr_t* location, uintptr_t expected, uintptr_t newValue)
241 return weakCompareAndSwap(reinterpret_cast<void*volatile*>(location), reinterpret_cast<void*>(expected), reinterpret_cast<void*>(newValue));
244 inline bool weakCompareAndSwapSize(volatile size_t* location, size_t expected, size_t newValue)
246 return weakCompareAndSwap(reinterpret_cast<void*volatile*>(location), reinterpret_cast<void*>(expected), reinterpret_cast<void*>(newValue));
249 // Just a compiler fence. Has no effect on the hardware, but tells the compiler
250 // not to move things around this call. Should not affect the compiler's ability
251 // to do things like register allocation and code motion over pure operations.
252 inline void compilerFence()
254 #if OS(WINDOWS) && !COMPILER(GCC_OR_CLANG)
257 asm volatile("" ::: "memory");
261 #if CPU(ARM_THUMB2) || CPU(ARM64)
263 // Full memory fence. No accesses will float above this, and no accesses will sink
265 inline void armV7_dmb()
267 asm volatile("dmb sy" ::: "memory");
270 // Like the above, but only affects stores.
271 inline void armV7_dmb_st()
273 asm volatile("dmb st" ::: "memory");
276 inline void loadLoadFence() { armV7_dmb(); }
277 inline void loadStoreFence() { armV7_dmb(); }
278 inline void storeLoadFence() { armV7_dmb(); }
279 inline void storeStoreFence() { armV7_dmb_st(); }
280 inline void memoryBarrierAfterLock() { armV7_dmb(); }
281 inline void memoryBarrierBeforeUnlock() { armV7_dmb(); }
283 #elif CPU(X86) || CPU(X86_64)
285 inline void x86_mfence()
288 // I think that this does the equivalent of a dummy interlocked instruction,
289 // instead of using the 'mfence' instruction, at least according to MSDN. I
290 // know that it is equivalent for our purposes, but it would be good to
291 // investigate if that is actually better.
294 asm volatile("mfence" ::: "memory");
298 inline void loadLoadFence() { compilerFence(); }
299 inline void loadStoreFence() { compilerFence(); }
300 inline void storeLoadFence() { x86_mfence(); }
301 inline void storeStoreFence() { compilerFence(); }
302 inline void memoryBarrierAfterLock() { compilerFence(); }
303 inline void memoryBarrierBeforeUnlock() { compilerFence(); }
307 inline void loadLoadFence() { compilerFence(); }
308 inline void loadStoreFence() { compilerFence(); }
309 inline void storeLoadFence() { compilerFence(); }
310 inline void storeStoreFence() { compilerFence(); }
311 inline void memoryBarrierAfterLock() { compilerFence(); }
312 inline void memoryBarrierBeforeUnlock() { compilerFence(); }
316 inline bool weakCompareAndSwap(uint8_t* location, uint8_t expected, uint8_t newValue)
318 #if ENABLE(COMPARE_AND_SWAP)
319 #if !OS(WINDOWS) && (CPU(X86) || CPU(X86_64))
320 unsigned char result;
322 "lock; cmpxchgb %3, %2\n\t"
324 : "+a"(expected), "=q"(result), "+m"(*location)
329 #elif OS(WINDOWS) && CPU(X86)
330 // FIXME: We need a 64-bit ASM implementation, but this cannot be inline due to
331 // Microsoft's decision to exclude it from the compiler.
338 lock cmpxchg byte ptr[edx], cl
344 uintptr_t locationValue = bitwise_cast<uintptr_t>(location);
345 uintptr_t alignedLocationValue = locationValue & ~(sizeof(unsigned) - 1);
346 uintptr_t locationOffset = locationValue - alignedLocationValue;
347 ASSERT(locationOffset < sizeof(unsigned));
348 unsigned* alignedLocation = bitwise_cast<unsigned*>(alignedLocationValue);
349 // Make sure that this load is always issued and never optimized away.
350 unsigned oldAlignedValue = *const_cast<volatile unsigned*>(alignedLocation);
353 static unsigned splice(unsigned value, uint8_t byte, uintptr_t byteIndex)
357 uint8_t bytes[sizeof(unsigned)];
360 u.bytes[byteIndex] = byte;
365 unsigned expectedAlignedValue = Splicer::splice(oldAlignedValue, expected, locationOffset);
366 unsigned newAlignedValue = Splicer::splice(oldAlignedValue, newValue, locationOffset);
368 return weakCompareAndSwap(alignedLocation, expectedAlignedValue, newAlignedValue);
371 UNUSED_PARAM(location);
372 UNUSED_PARAM(expected);
373 UNUSED_PARAM(newValue);