| // Copyright 2018 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef V8_UTILS_MEMCOPY_H_ |
| #define V8_UTILS_MEMCOPY_H_ |
| |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <algorithm> |
| |
| #include "src/base/logging.h" |
| #include "src/base/macros.h" |
| |
| namespace v8 { |
| namespace internal { |
| |
| using Address = uintptr_t; |
| |
| // ---------------------------------------------------------------------------- |
| // Generated memcpy/memmove for ia32, arm, and mips. |
| |
| void init_memcopy_functions(); |
| |
| #if defined(V8_TARGET_ARCH_IA32) |
| // Limit below which the extra overhead of the MemCopy function is likely |
| // to outweigh the benefits of faster copying. |
| const size_t kMinComplexMemCopy = 64; |
| |
| // Copy memory area. No restrictions. |
| V8_EXPORT_PRIVATE void MemMove(void* dest, const void* src, size_t size); |
| using MemMoveFunction = void (*)(void* dest, const void* src, size_t size); |
| |
| // Keep the distinction of "move" vs. "copy" for the benefit of other |
| // architectures. |
| V8_INLINE void MemCopy(void* dest, const void* src, size_t size) { |
| MemMove(dest, src, size); |
| } |
| #elif defined(V8_HOST_ARCH_ARM) |
| using MemCopyUint8Function = void (*)(uint8_t* dest, const uint8_t* src, |
| size_t size); |
| V8_EXPORT_PRIVATE extern MemCopyUint8Function memcopy_uint8_function; |
| V8_INLINE void MemCopyUint8Wrapper(uint8_t* dest, const uint8_t* src, |
| size_t chars) { |
| memcpy(dest, src, chars); |
| } |
| // For values < 16, the assembler function is slower than the inlined C code. |
| const size_t kMinComplexMemCopy = 16; |
| V8_INLINE void MemCopy(void* dest, const void* src, size_t size) { |
| (*memcopy_uint8_function)(reinterpret_cast<uint8_t*>(dest), |
| reinterpret_cast<const uint8_t*>(src), size); |
| } |
| V8_EXPORT_PRIVATE V8_INLINE void MemMove(void* dest, const void* src, |
| size_t size) { |
| memmove(dest, src, size); |
| } |
| |
| // For values < 12, the assembler function is slower than the inlined C code. |
| const int kMinComplexConvertMemCopy = 12; |
| #elif defined(V8_HOST_ARCH_MIPS) |
| using MemCopyUint8Function = void (*)(uint8_t* dest, const uint8_t* src, |
| size_t size); |
| V8_EXPORT_PRIVATE extern MemCopyUint8Function memcopy_uint8_function; |
| V8_INLINE void MemCopyUint8Wrapper(uint8_t* dest, const uint8_t* src, |
| size_t chars) { |
| memcpy(dest, src, chars); |
| } |
| // For values < 16, the assembler function is slower than the inlined C code. |
| const size_t kMinComplexMemCopy = 16; |
| V8_INLINE void MemCopy(void* dest, const void* src, size_t size) { |
| (*memcopy_uint8_function)(reinterpret_cast<uint8_t*>(dest), |
| reinterpret_cast<const uint8_t*>(src), size); |
| } |
| V8_EXPORT_PRIVATE V8_INLINE void MemMove(void* dest, const void* src, |
| size_t size) { |
| memmove(dest, src, size); |
| } |
| #else |
| // Copy memory area to disjoint memory area. |
| inline void MemCopy(void* dest, const void* src, size_t size) { |
| // Fast path for small sizes. The compiler will expand the {memcpy} for small |
| // fixed sizes to a sequence of move instructions. This avoids the overhead of |
| // the general {memcpy} function. |
| switch (size) { |
| #define CASE(N) \ |
| case N: \ |
| memcpy(dest, src, N); \ |
| return; |
| CASE(1) |
| CASE(2) |
| CASE(3) |
| CASE(4) |
| CASE(5) |
| CASE(6) |
| CASE(7) |
| CASE(8) |
| CASE(9) |
| CASE(10) |
| CASE(11) |
| CASE(12) |
| CASE(13) |
| CASE(14) |
| CASE(15) |
| CASE(16) |
| #undef CASE |
| default: |
| memcpy(dest, src, size); |
| return; |
| } |
| } |
| V8_EXPORT_PRIVATE inline void MemMove(void* dest, const void* src, |
| size_t size) { |
| // Fast path for small sizes. The compiler will expand the {memmove} for small |
| // fixed sizes to a sequence of move instructions. This avoids the overhead of |
| // the general {memmove} function. |
| switch (size) { |
| #define CASE(N) \ |
| case N: \ |
| memmove(dest, src, N); \ |
| return; |
| CASE(1) |
| CASE(2) |
| CASE(3) |
| CASE(4) |
| CASE(5) |
| CASE(6) |
| CASE(7) |
| CASE(8) |
| CASE(9) |
| CASE(10) |
| CASE(11) |
| CASE(12) |
| CASE(13) |
| CASE(14) |
| CASE(15) |
| CASE(16) |
| #undef CASE |
| default: |
| memmove(dest, src, size); |
| return; |
| } |
| } |
| const size_t kMinComplexMemCopy = 8; |
| #endif // V8_TARGET_ARCH_IA32 |
| |
| // Copies words from |src| to |dst|. The data spans must not overlap. |
| // |src| and |dst| must be TWord-size aligned. |
| template <size_t kBlockCopyLimit, typename T> |
| inline void CopyImpl(T* dst_ptr, const T* src_ptr, size_t count) { |
| constexpr int kTWordSize = sizeof(T); |
| #ifdef DEBUG |
| Address dst = reinterpret_cast<Address>(dst_ptr); |
| Address src = reinterpret_cast<Address>(src_ptr); |
| DCHECK(IsAligned(dst, kTWordSize)); |
| DCHECK(IsAligned(src, kTWordSize)); |
| DCHECK(((src <= dst) && ((src + count * kTWordSize) <= dst)) || |
| ((dst <= src) && ((dst + count * kTWordSize) <= src))); |
| #endif |
| if (count == 0) return; |
| |
| // Use block copying MemCopy if the segment we're copying is |
| // enough to justify the extra call/setup overhead. |
| if (count < kBlockCopyLimit) { |
| do { |
| count--; |
| *dst_ptr++ = *src_ptr++; |
| } while (count > 0); |
| } else { |
| MemCopy(dst_ptr, src_ptr, count * kTWordSize); |
| } |
| } |
| |
| // Copies kSystemPointerSize-sized words from |src| to |dst|. The data spans |
| // must not overlap. |src| and |dst| must be kSystemPointerSize-aligned. |
| inline void CopyWords(Address dst, const Address src, size_t num_words) { |
| static const size_t kBlockCopyLimit = 16; |
| CopyImpl<kBlockCopyLimit>(reinterpret_cast<Address*>(dst), |
| reinterpret_cast<const Address*>(src), num_words); |
| } |
| |
| // Copies data from |src| to |dst|. The data spans must not overlap. |
| template <typename T> |
| inline void CopyBytes(T* dst, const T* src, size_t num_bytes) { |
| STATIC_ASSERT(sizeof(T) == 1); |
| if (num_bytes == 0) return; |
| CopyImpl<kMinComplexMemCopy>(dst, src, num_bytes); |
| } |
| |
| inline void MemsetUint32(uint32_t* dest, uint32_t value, size_t counter) { |
| #if V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64 |
| #define STOS "stosl" |
| #endif |
| |
| #if defined(MEMORY_SANITIZER) |
| // MemorySanitizer does not understand inline assembly. |
| #undef STOS |
| #endif |
| |
| #if defined(__GNUC__) && defined(STOS) |
| asm volatile( |
| "cld;" |
| "rep ; " STOS |
| : "+&c"(counter), "+&D"(dest) |
| : "a"(value) |
| : "memory", "cc"); |
| #else |
| for (size_t i = 0; i < counter; i++) { |
| dest[i] = value; |
| } |
| #endif |
| |
| #undef STOS |
| } |
| |
| inline void MemsetPointer(Address* dest, Address value, size_t counter) { |
| #if V8_HOST_ARCH_IA32 |
| #define STOS "stosl" |
| #elif V8_HOST_ARCH_X64 |
| #define STOS "stosq" |
| #endif |
| |
| #if defined(MEMORY_SANITIZER) |
| // MemorySanitizer does not understand inline assembly. |
| #undef STOS |
| #endif |
| |
| #if defined(__GNUC__) && defined(STOS) |
| asm volatile( |
| "cld;" |
| "rep ; " STOS |
| : "+&c"(counter), "+&D"(dest) |
| : "a"(value) |
| : "memory", "cc"); |
| #else |
| for (size_t i = 0; i < counter; i++) { |
| dest[i] = value; |
| } |
| #endif |
| |
| #undef STOS |
| } |
| |
| template <typename T, typename U> |
| inline void MemsetPointer(T** dest, U* value, size_t counter) { |
| #ifdef DEBUG |
| T* a = nullptr; |
| U* b = nullptr; |
| a = b; // Fake assignment to check assignability. |
| USE(a); |
| #endif // DEBUG |
| MemsetPointer(reinterpret_cast<Address*>(dest), |
| reinterpret_cast<Address>(value), counter); |
| } |
| |
| // Copy from 8bit/16bit chars to 8bit/16bit chars. Values are zero-extended if |
| // needed. Ranges are not allowed to overlap. |
| // The separate declaration is needed for the V8_NONNULL, which is not allowed |
| // on a definition. |
| template <typename SrcType, typename DstType> |
| void CopyChars(DstType* dst, const SrcType* src, size_t count) V8_NONNULL(1, 2); |
| |
| template <typename SrcType, typename DstType> |
| void CopyChars(DstType* dst, const SrcType* src, size_t count) { |
| STATIC_ASSERT(std::is_integral<SrcType>::value); |
| STATIC_ASSERT(std::is_integral<DstType>::value); |
| using SrcTypeUnsigned = typename std::make_unsigned<SrcType>::type; |
| using DstTypeUnsigned = typename std::make_unsigned<DstType>::type; |
| |
| #ifdef DEBUG |
| // Check for no overlap, otherwise {std::copy_n} cannot be used. |
| Address src_start = reinterpret_cast<Address>(src); |
| Address src_end = src_start + count * sizeof(SrcType); |
| Address dst_start = reinterpret_cast<Address>(dst); |
| Address dst_end = dst_start + count * sizeof(DstType); |
| DCHECK(src_end <= dst_start || dst_end <= src_start); |
| #endif |
| |
| auto* dst_u = reinterpret_cast<DstTypeUnsigned*>(dst); |
| auto* src_u = reinterpret_cast<const SrcTypeUnsigned*>(src); |
| |
| // Especially Atom CPUs profit from this explicit instantiation for small |
| // counts. This gives up to 20 percent improvement for microbenchmarks such as |
| // joining an array of small integers (2019-10-16). |
| switch (count) { |
| #define CASE(N) \ |
| case N: \ |
| std::copy_n(src_u, N, dst_u); \ |
| return; |
| CASE(1) |
| CASE(2) |
| CASE(3) |
| CASE(4) |
| CASE(5) |
| CASE(6) |
| CASE(7) |
| CASE(8) |
| CASE(9) |
| CASE(10) |
| CASE(11) |
| CASE(12) |
| CASE(13) |
| CASE(14) |
| CASE(15) |
| CASE(16) |
| #undef CASE |
| default: |
| std::copy_n(src_u, count, dst_u); |
| return; |
| } |
| } |
| |
| } // namespace internal |
| } // namespace v8 |
| |
| #endif // V8_UTILS_MEMCOPY_H_ |