| // Copyright 2018 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef V8_WASM_JUMP_TABLE_ASSEMBLER_H_ |
| #define V8_WASM_JUMP_TABLE_ASSEMBLER_H_ |
| |
| #include "src/codegen/macro-assembler.h" |
| |
| namespace v8 { |
| namespace internal { |
| namespace wasm { |
| |
| // The jump table is the central dispatch point for all (direct and indirect) |
| // invocations in WebAssembly. It holds one slot per function in a module, with |
| // each slot containing a dispatch to the currently published {WasmCode} that |
| // corresponds to the function. |
| // |
| // Additionally to this main jump table, there exist special jump tables for |
| // other purposes: |
| // - the far stub table contains one entry per wasm runtime stub (see |
| // {WasmCode::RuntimeStubId}, which jumps to the corresponding embedded |
| // builtin, plus (if not the full address space can be reached via the jump |
| // table) one entry per wasm function. |
| // - the lazy compile table contains one entry per wasm function which jumps to |
| // the common {WasmCompileLazy} builtin and passes the function index that was |
| // invoked. |
| // |
| // The main jump table is split into lines of fixed size, with lines laid out |
| // consecutively within the executable memory of the {NativeModule}. The slots |
| // in turn are consecutive within a line, but do not cross line boundaries. |
| // |
| // +- L1 -------------------+ +- L2 -------------------+ +- L3 ... |
| // | S1 | S2 | ... | Sn | x | | S1 | S2 | ... | Sn | x | | S1 ... |
| // +------------------------+ +------------------------+ +---- ... |
| // |
| // The above illustrates jump table lines {Li} containing slots {Si} with each |
| // line containing {n} slots and some padding {x} for alignment purposes. |
| // Other jump tables are just consecutive. |
| // |
| // The main jump table will be patched concurrently while other threads execute |
| // it. The code at the new target might also have been emitted concurrently, so |
| // we need to ensure that there is proper synchronization between code emission, |
| // jump table patching and code execution. |
| // On Intel platforms, this all works out of the box because there is cache |
| // coherency between i-cache and d-cache. |
| // On ARM, it is safe because the i-cache flush after code emission executes an |
| // "ic ivau" (Instruction Cache line Invalidate by Virtual Address to Point of |
| // Unification), which broadcasts to all cores. A core which sees the jump table |
| // update thus also sees the new code. Since the other core does not explicitly |
| // execute an "isb" (Instruction Synchronization Barrier), it might still |
| // execute the old code afterwards, which is no problem, since that code remains |
| // available until it is garbage collected. Garbage collection itself is a |
| // synchronization barrier though. |
| class V8_EXPORT_PRIVATE JumpTableAssembler : public MacroAssembler { |
| public: |
| // Translate an offset into the continuous jump table to a jump table index. |
| static uint32_t SlotOffsetToIndex(uint32_t slot_offset) { |
| uint32_t line_index = slot_offset / kJumpTableLineSize; |
| uint32_t line_offset = slot_offset % kJumpTableLineSize; |
| DCHECK_EQ(0, line_offset % kJumpTableSlotSize); |
| return line_index * kJumpTableSlotsPerLine + |
| line_offset / kJumpTableSlotSize; |
| } |
| |
| // Translate a jump table index to an offset into the continuous jump table. |
| static uint32_t JumpSlotIndexToOffset(uint32_t slot_index) { |
| uint32_t line_index = slot_index / kJumpTableSlotsPerLine; |
| uint32_t line_offset = |
| (slot_index % kJumpTableSlotsPerLine) * kJumpTableSlotSize; |
| return line_index * kJumpTableLineSize + line_offset; |
| } |
| |
| // Determine the size of a jump table containing the given number of slots. |
| static constexpr uint32_t SizeForNumberOfSlots(uint32_t slot_count) { |
| return ((slot_count + kJumpTableSlotsPerLine - 1) / |
| kJumpTableSlotsPerLine) * |
| kJumpTableLineSize; |
| } |
| |
| // Translate a far jump table index to an offset into the table. |
| static uint32_t FarJumpSlotIndexToOffset(uint32_t slot_index) { |
| return slot_index * kFarJumpTableSlotSize; |
| } |
| |
| // Translate a far jump table offset to the index into the table. |
| static uint32_t FarJumpSlotOffsetToIndex(uint32_t offset) { |
| DCHECK_EQ(0, offset % kFarJumpTableSlotSize); |
| return offset / kFarJumpTableSlotSize; |
| } |
| |
| // Determine the size of a far jump table containing the given number of |
| // slots. |
| static constexpr uint32_t SizeForNumberOfFarJumpSlots( |
| int num_runtime_slots, int num_function_slots) { |
| int num_entries = num_runtime_slots + num_function_slots; |
| return num_entries * kFarJumpTableSlotSize; |
| } |
| |
| // Translate a slot index to an offset into the lazy compile table. |
| static uint32_t LazyCompileSlotIndexToOffset(uint32_t slot_index) { |
| return slot_index * kLazyCompileTableSlotSize; |
| } |
| |
| // Determine the size of a lazy compile table. |
| static constexpr uint32_t SizeForNumberOfLazyFunctions(uint32_t slot_count) { |
| return slot_count * kLazyCompileTableSlotSize; |
| } |
| |
| static void GenerateLazyCompileTable(Address base, uint32_t num_slots, |
| uint32_t num_imported_functions, |
| Address wasm_compile_lazy_target) { |
| uint32_t lazy_compile_table_size = num_slots * kLazyCompileTableSlotSize; |
| // Assume enough space, so the Assembler does not try to grow the buffer. |
| JumpTableAssembler jtasm(base, lazy_compile_table_size + 256); |
| for (uint32_t slot_index = 0; slot_index < num_slots; ++slot_index) { |
| DCHECK_EQ(slot_index * kLazyCompileTableSlotSize, jtasm.pc_offset()); |
| jtasm.EmitLazyCompileJumpSlot(slot_index + num_imported_functions, |
| wasm_compile_lazy_target); |
| } |
| DCHECK_EQ(lazy_compile_table_size, jtasm.pc_offset()); |
| FlushInstructionCache(base, lazy_compile_table_size); |
| } |
| |
| static void GenerateFarJumpTable(Address base, Address* stub_targets, |
| int num_runtime_slots, |
| int num_function_slots) { |
| uint32_t table_size = |
| SizeForNumberOfFarJumpSlots(num_runtime_slots, num_function_slots); |
| // Assume enough space, so the Assembler does not try to grow the buffer. |
| JumpTableAssembler jtasm(base, table_size + 256); |
| int offset = 0; |
| for (int index = 0; index < num_runtime_slots + num_function_slots; |
| ++index) { |
| DCHECK_EQ(offset, FarJumpSlotIndexToOffset(index)); |
| // Functions slots initially jump to themselves. They are patched before |
| // being used. |
| Address target = |
| index < num_runtime_slots ? stub_targets[index] : base + offset; |
| jtasm.EmitFarJumpSlot(target); |
| offset += kFarJumpTableSlotSize; |
| DCHECK_EQ(offset, jtasm.pc_offset()); |
| } |
| FlushInstructionCache(base, table_size); |
| } |
| |
| static void PatchJumpTableSlot(Address jump_table_slot, |
| Address far_jump_table_slot, Address target) { |
| // First, try to patch the jump table slot. |
| JumpTableAssembler jtasm(jump_table_slot); |
| if (!jtasm.EmitJumpSlot(target)) { |
| // If that fails, we need to patch the far jump table slot, and then |
| // update the jump table slot to jump to this far jump table slot. |
| DCHECK_NE(kNullAddress, far_jump_table_slot); |
| JumpTableAssembler::PatchFarJumpSlot(far_jump_table_slot, target); |
| CHECK(jtasm.EmitJumpSlot(far_jump_table_slot)); |
| } |
| jtasm.NopBytes(kJumpTableSlotSize - jtasm.pc_offset()); |
| FlushInstructionCache(jump_table_slot, kJumpTableSlotSize); |
| } |
| |
| private: |
| // Instantiate a {JumpTableAssembler} for patching. |
| explicit JumpTableAssembler(Address slot_addr, int size = 256) |
| : MacroAssembler(nullptr, JumpTableAssemblerOptions(), |
| CodeObjectRequired::kNo, |
| ExternalAssemblerBuffer( |
| reinterpret_cast<uint8_t*>(slot_addr), size)) {} |
| |
| // To allow concurrent patching of the jump table entries, we need to ensure |
| // that the instruction containing the call target does not cross cache-line |
| // boundaries. The jump table line size has been chosen to satisfy this. |
| #if V8_TARGET_ARCH_X64 |
| static constexpr int kJumpTableLineSize = 64; |
| static constexpr int kJumpTableSlotSize = 5; |
| static constexpr int kFarJumpTableSlotSize = 16; |
| static constexpr int kLazyCompileTableSlotSize = 10; |
| #elif V8_TARGET_ARCH_IA32 |
| static constexpr int kJumpTableLineSize = 64; |
| static constexpr int kJumpTableSlotSize = 5; |
| static constexpr int kFarJumpTableSlotSize = 5; |
| static constexpr int kLazyCompileTableSlotSize = 10; |
| #elif V8_TARGET_ARCH_ARM |
| static constexpr int kJumpTableLineSize = 3 * kInstrSize; |
| static constexpr int kJumpTableSlotSize = 3 * kInstrSize; |
| static constexpr int kFarJumpTableSlotSize = 2 * kInstrSize; |
| static constexpr int kLazyCompileTableSlotSize = 5 * kInstrSize; |
| #elif V8_TARGET_ARCH_ARM64 && V8_ENABLE_CONTROL_FLOW_INTEGRITY |
| static constexpr int kJumpTableLineSize = 2 * kInstrSize; |
| static constexpr int kJumpTableSlotSize = 2 * kInstrSize; |
| static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize; |
| static constexpr int kLazyCompileTableSlotSize = 4 * kInstrSize; |
| #elif V8_TARGET_ARCH_ARM64 && !V8_ENABLE_CONTROL_FLOW_INTEGRITY |
| static constexpr int kJumpTableLineSize = 1 * kInstrSize; |
| static constexpr int kJumpTableSlotSize = 1 * kInstrSize; |
| static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize; |
| static constexpr int kLazyCompileTableSlotSize = 3 * kInstrSize; |
| #elif V8_TARGET_ARCH_S390X |
| static constexpr int kJumpTableLineSize = 128; |
| static constexpr int kJumpTableSlotSize = 14; |
| static constexpr int kFarJumpTableSlotSize = 14; |
| static constexpr int kLazyCompileTableSlotSize = 20; |
| #elif V8_TARGET_ARCH_PPC64 |
| static constexpr int kJumpTableLineSize = 64; |
| static constexpr int kJumpTableSlotSize = 7 * kInstrSize; |
| static constexpr int kFarJumpTableSlotSize = 7 * kInstrSize; |
| static constexpr int kLazyCompileTableSlotSize = 12 * kInstrSize; |
| #elif V8_TARGET_ARCH_MIPS |
| static constexpr int kJumpTableLineSize = 8 * kInstrSize; |
| static constexpr int kJumpTableSlotSize = 8 * kInstrSize; |
| static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize; |
| static constexpr int kLazyCompileTableSlotSize = 6 * kInstrSize; |
| #elif V8_TARGET_ARCH_MIPS64 |
| static constexpr int kJumpTableLineSize = 8 * kInstrSize; |
| static constexpr int kJumpTableSlotSize = 8 * kInstrSize; |
| static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize; |
| static constexpr int kLazyCompileTableSlotSize = 8 * kInstrSize; |
| #else |
| #error Unknown architecture. |
| #endif |
| |
| static constexpr int kJumpTableSlotsPerLine = |
| kJumpTableLineSize / kJumpTableSlotSize; |
| STATIC_ASSERT(kJumpTableSlotsPerLine >= 1); |
| |
| // {JumpTableAssembler} is never used during snapshot generation, and its code |
| // must be independent of the code range of any isolate anyway. Just ensure |
| // that no relocation information is recorded, there is no buffer to store it |
| // since it is instantiated in patching mode in existing code directly. |
| static AssemblerOptions JumpTableAssemblerOptions() { |
| AssemblerOptions options; |
| options.disable_reloc_info_for_patching = true; |
| return options; |
| } |
| |
| void EmitLazyCompileJumpSlot(uint32_t func_index, |
| Address lazy_compile_target); |
| |
| // Returns {true} if the jump fits in the jump table slot, {false} otherwise. |
| bool EmitJumpSlot(Address target); |
| |
| // Initially emit a far jump slot. |
| void EmitFarJumpSlot(Address target); |
| |
| // Patch an existing far jump slot, and make sure that this updated eventually |
| // becomes available to all execution units that might execute this code. |
| static void PatchFarJumpSlot(Address slot, Address target); |
| |
| void NopBytes(int bytes); |
| }; |
| |
| } // namespace wasm |
| } // namespace internal |
| } // namespace v8 |
| |
| #endif // V8_WASM_JUMP_TABLE_ASSEMBLER_H_ |