| // Copyright 2013 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "src/compiler/backend/code-generator.h" |
| |
| #include <limits> |
| |
| #include "src/base/overflowing-math.h" |
| #include "src/codegen/macro-assembler.h" |
| #include "src/codegen/optimized-compilation-info.h" |
| #include "src/codegen/x64/assembler-x64.h" |
| #include "src/compiler/backend/code-generator-impl.h" |
| #include "src/compiler/backend/gap-resolver.h" |
| #include "src/compiler/node-matchers.h" |
| #include "src/compiler/osr.h" |
| #include "src/heap/heap-inl.h" // crbug.com/v8/8499 |
| #include "src/objects/smi.h" |
| #include "src/wasm/wasm-code-manager.h" |
| #include "src/wasm/wasm-objects.h" |
| |
| namespace v8 { |
| namespace internal { |
| namespace compiler { |
| |
| #define __ tasm()-> |
| |
| // Adds X64 specific methods for decoding operands. |
| class X64OperandConverter : public InstructionOperandConverter { |
| public: |
| X64OperandConverter(CodeGenerator* gen, Instruction* instr) |
| : InstructionOperandConverter(gen, instr) {} |
| |
| Immediate InputImmediate(size_t index) { |
| return ToImmediate(instr_->InputAt(index)); |
| } |
| |
| Operand InputOperand(size_t index, int extra = 0) { |
| return ToOperand(instr_->InputAt(index), extra); |
| } |
| |
| Operand OutputOperand() { return ToOperand(instr_->Output()); } |
| |
| Immediate ToImmediate(InstructionOperand* operand) { |
| Constant constant = ToConstant(operand); |
| if (constant.type() == Constant::kFloat64) { |
| DCHECK_EQ(0, constant.ToFloat64().AsUint64()); |
| return Immediate(0); |
| } |
| if (RelocInfo::IsWasmReference(constant.rmode())) { |
| return Immediate(constant.ToInt32(), constant.rmode()); |
| } |
| return Immediate(constant.ToInt32()); |
| } |
| |
| Operand ToOperand(InstructionOperand* op, int extra = 0) { |
| DCHECK(op->IsStackSlot() || op->IsFPStackSlot()); |
| return SlotToOperand(AllocatedOperand::cast(op)->index(), extra); |
| } |
| |
| Operand SlotToOperand(int slot_index, int extra = 0) { |
| FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index); |
| return Operand(offset.from_stack_pointer() ? rsp : rbp, |
| offset.offset() + extra); |
| } |
| |
| static size_t NextOffset(size_t* offset) { |
| size_t i = *offset; |
| (*offset)++; |
| return i; |
| } |
| |
| static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) { |
| STATIC_ASSERT(0 == static_cast<int>(times_1)); |
| STATIC_ASSERT(1 == static_cast<int>(times_2)); |
| STATIC_ASSERT(2 == static_cast<int>(times_4)); |
| STATIC_ASSERT(3 == static_cast<int>(times_8)); |
| int scale = static_cast<int>(mode - one); |
| DCHECK(scale >= 0 && scale < 4); |
| return static_cast<ScaleFactor>(scale); |
| } |
| |
| Operand MemoryOperand(size_t* offset) { |
| AddressingMode mode = AddressingModeField::decode(instr_->opcode()); |
| switch (mode) { |
| case kMode_MR: { |
| Register base = InputRegister(NextOffset(offset)); |
| int32_t disp = 0; |
| return Operand(base, disp); |
| } |
| case kMode_MRI: { |
| Register base = InputRegister(NextOffset(offset)); |
| int32_t disp = InputInt32(NextOffset(offset)); |
| return Operand(base, disp); |
| } |
| case kMode_MR1: |
| case kMode_MR2: |
| case kMode_MR4: |
| case kMode_MR8: { |
| Register base = InputRegister(NextOffset(offset)); |
| Register index = InputRegister(NextOffset(offset)); |
| ScaleFactor scale = ScaleFor(kMode_MR1, mode); |
| int32_t disp = 0; |
| return Operand(base, index, scale, disp); |
| } |
| case kMode_MR1I: |
| case kMode_MR2I: |
| case kMode_MR4I: |
| case kMode_MR8I: { |
| Register base = InputRegister(NextOffset(offset)); |
| Register index = InputRegister(NextOffset(offset)); |
| ScaleFactor scale = ScaleFor(kMode_MR1I, mode); |
| int32_t disp = InputInt32(NextOffset(offset)); |
| return Operand(base, index, scale, disp); |
| } |
| case kMode_M1: { |
| Register base = InputRegister(NextOffset(offset)); |
| int32_t disp = 0; |
| return Operand(base, disp); |
| } |
| case kMode_M2: |
| UNREACHABLE(); // Should use kModeMR with more compact encoding instead |
| return Operand(no_reg, 0); |
| case kMode_M4: |
| case kMode_M8: { |
| Register index = InputRegister(NextOffset(offset)); |
| ScaleFactor scale = ScaleFor(kMode_M1, mode); |
| int32_t disp = 0; |
| return Operand(index, scale, disp); |
| } |
| case kMode_M1I: |
| case kMode_M2I: |
| case kMode_M4I: |
| case kMode_M8I: { |
| Register index = InputRegister(NextOffset(offset)); |
| ScaleFactor scale = ScaleFor(kMode_M1I, mode); |
| int32_t disp = InputInt32(NextOffset(offset)); |
| return Operand(index, scale, disp); |
| } |
| case kMode_Root: { |
| Register base = kRootRegister; |
| int32_t disp = InputInt32(NextOffset(offset)); |
| return Operand(base, disp); |
| } |
| case kMode_None: |
| UNREACHABLE(); |
| } |
| UNREACHABLE(); |
| } |
| |
| Operand MemoryOperand(size_t first_input = 0) { |
| return MemoryOperand(&first_input); |
| } |
| }; |
| |
| namespace { |
| |
| bool HasImmediateInput(Instruction* instr, size_t index) { |
| return instr->InputAt(index)->IsImmediate(); |
| } |
| |
| class OutOfLineLoadFloat32NaN final : public OutOfLineCode { |
| public: |
| OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result) |
| : OutOfLineCode(gen), result_(result) {} |
| |
| void Generate() final { |
| __ Xorps(result_, result_); |
| __ Divss(result_, result_); |
| } |
| |
| private: |
| XMMRegister const result_; |
| }; |
| |
| class OutOfLineLoadFloat64NaN final : public OutOfLineCode { |
| public: |
| OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result) |
| : OutOfLineCode(gen), result_(result) {} |
| |
| void Generate() final { |
| __ Xorpd(result_, result_); |
| __ Divsd(result_, result_); |
| } |
| |
| private: |
| XMMRegister const result_; |
| }; |
| |
| class OutOfLineTruncateDoubleToI final : public OutOfLineCode { |
| public: |
| OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result, |
| XMMRegister input, StubCallMode stub_mode, |
| UnwindingInfoWriter* unwinding_info_writer) |
| : OutOfLineCode(gen), |
| result_(result), |
| input_(input), |
| stub_mode_(stub_mode), |
| unwinding_info_writer_(unwinding_info_writer), |
| isolate_(gen->isolate()), |
| zone_(gen->zone()) {} |
| |
| void Generate() final { |
| __ AllocateStackSpace(kDoubleSize); |
| unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| kDoubleSize); |
| __ Movsd(MemOperand(rsp, 0), input_); |
| if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) { |
| // A direct call to a wasm runtime stub defined in this module. |
| // Just encode the stub index. This will be patched when the code |
| // is added to the native module and copied into wasm code space. |
| __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL); |
| } else { |
| __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET); |
| } |
| __ movl(result_, MemOperand(rsp, 0)); |
| __ addq(rsp, Immediate(kDoubleSize)); |
| unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| -kDoubleSize); |
| } |
| |
| private: |
| Register const result_; |
| XMMRegister const input_; |
| StubCallMode stub_mode_; |
| UnwindingInfoWriter* const unwinding_info_writer_; |
| Isolate* isolate_; |
| Zone* zone_; |
| }; |
| |
| class OutOfLineRecordWrite final : public OutOfLineCode { |
| public: |
| OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand, |
| Register value, Register scratch0, Register scratch1, |
| RecordWriteMode mode, StubCallMode stub_mode) |
| : OutOfLineCode(gen), |
| object_(object), |
| operand_(operand), |
| value_(value), |
| scratch0_(scratch0), |
| scratch1_(scratch1), |
| mode_(mode), |
| stub_mode_(stub_mode), |
| zone_(gen->zone()) {} |
| |
| void Generate() final { |
| if (mode_ > RecordWriteMode::kValueIsPointer) { |
| __ JumpIfSmi(value_, exit()); |
| } |
| if (COMPRESS_POINTERS_BOOL) { |
| __ DecompressTaggedPointer(value_, value_); |
| } |
| __ CheckPageFlag(value_, scratch0_, |
| MemoryChunk::kPointersToHereAreInterestingMask, zero, |
| exit()); |
| __ leaq(scratch1_, operand_); |
| |
| RememberedSetAction const remembered_set_action = |
| mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET |
| : OMIT_REMEMBERED_SET; |
| SaveFPRegsMode const save_fp_mode = |
| frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs; |
| |
| if (mode_ == RecordWriteMode::kValueIsEphemeronKey) { |
| __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode); |
| } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) { |
| // A direct call to a wasm runtime stub defined in this module. |
| // Just encode the stub index. This will be patched when the code |
| // is added to the native module and copied into wasm code space. |
| __ CallRecordWriteStub(object_, scratch1_, remembered_set_action, |
| save_fp_mode, wasm::WasmCode::kWasmRecordWrite); |
| } else { |
| __ CallRecordWriteStub(object_, scratch1_, remembered_set_action, |
| save_fp_mode); |
| } |
| } |
| |
| private: |
| Register const object_; |
| Operand const operand_; |
| Register const value_; |
| Register const scratch0_; |
| Register const scratch1_; |
| RecordWriteMode const mode_; |
| StubCallMode const stub_mode_; |
| Zone* zone_; |
| }; |
| |
| class WasmOutOfLineTrap : public OutOfLineCode { |
| public: |
| WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr) |
| : OutOfLineCode(gen), gen_(gen), instr_(instr) {} |
| |
| void Generate() override { |
| X64OperandConverter i(gen_, instr_); |
| TrapId trap_id = |
| static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1)); |
| GenerateWithTrapId(trap_id); |
| } |
| |
| protected: |
| CodeGenerator* gen_; |
| |
| void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); } |
| |
| private: |
| void GenerateCallToTrap(TrapId trap_id) { |
| if (!gen_->wasm_runtime_exception_support()) { |
| // We cannot test calls to the runtime in cctest/test-run-wasm. |
| // Therefore we emit a call to C here instead of a call to the runtime. |
| __ PrepareCallCFunction(0); |
| __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), |
| 0); |
| __ LeaveFrame(StackFrame::WASM_COMPILED); |
| auto call_descriptor = gen_->linkage()->GetIncomingDescriptor(); |
| size_t pop_size = |
| call_descriptor->StackParameterCount() * kSystemPointerSize; |
| // Use rcx as a scratch register, we return anyways immediately. |
| __ Ret(static_cast<int>(pop_size), rcx); |
| } else { |
| gen_->AssembleSourcePosition(instr_); |
| // A direct call to a wasm runtime stub defined in this module. |
| // Just encode the stub index. This will be patched when the code |
| // is added to the native module and copied into wasm code space. |
| __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL); |
| ReferenceMap* reference_map = |
| new (gen_->zone()) ReferenceMap(gen_->zone()); |
| gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt); |
| __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap); |
| } |
| } |
| |
| Instruction* instr_; |
| }; |
| |
| class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap { |
| public: |
| WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr) |
| : WasmOutOfLineTrap(gen, instr), pc_(pc) {} |
| |
| void Generate() final { |
| gen_->AddProtectedInstructionLanding(pc_, __ pc_offset()); |
| GenerateWithTrapId(TrapId::kTrapMemOutOfBounds); |
| } |
| |
| private: |
| int pc_; |
| }; |
| |
| void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen, |
| InstructionCode opcode, Instruction* instr, |
| X64OperandConverter& i, // NOLINT(runtime/references) |
| int pc) { |
| const MemoryAccessMode access_mode = |
| static_cast<MemoryAccessMode>(MiscField::decode(opcode)); |
| if (access_mode == kMemoryAccessProtected) { |
| new (zone) WasmProtectedInstructionTrap(codegen, pc, instr); |
| } |
| } |
| |
| void EmitWordLoadPoisoningIfNeeded( |
| CodeGenerator* codegen, InstructionCode opcode, Instruction* instr, |
| X64OperandConverter& i) { // NOLINT(runtime/references) |
| const MemoryAccessMode access_mode = |
| static_cast<MemoryAccessMode>(MiscField::decode(opcode)); |
| if (access_mode == kMemoryAccessPoisoned) { |
| Register value = i.OutputRegister(); |
| codegen->tasm()->andq(value, kSpeculationPoisonRegister); |
| } |
| } |
| |
| } // namespace |
| |
| #define ASSEMBLE_UNOP(asm_instr) \ |
| do { \ |
| if (instr->Output()->IsRegister()) { \ |
| __ asm_instr(i.OutputRegister()); \ |
| } else { \ |
| __ asm_instr(i.OutputOperand()); \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_BINOP(asm_instr) \ |
| do { \ |
| if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ |
| size_t index = 1; \ |
| Operand right = i.MemoryOperand(&index); \ |
| __ asm_instr(i.InputRegister(0), right); \ |
| } else { \ |
| if (HasImmediateInput(instr, 1)) { \ |
| if (instr->InputAt(0)->IsRegister()) { \ |
| __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ |
| } else { \ |
| __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ |
| } \ |
| } else { \ |
| if (instr->InputAt(1)->IsRegister()) { \ |
| __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ |
| } else { \ |
| __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ |
| } \ |
| } \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_COMPARE(asm_instr) \ |
| do { \ |
| if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ |
| size_t index = 0; \ |
| Operand left = i.MemoryOperand(&index); \ |
| if (HasImmediateInput(instr, index)) { \ |
| __ asm_instr(left, i.InputImmediate(index)); \ |
| } else { \ |
| __ asm_instr(left, i.InputRegister(index)); \ |
| } \ |
| } else { \ |
| if (HasImmediateInput(instr, 1)) { \ |
| if (instr->InputAt(0)->IsRegister()) { \ |
| __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ |
| } else { \ |
| __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ |
| } \ |
| } else { \ |
| if (instr->InputAt(1)->IsRegister()) { \ |
| __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ |
| } else { \ |
| __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ |
| } \ |
| } \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_MULT(asm_instr) \ |
| do { \ |
| if (HasImmediateInput(instr, 1)) { \ |
| if (instr->InputAt(0)->IsRegister()) { \ |
| __ asm_instr(i.OutputRegister(), i.InputRegister(0), \ |
| i.InputImmediate(1)); \ |
| } else { \ |
| __ asm_instr(i.OutputRegister(), i.InputOperand(0), \ |
| i.InputImmediate(1)); \ |
| } \ |
| } else { \ |
| if (instr->InputAt(1)->IsRegister()) { \ |
| __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \ |
| } else { \ |
| __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \ |
| } \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_SHIFT(asm_instr, width) \ |
| do { \ |
| if (HasImmediateInput(instr, 1)) { \ |
| if (instr->Output()->IsRegister()) { \ |
| __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \ |
| } else { \ |
| __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \ |
| } \ |
| } else { \ |
| if (instr->Output()->IsRegister()) { \ |
| __ asm_instr##_cl(i.OutputRegister()); \ |
| } else { \ |
| __ asm_instr##_cl(i.OutputOperand()); \ |
| } \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_MOVX(asm_instr) \ |
| do { \ |
| if (instr->addressing_mode() != kMode_None) { \ |
| __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \ |
| } else if (instr->InputAt(0)->IsRegister()) { \ |
| __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \ |
| } else { \ |
| __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_SSE_BINOP(asm_instr) \ |
| do { \ |
| if (instr->InputAt(1)->IsFPRegister()) { \ |
| __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \ |
| } else { \ |
| __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_SSE_UNOP(asm_instr) \ |
| do { \ |
| if (instr->InputAt(0)->IsFPRegister()) { \ |
| __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \ |
| } else { \ |
| __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_AVX_BINOP(asm_instr) \ |
| do { \ |
| CpuFeatureScope avx_scope(tasm(), AVX); \ |
| if (instr->InputAt(1)->IsFPRegister()) { \ |
| __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \ |
| i.InputDoubleRegister(1)); \ |
| } else { \ |
| __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \ |
| i.InputOperand(1)); \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_IEEE754_BINOP(name) \ |
| do { \ |
| __ PrepareCallCFunction(2); \ |
| __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \ |
| } while (false) |
| |
| #define ASSEMBLE_IEEE754_UNOP(name) \ |
| do { \ |
| __ PrepareCallCFunction(1); \ |
| __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \ |
| } while (false) |
| |
| #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \ |
| do { \ |
| Label binop; \ |
| __ bind(&binop); \ |
| __ mov_inst(rax, i.MemoryOperand(1)); \ |
| __ movl(i.TempRegister(0), rax); \ |
| __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \ |
| __ lock(); \ |
| __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \ |
| __ j(not_equal, &binop); \ |
| } while (false) |
| |
| #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \ |
| do { \ |
| Label binop; \ |
| __ bind(&binop); \ |
| __ mov_inst(rax, i.MemoryOperand(1)); \ |
| __ movq(i.TempRegister(0), rax); \ |
| __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \ |
| __ lock(); \ |
| __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \ |
| __ j(not_equal, &binop); \ |
| } while (false) |
| |
| #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \ |
| do { \ |
| if (instr->InputAt(index)->IsSimd128Register()) { \ |
| __ opcode(dst_operand, i.InputSimd128Register(index)); \ |
| } else { \ |
| __ opcode(dst_operand, i.InputOperand(index)); \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \ |
| do { \ |
| if (instr->InputAt(index)->IsSimd128Register()) { \ |
| __ opcode(dst_operand, i.InputSimd128Register(index), imm); \ |
| } else { \ |
| __ opcode(dst_operand, i.InputOperand(index), imm); \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \ |
| do { \ |
| XMMRegister dst = i.OutputSimd128Register(); \ |
| DCHECK_EQ(dst, i.InputSimd128Register(0)); \ |
| byte input_index = instr->InputCount() == 2 ? 1 : 0; \ |
| ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \ |
| } while (false) |
| |
| #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \ |
| do { \ |
| CpuFeatureScope sse_scope(tasm(), SSELevel); \ |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \ |
| __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \ |
| } while (false) |
| |
| #define ASSEMBLE_SIMD_ALL_TRUE(opcode) \ |
| do { \ |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); \ |
| Register dst = i.OutputRegister(); \ |
| Register tmp = i.TempRegister(0); \ |
| __ movq(tmp, Immediate(1)); \ |
| __ xorq(dst, dst); \ |
| __ pxor(kScratchDoubleReg, kScratchDoubleReg); \ |
| __ opcode(kScratchDoubleReg, i.InputSimd128Register(0)); \ |
| __ ptest(kScratchDoubleReg, kScratchDoubleReg); \ |
| __ cmovq(zero, dst, tmp); \ |
| } while (false) |
| |
| void CodeGenerator::AssembleDeconstructFrame() { |
| unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset()); |
| __ movq(rsp, rbp); |
| __ popq(rbp); |
| } |
| |
| void CodeGenerator::AssemblePrepareTailCall() { |
| if (frame_access_state()->has_frame()) { |
| __ movq(rbp, MemOperand(rbp, 0)); |
| } |
| frame_access_state()->SetFrameAccessToSP(); |
| } |
| |
| void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg, |
| Register scratch1, |
| Register scratch2, |
| Register scratch3) { |
| DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3)); |
| Label done; |
| |
| // Check if current frame is an arguments adaptor frame. |
| __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset), |
| Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR))); |
| __ j(not_equal, &done, Label::kNear); |
| |
| // Load arguments count from current arguments adaptor frame (note, it |
| // does not include receiver). |
| Register caller_args_count_reg = scratch1; |
| __ SmiUntag(caller_args_count_reg, |
| Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset)); |
| |
| ParameterCount callee_args_count(args_reg); |
| __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2, |
| scratch3); |
| __ bind(&done); |
| } |
| |
| namespace { |
| |
| void AdjustStackPointerForTailCall(TurboAssembler* assembler, |
| FrameAccessState* state, |
| int new_slot_above_sp, |
| bool allow_shrinkage = true) { |
| int current_sp_offset = state->GetSPToFPSlotCount() + |
| StandardFrameConstants::kFixedSlotCountAboveFp; |
| int stack_slot_delta = new_slot_above_sp - current_sp_offset; |
| if (stack_slot_delta > 0) { |
| assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize); |
| state->IncreaseSPDelta(stack_slot_delta); |
| } else if (allow_shrinkage && stack_slot_delta < 0) { |
| assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize)); |
| state->IncreaseSPDelta(stack_slot_delta); |
| } |
| } |
| |
| void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) { |
| int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32); |
| assembler->movq(kScratchRegister, shuffle_mask); |
| assembler->Push(kScratchRegister); |
| shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32); |
| assembler->movq(kScratchRegister, shuffle_mask); |
| assembler->Push(kScratchRegister); |
| } |
| |
| } // namespace |
| |
| void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr, |
| int first_unused_stack_slot) { |
| CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush); |
| ZoneVector<MoveOperands*> pushes(zone()); |
| GetPushCompatibleMoves(instr, flags, &pushes); |
| |
| if (!pushes.empty() && |
| (LocationOperand::cast(pushes.back()->destination()).index() + 1 == |
| first_unused_stack_slot)) { |
| X64OperandConverter g(this, instr); |
| for (auto move : pushes) { |
| LocationOperand destination_location( |
| LocationOperand::cast(move->destination())); |
| InstructionOperand source(move->source()); |
| AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
| destination_location.index()); |
| if (source.IsStackSlot()) { |
| LocationOperand source_location(LocationOperand::cast(source)); |
| __ Push(g.SlotToOperand(source_location.index())); |
| } else if (source.IsRegister()) { |
| LocationOperand source_location(LocationOperand::cast(source)); |
| __ Push(source_location.GetRegister()); |
| } else if (source.IsImmediate()) { |
| __ Push(Immediate(ImmediateOperand::cast(source).inline_value())); |
| } else { |
| // Pushes of non-scalar data types is not supported. |
| UNIMPLEMENTED(); |
| } |
| frame_access_state()->IncreaseSPDelta(1); |
| move->Eliminate(); |
| } |
| } |
| AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
| first_unused_stack_slot, false); |
| } |
| |
| void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr, |
| int first_unused_stack_slot) { |
| AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
| first_unused_stack_slot); |
| } |
| |
| // Check that {kJavaScriptCallCodeStartRegister} is correct. |
| void CodeGenerator::AssembleCodeStartRegisterCheck() { |
| __ ComputeCodeStartAddress(rbx); |
| __ cmpq(rbx, kJavaScriptCallCodeStartRegister); |
| __ Assert(equal, AbortReason::kWrongFunctionCodeStart); |
| } |
| |
| // Check if the code object is marked for deoptimization. If it is, then it |
| // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need |
| // to: |
| // 1. read from memory the word that contains that bit, which can be found in |
| // the flags in the referenced {CodeDataContainer} object; |
| // 2. test kMarkedForDeoptimizationBit in those flags; and |
| // 3. if it is not zero then it jumps to the builtin. |
| void CodeGenerator::BailoutIfDeoptimized() { |
| int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize; |
| __ LoadTaggedPointerField(rbx, |
| Operand(kJavaScriptCallCodeStartRegister, offset)); |
| __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset), |
| Immediate(1 << Code::kMarkedForDeoptimizationBit)); |
| __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode), |
| RelocInfo::CODE_TARGET, not_zero); |
| } |
| |
| void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() { |
| // Set a mask which has all bits set in the normal case, but has all |
| // bits cleared if we are speculatively executing the wrong PC. |
| __ ComputeCodeStartAddress(rbx); |
| __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister); |
| __ cmpq(kJavaScriptCallCodeStartRegister, rbx); |
| __ movq(rbx, Immediate(-1)); |
| __ cmovq(equal, kSpeculationPoisonRegister, rbx); |
| } |
| |
| void CodeGenerator::AssembleRegisterArgumentPoisoning() { |
| __ andq(kJSFunctionRegister, kSpeculationPoisonRegister); |
| __ andq(kContextRegister, kSpeculationPoisonRegister); |
| __ andq(rsp, kSpeculationPoisonRegister); |
| } |
| |
| // Assembles an instruction after register allocation, producing machine code. |
| CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
| Instruction* instr) { |
| X64OperandConverter i(this, instr); |
| InstructionCode opcode = instr->opcode(); |
| ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode); |
| switch (arch_opcode) { |
| case kArchCallCodeObject: { |
| if (HasImmediateInput(instr, 0)) { |
| Handle<Code> code = i.InputCode(0); |
| __ Call(code, RelocInfo::CODE_TARGET); |
| } else { |
| Register reg = i.InputRegister(0); |
| DCHECK_IMPLIES( |
| HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), |
| reg == kJavaScriptCallCodeStartRegister); |
| __ LoadCodeObjectEntry(reg, reg); |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineCall(reg); |
| } else { |
| __ call(reg); |
| } |
| } |
| RecordCallPosition(instr); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchCallBuiltinPointer: { |
| DCHECK(!HasImmediateInput(instr, 0)); |
| Register builtin_index = i.InputRegister(0); |
| __ CallBuiltinByIndex(builtin_index); |
| RecordCallPosition(instr); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchCallWasmFunction: { |
| if (HasImmediateInput(instr, 0)) { |
| Constant constant = i.ToConstant(instr->InputAt(0)); |
| Address wasm_code = static_cast<Address>(constant.ToInt64()); |
| if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) { |
| __ near_call(wasm_code, constant.rmode()); |
| } else { |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineCall(wasm_code, constant.rmode()); |
| } else { |
| __ Call(wasm_code, constant.rmode()); |
| } |
| } |
| } else { |
| Register reg = i.InputRegister(0); |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineCall(reg); |
| } else { |
| __ call(reg); |
| } |
| } |
| RecordCallPosition(instr); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchTailCallCodeObjectFromJSFunction: |
| case kArchTailCallCodeObject: { |
| if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) { |
| AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister, |
| i.TempRegister(0), i.TempRegister(1), |
| i.TempRegister(2)); |
| } |
| if (HasImmediateInput(instr, 0)) { |
| Handle<Code> code = i.InputCode(0); |
| __ Jump(code, RelocInfo::CODE_TARGET); |
| } else { |
| Register reg = i.InputRegister(0); |
| DCHECK_IMPLIES( |
| HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), |
| reg == kJavaScriptCallCodeStartRegister); |
| __ LoadCodeObjectEntry(reg, reg); |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineJump(reg); |
| } else { |
| __ jmp(reg); |
| } |
| } |
| unwinding_info_writer_.MarkBlockWillExit(); |
| frame_access_state()->ClearSPDelta(); |
| frame_access_state()->SetFrameAccessToDefault(); |
| break; |
| } |
| case kArchTailCallWasm: { |
| if (HasImmediateInput(instr, 0)) { |
| Constant constant = i.ToConstant(instr->InputAt(0)); |
| Address wasm_code = static_cast<Address>(constant.ToInt64()); |
| if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) { |
| __ near_jmp(wasm_code, constant.rmode()); |
| } else { |
| __ Move(kScratchRegister, wasm_code, constant.rmode()); |
| __ jmp(kScratchRegister); |
| } |
| } else { |
| Register reg = i.InputRegister(0); |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineJump(reg); |
| } else { |
| __ jmp(reg); |
| } |
| } |
| unwinding_info_writer_.MarkBlockWillExit(); |
| frame_access_state()->ClearSPDelta(); |
| frame_access_state()->SetFrameAccessToDefault(); |
| break; |
| } |
| case kArchTailCallAddress: { |
| CHECK(!HasImmediateInput(instr, 0)); |
| Register reg = i.InputRegister(0); |
| DCHECK_IMPLIES( |
| HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), |
| reg == kJavaScriptCallCodeStartRegister); |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineJump(reg); |
| } else { |
| __ jmp(reg); |
| } |
| unwinding_info_writer_.MarkBlockWillExit(); |
| frame_access_state()->ClearSPDelta(); |
| frame_access_state()->SetFrameAccessToDefault(); |
| break; |
| } |
| case kArchCallJSFunction: { |
| Register func = i.InputRegister(0); |
| if (FLAG_debug_code) { |
| // Check the function's context matches the context argument. |
| __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset)); |
| __ Assert(equal, AbortReason::kWrongFunctionContext); |
| } |
| static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch"); |
| __ LoadTaggedPointerField(rcx, |
| FieldOperand(func, JSFunction::kCodeOffset)); |
| __ CallCodeObject(rcx); |
| frame_access_state()->ClearSPDelta(); |
| RecordCallPosition(instr); |
| break; |
| } |
| case kArchPrepareCallCFunction: { |
| // Frame alignment requires using FP-relative frame addressing. |
| frame_access_state()->SetFrameAccessToFP(); |
| int const num_parameters = MiscField::decode(instr->opcode()); |
| __ PrepareCallCFunction(num_parameters); |
| break; |
| } |
| case kArchSaveCallerRegisters: { |
| fp_mode_ = |
| static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())); |
| DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); |
| // kReturnRegister0 should have been saved before entering the stub. |
| int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0); |
| DCHECK(IsAligned(bytes, kSystemPointerSize)); |
| DCHECK_EQ(0, frame_access_state()->sp_delta()); |
| frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize); |
| DCHECK(!caller_registers_saved_); |
| caller_registers_saved_ = true; |
| break; |
| } |
| case kArchRestoreCallerRegisters: { |
| DCHECK(fp_mode_ == |
| static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()))); |
| DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); |
| // Don't overwrite the returned value. |
| int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0); |
| frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize)); |
| DCHECK_EQ(0, frame_access_state()->sp_delta()); |
| DCHECK(caller_registers_saved_); |
| caller_registers_saved_ = false; |
| break; |
| } |
| case kArchPrepareTailCall: |
| AssemblePrepareTailCall(); |
| break; |
| case kArchCallCFunction: { |
| int const num_parameters = MiscField::decode(instr->opcode()); |
| Label return_location; |
| if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) { |
| // Put the return address in a stack slot. |
| __ leaq(kScratchRegister, Operand(&return_location, 0)); |
| __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset), |
| kScratchRegister); |
| } |
| if (HasImmediateInput(instr, 0)) { |
| ExternalReference ref = i.InputExternalReference(0); |
| __ CallCFunction(ref, num_parameters); |
| } else { |
| Register func = i.InputRegister(0); |
| __ CallCFunction(func, num_parameters); |
| } |
| __ bind(&return_location); |
| RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt); |
| frame_access_state()->SetFrameAccessToDefault(); |
| // Ideally, we should decrement SP delta to match the change of stack |
| // pointer in CallCFunction. However, for certain architectures (e.g. |
| // ARM), there may be more strict alignment requirement, causing old SP |
| // to be saved on the stack. In those cases, we can not calculate the SP |
| // delta statically. |
| frame_access_state()->ClearSPDelta(); |
| if (caller_registers_saved_) { |
| // Need to re-sync SP delta introduced in kArchSaveCallerRegisters. |
| // Here, we assume the sequence to be: |
| // kArchSaveCallerRegisters; |
| // kArchCallCFunction; |
| // kArchRestoreCallerRegisters; |
| int bytes = |
| __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0); |
| frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize); |
| } |
| // TODO(tebbi): Do we need an lfence here? |
| break; |
| } |
| case kArchJmp: |
| AssembleArchJump(i.InputRpo(0)); |
| break; |
| case kArchBinarySearchSwitch: |
| AssembleArchBinarySearchSwitch(instr); |
| break; |
| case kArchLookupSwitch: |
| AssembleArchLookupSwitch(instr); |
| break; |
| case kArchTableSwitch: |
| AssembleArchTableSwitch(instr); |
| break; |
| case kArchComment: |
| __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0))); |
| break; |
| case kArchAbortCSAAssert: |
| DCHECK(i.InputRegister(0) == rdx); |
| { |
| // We don't actually want to generate a pile of code for this, so just |
| // claim there is a stack frame, without generating one. |
| FrameScope scope(tasm(), StackFrame::NONE); |
| __ Call( |
| isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert), |
| RelocInfo::CODE_TARGET); |
| } |
| __ int3(); |
| unwinding_info_writer_.MarkBlockWillExit(); |
| break; |
| case kArchDebugBreak: |
| __ int3(); |
| break; |
| case kArchThrowTerminator: |
| unwinding_info_writer_.MarkBlockWillExit(); |
| break; |
| case kArchNop: |
| // don't emit code for nops. |
| break; |
| case kArchDeoptimize: { |
| int deopt_state_id = |
| BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); |
| CodeGenResult result = |
| AssembleDeoptimizerCall(deopt_state_id, current_source_position_); |
| if (result != kSuccess) return result; |
| unwinding_info_writer_.MarkBlockWillExit(); |
| break; |
| } |
| case kArchRet: |
| AssembleReturn(instr->InputAt(0)); |
| break; |
| case kArchStackPointer: |
| __ movq(i.OutputRegister(), rsp); |
| break; |
| case kArchFramePointer: |
| __ movq(i.OutputRegister(), rbp); |
| break; |
| case kArchParentFramePointer: |
| if (frame_access_state()->has_frame()) { |
| __ movq(i.OutputRegister(), Operand(rbp, 0)); |
| } else { |
| __ movq(i.OutputRegister(), rbp); |
| } |
| break; |
| case kArchTruncateDoubleToI: { |
| auto result = i.OutputRegister(); |
| auto input = i.InputDoubleRegister(0); |
| auto ool = new (zone()) OutOfLineTruncateDoubleToI( |
| this, result, input, DetermineStubCallMode(), |
| &unwinding_info_writer_); |
| // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The |
| // use of Cvttsd2siq requires the movl below to avoid sign extension. |
| __ Cvttsd2siq(result, input); |
| __ cmpq(result, Immediate(1)); |
| __ j(overflow, ool->entry()); |
| __ bind(ool->exit()); |
| __ movl(result, result); |
| break; |
| } |
| case kArchStoreWithWriteBarrier: { |
| RecordWriteMode mode = |
| static_cast<RecordWriteMode>(MiscField::decode(instr->opcode())); |
| Register object = i.InputRegister(0); |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| Register value = i.InputRegister(index); |
| Register scratch0 = i.TempRegister(0); |
| Register scratch1 = i.TempRegister(1); |
| auto ool = new (zone()) |
| OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1, |
| mode, DetermineStubCallMode()); |
| __ StoreTaggedField(operand, value); |
| __ CheckPageFlag(object, scratch0, |
| MemoryChunk::kPointersFromHereAreInterestingMask, |
| not_zero, ool->entry()); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kArchWordPoisonOnSpeculation: |
| DCHECK_EQ(i.OutputRegister(), i.InputRegister(0)); |
| __ andq(i.InputRegister(0), kSpeculationPoisonRegister); |
| break; |
| case kX64MFence: |
| __ mfence(); |
| break; |
| case kX64LFence: |
| __ lfence(); |
| break; |
| case kArchStackSlot: { |
| FrameOffset offset = |
| frame_access_state()->GetFrameOffset(i.InputInt32(0)); |
| Register base = offset.from_stack_pointer() ? rsp : rbp; |
| __ leaq(i.OutputRegister(), Operand(base, offset.offset())); |
| break; |
| } |
| case kIeee754Float64Acos: |
| ASSEMBLE_IEEE754_UNOP(acos); |
| break; |
| case kIeee754Float64Acosh: |
| ASSEMBLE_IEEE754_UNOP(acosh); |
| break; |
| case kIeee754Float64Asin: |
| ASSEMBLE_IEEE754_UNOP(asin); |
| break; |
| case kIeee754Float64Asinh: |
| ASSEMBLE_IEEE754_UNOP(asinh); |
| break; |
| case kIeee754Float64Atan: |
| ASSEMBLE_IEEE754_UNOP(atan); |
| break; |
| case kIeee754Float64Atanh: |
| ASSEMBLE_IEEE754_UNOP(atanh); |
| break; |
| case kIeee754Float64Atan2: |
| ASSEMBLE_IEEE754_BINOP(atan2); |
| break; |
| case kIeee754Float64Cbrt: |
| ASSEMBLE_IEEE754_UNOP(cbrt); |
| break; |
| case kIeee754Float64Cos: |
| ASSEMBLE_IEEE754_UNOP(cos); |
| break; |
| case kIeee754Float64Cosh: |
| ASSEMBLE_IEEE754_UNOP(cosh); |
| break; |
| case kIeee754Float64Exp: |
| ASSEMBLE_IEEE754_UNOP(exp); |
| break; |
| case kIeee754Float64Expm1: |
| ASSEMBLE_IEEE754_UNOP(expm1); |
| break; |
| case kIeee754Float64Log: |
| ASSEMBLE_IEEE754_UNOP(log); |
| break; |
| case kIeee754Float64Log1p: |
| ASSEMBLE_IEEE754_UNOP(log1p); |
| break; |
| case kIeee754Float64Log2: |
| ASSEMBLE_IEEE754_UNOP(log2); |
| break; |
| case kIeee754Float64Log10: |
| ASSEMBLE_IEEE754_UNOP(log10); |
| break; |
| case kIeee754Float64Pow: |
| ASSEMBLE_IEEE754_BINOP(pow); |
| break; |
| case kIeee754Float64Sin: |
| ASSEMBLE_IEEE754_UNOP(sin); |
| break; |
| case kIeee754Float64Sinh: |
| ASSEMBLE_IEEE754_UNOP(sinh); |
| break; |
| case kIeee754Float64Tan: |
| ASSEMBLE_IEEE754_UNOP(tan); |
| break; |
| case kIeee754Float64Tanh: |
| ASSEMBLE_IEEE754_UNOP(tanh); |
| break; |
| case kX64Add32: |
| ASSEMBLE_BINOP(addl); |
| break; |
| case kX64Add: |
| ASSEMBLE_BINOP(addq); |
| break; |
| case kX64Sub32: |
| ASSEMBLE_BINOP(subl); |
| break; |
| case kX64Sub: |
| ASSEMBLE_BINOP(subq); |
| break; |
| case kX64And32: |
| ASSEMBLE_BINOP(andl); |
| break; |
| case kX64And: |
| ASSEMBLE_BINOP(andq); |
| break; |
| case kX64Cmp8: |
| ASSEMBLE_COMPARE(cmpb); |
| break; |
| case kX64Cmp16: |
| ASSEMBLE_COMPARE(cmpw); |
| break; |
| case kX64Cmp32: |
| ASSEMBLE_COMPARE(cmpl); |
| break; |
| case kX64Cmp: |
| ASSEMBLE_COMPARE(cmpq); |
| break; |
| case kX64Test8: |
| ASSEMBLE_COMPARE(testb); |
| break; |
| case kX64Test16: |
| ASSEMBLE_COMPARE(testw); |
| break; |
| case kX64Test32: |
| ASSEMBLE_COMPARE(testl); |
| break; |
| case kX64Test: |
| ASSEMBLE_COMPARE(testq); |
| break; |
| case kX64Imul32: |
| ASSEMBLE_MULT(imull); |
| break; |
| case kX64Imul: |
| ASSEMBLE_MULT(imulq); |
| break; |
| case kX64ImulHigh32: |
| if (instr->InputAt(1)->IsRegister()) { |
| __ imull(i.InputRegister(1)); |
| } else { |
| __ imull(i.InputOperand(1)); |
| } |
| break; |
| case kX64UmulHigh32: |
| if (instr->InputAt(1)->IsRegister()) { |
| __ mull(i.InputRegister(1)); |
| } else { |
| __ mull(i.InputOperand(1)); |
| } |
| break; |
| case kX64Idiv32: |
| __ cdq(); |
| __ idivl(i.InputRegister(1)); |
| break; |
| case kX64Idiv: |
| __ cqo(); |
| __ idivq(i.InputRegister(1)); |
| break; |
| case kX64Udiv32: |
| __ xorl(rdx, rdx); |
| __ divl(i.InputRegister(1)); |
| break; |
| case kX64Udiv: |
| __ xorq(rdx, rdx); |
| __ divq(i.InputRegister(1)); |
| break; |
| case kX64Not: |
| ASSEMBLE_UNOP(notq); |
| break; |
| case kX64Not32: |
| ASSEMBLE_UNOP(notl); |
| break; |
| case kX64Neg: |
| ASSEMBLE_UNOP(negq); |
| break; |
| case kX64Neg32: |
| ASSEMBLE_UNOP(negl); |
| break; |
| case kX64Or32: |
| ASSEMBLE_BINOP(orl); |
| break; |
| case kX64Or: |
| ASSEMBLE_BINOP(orq); |
| break; |
| case kX64Xor32: |
| ASSEMBLE_BINOP(xorl); |
| break; |
| case kX64Xor: |
| ASSEMBLE_BINOP(xorq); |
| break; |
| case kX64Shl32: |
| ASSEMBLE_SHIFT(shll, 5); |
| break; |
| case kX64Shl: |
| ASSEMBLE_SHIFT(shlq, 6); |
| break; |
| case kX64Shr32: |
| ASSEMBLE_SHIFT(shrl, 5); |
| break; |
| case kX64Shr: |
| ASSEMBLE_SHIFT(shrq, 6); |
| break; |
| case kX64Sar32: |
| ASSEMBLE_SHIFT(sarl, 5); |
| break; |
| case kX64Sar: |
| ASSEMBLE_SHIFT(sarq, 6); |
| break; |
| case kX64Ror32: |
| ASSEMBLE_SHIFT(rorl, 5); |
| break; |
| case kX64Ror: |
| ASSEMBLE_SHIFT(rorq, 6); |
| break; |
| case kX64Lzcnt: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Lzcntq(i.OutputRegister(), i.InputRegister(0)); |
| } else { |
| __ Lzcntq(i.OutputRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kX64Lzcnt32: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Lzcntl(i.OutputRegister(), i.InputRegister(0)); |
| } else { |
| __ Lzcntl(i.OutputRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kX64Tzcnt: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Tzcntq(i.OutputRegister(), i.InputRegister(0)); |
| } else { |
| __ Tzcntq(i.OutputRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kX64Tzcnt32: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Tzcntl(i.OutputRegister(), i.InputRegister(0)); |
| } else { |
| __ Tzcntl(i.OutputRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kX64Popcnt: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Popcntq(i.OutputRegister(), i.InputRegister(0)); |
| } else { |
| __ Popcntq(i.OutputRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kX64Popcnt32: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Popcntl(i.OutputRegister(), i.InputRegister(0)); |
| } else { |
| __ Popcntl(i.OutputRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kX64Bswap: |
| __ bswapq(i.OutputRegister()); |
| break; |
| case kX64Bswap32: |
| __ bswapl(i.OutputRegister()); |
| break; |
| case kSSEFloat32Cmp: |
| ASSEMBLE_SSE_BINOP(Ucomiss); |
| break; |
| case kSSEFloat32Add: |
| ASSEMBLE_SSE_BINOP(addss); |
| break; |
| case kSSEFloat32Sub: |
| ASSEMBLE_SSE_BINOP(subss); |
| break; |
| case kSSEFloat32Mul: |
| ASSEMBLE_SSE_BINOP(mulss); |
| break; |
| case kSSEFloat32Div: |
| ASSEMBLE_SSE_BINOP(divss); |
| // Don't delete this mov. It may improve performance on some CPUs, |
| // when there is a (v)mulss depending on the result. |
| __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
| break; |
| case kSSEFloat32Abs: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ Psrlq(kScratchDoubleReg, 33); |
| __ Andps(i.OutputDoubleRegister(), kScratchDoubleReg); |
| break; |
| } |
| case kSSEFloat32Neg: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ Psllq(kScratchDoubleReg, 31); |
| __ Xorps(i.OutputDoubleRegister(), kScratchDoubleReg); |
| break; |
| } |
| case kSSEFloat32Sqrt: |
| ASSEMBLE_SSE_UNOP(sqrtss); |
| break; |
| case kSSEFloat32ToFloat64: |
| ASSEMBLE_SSE_UNOP(Cvtss2sd); |
| break; |
| case kSSEFloat32Round: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| RoundingMode const mode = |
| static_cast<RoundingMode>(MiscField::decode(instr->opcode())); |
| __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode); |
| break; |
| } |
| case kSSEFloat32ToInt32: |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0)); |
| } else { |
| __ Cvttss2si(i.OutputRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kSSEFloat32ToUint32: { |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0)); |
| } else { |
| __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0)); |
| } |
| break; |
| } |
| case kSSEFloat64Cmp: |
| ASSEMBLE_SSE_BINOP(Ucomisd); |
| break; |
| case kSSEFloat64Add: |
| ASSEMBLE_SSE_BINOP(addsd); |
| break; |
| case kSSEFloat64Sub: |
| ASSEMBLE_SSE_BINOP(subsd); |
| break; |
| case kSSEFloat64Mul: |
| ASSEMBLE_SSE_BINOP(mulsd); |
| break; |
| case kSSEFloat64Div: |
| ASSEMBLE_SSE_BINOP(divsd); |
| // Don't delete this mov. It may improve performance on some CPUs, |
| // when there is a (v)mulsd depending on the result. |
| __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
| break; |
| case kSSEFloat64Mod: { |
| __ AllocateStackSpace(kDoubleSize); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| kDoubleSize); |
| // Move values to st(0) and st(1). |
| __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1)); |
| __ fld_d(Operand(rsp, 0)); |
| __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); |
| __ fld_d(Operand(rsp, 0)); |
| // Loop while fprem isn't done. |
| Label mod_loop; |
| __ bind(&mod_loop); |
| // This instructions traps on all kinds inputs, but we are assuming the |
| // floating point control word is set to ignore them all. |
| __ fprem(); |
| // The following 2 instruction implicitly use rax. |
| __ fnstsw_ax(); |
| if (CpuFeatures::IsSupported(SAHF)) { |
| CpuFeatureScope sahf_scope(tasm(), SAHF); |
| __ sahf(); |
| } else { |
| __ shrl(rax, Immediate(8)); |
| __ andl(rax, Immediate(0xFF)); |
| __ pushq(rax); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| kSystemPointerSize); |
| __ popfq(); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| -kSystemPointerSize); |
| } |
| __ j(parity_even, &mod_loop); |
| // Move output to stack and clean up. |
| __ fstp(1); |
| __ fstp_d(Operand(rsp, 0)); |
| __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0)); |
| __ addq(rsp, Immediate(kDoubleSize)); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| -kDoubleSize); |
| break; |
| } |
| case kSSEFloat32Max: { |
| Label compare_swap, done_compare; |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| auto ool = |
| new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister()); |
| __ j(parity_even, ool->entry()); |
| __ j(above, &done_compare, Label::kNear); |
| __ j(below, &compare_swap, Label::kNear); |
| __ Movmskps(kScratchRegister, i.InputDoubleRegister(0)); |
| __ testl(kScratchRegister, Immediate(1)); |
| __ j(zero, &done_compare, Label::kNear); |
| __ bind(&compare_swap); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ Movss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| __ bind(&done_compare); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kSSEFloat32Min: { |
| Label compare_swap, done_compare; |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| auto ool = |
| new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister()); |
| __ j(parity_even, ool->entry()); |
| __ j(below, &done_compare, Label::kNear); |
| __ j(above, &compare_swap, Label::kNear); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ Movmskps(kScratchRegister, i.InputDoubleRegister(1)); |
| } else { |
| __ Movss(kScratchDoubleReg, i.InputOperand(1)); |
| __ Movmskps(kScratchRegister, kScratchDoubleReg); |
| } |
| __ testl(kScratchRegister, Immediate(1)); |
| __ j(zero, &done_compare, Label::kNear); |
| __ bind(&compare_swap); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ Movss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| __ bind(&done_compare); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kSSEFloat64Max: { |
| Label compare_swap, done_compare; |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| auto ool = |
| new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister()); |
| __ j(parity_even, ool->entry()); |
| __ j(above, &done_compare, Label::kNear); |
| __ j(below, &compare_swap, Label::kNear); |
| __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0)); |
| __ testl(kScratchRegister, Immediate(1)); |
| __ j(zero, &done_compare, Label::kNear); |
| __ bind(&compare_swap); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| __ bind(&done_compare); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kSSEFloat64Min: { |
| Label compare_swap, done_compare; |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| auto ool = |
| new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister()); |
| __ j(parity_even, ool->entry()); |
| __ j(below, &done_compare, Label::kNear); |
| __ j(above, &compare_swap, Label::kNear); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1)); |
| } else { |
| __ Movsd(kScratchDoubleReg, i.InputOperand(1)); |
| __ Movmskpd(kScratchRegister, kScratchDoubleReg); |
| } |
| __ testl(kScratchRegister, Immediate(1)); |
| __ j(zero, &done_compare, Label::kNear); |
| __ bind(&compare_swap); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| __ bind(&done_compare); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kX64F64x2Abs: |
| case kSSEFloat64Abs: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ Psrlq(kScratchDoubleReg, 1); |
| __ Andpd(i.OutputDoubleRegister(), kScratchDoubleReg); |
| break; |
| } |
| case kX64F64x2Neg: |
| case kSSEFloat64Neg: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ Psllq(kScratchDoubleReg, 63); |
| __ Xorpd(i.OutputDoubleRegister(), kScratchDoubleReg); |
| break; |
| } |
| case kSSEFloat64Sqrt: |
| ASSEMBLE_SSE_UNOP(Sqrtsd); |
| break; |
| case kSSEFloat64Round: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| RoundingMode const mode = |
| static_cast<RoundingMode>(MiscField::decode(instr->opcode())); |
| __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode); |
| break; |
| } |
| case kSSEFloat64ToFloat32: |
| ASSEMBLE_SSE_UNOP(Cvtsd2ss); |
| break; |
| case kSSEFloat64ToInt32: |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0)); |
| } else { |
| __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kSSEFloat64ToUint32: { |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0)); |
| } else { |
| __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0)); |
| } |
| if (MiscField::decode(instr->opcode())) { |
| __ AssertZeroExtended(i.OutputRegister()); |
| } |
| break; |
| } |
| case kSSEFloat32ToInt64: |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0)); |
| } else { |
| __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0)); |
| } |
| if (instr->OutputCount() > 1) { |
| __ Set(i.OutputRegister(1), 1); |
| Label done; |
| Label fail; |
| __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN)); |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0)); |
| } else { |
| __ Ucomiss(kScratchDoubleReg, i.InputOperand(0)); |
| } |
| // If the input is NaN, then the conversion fails. |
| __ j(parity_even, &fail); |
| // If the input is INT64_MIN, then the conversion succeeds. |
| __ j(equal, &done); |
| __ cmpq(i.OutputRegister(0), Immediate(1)); |
| // If the conversion results in INT64_MIN, but the input was not |
| // INT64_MIN, then the conversion fails. |
| __ j(no_overflow, &done); |
| __ bind(&fail); |
| __ Set(i.OutputRegister(1), 0); |
| __ bind(&done); |
| } |
| break; |
| case kSSEFloat64ToInt64: |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0)); |
| } else { |
| __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0)); |
| } |
| if (instr->OutputCount() > 1) { |
| __ Set(i.OutputRegister(1), 1); |
| Label done; |
| Label fail; |
| __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN)); |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0)); |
| } else { |
| __ Ucomisd(kScratchDoubleReg, i.InputOperand(0)); |
| } |
| // If the input is NaN, then the conversion fails. |
| __ j(parity_even, &fail); |
| // If the input is INT64_MIN, then the conversion succeeds. |
| __ j(equal, &done); |
| __ cmpq(i.OutputRegister(0), Immediate(1)); |
| // If the conversion results in INT64_MIN, but the input was not |
| // INT64_MIN, then the conversion fails. |
| __ j(no_overflow, &done); |
| __ bind(&fail); |
| __ Set(i.OutputRegister(1), 0); |
| __ bind(&done); |
| } |
| break; |
| case kSSEFloat32ToUint64: { |
| Label fail; |
| if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0); |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail); |
| } else { |
| __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail); |
| } |
| if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1); |
| __ bind(&fail); |
| break; |
| } |
| case kSSEFloat64ToUint64: { |
| Label fail; |
| if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0); |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail); |
| } else { |
| __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail); |
| } |
| if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1); |
| __ bind(&fail); |
| break; |
| } |
| case kSSEInt32ToFloat64: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kSSEInt32ToFloat32: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kSSEInt64ToFloat32: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kSSEInt64ToFloat64: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kSSEUint64ToFloat32: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kSSEUint64ToFloat64: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kSSEUint32ToFloat64: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kSSEUint32ToFloat32: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kSSEFloat64ExtractLowWord32: |
| if (instr->InputAt(0)->IsFPStackSlot()) { |
| __ movl(i.OutputRegister(), i.InputOperand(0)); |
| } else { |
| __ Movd(i.OutputRegister(), i.InputDoubleRegister(0)); |
| } |
| break; |
| case kSSEFloat64ExtractHighWord32: |
| if (instr->InputAt(0)->IsFPStackSlot()) { |
| __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2)); |
| } else { |
| __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1); |
| } |
| break; |
| case kSSEFloat64InsertLowWord32: |
| if (instr->InputAt(1)->IsRegister()) { |
| __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0); |
| } else { |
| __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0); |
| } |
| break; |
| case kSSEFloat64InsertHighWord32: |
| if (instr->InputAt(1)->IsRegister()) { |
| __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1); |
| } else { |
| __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1); |
| } |
| break; |
| case kSSEFloat64LoadLowWord32: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Movd(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Movd(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kAVXFloat32Cmp: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| break; |
| } |
| case kAVXFloat32Add: |
| ASSEMBLE_AVX_BINOP(vaddss); |
| break; |
| case kAVXFloat32Sub: |
| ASSEMBLE_AVX_BINOP(vsubss); |
| break; |
| case kAVXFloat32Mul: |
| ASSEMBLE_AVX_BINOP(vmulss); |
| break; |
| case kAVXFloat32Div: |
| ASSEMBLE_AVX_BINOP(vdivss); |
| // Don't delete this mov. It may improve performance on some CPUs, |
| // when there is a (v)mulss depending on the result. |
| __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
| break; |
| case kAVXFloat64Cmp: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| break; |
| } |
| case kAVXFloat64Add: |
| ASSEMBLE_AVX_BINOP(vaddsd); |
| break; |
| case kAVXFloat64Sub: |
| ASSEMBLE_AVX_BINOP(vsubsd); |
| break; |
| case kAVXFloat64Mul: |
| ASSEMBLE_AVX_BINOP(vmulsd); |
| break; |
| case kAVXFloat64Div: |
| ASSEMBLE_AVX_BINOP(vdivsd); |
| // Don't delete this mov. It may improve performance on some CPUs, |
| // when there is a (v)mulsd depending on the result. |
| __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
| break; |
| case kAVXFloat32Abs: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33); |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, |
| i.InputDoubleRegister(0)); |
| } else { |
| __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, |
| i.InputOperand(0)); |
| } |
| break; |
| } |
| case kAVXFloat32Neg: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31); |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, |
| i.InputDoubleRegister(0)); |
| } else { |
| __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, |
| i.InputOperand(0)); |
| } |
| break; |
| } |
| case kAVXFloat64Abs: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1); |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, |
| i.InputDoubleRegister(0)); |
| } else { |
| __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, |
| i.InputOperand(0)); |
| } |
| break; |
| } |
| case kAVXFloat64Neg: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63); |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, |
| i.InputDoubleRegister(0)); |
| } else { |
| __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, |
| i.InputOperand(0)); |
| } |
| break; |
| } |
| case kSSEFloat64SilenceNaN: |
| __ Xorpd(kScratchDoubleReg, kScratchDoubleReg); |
| __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg); |
| break; |
| case kX64Movsxbl: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| ASSEMBLE_MOVX(movsxbl); |
| __ AssertZeroExtended(i.OutputRegister()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| case kX64Movzxbl: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| ASSEMBLE_MOVX(movzxbl); |
| __ AssertZeroExtended(i.OutputRegister()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| case kX64Movsxbq: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| ASSEMBLE_MOVX(movsxbq); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| case kX64Movzxbq: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| ASSEMBLE_MOVX(movzxbq); |
| __ AssertZeroExtended(i.OutputRegister()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| case kX64Movb: { |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| if (HasImmediateInput(instr, index)) { |
| __ movb(operand, Immediate(i.InputInt8(index))); |
| } else { |
| __ movb(operand, i.InputRegister(index)); |
| } |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| } |
| case kX64Movsxwl: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| ASSEMBLE_MOVX(movsxwl); |
| __ AssertZeroExtended(i.OutputRegister()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| case kX64Movzxwl: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| ASSEMBLE_MOVX(movzxwl); |
| __ AssertZeroExtended(i.OutputRegister()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| case kX64Movsxwq: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| ASSEMBLE_MOVX(movsxwq); |
| break; |
| case kX64Movzxwq: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| ASSEMBLE_MOVX(movzxwq); |
| __ AssertZeroExtended(i.OutputRegister()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| case kX64Movw: { |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| if (HasImmediateInput(instr, index)) { |
| __ movw(operand, Immediate(i.InputInt16(index))); |
| } else { |
| __ movw(operand, i.InputRegister(index)); |
| } |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| } |
| case kX64Movl: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| if (instr->HasOutput()) { |
| if (instr->addressing_mode() == kMode_None) { |
| if (instr->InputAt(0)->IsRegister()) { |
| __ movl(i.OutputRegister(), i.InputRegister(0)); |
| } else { |
| __ movl(i.OutputRegister(), i.InputOperand(0)); |
| } |
| } else { |
| __ movl(i.OutputRegister(), i.MemoryOperand()); |
| } |
| __ AssertZeroExtended(i.OutputRegister()); |
| } else { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| if (HasImmediateInput(instr, index)) { |
| __ movl(operand, i.InputImmediate(index)); |
| } else { |
| __ movl(operand, i.InputRegister(index)); |
| } |
| } |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| case kX64Movsxlq: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| ASSEMBLE_MOVX(movsxlq); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| case kX64MovqDecompressTaggedSigned: { |
| CHECK(instr->HasOutput()); |
| __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| } |
| case kX64MovqDecompressTaggedPointer: { |
| CHECK(instr->HasOutput()); |
| __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| } |
| case kX64MovqDecompressAnyTagged: { |
| CHECK(instr->HasOutput()); |
| __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| } |
| case kX64MovqCompressTagged: { |
| CHECK(!instr->HasOutput()); |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| if (HasImmediateInput(instr, index)) { |
| __ StoreTaggedField(operand, i.InputImmediate(index)); |
| } else { |
| __ StoreTaggedField(operand, i.InputRegister(index)); |
| } |
| break; |
| } |
| case kX64DecompressSigned: { |
| CHECK(instr->HasOutput()); |
| ASSEMBLE_MOVX(DecompressTaggedSigned); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| } |
| case kX64DecompressPointer: { |
| CHECK(instr->HasOutput()); |
| ASSEMBLE_MOVX(DecompressTaggedPointer); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| } |
| case kX64DecompressAny: { |
| CHECK(instr->HasOutput()); |
| ASSEMBLE_MOVX(DecompressAnyTagged); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| } |
| case kX64CompressSigned: // Fall through. |
| case kX64CompressPointer: // Fall through. |
| case kX64CompressAny: { |
| ASSEMBLE_MOVX(movl); |
| break; |
| } |
| case kX64Movq: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| if (instr->HasOutput()) { |
| __ movq(i.OutputRegister(), i.MemoryOperand()); |
| } else { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| if (HasImmediateInput(instr, index)) { |
| __ movq(operand, i.InputImmediate(index)); |
| } else { |
| __ movq(operand, i.InputRegister(index)); |
| } |
| } |
| EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
| break; |
| case kX64Movss: |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| if (instr->HasOutput()) { |
| __ Movss(i.OutputDoubleRegister(), i.MemoryOperand()); |
| } else { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| __ Movss(operand, i.InputDoubleRegister(index)); |
| } |
| break; |
| case kX64Movsd: { |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| if (instr->HasOutput()) { |
| const MemoryAccessMode access_mode = |
| static_cast<MemoryAccessMode>(MiscField::decode(opcode)); |
| if (access_mode == kMemoryAccessPoisoned) { |
| // If we have to poison the loaded value, we load into a general |
| // purpose register first, mask it with the poison, and move the |
| // value from the general purpose register into the double register. |
| __ movq(kScratchRegister, i.MemoryOperand()); |
| __ andq(kScratchRegister, kSpeculationPoisonRegister); |
| __ Movq(i.OutputDoubleRegister(), kScratchRegister); |
| } else { |
| __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand()); |
| } |
| } else { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| __ Movsd(operand, i.InputDoubleRegister(index)); |
| } |
| break; |
| } |
| case kX64Movdqu: { |
| CpuFeatureScope sse_scope(tasm(), SSSE3); |
| EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
| if (instr->HasOutput()) { |
| __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand()); |
| } else { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| __ Movdqu(operand, i.InputSimd128Register(index)); |
| } |
| break; |
| } |
| case kX64BitcastFI: |
| if (instr->InputAt(0)->IsFPStackSlot()) { |
| __ movl(i.OutputRegister(), i.InputOperand(0)); |
| } else { |
| __ Movd(i.OutputRegister(), i.InputDoubleRegister(0)); |
| } |
| break; |
| case kX64BitcastDL: |
| if (instr->InputAt(0)->IsFPStackSlot()) { |
| __ movq(i.OutputRegister(), i.InputOperand(0)); |
| } else { |
| __ Movq(i.OutputRegister(), i.InputDoubleRegister(0)); |
| } |
| break; |
| case kX64BitcastIF: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Movd(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Movss(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kX64BitcastLD: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ Movq(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kX64Lea32: { |
| AddressingMode mode = AddressingModeField::decode(instr->opcode()); |
| // Shorten "leal" to "addl", "subl" or "shll" if the register allocation |
| // and addressing mode just happens to work out. The "addl"/"subl" forms |
| // in these cases are faster based on measurements. |
| if (i.InputRegister(0) == i.OutputRegister()) { |
| if (mode == kMode_MRI) { |
| int32_t constant_summand = i.InputInt32(1); |
| DCHECK_NE(0, constant_summand); |
| if (constant_summand > 0) { |
| __ addl(i.OutputRegister(), Immediate(constant_summand)); |
| } else { |
| __ subl(i.OutputRegister(), |
| Immediate(base::NegateWithWraparound(constant_summand))); |
| } |
| } else if (mode == kMode_MR1) { |
| if (i.InputRegister(1) == i.OutputRegister()) { |
| __ shll(i.OutputRegister(), Immediate(1)); |
| } else { |
| __ addl(i.OutputRegister(), i.InputRegister(1)); |
| } |
| } else if (mode == kMode_M2) { |
| __ shll(i.OutputRegister(), Immediate(1)); |
| } else if (mode == kMode_M4) { |
| __ shll(i.OutputRegister(), Immediate(2)); |
| } else if (mode == kMode_M8) { |
| __ shll(i.OutputRegister(), Immediate(3)); |
| } else { |
| __ leal(i.OutputRegister(), i.MemoryOperand()); |
| } |
| } else if (mode == kMode_MR1 && |
| i.InputRegister(1) == i.OutputRegister()) { |
| __ addl(i.OutputRegister(), i.InputRegister(0)); |
| } else { |
| __ leal(i.OutputRegister(), i.MemoryOperand()); |
| } |
| __ AssertZeroExtended(i.OutputRegister()); |
| break; |
| } |
| case kX64Lea: { |
| AddressingMode mode = AddressingModeField::decode(instr->opcode()); |
| // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation |
| // and addressing mode just happens to work out. The "addq"/"subq" forms |
| // in these cases are faster based on measurements. |
| if (i.InputRegister(0) == i.OutputRegister()) { |
| if (mode == kMode_MRI) { |
| int32_t constant_summand = i.InputInt32(1); |
| if (constant_summand > 0) { |
| __ addq(i.OutputRegister(), Immediate(constant_summand)); |
| } else if (constant_summand < 0) { |
| __ subq(i.OutputRegister(), Immediate(-constant_summand)); |
| } |
| } else if (mode == kMode_MR1) { |
| if (i.InputRegister(1) == i.OutputRegister()) { |
| __ shlq(i.OutputRegister(), Immediate(1)); |
| } else { |
| __ addq(i.OutputRegister(), i.InputRegister(1)); |
| } |
| } else if (mode == kMode_M2) { |
| __ shlq(i.OutputRegister(), Immediate(1)); |
| } else if (mode == kMode_M4) { |
| __ shlq(i.OutputRegister(), Immediate(2)); |
| } else if (mode == kMode_M8) { |
| __ shlq(i.OutputRegister(), Immediate(3)); |
| } else { |
| __ leaq(i.OutputRegister(), i.MemoryOperand()); |
| } |
| } else if (mode == kMode_MR1 && |
| i.InputRegister(1) == i.OutputRegister()) { |
| __ addq(i.OutputRegister(), i.InputRegister(0)); |
| } else { |
| __ leaq(i.OutputRegister(), i.MemoryOperand()); |
| } |
| break; |
| } |
| case kX64Dec32: |
| __ decl(i.OutputRegister()); |
| break; |
| case kX64Inc32: |
| __ incl(i.OutputRegister()); |
| break; |
| case kX64Push: |
| if (AddressingModeField::decode(instr->opcode()) != kMode_None) { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| __ pushq(operand); |
| frame_access_state()->IncreaseSPDelta(1); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| kSystemPointerSize); |
| } else if (HasImmediateInput(instr, 0)) { |
| __ pushq(i.InputImmediate(0)); |
| frame_access_state()->IncreaseSPDelta(1); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| kSystemPointerSize); |
| } else if (instr->InputAt(0)->IsRegister()) { |
| __ pushq(i.InputRegister(0)); |
| frame_access_state()->IncreaseSPDelta(1); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| kSystemPointerSize); |
| } else if (instr->InputAt(0)->IsFloatRegister() || |
| instr->InputAt(0)->IsDoubleRegister()) { |
| // TODO(titzer): use another machine instruction? |
| __ AllocateStackSpace(kDoubleSize); |
| frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| kDoubleSize); |
| __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); |
| } else if (instr->InputAt(0)->IsSimd128Register()) { |
| // TODO(titzer): use another machine instruction? |
| __ AllocateStackSpace(kSimd128Size); |
| frame_access_state()->IncreaseSPDelta(kSimd128Size / |
| kSystemPointerSize); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| kSimd128Size); |
| __ Movups(Operand(rsp, 0), i.InputSimd128Register(0)); |
| } else if (instr->InputAt(0)->IsStackSlot() || |
| instr->InputAt(0)->IsFloatStackSlot() || |
| instr->InputAt(0)->IsDoubleStackSlot()) { |
| __ pushq(i.InputOperand(0)); |
| frame_access_state()->IncreaseSPDelta(1); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| kSystemPointerSize); |
| } else { |
| DCHECK(instr->InputAt(0)->IsSimd128StackSlot()); |
| __ Movups(kScratchDoubleReg, i.InputOperand(0)); |
| // TODO(titzer): use another machine instruction? |
| __ AllocateStackSpace(kSimd128Size); |
| frame_access_state()->IncreaseSPDelta(kSimd128Size / |
| kSystemPointerSize); |
| unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
| kSimd128Size); |
| __ Movups(Operand(rsp, 0), kScratchDoubleReg); |
| } |
| break; |
| case kX64Poke: { |
| int slot = MiscField::decode(instr->opcode()); |
| if (HasImmediateInput(instr, 0)) { |
| __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0)); |
| } else { |
| __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0)); |
| } |
| break; |
| } |
| case kX64Peek: { |
| int reverse_slot = i.InputInt32(0); |
| int offset = |
| FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot); |
| if (instr->OutputAt(0)->IsFPRegister()) { |
| LocationOperand* op = LocationOperand::cast(instr->OutputAt(0)); |
| if (op->representation() == MachineRepresentation::kFloat64) { |
| __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset)); |
| } else { |
| DCHECK_EQ(MachineRepresentation::kFloat32, op->representation()); |
| __ Movss(i.OutputFloatRegister(), Operand(rbp, offset)); |
| } |
| } else { |
| __ movq(i.OutputRegister(), Operand(rbp, offset)); |
| } |
| break; |
| } |
| case kX64F64x2Splat: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ pshufd(dst, i.InputDoubleRegister(0), 0x44); |
| } else { |
| __ pshufd(dst, i.InputOperand(0), 0x44); |
| } |
| break; |
| } |
| case kX64F64x2ReplaceLane: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| if (instr->InputAt(2)->IsFPRegister()) { |
| __ movq(kScratchRegister, i.InputDoubleRegister(2)); |
| __ pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1)); |
| } else { |
| __ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); |
| } |
| break; |
| } |
| case kX64F64x2ExtractLane: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1)); |
| __ movq(i.OutputDoubleRegister(), kScratchRegister); |
| break; |
| } |
| case kX64F64x2Eq: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64F64x2Ne: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64F64x2Lt: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64F64x2Le: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below |
| case kX64F32x4Splat: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ movss(dst, i.InputDoubleRegister(0)); |
| } else { |
| __ movss(dst, i.InputOperand(0)); |
| } |
| __ shufps(dst, dst, 0x0); |
| break; |
| } |
| case kX64F32x4ExtractLane: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1)); |
| __ movd(i.OutputDoubleRegister(), kScratchRegister); |
| break; |
| } |
| case kX64F32x4ReplaceLane: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| // The insertps instruction uses imm8[5:4] to indicate the lane |
| // that needs to be replaced. |
| byte select = i.InputInt8(1) << 4 & 0x30; |
| if (instr->InputAt(2)->IsFPRegister()) { |
| __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), |
| select); |
| } else { |
| __ insertps(i.OutputSimd128Register(), i.InputOperand(2), select); |
| } |
| break; |
| } |
| case kX64F32x4SConvertI32x4: { |
| __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kX64F32x4UConvertI32x4: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| XMMRegister dst = i.OutputSimd128Register(); |
| __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros |
| __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits |
| __ psubd(dst, kScratchDoubleReg); // get hi 16 bits |
| __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly |
| __ psrld(dst, 1); // divide by 2 to get in unsigned range |
| __ cvtdq2ps(dst, dst); // convert hi exactly |
| __ addps(dst, dst); // double hi, exactly |
| __ addps(dst, kScratchDoubleReg); // add hi and lo, may round. |
| break; |
| } |
| case kX64F32x4Abs: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister src = i.InputSimd128Register(0); |
| if (dst == src) { |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psrld(kScratchDoubleReg, 1); |
| __ andps(i.OutputSimd128Register(), kScratchDoubleReg); |
| } else { |
| __ pcmpeqd(dst, dst); |
| __ psrld(dst, 1); |
| __ andps(dst, i.InputSimd128Register(0)); |
| } |
| break; |
| } |
| case kX64F32x4Neg: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister src = i.InputSimd128Register(0); |
| if (dst == src) { |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ pslld(kScratchDoubleReg, 31); |
| __ xorps(i.OutputSimd128Register(), kScratchDoubleReg); |
| } else { |
| __ pcmpeqd(dst, dst); |
| __ pslld(dst, 31); |
| __ xorps(dst, i.InputSimd128Register(0)); |
| } |
| break; |
| } |
| case kX64F32x4RecipApprox: { |
| __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kX64F32x4RecipSqrtApprox: { |
| __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kX64F32x4Add: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64F32x4AddHoriz: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE3); |
| __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64F32x4Sub: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64F32x4Mul: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64F32x4Min: { |
| XMMRegister src1 = i.InputSimd128Register(1), |
| dst = i.OutputSimd128Register(); |
| DCHECK_EQ(dst, i.InputSimd128Register(0)); |
| // The minps instruction doesn't propagate NaNs and +0's in its first |
| // operand. Perform minps in both orders, merge the resuls, and adjust. |
| __ movaps(kScratchDoubleReg, src1); |
| __ minps(kScratchDoubleReg, dst); |
| __ minps(dst, src1); |
| // propagate -0's and NaNs, which may be non-canonical. |
| __ orps(kScratchDoubleReg, dst); |
| // Canonicalize NaNs by quieting and clearing the payload. |
| __ cmpps(dst, kScratchDoubleReg, 3); |
| __ orps(kScratchDoubleReg, dst); |
| __ psrld(dst, 10); |
| __ andnps(dst, kScratchDoubleReg); |
| break; |
| } |
| case kX64F32x4Max: { |
| XMMRegister src1 = i.InputSimd128Register(1), |
| dst = i.OutputSimd128Register(); |
| DCHECK_EQ(dst, i.InputSimd128Register(0)); |
| // The maxps instruction doesn't propagate NaNs and +0's in its first |
| // operand. Perform maxps in both orders, merge the resuls, and adjust. |
| __ movaps(kScratchDoubleReg, src1); |
| __ maxps(kScratchDoubleReg, dst); |
| __ maxps(dst, src1); |
| // Find discrepancies. |
| __ xorps(dst, kScratchDoubleReg); |
| // Propagate NaNs, which may be non-canonical. |
| __ orps(kScratchDoubleReg, dst); |
| // Propagate sign discrepancy and (subtle) quiet NaNs. |
| __ subps(kScratchDoubleReg, dst); |
| // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. |
| __ cmpps(dst, kScratchDoubleReg, 3); |
| __ psrld(dst, 10); |
| __ andnps(dst, kScratchDoubleReg); |
| break; |
| } |
| case kX64F32x4Eq: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0); |
| break; |
| } |
| case kX64F32x4Ne: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4); |
| break; |
| } |
| case kX64F32x4Lt: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64F32x4Le: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64I64x2Splat: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| if (instr->InputAt(0)->IsRegister()) { |
| __ movq(dst, i.InputRegister(0)); |
| } else { |
| __ movq(dst, i.InputOperand(0)); |
| } |
| __ pshufd(dst, dst, 0x44); |
| break; |
| } |
| case kX64I64x2ExtractLane: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1)); |
| break; |
| } |
| case kX64I64x2ReplaceLane: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| if (instr->InputAt(2)->IsRegister()) { |
| __ pinsrq(i.OutputSimd128Register(), i.InputRegister(2), |
| i.InputInt8(1)); |
| } else { |
| __ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); |
| } |
| break; |
| } |
| case kX64I64x2Neg: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister src = i.InputSimd128Register(0); |
| if (dst == src) { |
| __ movapd(kScratchDoubleReg, src); |
| src = kScratchDoubleReg; |
| } |
| __ pxor(dst, dst); |
| __ psubq(dst, src); |
| break; |
| } |
| case kX64I64x2Shl: { |
| __ psllq(i.OutputSimd128Register(), i.InputInt8(1)); |
| break; |
| } |
| case kX64I64x2ShrS: { |
| // TODO(zhin): there is vpsraq but requires AVX512 |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| // ShrS on each quadword one at a time |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister src = i.InputSimd128Register(0); |
| |
| // lower quadword |
| __ pextrq(kScratchRegister, src, 0x0); |
| __ sarq(kScratchRegister, Immediate(i.InputInt8(1))); |
| __ pinsrq(dst, kScratchRegister, 0x0); |
| |
| // upper quadword |
| __ pextrq(kScratchRegister, src, 0x1); |
| __ sarq(kScratchRegister, Immediate(i.InputInt8(1))); |
| __ pinsrq(dst, kScratchRegister, 0x1); |
| break; |
| } |
| case kX64I64x2Add: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ paddq(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64I64x2Sub: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ psubq(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64I64x2Mul: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| XMMRegister left = i.InputSimd128Register(0); |
| XMMRegister right = i.InputSimd128Register(1); |
| XMMRegister tmp1 = i.ToSimd128Register(instr->TempAt(0)); |
| XMMRegister tmp2 = i.ToSimd128Register(instr->TempAt(1)); |
| |
| __ movaps(tmp1, left); |
| __ movaps(tmp2, right); |
| |
| // Multiply high dword of each qword of left with right. |
| __ psrlq(tmp1, 32); |
| __ pmuludq(tmp1, right); |
| |
| // Multiply high dword of each qword of right with left. |
| __ psrlq(tmp2, 32); |
| __ pmuludq(tmp2, left); |
| |
| __ paddq(tmp2, tmp1); |
| __ psllq(tmp2, 32); |
| |
| __ pmuludq(left, right); |
| __ paddq(left, tmp2); // left == dst |
| break; |
| } |
| case kX64I64x2Eq: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64I64x2Ne: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| __ pcmpeqq(kScratchDoubleReg, kScratchDoubleReg); |
| __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
| break; |
| } |
| case kX64I64x2GtS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_2); |
| __ pcmpgtq(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
| break; |
| } |
| case kX64I64x2GeS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_2); |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister src = i.InputSimd128Register(1); |
| XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); |
| |
| __ movaps(tmp, src); |
| __ pcmpgtq(tmp, dst); |
| __ pcmpeqd(dst, dst); |
| __ pxor(dst, tmp); |
| break; |
| } |
| case kX64I64x2ShrU: { |
| __ psrlq(i.OutputSimd128Register(), i.InputInt8(1)); |
| break; |
| } |
| case kX64I64x2GtU: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_2); |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister src = i.InputSimd128Register(1); |
| XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); |
| |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psllq(kScratchDoubleReg, 63); |
| |
| __ movaps(tmp, src); |
| __ pxor(tmp, kScratchDoubleReg); |
| __ pxor(dst, kScratchDoubleReg); |
| __ pcmpgtq(dst, tmp); |
| break; |
| } |
| case kX64I64x2GeU: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_2); |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister src = i.InputSimd128Register(1); |
| XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); |
| |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psllq(kScratchDoubleReg, 63); |
| |
| __ movaps(tmp, src); |
| __ pxor(dst, kScratchDoubleReg); |
| __ pxor(tmp, kScratchDoubleReg); |
| __ pcmpgtq(tmp, dst); |
| __ pcmpeqd(dst, dst); |
| __ pxor(dst, tmp); |
| break; |
| } |
| case kX64I32x4Splat: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| if (instr->InputAt(0)->IsRegister()) { |
| __ movd(dst, i.InputRegister(0)); |
| } else { |
| __ movd(dst, i.InputOperand(0)); |
| } |
| __ pshufd(dst, dst, 0x0); |
| break; |
| } |
| case kX64I32x4ExtractLane: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1)); |
| break; |
| } |
| case kX64I32x4ReplaceLane: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| if (instr->InputAt(2)->IsRegister()) { |
| __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2), |
| i.InputInt8(1)); |
| } else { |
| __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); |
| } |
| break; |
| } |
| case kX64I32x4SConvertF32x4: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| XMMRegister dst = i.OutputSimd128Register(); |
| // NAN->0 |
| __ movaps(kScratchDoubleReg, dst); |
| __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg); |
| __ pand(dst, kScratchDoubleReg); |
| // Set top bit if >= 0 (but not -0.0!) |
| __ pxor(kScratchDoubleReg, dst); |
| // Convert |
| __ cvttps2dq(dst, dst); |
| // Set top bit if >=0 is now < 0 |
| __ pand(kScratchDoubleReg, dst); |
| __ psrad(kScratchDoubleReg, 31); |
| // Set positive overflow lanes to 0x7FFFFFFF |
| __ pxor(dst, kScratchDoubleReg); |
|