| // Copyright 2013 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "src/compiler/backend/code-generator.h" |
| |
| #include "src/base/overflowing-math.h" |
| #include "src/codegen/assembler-inl.h" |
| #include "src/codegen/callable.h" |
| #include "src/codegen/ia32/assembler-ia32.h" |
| #include "src/codegen/macro-assembler.h" |
| #include "src/codegen/optimized-compilation-info.h" |
| #include "src/compiler/backend/code-generator-impl.h" |
| #include "src/compiler/backend/gap-resolver.h" |
| #include "src/compiler/node-matchers.h" |
| #include "src/compiler/osr.h" |
| #include "src/execution/frame-constants.h" |
| #include "src/execution/frames.h" |
| #include "src/heap/heap-inl.h" // crbug.com/v8/8499 |
| #include "src/objects/smi.h" |
| #include "src/wasm/wasm-code-manager.h" |
| #include "src/wasm/wasm-objects.h" |
| |
| namespace v8 { |
| namespace internal { |
| namespace compiler { |
| |
| #define __ tasm()-> |
| |
| #define kScratchDoubleReg xmm0 |
| |
| // Adds IA-32 specific methods for decoding operands. |
| class IA32OperandConverter : public InstructionOperandConverter { |
| public: |
| IA32OperandConverter(CodeGenerator* gen, Instruction* instr) |
| : InstructionOperandConverter(gen, instr) {} |
| |
| Operand InputOperand(size_t index, int extra = 0) { |
| return ToOperand(instr_->InputAt(index), extra); |
| } |
| |
| Immediate InputImmediate(size_t index) { |
| return ToImmediate(instr_->InputAt(index)); |
| } |
| |
| Operand OutputOperand() { return ToOperand(instr_->Output()); } |
| |
| Operand ToOperand(InstructionOperand* op, int extra = 0) { |
| if (op->IsRegister()) { |
| DCHECK_EQ(0, extra); |
| return Operand(ToRegister(op)); |
| } else if (op->IsFPRegister()) { |
| DCHECK_EQ(0, extra); |
| return Operand(ToDoubleRegister(op)); |
| } |
| DCHECK(op->IsStackSlot() || op->IsFPStackSlot()); |
| return SlotToOperand(AllocatedOperand::cast(op)->index(), extra); |
| } |
| |
| Operand SlotToOperand(int slot, int extra = 0) { |
| FrameOffset offset = frame_access_state()->GetFrameOffset(slot); |
| return Operand(offset.from_stack_pointer() ? esp : ebp, |
| offset.offset() + extra); |
| } |
| |
| Immediate ToImmediate(InstructionOperand* operand) { |
| Constant constant = ToConstant(operand); |
| if (constant.type() == Constant::kInt32 && |
| RelocInfo::IsWasmReference(constant.rmode())) { |
| return Immediate(static_cast<Address>(constant.ToInt32()), |
| constant.rmode()); |
| } |
| switch (constant.type()) { |
| case Constant::kInt32: |
| return Immediate(constant.ToInt32()); |
| case Constant::kFloat32: |
| return Immediate::EmbeddedNumber(constant.ToFloat32()); |
| case Constant::kFloat64: |
| return Immediate::EmbeddedNumber(constant.ToFloat64().value()); |
| case Constant::kExternalReference: |
| return Immediate(constant.ToExternalReference()); |
| case Constant::kHeapObject: |
| return Immediate(constant.ToHeapObject()); |
| case Constant::kCompressedHeapObject: |
| break; |
| case Constant::kDelayedStringConstant: |
| return Immediate::EmbeddedStringConstant( |
| constant.ToDelayedStringConstant()); |
| case Constant::kInt64: |
| break; |
| case Constant::kRpoNumber: |
| return Immediate::CodeRelativeOffset(ToLabel(operand)); |
| } |
| UNREACHABLE(); |
| } |
| |
| static size_t NextOffset(size_t* offset) { |
| size_t i = *offset; |
| (*offset)++; |
| return i; |
| } |
| |
| static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) { |
| STATIC_ASSERT(0 == static_cast<int>(times_1)); |
| STATIC_ASSERT(1 == static_cast<int>(times_2)); |
| STATIC_ASSERT(2 == static_cast<int>(times_4)); |
| STATIC_ASSERT(3 == static_cast<int>(times_8)); |
| int scale = static_cast<int>(mode - one); |
| DCHECK(scale >= 0 && scale < 4); |
| return static_cast<ScaleFactor>(scale); |
| } |
| |
| Operand MemoryOperand(size_t* offset) { |
| AddressingMode mode = AddressingModeField::decode(instr_->opcode()); |
| switch (mode) { |
| case kMode_MR: { |
| Register base = InputRegister(NextOffset(offset)); |
| int32_t disp = 0; |
| return Operand(base, disp); |
| } |
| case kMode_MRI: { |
| Register base = InputRegister(NextOffset(offset)); |
| Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset))); |
| return Operand(base, ctant.ToInt32(), ctant.rmode()); |
| } |
| case kMode_MR1: |
| case kMode_MR2: |
| case kMode_MR4: |
| case kMode_MR8: { |
| Register base = InputRegister(NextOffset(offset)); |
| Register index = InputRegister(NextOffset(offset)); |
| ScaleFactor scale = ScaleFor(kMode_MR1, mode); |
| int32_t disp = 0; |
| return Operand(base, index, scale, disp); |
| } |
| case kMode_MR1I: |
| case kMode_MR2I: |
| case kMode_MR4I: |
| case kMode_MR8I: { |
| Register base = InputRegister(NextOffset(offset)); |
| Register index = InputRegister(NextOffset(offset)); |
| ScaleFactor scale = ScaleFor(kMode_MR1I, mode); |
| Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset))); |
| return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode()); |
| } |
| case kMode_M1: |
| case kMode_M2: |
| case kMode_M4: |
| case kMode_M8: { |
| Register index = InputRegister(NextOffset(offset)); |
| ScaleFactor scale = ScaleFor(kMode_M1, mode); |
| int32_t disp = 0; |
| return Operand(index, scale, disp); |
| } |
| case kMode_M1I: |
| case kMode_M2I: |
| case kMode_M4I: |
| case kMode_M8I: { |
| Register index = InputRegister(NextOffset(offset)); |
| ScaleFactor scale = ScaleFor(kMode_M1I, mode); |
| Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset))); |
| return Operand(index, scale, ctant.ToInt32(), ctant.rmode()); |
| } |
| case kMode_MI: { |
| Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset))); |
| return Operand(ctant.ToInt32(), ctant.rmode()); |
| } |
| case kMode_None: |
| UNREACHABLE(); |
| } |
| UNREACHABLE(); |
| } |
| |
| Operand MemoryOperand(size_t first_input = 0) { |
| return MemoryOperand(&first_input); |
| } |
| |
| Operand NextMemoryOperand(size_t offset = 0) { |
| AddressingMode mode = AddressingModeField::decode(instr_->opcode()); |
| Register base = InputRegister(NextOffset(&offset)); |
| const int32_t disp = 4; |
| if (mode == kMode_MR1) { |
| Register index = InputRegister(NextOffset(&offset)); |
| ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1); |
| return Operand(base, index, scale, disp); |
| } else if (mode == kMode_MRI) { |
| Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset))); |
| return Operand(base, ctant.ToInt32() + disp, ctant.rmode()); |
| } else { |
| UNREACHABLE(); |
| } |
| } |
| |
| void MoveInstructionOperandToRegister(Register destination, |
| InstructionOperand* op) { |
| if (op->IsImmediate() || op->IsConstant()) { |
| gen_->tasm()->mov(destination, ToImmediate(op)); |
| } else if (op->IsRegister()) { |
| gen_->tasm()->Move(destination, ToRegister(op)); |
| } else { |
| gen_->tasm()->mov(destination, ToOperand(op)); |
| } |
| } |
| }; |
| |
| namespace { |
| |
| bool HasImmediateInput(Instruction* instr, size_t index) { |
| return instr->InputAt(index)->IsImmediate(); |
| } |
| |
| class OutOfLineLoadFloat32NaN final : public OutOfLineCode { |
| public: |
| OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result) |
| : OutOfLineCode(gen), result_(result) {} |
| |
| void Generate() final { |
| __ xorps(result_, result_); |
| __ divss(result_, result_); |
| } |
| |
| private: |
| XMMRegister const result_; |
| }; |
| |
| class OutOfLineLoadFloat64NaN final : public OutOfLineCode { |
| public: |
| OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result) |
| : OutOfLineCode(gen), result_(result) {} |
| |
| void Generate() final { |
| __ xorpd(result_, result_); |
| __ divsd(result_, result_); |
| } |
| |
| private: |
| XMMRegister const result_; |
| }; |
| |
| class OutOfLineTruncateDoubleToI final : public OutOfLineCode { |
| public: |
| OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result, |
| XMMRegister input, StubCallMode stub_mode) |
| : OutOfLineCode(gen), |
| result_(result), |
| input_(input), |
| stub_mode_(stub_mode), |
| isolate_(gen->isolate()), |
| zone_(gen->zone()) {} |
| |
| void Generate() final { |
| __ AllocateStackSpace(kDoubleSize); |
| __ movsd(MemOperand(esp, 0), input_); |
| if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) { |
| // A direct call to a wasm runtime stub defined in this module. |
| // Just encode the stub index. This will be patched when the code |
| // is added to the native module and copied into wasm code space. |
| __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL); |
| } else { |
| __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET); |
| } |
| __ mov(result_, MemOperand(esp, 0)); |
| __ add(esp, Immediate(kDoubleSize)); |
| } |
| |
| private: |
| Register const result_; |
| XMMRegister const input_; |
| StubCallMode stub_mode_; |
| Isolate* isolate_; |
| Zone* zone_; |
| }; |
| |
| class OutOfLineRecordWrite final : public OutOfLineCode { |
| public: |
| OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand, |
| Register value, Register scratch0, Register scratch1, |
| RecordWriteMode mode, StubCallMode stub_mode) |
| : OutOfLineCode(gen), |
| object_(object), |
| operand_(operand), |
| value_(value), |
| scratch0_(scratch0), |
| scratch1_(scratch1), |
| mode_(mode), |
| stub_mode_(stub_mode), |
| zone_(gen->zone()) {} |
| |
| void Generate() final { |
| if (mode_ > RecordWriteMode::kValueIsPointer) { |
| __ JumpIfSmi(value_, exit()); |
| } |
| __ CheckPageFlag(value_, scratch0_, |
| MemoryChunk::kPointersToHereAreInterestingMask, zero, |
| exit()); |
| __ lea(scratch1_, operand_); |
| RememberedSetAction const remembered_set_action = |
| mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET |
| : OMIT_REMEMBERED_SET; |
| SaveFPRegsMode const save_fp_mode = |
| frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs; |
| if (mode_ == RecordWriteMode::kValueIsEphemeronKey) { |
| __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode); |
| } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) { |
| // A direct call to a wasm runtime stub defined in this module. |
| // Just encode the stub index. This will be patched when the code |
| // is added to the native module and copied into wasm code space. |
| __ CallRecordWriteStub(object_, scratch1_, remembered_set_action, |
| save_fp_mode, wasm::WasmCode::kWasmRecordWrite); |
| } else { |
| __ CallRecordWriteStub(object_, scratch1_, remembered_set_action, |
| save_fp_mode); |
| } |
| } |
| |
| private: |
| Register const object_; |
| Operand const operand_; |
| Register const value_; |
| Register const scratch0_; |
| Register const scratch1_; |
| RecordWriteMode const mode_; |
| StubCallMode const stub_mode_; |
| Zone* zone_; |
| }; |
| |
| } // namespace |
| |
| #define ASSEMBLE_COMPARE(asm_instr) \ |
| do { \ |
| if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ |
| size_t index = 0; \ |
| Operand left = i.MemoryOperand(&index); \ |
| if (HasImmediateInput(instr, index)) { \ |
| __ asm_instr(left, i.InputImmediate(index)); \ |
| } else { \ |
| __ asm_instr(left, i.InputRegister(index)); \ |
| } \ |
| } else { \ |
| if (HasImmediateInput(instr, 1)) { \ |
| if (instr->InputAt(0)->IsRegister()) { \ |
| __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ |
| } else { \ |
| __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ |
| } \ |
| } else { \ |
| if (instr->InputAt(1)->IsRegister()) { \ |
| __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ |
| } else { \ |
| __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ |
| } \ |
| } \ |
| } \ |
| } while (0) |
| |
| #define ASSEMBLE_IEEE754_BINOP(name) \ |
| do { \ |
| /* Pass two doubles as arguments on the stack. */ \ |
| __ PrepareCallCFunction(4, eax); \ |
| __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \ |
| __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1)); \ |
| __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \ |
| /* Return value is in st(0) on ia32. */ \ |
| /* Store it into the result register. */ \ |
| __ AllocateStackSpace(kDoubleSize); \ |
| __ fstp_d(Operand(esp, 0)); \ |
| __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \ |
| __ add(esp, Immediate(kDoubleSize)); \ |
| } while (false) |
| |
| #define ASSEMBLE_IEEE754_UNOP(name) \ |
| do { \ |
| /* Pass one double as argument on the stack. */ \ |
| __ PrepareCallCFunction(2, eax); \ |
| __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \ |
| __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \ |
| /* Return value is in st(0) on ia32. */ \ |
| /* Store it into the result register. */ \ |
| __ AllocateStackSpace(kDoubleSize); \ |
| __ fstp_d(Operand(esp, 0)); \ |
| __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \ |
| __ add(esp, Immediate(kDoubleSize)); \ |
| } while (false) |
| |
| #define ASSEMBLE_BINOP(asm_instr) \ |
| do { \ |
| if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ |
| size_t index = 1; \ |
| Operand right = i.MemoryOperand(&index); \ |
| __ asm_instr(i.InputRegister(0), right); \ |
| } else { \ |
| if (HasImmediateInput(instr, 1)) { \ |
| __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ |
| } else { \ |
| __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ |
| } \ |
| } \ |
| } while (0) |
| |
| #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \ |
| do { \ |
| Label binop; \ |
| __ bind(&binop); \ |
| __ mov_inst(eax, i.MemoryOperand(1)); \ |
| __ Move(i.TempRegister(0), eax); \ |
| __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \ |
| __ lock(); \ |
| __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \ |
| __ j(not_equal, &binop); \ |
| } while (false) |
| |
| #define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \ |
| do { \ |
| Label binop; \ |
| __ bind(&binop); \ |
| __ mov(eax, i.MemoryOperand(2)); \ |
| __ mov(edx, i.NextMemoryOperand(2)); \ |
| __ push(ebx); \ |
| frame_access_state()->IncreaseSPDelta(1); \ |
| i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0)); \ |
| __ push(i.InputRegister(1)); \ |
| __ instr1(ebx, eax); \ |
| __ instr2(i.InputRegister(1), edx); \ |
| __ lock(); \ |
| __ cmpxchg8b(i.MemoryOperand(2)); \ |
| __ pop(i.InputRegister(1)); \ |
| __ pop(ebx); \ |
| frame_access_state()->IncreaseSPDelta(-1); \ |
| __ j(not_equal, &binop); \ |
| } while (false); |
| |
| #define ASSEMBLE_MOVX(mov_instr) \ |
| do { \ |
| if (instr->addressing_mode() != kMode_None) { \ |
| __ mov_instr(i.OutputRegister(), i.MemoryOperand()); \ |
| } else if (instr->InputAt(0)->IsRegister()) { \ |
| __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \ |
| } else { \ |
| __ mov_instr(i.OutputRegister(), i.InputOperand(0)); \ |
| } \ |
| } while (0) |
| |
| #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \ |
| do { \ |
| XMMRegister src0 = i.InputSimd128Register(0); \ |
| Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \ |
| if (CpuFeatures::IsSupported(AVX)) { \ |
| CpuFeatureScope avx_scope(tasm(), AVX); \ |
| __ v##opcode(i.OutputSimd128Register(), src0, src1); \ |
| } else { \ |
| DCHECK_EQ(i.OutputSimd128Register(), src0); \ |
| __ opcode(i.OutputSimd128Register(), src1); \ |
| } \ |
| } while (false) |
| |
| #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \ |
| if (CpuFeatures::IsSupported(AVX)) { \ |
| CpuFeatureScope avx_scope(tasm(), AVX); \ |
| __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \ |
| i.InputOperand(1), imm); \ |
| } else { \ |
| CpuFeatureScope sse_scope(tasm(), SSELevel); \ |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \ |
| __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \ |
| } |
| |
| #define ASSEMBLE_SIMD_ALL_TRUE(opcode) \ |
| do { \ |
| Register dst = i.OutputRegister(); \ |
| Operand src = i.InputOperand(0); \ |
| Register tmp = i.TempRegister(0); \ |
| __ mov(tmp, Immediate(1)); \ |
| __ xor_(dst, dst); \ |
| __ Pxor(kScratchDoubleReg, kScratchDoubleReg); \ |
| __ opcode(kScratchDoubleReg, src); \ |
| __ Ptest(kScratchDoubleReg, kScratchDoubleReg); \ |
| __ cmov(zero, dst, tmp); \ |
| } while (false) |
| |
| void CodeGenerator::AssembleDeconstructFrame() { |
| __ mov(esp, ebp); |
| __ pop(ebp); |
| } |
| |
| void CodeGenerator::AssemblePrepareTailCall() { |
| if (frame_access_state()->has_frame()) { |
| __ mov(ebp, MemOperand(ebp, 0)); |
| } |
| frame_access_state()->SetFrameAccessToSP(); |
| } |
| |
| void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg, |
| Register, Register, |
| Register) { |
| // There are not enough temp registers left on ia32 for a call instruction |
| // so we pick some scratch registers and save/restore them manually here. |
| int scratch_count = 3; |
| Register scratch1 = esi; |
| Register scratch2 = ecx; |
| Register scratch3 = edx; |
| DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3)); |
| Label done; |
| |
| // Check if current frame is an arguments adaptor frame. |
| __ cmp(Operand(ebp, StandardFrameConstants::kContextOffset), |
| Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR))); |
| __ j(not_equal, &done, Label::kNear); |
| |
| __ push(scratch1); |
| __ push(scratch2); |
| __ push(scratch3); |
| |
| // Load arguments count from current arguments adaptor frame (note, it |
| // does not include receiver). |
| Register caller_args_count_reg = scratch1; |
| __ mov(caller_args_count_reg, |
| Operand(ebp, ArgumentsAdaptorFrameConstants::kLengthOffset)); |
| __ SmiUntag(caller_args_count_reg); |
| |
| ParameterCount callee_args_count(args_reg); |
| __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2, |
| scratch3, scratch_count); |
| __ pop(scratch3); |
| __ pop(scratch2); |
| __ pop(scratch1); |
| |
| __ bind(&done); |
| } |
| |
| namespace { |
| |
| void AdjustStackPointerForTailCall(TurboAssembler* tasm, |
| FrameAccessState* state, |
| int new_slot_above_sp, |
| bool allow_shrinkage = true) { |
| int current_sp_offset = state->GetSPToFPSlotCount() + |
| StandardFrameConstants::kFixedSlotCountAboveFp; |
| int stack_slot_delta = new_slot_above_sp - current_sp_offset; |
| if (stack_slot_delta > 0) { |
| tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize); |
| state->IncreaseSPDelta(stack_slot_delta); |
| } else if (allow_shrinkage && stack_slot_delta < 0) { |
| tasm->add(esp, Immediate(-stack_slot_delta * kSystemPointerSize)); |
| state->IncreaseSPDelta(stack_slot_delta); |
| } |
| } |
| |
| #ifdef DEBUG |
| bool VerifyOutputOfAtomicPairInstr(IA32OperandConverter* converter, |
| const Instruction* instr) { |
| if (instr->OutputCount() > 0) { |
| if (converter->OutputRegister(0) != eax) return false; |
| if (instr->OutputCount() == 2 && converter->OutputRegister(1) != edx) |
| return false; |
| } |
| return true; |
| } |
| #endif |
| |
| } // namespace |
| |
| void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr, |
| int first_unused_stack_slot) { |
| CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush); |
| ZoneVector<MoveOperands*> pushes(zone()); |
| GetPushCompatibleMoves(instr, flags, &pushes); |
| |
| if (!pushes.empty() && |
| (LocationOperand::cast(pushes.back()->destination()).index() + 1 == |
| first_unused_stack_slot)) { |
| IA32OperandConverter g(this, instr); |
| for (auto move : pushes) { |
| LocationOperand destination_location( |
| LocationOperand::cast(move->destination())); |
| InstructionOperand source(move->source()); |
| AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
| destination_location.index()); |
| if (source.IsStackSlot()) { |
| LocationOperand source_location(LocationOperand::cast(source)); |
| __ push(g.SlotToOperand(source_location.index())); |
| } else if (source.IsRegister()) { |
| LocationOperand source_location(LocationOperand::cast(source)); |
| __ push(source_location.GetRegister()); |
| } else if (source.IsImmediate()) { |
| __ Push(Immediate(ImmediateOperand::cast(source).inline_value())); |
| } else { |
| // Pushes of non-scalar data types is not supported. |
| UNIMPLEMENTED(); |
| } |
| frame_access_state()->IncreaseSPDelta(1); |
| move->Eliminate(); |
| } |
| } |
| AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
| first_unused_stack_slot, false); |
| } |
| |
| void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr, |
| int first_unused_stack_slot) { |
| AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
| first_unused_stack_slot); |
| } |
| |
| // Check that {kJavaScriptCallCodeStartRegister} is correct. |
| void CodeGenerator::AssembleCodeStartRegisterCheck() { |
| __ push(eax); // Push eax so we can use it as a scratch register. |
| __ ComputeCodeStartAddress(eax); |
| __ cmp(eax, kJavaScriptCallCodeStartRegister); |
| __ Assert(equal, AbortReason::kWrongFunctionCodeStart); |
| __ pop(eax); // Restore eax. |
| } |
| |
| // Check if the code object is marked for deoptimization. If it is, then it |
| // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need |
| // to: |
| // 1. read from memory the word that contains that bit, which can be found in |
| // the flags in the referenced {CodeDataContainer} object; |
| // 2. test kMarkedForDeoptimizationBit in those flags; and |
| // 3. if it is not zero then it jumps to the builtin. |
| void CodeGenerator::BailoutIfDeoptimized() { |
| int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize; |
| __ push(eax); // Push eax so we can use it as a scratch register. |
| __ mov(eax, Operand(kJavaScriptCallCodeStartRegister, offset)); |
| __ test(FieldOperand(eax, CodeDataContainer::kKindSpecificFlagsOffset), |
| Immediate(1 << Code::kMarkedForDeoptimizationBit)); |
| __ pop(eax); // Restore eax. |
| |
| Label skip; |
| __ j(zero, &skip); |
| __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode), |
| RelocInfo::CODE_TARGET); |
| __ bind(&skip); |
| } |
| |
| void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() { |
| // TODO(860429): Remove remaining poisoning infrastructure on ia32. |
| UNREACHABLE(); |
| } |
| |
| void CodeGenerator::AssembleRegisterArgumentPoisoning() { |
| // TODO(860429): Remove remaining poisoning infrastructure on ia32. |
| UNREACHABLE(); |
| } |
| |
| // Assembles an instruction after register allocation, producing machine code. |
| CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
| Instruction* instr) { |
| IA32OperandConverter i(this, instr); |
| InstructionCode opcode = instr->opcode(); |
| ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode); |
| switch (arch_opcode) { |
| case kArchCallCodeObject: { |
| InstructionOperand* op = instr->InputAt(0); |
| if (op->IsImmediate()) { |
| Handle<Code> code = i.InputCode(0); |
| __ Call(code, RelocInfo::CODE_TARGET); |
| } else if (op->IsRegister()) { |
| Register reg = i.InputRegister(0); |
| DCHECK_IMPLIES( |
| HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), |
| reg == kJavaScriptCallCodeStartRegister); |
| __ LoadCodeObjectEntry(reg, reg); |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineCall(reg); |
| } else { |
| __ call(reg); |
| } |
| } else { |
| CHECK(tasm()->root_array_available()); |
| // This is used to allow calls to the arguments adaptor trampoline from |
| // code that only has 5 gp registers available and cannot call through |
| // an immediate. This happens when the arguments adaptor trampoline is |
| // not an embedded builtin. |
| // TODO(v8:6666): Remove once only embedded builtins are supported. |
| __ push(eax); |
| frame_access_state()->IncreaseSPDelta(1); |
| Operand virtual_call_target_register( |
| kRootRegister, IsolateData::virtual_call_target_register_offset()); |
| __ mov(eax, i.InputOperand(0)); |
| __ LoadCodeObjectEntry(eax, eax); |
| __ mov(virtual_call_target_register, eax); |
| __ pop(eax); |
| frame_access_state()->IncreaseSPDelta(-1); |
| __ call(virtual_call_target_register); |
| } |
| RecordCallPosition(instr); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchCallBuiltinPointer: { |
| DCHECK(!HasImmediateInput(instr, 0)); |
| Register builtin_index = i.InputRegister(0); |
| __ CallBuiltinByIndex(builtin_index); |
| RecordCallPosition(instr); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchCallWasmFunction: { |
| if (HasImmediateInput(instr, 0)) { |
| Constant constant = i.ToConstant(instr->InputAt(0)); |
| Address wasm_code = static_cast<Address>(constant.ToInt32()); |
| if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) { |
| __ wasm_call(wasm_code, constant.rmode()); |
| } else { |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineCall(wasm_code, constant.rmode()); |
| } else { |
| __ call(wasm_code, constant.rmode()); |
| } |
| } |
| } else { |
| Register reg = i.InputRegister(0); |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineCall(reg); |
| } else { |
| __ call(reg); |
| } |
| } |
| RecordCallPosition(instr); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchTailCallCodeObjectFromJSFunction: |
| case kArchTailCallCodeObject: { |
| if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) { |
| AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister, |
| no_reg, no_reg, no_reg); |
| } |
| if (HasImmediateInput(instr, 0)) { |
| Handle<Code> code = i.InputCode(0); |
| __ Jump(code, RelocInfo::CODE_TARGET); |
| } else { |
| Register reg = i.InputRegister(0); |
| DCHECK_IMPLIES( |
| HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), |
| reg == kJavaScriptCallCodeStartRegister); |
| __ LoadCodeObjectEntry(reg, reg); |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineJump(reg); |
| } else { |
| __ jmp(reg); |
| } |
| } |
| frame_access_state()->ClearSPDelta(); |
| frame_access_state()->SetFrameAccessToDefault(); |
| break; |
| } |
| case kArchTailCallWasm: { |
| if (HasImmediateInput(instr, 0)) { |
| Constant constant = i.ToConstant(instr->InputAt(0)); |
| Address wasm_code = static_cast<Address>(constant.ToInt32()); |
| __ jmp(wasm_code, constant.rmode()); |
| } else { |
| Register reg = i.InputRegister(0); |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineJump(reg); |
| } else { |
| __ jmp(reg); |
| } |
| } |
| frame_access_state()->ClearSPDelta(); |
| frame_access_state()->SetFrameAccessToDefault(); |
| break; |
| } |
| case kArchTailCallAddress: { |
| CHECK(!HasImmediateInput(instr, 0)); |
| Register reg = i.InputRegister(0); |
| DCHECK_IMPLIES( |
| HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), |
| reg == kJavaScriptCallCodeStartRegister); |
| if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
| __ RetpolineJump(reg); |
| } else { |
| __ jmp(reg); |
| } |
| frame_access_state()->ClearSPDelta(); |
| frame_access_state()->SetFrameAccessToDefault(); |
| break; |
| } |
| case kArchCallJSFunction: { |
| Register func = i.InputRegister(0); |
| if (FLAG_debug_code) { |
| // Check the function's context matches the context argument. |
| __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset)); |
| __ Assert(equal, AbortReason::kWrongFunctionContext); |
| } |
| static_assert(kJavaScriptCallCodeStartRegister == ecx, "ABI mismatch"); |
| __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset)); |
| __ CallCodeObject(ecx); |
| RecordCallPosition(instr); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchPrepareCallCFunction: { |
| // Frame alignment requires using FP-relative frame addressing. |
| frame_access_state()->SetFrameAccessToFP(); |
| int const num_parameters = MiscField::decode(instr->opcode()); |
| __ PrepareCallCFunction(num_parameters, i.TempRegister(0)); |
| break; |
| } |
| case kArchSaveCallerRegisters: { |
| fp_mode_ = |
| static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())); |
| DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); |
| // kReturnRegister0 should have been saved before entering the stub. |
| int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0); |
| DCHECK(IsAligned(bytes, kSystemPointerSize)); |
| DCHECK_EQ(0, frame_access_state()->sp_delta()); |
| frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize); |
| DCHECK(!caller_registers_saved_); |
| caller_registers_saved_ = true; |
| break; |
| } |
| case kArchRestoreCallerRegisters: { |
| DCHECK(fp_mode_ == |
| static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()))); |
| DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); |
| // Don't overwrite the returned value. |
| int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0); |
| frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize)); |
| DCHECK_EQ(0, frame_access_state()->sp_delta()); |
| DCHECK(caller_registers_saved_); |
| caller_registers_saved_ = false; |
| break; |
| } |
| case kArchPrepareTailCall: |
| AssemblePrepareTailCall(); |
| break; |
| case kArchCallCFunction: { |
| int const num_parameters = MiscField::decode(instr->opcode()); |
| Label return_location; |
| if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) { |
| // Put the return address in a stack slot. |
| Register scratch = eax; |
| __ push(scratch); |
| __ PushPC(); |
| int pc = __ pc_offset(); |
| __ pop(scratch); |
| __ sub(scratch, Immediate(pc + Code::kHeaderSize - kHeapObjectTag)); |
| __ add(scratch, Immediate::CodeRelativeOffset(&return_location)); |
| __ mov(MemOperand(ebp, WasmExitFrameConstants::kCallingPCOffset), |
| scratch); |
| __ pop(scratch); |
| } |
| if (HasImmediateInput(instr, 0)) { |
| ExternalReference ref = i.InputExternalReference(0); |
| __ CallCFunction(ref, num_parameters); |
| } else { |
| Register func = i.InputRegister(0); |
| __ CallCFunction(func, num_parameters); |
| } |
| __ bind(&return_location); |
| RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt); |
| frame_access_state()->SetFrameAccessToDefault(); |
| // Ideally, we should decrement SP delta to match the change of stack |
| // pointer in CallCFunction. However, for certain architectures (e.g. |
| // ARM), there may be more strict alignment requirement, causing old SP |
| // to be saved on the stack. In those cases, we can not calculate the SP |
| // delta statically. |
| frame_access_state()->ClearSPDelta(); |
| if (caller_registers_saved_) { |
| // Need to re-sync SP delta introduced in kArchSaveCallerRegisters. |
| // Here, we assume the sequence to be: |
| // kArchSaveCallerRegisters; |
| // kArchCallCFunction; |
| // kArchRestoreCallerRegisters; |
| int bytes = |
| __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0); |
| frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize); |
| } |
| break; |
| } |
| case kArchJmp: |
| AssembleArchJump(i.InputRpo(0)); |
| break; |
| case kArchBinarySearchSwitch: |
| AssembleArchBinarySearchSwitch(instr); |
| break; |
| case kArchLookupSwitch: |
| AssembleArchLookupSwitch(instr); |
| break; |
| case kArchTableSwitch: |
| AssembleArchTableSwitch(instr); |
| break; |
| case kArchComment: |
| __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0))); |
| break; |
| case kArchAbortCSAAssert: |
| DCHECK(i.InputRegister(0) == edx); |
| { |
| // We don't actually want to generate a pile of code for this, so just |
| // claim there is a stack frame, without generating one. |
| FrameScope scope(tasm(), StackFrame::NONE); |
| __ Call( |
| isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert), |
| RelocInfo::CODE_TARGET); |
| } |
| __ int3(); |
| break; |
| case kArchDebugBreak: |
| __ int3(); |
| break; |
| case kArchNop: |
| case kArchThrowTerminator: |
| // don't emit code for nops. |
| break; |
| case kArchDeoptimize: { |
| int deopt_state_id = |
| BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); |
| CodeGenResult result = |
| AssembleDeoptimizerCall(deopt_state_id, current_source_position_); |
| if (result != kSuccess) return result; |
| break; |
| } |
| case kArchRet: |
| AssembleReturn(instr->InputAt(0)); |
| break; |
| case kArchStackPointer: |
| __ mov(i.OutputRegister(), esp); |
| break; |
| case kArchFramePointer: |
| __ mov(i.OutputRegister(), ebp); |
| break; |
| case kArchParentFramePointer: |
| if (frame_access_state()->has_frame()) { |
| __ mov(i.OutputRegister(), Operand(ebp, 0)); |
| } else { |
| __ mov(i.OutputRegister(), ebp); |
| } |
| break; |
| case kArchTruncateDoubleToI: { |
| auto result = i.OutputRegister(); |
| auto input = i.InputDoubleRegister(0); |
| auto ool = new (zone()) OutOfLineTruncateDoubleToI( |
| this, result, input, DetermineStubCallMode()); |
| __ cvttsd2si(result, Operand(input)); |
| __ cmp(result, 1); |
| __ j(overflow, ool->entry()); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kArchStoreWithWriteBarrier: { |
| RecordWriteMode mode = |
| static_cast<RecordWriteMode>(MiscField::decode(instr->opcode())); |
| Register object = i.InputRegister(0); |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| Register value = i.InputRegister(index); |
| Register scratch0 = i.TempRegister(0); |
| Register scratch1 = i.TempRegister(1); |
| auto ool = new (zone()) |
| OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1, |
| mode, DetermineStubCallMode()); |
| __ mov(operand, value); |
| __ CheckPageFlag(object, scratch0, |
| MemoryChunk::kPointersFromHereAreInterestingMask, |
| not_zero, ool->entry()); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kArchStackSlot: { |
| FrameOffset offset = |
| frame_access_state()->GetFrameOffset(i.InputInt32(0)); |
| Register base = offset.from_stack_pointer() ? esp : ebp; |
| __ lea(i.OutputRegister(), Operand(base, offset.offset())); |
| break; |
| } |
| case kIeee754Float64Acos: |
| ASSEMBLE_IEEE754_UNOP(acos); |
| break; |
| case kIeee754Float64Acosh: |
| ASSEMBLE_IEEE754_UNOP(acosh); |
| break; |
| case kIeee754Float64Asin: |
| ASSEMBLE_IEEE754_UNOP(asin); |
| break; |
| case kIeee754Float64Asinh: |
| ASSEMBLE_IEEE754_UNOP(asinh); |
| break; |
| case kIeee754Float64Atan: |
| ASSEMBLE_IEEE754_UNOP(atan); |
| break; |
| case kIeee754Float64Atanh: |
| ASSEMBLE_IEEE754_UNOP(atanh); |
| break; |
| case kIeee754Float64Atan2: |
| ASSEMBLE_IEEE754_BINOP(atan2); |
| break; |
| case kIeee754Float64Cbrt: |
| ASSEMBLE_IEEE754_UNOP(cbrt); |
| break; |
| case kIeee754Float64Cos: |
| ASSEMBLE_IEEE754_UNOP(cos); |
| break; |
| case kIeee754Float64Cosh: |
| ASSEMBLE_IEEE754_UNOP(cosh); |
| break; |
| case kIeee754Float64Expm1: |
| ASSEMBLE_IEEE754_UNOP(expm1); |
| break; |
| case kIeee754Float64Exp: |
| ASSEMBLE_IEEE754_UNOP(exp); |
| break; |
| case kIeee754Float64Log: |
| ASSEMBLE_IEEE754_UNOP(log); |
| break; |
| case kIeee754Float64Log1p: |
| ASSEMBLE_IEEE754_UNOP(log1p); |
| break; |
| case kIeee754Float64Log2: |
| ASSEMBLE_IEEE754_UNOP(log2); |
| break; |
| case kIeee754Float64Log10: |
| ASSEMBLE_IEEE754_UNOP(log10); |
| break; |
| case kIeee754Float64Pow: |
| ASSEMBLE_IEEE754_BINOP(pow); |
| break; |
| case kIeee754Float64Sin: |
| ASSEMBLE_IEEE754_UNOP(sin); |
| break; |
| case kIeee754Float64Sinh: |
| ASSEMBLE_IEEE754_UNOP(sinh); |
| break; |
| case kIeee754Float64Tan: |
| ASSEMBLE_IEEE754_UNOP(tan); |
| break; |
| case kIeee754Float64Tanh: |
| ASSEMBLE_IEEE754_UNOP(tanh); |
| break; |
| case kIA32Add: |
| ASSEMBLE_BINOP(add); |
| break; |
| case kIA32And: |
| ASSEMBLE_BINOP(and_); |
| break; |
| case kIA32Cmp: |
| ASSEMBLE_COMPARE(cmp); |
| break; |
| case kIA32Cmp16: |
| ASSEMBLE_COMPARE(cmpw); |
| break; |
| case kIA32Cmp8: |
| ASSEMBLE_COMPARE(cmpb); |
| break; |
| case kIA32Test: |
| ASSEMBLE_COMPARE(test); |
| break; |
| case kIA32Test16: |
| ASSEMBLE_COMPARE(test_w); |
| break; |
| case kIA32Test8: |
| ASSEMBLE_COMPARE(test_b); |
| break; |
| case kIA32Imul: |
| if (HasImmediateInput(instr, 1)) { |
| __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1)); |
| } else { |
| __ imul(i.OutputRegister(), i.InputOperand(1)); |
| } |
| break; |
| case kIA32ImulHigh: |
| __ imul(i.InputRegister(1)); |
| break; |
| case kIA32UmulHigh: |
| __ mul(i.InputRegister(1)); |
| break; |
| case kIA32Idiv: |
| __ cdq(); |
| __ idiv(i.InputOperand(1)); |
| break; |
| case kIA32Udiv: |
| __ Move(edx, Immediate(0)); |
| __ div(i.InputOperand(1)); |
| break; |
| case kIA32Not: |
| __ not_(i.OutputOperand()); |
| break; |
| case kIA32Neg: |
| __ neg(i.OutputOperand()); |
| break; |
| case kIA32Or: |
| ASSEMBLE_BINOP(or_); |
| break; |
| case kIA32Xor: |
| ASSEMBLE_BINOP(xor_); |
| break; |
| case kIA32Sub: |
| ASSEMBLE_BINOP(sub); |
| break; |
| case kIA32Shl: |
| if (HasImmediateInput(instr, 1)) { |
| __ shl(i.OutputOperand(), i.InputInt5(1)); |
| } else { |
| __ shl_cl(i.OutputOperand()); |
| } |
| break; |
| case kIA32Shr: |
| if (HasImmediateInput(instr, 1)) { |
| __ shr(i.OutputOperand(), i.InputInt5(1)); |
| } else { |
| __ shr_cl(i.OutputOperand()); |
| } |
| break; |
| case kIA32Sar: |
| if (HasImmediateInput(instr, 1)) { |
| __ sar(i.OutputOperand(), i.InputInt5(1)); |
| } else { |
| __ sar_cl(i.OutputOperand()); |
| } |
| break; |
| case kIA32AddPair: { |
| // i.OutputRegister(0) == i.InputRegister(0) ... left low word. |
| // i.InputRegister(1) ... left high word. |
| // i.InputRegister(2) ... right low word. |
| // i.InputRegister(3) ... right high word. |
| bool use_temp = false; |
| if ((instr->InputAt(1)->IsRegister() && |
| i.OutputRegister(0).code() == i.InputRegister(1).code()) || |
| i.OutputRegister(0).code() == i.InputRegister(3).code()) { |
| // We cannot write to the output register directly, because it would |
| // overwrite an input for adc. We have to use the temp register. |
| use_temp = true; |
| __ Move(i.TempRegister(0), i.InputRegister(0)); |
| __ add(i.TempRegister(0), i.InputRegister(2)); |
| } else { |
| __ add(i.OutputRegister(0), i.InputRegister(2)); |
| } |
| i.MoveInstructionOperandToRegister(i.OutputRegister(1), |
| instr->InputAt(1)); |
| __ adc(i.OutputRegister(1), Operand(i.InputRegister(3))); |
| if (use_temp) { |
| __ Move(i.OutputRegister(0), i.TempRegister(0)); |
| } |
| break; |
| } |
| case kIA32SubPair: { |
| // i.OutputRegister(0) == i.InputRegister(0) ... left low word. |
| // i.InputRegister(1) ... left high word. |
| // i.InputRegister(2) ... right low word. |
| // i.InputRegister(3) ... right high word. |
| bool use_temp = false; |
| if ((instr->InputAt(1)->IsRegister() && |
| i.OutputRegister(0).code() == i.InputRegister(1).code()) || |
| i.OutputRegister(0).code() == i.InputRegister(3).code()) { |
| // We cannot write to the output register directly, because it would |
| // overwrite an input for adc. We have to use the temp register. |
| use_temp = true; |
| __ Move(i.TempRegister(0), i.InputRegister(0)); |
| __ sub(i.TempRegister(0), i.InputRegister(2)); |
| } else { |
| __ sub(i.OutputRegister(0), i.InputRegister(2)); |
| } |
| i.MoveInstructionOperandToRegister(i.OutputRegister(1), |
| instr->InputAt(1)); |
| __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3))); |
| if (use_temp) { |
| __ Move(i.OutputRegister(0), i.TempRegister(0)); |
| } |
| break; |
| } |
| case kIA32MulPair: { |
| __ imul(i.OutputRegister(1), i.InputOperand(0)); |
| i.MoveInstructionOperandToRegister(i.TempRegister(0), instr->InputAt(1)); |
| __ imul(i.TempRegister(0), i.InputOperand(2)); |
| __ add(i.OutputRegister(1), i.TempRegister(0)); |
| __ mov(i.OutputRegister(0), i.InputOperand(0)); |
| // Multiplies the low words and stores them in eax and edx. |
| __ mul(i.InputRegister(2)); |
| __ add(i.OutputRegister(1), i.TempRegister(0)); |
| |
| break; |
| } |
| case kIA32ShlPair: |
| if (HasImmediateInput(instr, 2)) { |
| __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2)); |
| } else { |
| // Shift has been loaded into CL by the register allocator. |
| __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0)); |
| } |
| break; |
| case kIA32ShrPair: |
| if (HasImmediateInput(instr, 2)) { |
| __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2)); |
| } else { |
| // Shift has been loaded into CL by the register allocator. |
| __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0)); |
| } |
| break; |
| case kIA32SarPair: |
| if (HasImmediateInput(instr, 2)) { |
| __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2)); |
| } else { |
| // Shift has been loaded into CL by the register allocator. |
| __ SarPair_cl(i.InputRegister(1), i.InputRegister(0)); |
| } |
| break; |
| case kIA32Ror: |
| if (HasImmediateInput(instr, 1)) { |
| __ ror(i.OutputOperand(), i.InputInt5(1)); |
| } else { |
| __ ror_cl(i.OutputOperand()); |
| } |
| break; |
| case kIA32Lzcnt: |
| __ Lzcnt(i.OutputRegister(), i.InputOperand(0)); |
| break; |
| case kIA32Tzcnt: |
| __ Tzcnt(i.OutputRegister(), i.InputOperand(0)); |
| break; |
| case kIA32Popcnt: |
| __ Popcnt(i.OutputRegister(), i.InputOperand(0)); |
| break; |
| case kIA32Bswap: |
| __ bswap(i.OutputRegister()); |
| break; |
| case kArchWordPoisonOnSpeculation: |
| // TODO(860429): Remove remaining poisoning infrastructure on ia32. |
| UNREACHABLE(); |
| case kIA32MFence: |
| __ mfence(); |
| break; |
| case kIA32LFence: |
| __ lfence(); |
| break; |
| case kSSEFloat32Cmp: |
| __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| break; |
| case kSSEFloat32Add: |
| __ addss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| break; |
| case kSSEFloat32Sub: |
| __ subss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| break; |
| case kSSEFloat32Mul: |
| __ mulss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| break; |
| case kSSEFloat32Div: |
| __ divss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| // Don't delete this mov. It may improve performance on some CPUs, |
| // when there is a (v)mulss depending on the result. |
| __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
| break; |
| case kSSEFloat32Sqrt: |
| __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0)); |
| break; |
| case kSSEFloat32Abs: { |
| // TODO(bmeurer): Use 128-bit constants. |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psrlq(kScratchDoubleReg, 33); |
| __ andps(i.OutputDoubleRegister(), kScratchDoubleReg); |
| break; |
| } |
| case kSSEFloat32Neg: { |
| // TODO(bmeurer): Use 128-bit constants. |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psllq(kScratchDoubleReg, 31); |
| __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg); |
| break; |
| } |
| case kSSEFloat32Round: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| RoundingMode const mode = |
| static_cast<RoundingMode>(MiscField::decode(instr->opcode())); |
| __ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode); |
| break; |
| } |
| case kSSEFloat64Cmp: |
| __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| break; |
| case kSSEFloat64Add: |
| __ addsd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| break; |
| case kSSEFloat64Sub: |
| __ subsd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| break; |
| case kSSEFloat64Mul: |
| __ mulsd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| break; |
| case kSSEFloat64Div: |
| __ divsd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| // Don't delete this mov. It may improve performance on some CPUs, |
| // when there is a (v)mulsd depending on the result. |
| __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
| break; |
| case kSSEFloat32Max: { |
| Label compare_swap, done_compare; |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| auto ool = |
| new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister()); |
| __ j(parity_even, ool->entry()); |
| __ j(above, &done_compare, Label::kNear); |
| __ j(below, &compare_swap, Label::kNear); |
| __ movmskps(i.TempRegister(0), i.InputDoubleRegister(0)); |
| __ test(i.TempRegister(0), Immediate(1)); |
| __ j(zero, &done_compare, Label::kNear); |
| __ bind(&compare_swap); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ movss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| __ bind(&done_compare); |
| __ bind(ool->exit()); |
| break; |
| } |
| |
| case kSSEFloat64Max: { |
| Label compare_swap, done_compare; |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| auto ool = |
| new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister()); |
| __ j(parity_even, ool->entry()); |
| __ j(above, &done_compare, Label::kNear); |
| __ j(below, &compare_swap, Label::kNear); |
| __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(0)); |
| __ test(i.TempRegister(0), Immediate(1)); |
| __ j(zero, &done_compare, Label::kNear); |
| __ bind(&compare_swap); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ movsd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| __ bind(&done_compare); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kSSEFloat32Min: { |
| Label compare_swap, done_compare; |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| auto ool = |
| new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister()); |
| __ j(parity_even, ool->entry()); |
| __ j(below, &done_compare, Label::kNear); |
| __ j(above, &compare_swap, Label::kNear); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ movmskps(i.TempRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ movss(kScratchDoubleReg, i.InputOperand(1)); |
| __ movmskps(i.TempRegister(0), kScratchDoubleReg); |
| } |
| __ test(i.TempRegister(0), Immediate(1)); |
| __ j(zero, &done_compare, Label::kNear); |
| __ bind(&compare_swap); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ movss(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| __ bind(&done_compare); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kSSEFloat64Min: { |
| Label compare_swap, done_compare; |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| auto ool = |
| new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister()); |
| __ j(parity_even, ool->entry()); |
| __ j(below, &done_compare, Label::kNear); |
| __ j(above, &compare_swap, Label::kNear); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ movsd(kScratchDoubleReg, i.InputOperand(1)); |
| __ movmskpd(i.TempRegister(0), kScratchDoubleReg); |
| } |
| __ test(i.TempRegister(0), Immediate(1)); |
| __ j(zero, &done_compare, Label::kNear); |
| __ bind(&compare_swap); |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
| } else { |
| __ movsd(i.InputDoubleRegister(0), i.InputOperand(1)); |
| } |
| __ bind(&done_compare); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kSSEFloat64Mod: { |
| Register tmp = i.TempRegister(1); |
| __ mov(tmp, esp); |
| __ AllocateStackSpace(kDoubleSize); |
| __ and_(esp, -8); // align to 8 byte boundary. |
| // Move values to st(0) and st(1). |
| __ movsd(Operand(esp, 0), i.InputDoubleRegister(1)); |
| __ fld_d(Operand(esp, 0)); |
| __ movsd(Operand(esp, 0), i.InputDoubleRegister(0)); |
| __ fld_d(Operand(esp, 0)); |
| // Loop while fprem isn't done. |
| Label mod_loop; |
| __ bind(&mod_loop); |
| // This instruction traps on all kinds of inputs, but we are assuming the |
| // floating point control word is set to ignore them all. |
| __ fprem(); |
| // fnstsw_ax clobbers eax. |
| DCHECK_EQ(eax, i.TempRegister(0)); |
| __ fnstsw_ax(); |
| __ sahf(); |
| __ j(parity_even, &mod_loop); |
| // Move output to stack and clean up. |
| __ fstp(1); |
| __ fstp_d(Operand(esp, 0)); |
| __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); |
| __ mov(esp, tmp); |
| break; |
| } |
| case kSSEFloat64Abs: { |
| // TODO(bmeurer): Use 128-bit constants. |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psrlq(kScratchDoubleReg, 1); |
| __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg); |
| break; |
| } |
| case kSSEFloat64Neg: { |
| // TODO(bmeurer): Use 128-bit constants. |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psllq(kScratchDoubleReg, 63); |
| __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg); |
| break; |
| } |
| case kSSEFloat64Sqrt: |
| __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0)); |
| break; |
| case kSSEFloat64Round: { |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| RoundingMode const mode = |
| static_cast<RoundingMode>(MiscField::decode(instr->opcode())); |
| __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode); |
| break; |
| } |
| case kSSEFloat32ToFloat64: |
| __ cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0)); |
| break; |
| case kSSEFloat64ToFloat32: |
| __ cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0)); |
| break; |
| case kSSEFloat32ToInt32: |
| __ cvttss2si(i.OutputRegister(), i.InputOperand(0)); |
| break; |
| case kSSEFloat32ToUint32: |
| __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg); |
| break; |
| case kSSEFloat64ToInt32: |
| __ cvttsd2si(i.OutputRegister(), i.InputOperand(0)); |
| break; |
| case kSSEFloat64ToUint32: |
| __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg); |
| break; |
| case kSSEInt32ToFloat32: |
| __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); |
| break; |
| case kSSEUint32ToFloat32: |
| __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0), |
| i.TempRegister(0)); |
| break; |
| case kSSEInt32ToFloat64: |
| __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0)); |
| break; |
| case kSSEUint32ToFloat64: |
| __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0), |
| i.TempRegister(0)); |
| break; |
| case kSSEFloat64ExtractLowWord32: |
| if (instr->InputAt(0)->IsFPStackSlot()) { |
| __ mov(i.OutputRegister(), i.InputOperand(0)); |
| } else { |
| __ movd(i.OutputRegister(), i.InputDoubleRegister(0)); |
| } |
| break; |
| case kSSEFloat64ExtractHighWord32: |
| if (instr->InputAt(0)->IsFPStackSlot()) { |
| __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2)); |
| } else { |
| __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1); |
| } |
| break; |
| case kSSEFloat64InsertLowWord32: |
| __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0); |
| break; |
| case kSSEFloat64InsertHighWord32: |
| __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1); |
| break; |
| case kSSEFloat64LoadLowWord32: |
| __ movd(i.OutputDoubleRegister(), i.InputOperand(0)); |
| break; |
| case kAVXFloat32Add: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vaddss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kAVXFloat32Sub: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vsubss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kAVXFloat32Mul: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vmulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kAVXFloat32Div: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputOperand(1)); |
| // Don't delete this mov. It may improve performance on some CPUs, |
| // when there is a (v)mulss depending on the result. |
| __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
| break; |
| } |
| case kAVXFloat64Add: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vaddsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kAVXFloat64Sub: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vsubsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kAVXFloat64Mul: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vmulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kAVXFloat64Div: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputOperand(1)); |
| // Don't delete this mov. It may improve performance on some CPUs, |
| // when there is a (v)mulsd depending on the result. |
| __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
| break; |
| } |
| case kAVXFloat32Abs: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psrlq(kScratchDoubleReg, 33); |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); |
| break; |
| } |
| case kAVXFloat32Neg: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psllq(kScratchDoubleReg, 31); |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); |
| break; |
| } |
| case kAVXFloat64Abs: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psrlq(kScratchDoubleReg, 1); |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); |
| break; |
| } |
| case kAVXFloat64Neg: { |
| // TODO(bmeurer): Use RIP relative 128-bit constants. |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psllq(kScratchDoubleReg, 63); |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); |
| break; |
| } |
| case kSSEFloat64SilenceNaN: |
| __ xorpd(kScratchDoubleReg, kScratchDoubleReg); |
| __ subsd(i.InputDoubleRegister(0), kScratchDoubleReg); |
| break; |
| case kIA32Movsxbl: |
| ASSEMBLE_MOVX(movsx_b); |
| break; |
| case kIA32Movzxbl: |
| ASSEMBLE_MOVX(movzx_b); |
| break; |
| case kIA32Movb: { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| if (HasImmediateInput(instr, index)) { |
| __ mov_b(operand, i.InputInt8(index)); |
| } else { |
| __ mov_b(operand, i.InputRegister(index)); |
| } |
| break; |
| } |
| case kIA32Movsxwl: |
| ASSEMBLE_MOVX(movsx_w); |
| break; |
| case kIA32Movzxwl: |
| ASSEMBLE_MOVX(movzx_w); |
| break; |
| case kIA32Movw: { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| if (HasImmediateInput(instr, index)) { |
| __ mov_w(operand, i.InputInt16(index)); |
| } else { |
| __ mov_w(operand, i.InputRegister(index)); |
| } |
| break; |
| } |
| case kIA32Movl: |
| if (instr->HasOutput()) { |
| __ mov(i.OutputRegister(), i.MemoryOperand()); |
| } else { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| if (HasImmediateInput(instr, index)) { |
| __ mov(operand, i.InputImmediate(index)); |
| } else { |
| __ mov(operand, i.InputRegister(index)); |
| } |
| } |
| break; |
| case kIA32Movsd: |
| if (instr->HasOutput()) { |
| __ movsd(i.OutputDoubleRegister(), i.MemoryOperand()); |
| } else { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| __ movsd(operand, i.InputDoubleRegister(index)); |
| } |
| break; |
| case kIA32Movss: |
| if (instr->HasOutput()) { |
| __ movss(i.OutputDoubleRegister(), i.MemoryOperand()); |
| } else { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| __ movss(operand, i.InputDoubleRegister(index)); |
| } |
| break; |
| case kIA32Movdqu: |
| if (instr->HasOutput()) { |
| __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand()); |
| } else { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| __ Movdqu(operand, i.InputSimd128Register(index)); |
| } |
| break; |
| case kIA32BitcastFI: |
| if (instr->InputAt(0)->IsFPStackSlot()) { |
| __ mov(i.OutputRegister(), i.InputOperand(0)); |
| } else { |
| __ movd(i.OutputRegister(), i.InputDoubleRegister(0)); |
| } |
| break; |
| case kIA32BitcastIF: |
| if (instr->InputAt(0)->IsRegister()) { |
| __ movd(i.OutputDoubleRegister(), i.InputRegister(0)); |
| } else { |
| __ movss(i.OutputDoubleRegister(), i.InputOperand(0)); |
| } |
| break; |
| case kIA32Lea: { |
| AddressingMode mode = AddressingModeField::decode(instr->opcode()); |
| // Shorten "leal" to "addl", "subl" or "shll" if the register allocation |
| // and addressing mode just happens to work out. The "addl"/"subl" forms |
| // in these cases are faster based on measurements. |
| if (mode == kMode_MI) { |
| __ Move(i.OutputRegister(), Immediate(i.InputInt32(0))); |
| } else if (i.InputRegister(0) == i.OutputRegister()) { |
| if (mode == kMode_MRI) { |
| int32_t constant_summand = i.InputInt32(1); |
| if (constant_summand > 0) { |
| __ add(i.OutputRegister(), Immediate(constant_summand)); |
| } else if (constant_summand < 0) { |
| __ sub(i.OutputRegister(), |
| Immediate(base::NegateWithWraparound(constant_summand))); |
| } |
| } else if (mode == kMode_MR1) { |
| if (i.InputRegister(1) == i.OutputRegister()) { |
| __ shl(i.OutputRegister(), 1); |
| } else { |
| __ add(i.OutputRegister(), i.InputRegister(1)); |
| } |
| } else if (mode == kMode_M2) { |
| __ shl(i.OutputRegister(), 1); |
| } else if (mode == kMode_M4) { |
| __ shl(i.OutputRegister(), 2); |
| } else if (mode == kMode_M8) { |
| __ shl(i.OutputRegister(), 3); |
| } else { |
| __ lea(i.OutputRegister(), i.MemoryOperand()); |
| } |
| } else if (mode == kMode_MR1 && |
| i.InputRegister(1) == i.OutputRegister()) { |
| __ add(i.OutputRegister(), i.InputRegister(0)); |
| } else { |
| __ lea(i.OutputRegister(), i.MemoryOperand()); |
| } |
| break; |
| } |
| case kIA32PushFloat32: |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ AllocateStackSpace(kFloatSize); |
| __ movss(Operand(esp, 0), i.InputDoubleRegister(0)); |
| frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); |
| } else if (HasImmediateInput(instr, 0)) { |
| __ Move(kScratchDoubleReg, i.InputFloat32(0)); |
| __ AllocateStackSpace(kFloatSize); |
| __ movss(Operand(esp, 0), kScratchDoubleReg); |
| frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); |
| } else { |
| __ movss(kScratchDoubleReg, i.InputOperand(0)); |
| __ AllocateStackSpace(kFloatSize); |
| __ movss(Operand(esp, 0), kScratchDoubleReg); |
| frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); |
| } |
| break; |
| case kIA32PushFloat64: |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ AllocateStackSpace(kDoubleSize); |
| __ movsd(Operand(esp, 0), i.InputDoubleRegister(0)); |
| frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); |
| } else if (HasImmediateInput(instr, 0)) { |
| __ Move(kScratchDoubleReg, i.InputDouble(0)); |
| __ AllocateStackSpace(kDoubleSize); |
| __ movsd(Operand(esp, 0), kScratchDoubleReg); |
| frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); |
| } else { |
| __ movsd(kScratchDoubleReg, i.InputOperand(0)); |
| __ AllocateStackSpace(kDoubleSize); |
| __ movsd(Operand(esp, 0), kScratchDoubleReg); |
| frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); |
| } |
| break; |
| case kIA32PushSimd128: |
| if (instr->InputAt(0)->IsFPRegister()) { |
| __ AllocateStackSpace(kSimd128Size); |
| __ movups(Operand(esp, 0), i.InputSimd128Register(0)); |
| } else { |
| __ movups(kScratchDoubleReg, i.InputOperand(0)); |
| __ AllocateStackSpace(kSimd128Size); |
| __ movups(Operand(esp, 0), kScratchDoubleReg); |
| } |
| frame_access_state()->IncreaseSPDelta(kSimd128Size / kSystemPointerSize); |
| break; |
| case kIA32Push: |
| if (AddressingModeField::decode(instr->opcode()) != kMode_None) { |
| size_t index = 0; |
| Operand operand = i.MemoryOperand(&index); |
| __ push(operand); |
| frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); |
| } else if (instr->InputAt(0)->IsFPRegister()) { |
| __ AllocateStackSpace(kFloatSize); |
| __ movsd(Operand(esp, 0), i.InputDoubleRegister(0)); |
| frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize); |
| } else if (HasImmediateInput(instr, 0)) { |
| __ push(i.InputImmediate(0)); |
| frame_access_state()->IncreaseSPDelta(1); |
| } else { |
| __ push(i.InputOperand(0)); |
| frame_access_state()->IncreaseSPDelta(1); |
| } |
| break; |
| case kIA32Poke: { |
| int slot = MiscField::decode(instr->opcode()); |
| if (HasImmediateInput(instr, 0)) { |
| __ mov(Operand(esp, slot * kSystemPointerSize), i.InputImmediate(0)); |
| } else { |
| __ mov(Operand(esp, slot * kSystemPointerSize), i.InputRegister(0)); |
| } |
| break; |
| } |
| case kIA32Peek: { |
| int reverse_slot = i.InputInt32(0) + 1; |
| int offset = |
| FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot); |
| if (instr->OutputAt(0)->IsFPRegister()) { |
| LocationOperand* op = LocationOperand::cast(instr->OutputAt(0)); |
| if (op->representation() == MachineRepresentation::kFloat64) { |
| __ movsd(i.OutputDoubleRegister(), Operand(ebp, offset)); |
| } else { |
| DCHECK_EQ(MachineRepresentation::kFloat32, op->representation()); |
| __ movss(i.OutputFloatRegister(), Operand(ebp, offset)); |
| } |
| } else { |
| __ mov(i.OutputRegister(), Operand(ebp, offset)); |
| } |
| break; |
| } |
| case kSSEF32x4Splat: { |
| DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); |
| XMMRegister dst = i.OutputSimd128Register(); |
| __ shufps(dst, dst, 0x0); |
| break; |
| } |
| case kAVXF32x4Splat: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| XMMRegister src = i.InputFloatRegister(0); |
| __ vshufps(i.OutputSimd128Register(), src, src, 0x0); |
| break; |
| } |
| case kSSEF32x4ExtractLane: { |
| DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); |
| XMMRegister dst = i.OutputFloatRegister(); |
| int8_t lane = i.InputInt8(1); |
| if (lane != 0) { |
| DCHECK_LT(lane, 4); |
| __ shufps(dst, dst, lane); |
| } |
| break; |
| } |
| case kAVXF32x4ExtractLane: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| XMMRegister dst = i.OutputFloatRegister(); |
| XMMRegister src = i.InputSimd128Register(0); |
| int8_t lane = i.InputInt8(1); |
| if (lane == 0) { |
| if (dst != src) __ vmovaps(dst, src); |
| } else { |
| DCHECK_LT(lane, 4); |
| __ vshufps(dst, src, src, lane); |
| } |
| break; |
| } |
| case kSSEF32x4ReplaceLane: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ insertps(i.OutputSimd128Register(), i.InputOperand(2), |
| i.InputInt8(1) << 4); |
| break; |
| } |
| case kAVXF32x4ReplaceLane: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(2), i.InputInt8(1) << 4); |
| break; |
| } |
| case kIA32F32x4SConvertI32x4: { |
| __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0)); |
| break; |
| } |
| case kSSEF32x4UConvertI32x4: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| XMMRegister dst = i.OutputSimd128Register(); |
| __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros |
| __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits |
| __ psubd(dst, kScratchDoubleReg); // get hi 16 bits |
| __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly |
| __ psrld(dst, 1); // divide by 2 to get in unsigned range |
| __ cvtdq2ps(dst, dst); // convert hi exactly |
| __ addps(dst, dst); // double hi, exactly |
| __ addps(dst, kScratchDoubleReg); // add hi and lo, may round. |
| break; |
| } |
| case kAVXF32x4UConvertI32x4: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister src = i.InputSimd128Register(0); |
| __ vpxor(kScratchDoubleReg, kScratchDoubleReg, |
| kScratchDoubleReg); // zeros |
| __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src, |
| 0x55); // get lo 16 bits |
| __ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits |
| __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly |
| __ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range |
| __ vcvtdq2ps(dst, dst); // convert hi exactly |
| __ vaddps(dst, dst, dst); // double hi, exactly |
| __ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round. |
| break; |
| } |
| case kSSEF32x4Abs: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| Operand src = i.InputOperand(0); |
| if (src.is_reg(dst)) { |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psrld(kScratchDoubleReg, 1); |
| __ andps(dst, kScratchDoubleReg); |
| } else { |
| __ pcmpeqd(dst, dst); |
| __ psrld(dst, 1); |
| __ andps(dst, src); |
| } |
| break; |
| } |
| case kAVXF32x4Abs: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); |
| __ vandps(i.OutputSimd128Register(), kScratchDoubleReg, |
| i.InputOperand(0)); |
| break; |
| } |
| case kSSEF32x4Neg: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| Operand src = i.InputOperand(0); |
| if (src.is_reg(dst)) { |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ pslld(kScratchDoubleReg, 31); |
| __ xorps(dst, kScratchDoubleReg); |
| } else { |
| __ pcmpeqd(dst, dst); |
| __ pslld(dst, 31); |
| __ xorps(dst, src); |
| } |
| break; |
| } |
| case kAVXF32x4Neg: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31); |
| __ vxorps(i.OutputSimd128Register(), kScratchDoubleReg, |
| i.InputOperand(0)); |
| break; |
| } |
| case kIA32F32x4RecipApprox: { |
| __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0)); |
| break; |
| } |
| case kIA32F32x4RecipSqrtApprox: { |
| __ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0)); |
| break; |
| } |
| case kSSEF32x4Add: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ addps(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXF32x4Add: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vaddps(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEF32x4AddHoriz: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE3); |
| __ haddps(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXF32x4AddHoriz: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vhaddps(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEF32x4Sub: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ subps(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXF32x4Sub: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vsubps(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEF32x4Mul: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ mulps(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXF32x4Mul: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vmulps(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEF32x4Min: { |
| XMMRegister src1 = i.InputSimd128Register(1), |
| dst = i.OutputSimd128Register(); |
| DCHECK_EQ(dst, i.InputSimd128Register(0)); |
| // The minps instruction doesn't propagate NaNs and +0's in its first |
| // operand. Perform minps in both orders, merge the resuls, and adjust. |
| __ movaps(kScratchDoubleReg, src1); |
| __ minps(kScratchDoubleReg, dst); |
| __ minps(dst, src1); |
| // propagate -0's and NaNs, which may be non-canonical. |
| __ orps(kScratchDoubleReg, dst); |
| // Canonicalize NaNs by quieting and clearing the payload. |
| __ cmpps(dst, kScratchDoubleReg, 3); |
| __ orps(kScratchDoubleReg, dst); |
| __ psrld(dst, 10); |
| __ andnps(dst, kScratchDoubleReg); |
| break; |
| } |
| case kAVXF32x4Min: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| XMMRegister dst = i.OutputSimd128Register(); |
| Operand src1 = i.InputOperand(1); |
| // See comment above for correction of minps. |
| __ movups(kScratchDoubleReg, src1); |
| __ vminps(kScratchDoubleReg, kScratchDoubleReg, dst); |
| __ vminps(dst, dst, src1); |
| __ vorps(dst, dst, kScratchDoubleReg); |
| __ vcmpneqps(kScratchDoubleReg, dst, dst); |
| __ vorps(dst, dst, kScratchDoubleReg); |
| __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 10); |
| __ vandnps(dst, kScratchDoubleReg, dst); |
| break; |
| } |
| case kSSEF32x4Max: { |
| XMMRegister src1 = i.InputSimd128Register(1), |
| dst = i.OutputSimd128Register(); |
| DCHECK_EQ(dst, i.InputSimd128Register(0)); |
| // The maxps instruction doesn't propagate NaNs and +0's in its first |
| // operand. Perform maxps in both orders, merge the resuls, and adjust. |
| __ movaps(kScratchDoubleReg, src1); |
| __ maxps(kScratchDoubleReg, dst); |
| __ maxps(dst, src1); |
| // Find discrepancies. |
| __ xorps(dst, kScratchDoubleReg); |
| // Propagate NaNs, which may be non-canonical. |
| __ orps(kScratchDoubleReg, dst); |
| // Propagate sign discrepancy and (subtle) quiet NaNs. |
| __ subps(kScratchDoubleReg, dst); |
| // Canonicalize NaNs by clearing the payload. |
| __ cmpps(dst, kScratchDoubleReg, 3); |
| __ psrld(dst, 10); |
| __ andnps(dst, kScratchDoubleReg); |
| break; |
| } |
| case kAVXF32x4Max: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| XMMRegister dst = i.OutputSimd128Register(); |
| Operand src1 = i.InputOperand(1); |
| // See comment above for correction of maxps. |
| __ movaps(kScratchDoubleReg, src1); |
| __ vmaxps(kScratchDoubleReg, kScratchDoubleReg, dst); |
| __ vmaxps(dst, dst, src1); |
| __ vxorps(dst, dst, kScratchDoubleReg); |
| __ vorps(kScratchDoubleReg, kScratchDoubleReg, dst); |
| __ vsubps(kScratchDoubleReg, kScratchDoubleReg, dst); |
| __ vcmpneqps(dst, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpsrld(dst, dst, 10); |
| __ vandnps(dst, dst, kScratchDoubleReg); |
| break; |
| } |
| case kSSEF32x4Eq: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpeqps(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXF32x4Eq: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vcmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEF32x4Ne: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpneqps(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXF32x4Ne: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vcmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEF32x4Lt: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpltps(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXF32x4Lt: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vcmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEF32x4Le: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ cmpleps(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXF32x4Le: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vcmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kIA32I32x4Splat: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| __ Movd(dst, i.InputOperand(0)); |
| __ Pshufd(dst, dst, 0x0); |
| break; |
| } |
| case kIA32I32x4ExtractLane: { |
| __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1)); |
| break; |
| } |
| case kSSEI32x4ReplaceLane: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); |
| break; |
| } |
| case kAVXI32x4ReplaceLane: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(2), i.InputInt8(1)); |
| break; |
| } |
| case kSSEI32x4SConvertF32x4: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| XMMRegister dst = i.OutputSimd128Register(); |
| // NAN->0 |
| __ movaps(kScratchDoubleReg, dst); |
| __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg); |
| __ pand(dst, kScratchDoubleReg); |
| // Set top bit if >= 0 (but not -0.0!) |
| __ pxor(kScratchDoubleReg, dst); |
| // Convert |
| __ cvttps2dq(dst, dst); |
| // Set top bit if >=0 is now < 0 |
| __ pand(kScratchDoubleReg, dst); |
| __ psrad(kScratchDoubleReg, 31); |
| // Set positive overflow lanes to 0x7FFFFFFF |
| __ pxor(dst, kScratchDoubleReg); |
| break; |
| } |
| case kAVXI32x4SConvertF32x4: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister src = i.InputSimd128Register(0); |
| // NAN->0 |
| __ vcmpeqps(kScratchDoubleReg, src, src); |
| __ vpand(dst, src, kScratchDoubleReg); |
| // Set top bit if >= 0 (but not -0.0!) |
| __ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst); |
| // Convert |
| __ vcvttps2dq(dst, dst); |
| // Set top bit if >=0 is now < 0 |
| __ vpand(kScratchDoubleReg, kScratchDoubleReg, dst); |
| __ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31); |
| // Set positive overflow lanes to 0x7FFFFFFF |
| __ vpxor(dst, dst, kScratchDoubleReg); |
| break; |
| } |
| case kIA32I32x4SConvertI16x8Low: { |
| __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0)); |
| break; |
| } |
| case kIA32I32x4SConvertI16x8High: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| __ Palignr(dst, i.InputOperand(0), 8); |
| __ Pmovsxwd(dst, dst); |
| break; |
| } |
| case kIA32I32x4Neg: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| Operand src = i.InputOperand(0); |
| if (src.is_reg(dst)) { |
| __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ Psignd(dst, kScratchDoubleReg); |
| } else { |
| __ Pxor(dst, dst); |
| __ Psubd(dst, src); |
| } |
| break; |
| } |
| case kSSEI32x4Shl: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pslld(i.OutputSimd128Register(), i.InputInt8(1)); |
| break; |
| } |
| case kAVXI32x4Shl: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputInt8(1)); |
| break; |
| } |
| case kSSEI32x4ShrS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ psrad(i.OutputSimd128Register(), i.InputInt8(1)); |
| break; |
| } |
| case kAVXI32x4ShrS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputInt8(1)); |
| break; |
| } |
| case kSSEI32x4Add: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ paddd(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI32x4Add: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI32x4AddHoriz: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSSE3); |
| __ phaddd(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI32x4AddHoriz: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI32x4Sub: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ psubd(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI32x4Sub: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI32x4Mul: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ pmulld(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI32x4Mul: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpmulld(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI32x4MinS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ pminsd(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI32x4MinS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpminsd(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI32x4MaxS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ pmaxsd(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI32x4MaxS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI32x4Eq: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI32x4Eq: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI32x4Ne: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1)); |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
| break; |
| } |
| case kAVXI32x4Ne: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(), |
| kScratchDoubleReg); |
| break; |
| } |
| case kSSEI32x4GtS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pcmpgtd(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI32x4GtS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI32x4GeS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| XMMRegister dst = i.OutputSimd128Register(); |
| Operand src = i.InputOperand(1); |
| __ pminsd(dst, src); |
| __ pcmpeqd(dst, src); |
| break; |
| } |
| case kAVXI32x4GeS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| XMMRegister src1 = i.InputSimd128Register(0); |
| Operand src2 = i.InputOperand(1); |
| __ vpminsd(kScratchDoubleReg, src1, src2); |
| __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2); |
| break; |
| } |
| case kSSEI32x4UConvertF32x4: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); |
| // NAN->0, negative->0 |
| __ pxor(kScratchDoubleReg, kScratchDoubleReg); |
| __ maxps(dst, kScratchDoubleReg); |
| // scratch: float representation of max_signed |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ psrld(kScratchDoubleReg, 1); // 0x7fffffff |
| __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000 |
| // tmp: convert (src-max_signed). |
| // Positive overflow lanes -> 0x7FFFFFFF |
| // Negative lanes -> 0 |
| __ movaps(tmp, dst); |
| __ subps(tmp, kScratchDoubleReg); |
| __ cmpleps(kScratchDoubleReg, tmp); |
| __ cvttps2dq(tmp, tmp); |
| __ pxor(tmp, kScratchDoubleReg); |
| __ pxor(kScratchDoubleReg, kScratchDoubleReg); |
| __ pmaxsd(tmp, kScratchDoubleReg); |
| // convert. Overflow lanes above max_signed will be 0x80000000 |
| __ cvttps2dq(dst, dst); |
| // Add (src-max_signed) for overflow lanes. |
| __ paddd(dst, tmp); |
| break; |
| } |
| case kAVXI32x4UConvertF32x4: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); |
| // NAN->0, negative->0 |
| __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vmaxps(dst, dst, kScratchDoubleReg); |
| // scratch: float representation of max_signed |
| __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); // 0x7fffffff |
| __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000 |
| // tmp: convert (src-max_signed). |
| // Positive overflow lanes -> 0x7FFFFFFF |
| // Negative lanes -> 0 |
| __ vsubps(tmp, dst, kScratchDoubleReg); |
| __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp); |
| __ vcvttps2dq(tmp, tmp); |
| __ vpxor(tmp, tmp, kScratchDoubleReg); |
| __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpmaxsd(tmp, tmp, kScratchDoubleReg); |
| // convert. Overflow lanes above max_signed will be 0x80000000 |
| __ vcvttps2dq(dst, dst); |
| // Add (src-max_signed) for overflow lanes. |
| __ vpaddd(dst, dst, tmp); |
| break; |
| } |
| case kIA32I32x4UConvertI16x8Low: { |
| __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0)); |
| break; |
| } |
| case kIA32I32x4UConvertI16x8High: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| __ Palignr(dst, i.InputOperand(0), 8); |
| __ Pmovzxwd(dst, dst); |
| break; |
| } |
| case kSSEI32x4ShrU: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ psrld(i.OutputSimd128Register(), i.InputInt8(1)); |
| break; |
| } |
| case kAVXI32x4ShrU: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputInt8(1)); |
| break; |
| } |
| case kSSEI32x4MinU: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ pminud(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI32x4MinU: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpminud(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI32x4MaxU: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| __ pmaxud(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI32x4MaxU: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI32x4GtU: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| XMMRegister dst = i.OutputSimd128Register(); |
| Operand src = i.InputOperand(1); |
| __ pmaxud(dst, src); |
| __ pcmpeqd(dst, src); |
| __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ pxor(dst, kScratchDoubleReg); |
| break; |
| } |
| case kAVXI32x4GtU: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| XMMRegister dst = i.OutputSimd128Register(); |
| XMMRegister src1 = i.InputSimd128Register(0); |
| Operand src2 = i.InputOperand(1); |
| __ vpmaxud(kScratchDoubleReg, src1, src2); |
| __ vpcmpeqd(dst, kScratchDoubleReg, src2); |
| __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpxor(dst, dst, kScratchDoubleReg); |
| break; |
| } |
| case kSSEI32x4GeU: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSE4_1); |
| XMMRegister dst = i.OutputSimd128Register(); |
| Operand src = i.InputOperand(1); |
| __ pminud(dst, src); |
| __ pcmpeqd(dst, src); |
| break; |
| } |
| case kAVXI32x4GeU: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| XMMRegister src1 = i.InputSimd128Register(0); |
| Operand src2 = i.InputOperand(1); |
| __ vpminud(kScratchDoubleReg, src1, src2); |
| __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2); |
| break; |
| } |
| case kIA32I16x8Splat: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| __ Movd(dst, i.InputOperand(0)); |
| __ Pshuflw(dst, dst, 0x0); |
| __ Pshufd(dst, dst, 0x0); |
| break; |
| } |
| case kIA32I16x8ExtractLane: { |
| Register dst = i.OutputRegister(); |
| __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1)); |
| __ movsx_w(dst, dst); |
| break; |
| } |
| case kSSEI16x8ReplaceLane: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); |
| break; |
| } |
| case kAVXI16x8ReplaceLane: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(2), i.InputInt8(1)); |
| break; |
| } |
| case kIA32I16x8SConvertI8x16Low: { |
| __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0)); |
| break; |
| } |
| case kIA32I16x8SConvertI8x16High: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| __ Palignr(dst, i.InputOperand(0), 8); |
| __ Pmovsxbw(dst, dst); |
| break; |
| } |
| case kIA32I16x8Neg: { |
| XMMRegister dst = i.OutputSimd128Register(); |
| Operand src = i.InputOperand(0); |
| if (src.is_reg(dst)) { |
| __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
| __ Psignw(dst, kScratchDoubleReg); |
| } else { |
| __ Pxor(dst, dst); |
| __ Psubw(dst, src); |
| } |
| break; |
| } |
| case kSSEI16x8Shl: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ psllw(i.OutputSimd128Register(), i.InputInt8(1)); |
| break; |
| } |
| case kAVXI16x8Shl: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputInt8(1)); |
| break; |
| } |
| case kSSEI16x8ShrS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ psraw(i.OutputSimd128Register(), i.InputInt8(1)); |
| break; |
| } |
| case kAVXI16x8ShrS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputInt8(1)); |
| break; |
| } |
| case kSSEI16x8SConvertI32x4: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ packssdw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8SConvertI32x4: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpackssdw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8Add: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ paddw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8Add: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpaddw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8AddSaturateS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ paddsw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8AddSaturateS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpaddsw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8AddHoriz: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| CpuFeatureScope sse_scope(tasm(), SSSE3); |
| __ phaddw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8AddHoriz: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8Sub: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ psubw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8Sub: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpsubw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8SubSaturateS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ psubsw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8SubSaturateS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpsubsw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8Mul: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pmullw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8Mul: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpmullw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8MinS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pminsw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8MinS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpminsw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8MaxS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pmaxsw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8MaxS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8Eq: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8Eq: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8Ne: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1)); |
| __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); |
| __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
| break; |
| } |
| case kAVXI16x8Ne: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(), |
| kScratchDoubleReg); |
| break; |
| } |
| case kSSEI16x8GtS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| __ pcmpgtw(i.OutputSimd128Register(), i.InputOperand(1)); |
| break; |
| } |
| case kAVXI16x8GtS: { |
| CpuFeatureScope avx_scope(tasm(), AVX); |
| __ vpcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputOperand(1)); |
| break; |
| } |
| case kSSEI16x8GeS: { |
| DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| XMMRegister dst = i.OutputSimd128Register(); |
| Operand src = i.InputOperand(1); |
| __ pminsw(dst, src); |
| __ pcmpeqw(dst, src); |
| break; |
|