blob: a108edeff0e592fd4520075c0583e37e695b6d33 [file] [log] [blame]
// Copyright 2013 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/compiler/backend/code-generator.h"
#include <limits>
#include "src/base/overflowing-math.h"
#include "src/codegen/macro-assembler.h"
#include "src/codegen/optimized-compilation-info.h"
#include "src/codegen/x64/assembler-x64.h"
#include "src/compiler/backend/code-generator-impl.h"
#include "src/compiler/backend/gap-resolver.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/osr.h"
#include "src/heap/heap-inl.h" // crbug.com/v8/8499
#include "src/objects/smi.h"
#include "src/wasm/wasm-code-manager.h"
#include "src/wasm/wasm-objects.h"
namespace v8 {
namespace internal {
namespace compiler {
#define __ tasm()->
// Adds X64 specific methods for decoding operands.
class X64OperandConverter : public InstructionOperandConverter {
public:
X64OperandConverter(CodeGenerator* gen, Instruction* instr)
: InstructionOperandConverter(gen, instr) {}
Immediate InputImmediate(size_t index) {
return ToImmediate(instr_->InputAt(index));
}
Operand InputOperand(size_t index, int extra = 0) {
return ToOperand(instr_->InputAt(index), extra);
}
Operand OutputOperand() { return ToOperand(instr_->Output()); }
Immediate ToImmediate(InstructionOperand* operand) {
Constant constant = ToConstant(operand);
if (constant.type() == Constant::kFloat64) {
DCHECK_EQ(0, constant.ToFloat64().AsUint64());
return Immediate(0);
}
if (RelocInfo::IsWasmReference(constant.rmode())) {
return Immediate(constant.ToInt32(), constant.rmode());
}
return Immediate(constant.ToInt32());
}
Operand ToOperand(InstructionOperand* op, int extra = 0) {
DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
}
Operand SlotToOperand(int slot_index, int extra = 0) {
FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
return Operand(offset.from_stack_pointer() ? rsp : rbp,
offset.offset() + extra);
}
static size_t NextOffset(size_t* offset) {
size_t i = *offset;
(*offset)++;
return i;
}
static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
STATIC_ASSERT(0 == static_cast<int>(times_1));
STATIC_ASSERT(1 == static_cast<int>(times_2));
STATIC_ASSERT(2 == static_cast<int>(times_4));
STATIC_ASSERT(3 == static_cast<int>(times_8));
int scale = static_cast<int>(mode - one);
DCHECK(scale >= 0 && scale < 4);
return static_cast<ScaleFactor>(scale);
}
Operand MemoryOperand(size_t* offset) {
AddressingMode mode = AddressingModeField::decode(instr_->opcode());
switch (mode) {
case kMode_MR: {
Register base = InputRegister(NextOffset(offset));
int32_t disp = 0;
return Operand(base, disp);
}
case kMode_MRI: {
Register base = InputRegister(NextOffset(offset));
int32_t disp = InputInt32(NextOffset(offset));
return Operand(base, disp);
}
case kMode_MR1:
case kMode_MR2:
case kMode_MR4:
case kMode_MR8: {
Register base = InputRegister(NextOffset(offset));
Register index = InputRegister(NextOffset(offset));
ScaleFactor scale = ScaleFor(kMode_MR1, mode);
int32_t disp = 0;
return Operand(base, index, scale, disp);
}
case kMode_MR1I:
case kMode_MR2I:
case kMode_MR4I:
case kMode_MR8I: {
Register base = InputRegister(NextOffset(offset));
Register index = InputRegister(NextOffset(offset));
ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
int32_t disp = InputInt32(NextOffset(offset));
return Operand(base, index, scale, disp);
}
case kMode_M1: {
Register base = InputRegister(NextOffset(offset));
int32_t disp = 0;
return Operand(base, disp);
}
case kMode_M2:
UNREACHABLE(); // Should use kModeMR with more compact encoding instead
return Operand(no_reg, 0);
case kMode_M4:
case kMode_M8: {
Register index = InputRegister(NextOffset(offset));
ScaleFactor scale = ScaleFor(kMode_M1, mode);
int32_t disp = 0;
return Operand(index, scale, disp);
}
case kMode_M1I:
case kMode_M2I:
case kMode_M4I:
case kMode_M8I: {
Register index = InputRegister(NextOffset(offset));
ScaleFactor scale = ScaleFor(kMode_M1I, mode);
int32_t disp = InputInt32(NextOffset(offset));
return Operand(index, scale, disp);
}
case kMode_Root: {
Register base = kRootRegister;
int32_t disp = InputInt32(NextOffset(offset));
return Operand(base, disp);
}
case kMode_None:
UNREACHABLE();
}
UNREACHABLE();
}
Operand MemoryOperand(size_t first_input = 0) {
return MemoryOperand(&first_input);
}
};
namespace {
bool HasImmediateInput(Instruction* instr, size_t index) {
return instr->InputAt(index)->IsImmediate();
}
class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
public:
OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
: OutOfLineCode(gen), result_(result) {}
void Generate() final {
__ Xorps(result_, result_);
__ Divss(result_, result_);
}
private:
XMMRegister const result_;
};
class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
public:
OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
: OutOfLineCode(gen), result_(result) {}
void Generate() final {
__ Xorpd(result_, result_);
__ Divsd(result_, result_);
}
private:
XMMRegister const result_;
};
class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
public:
OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
XMMRegister input, StubCallMode stub_mode,
UnwindingInfoWriter* unwinding_info_writer)
: OutOfLineCode(gen),
result_(result),
input_(input),
stub_mode_(stub_mode),
unwinding_info_writer_(unwinding_info_writer),
isolate_(gen->isolate()),
zone_(gen->zone()) {}
void Generate() final {
__ AllocateStackSpace(kDoubleSize);
unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kDoubleSize);
__ Movsd(MemOperand(rsp, 0), input_);
if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
// A direct call to a wasm runtime stub defined in this module.
// Just encode the stub index. This will be patched when the code
// is added to the native module and copied into wasm code space.
__ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
} else {
__ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
}
__ movl(result_, MemOperand(rsp, 0));
__ addq(rsp, Immediate(kDoubleSize));
unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
-kDoubleSize);
}
private:
Register const result_;
XMMRegister const input_;
StubCallMode stub_mode_;
UnwindingInfoWriter* const unwinding_info_writer_;
Isolate* isolate_;
Zone* zone_;
};
class OutOfLineRecordWrite final : public OutOfLineCode {
public:
OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
Register value, Register scratch0, Register scratch1,
RecordWriteMode mode, StubCallMode stub_mode)
: OutOfLineCode(gen),
object_(object),
operand_(operand),
value_(value),
scratch0_(scratch0),
scratch1_(scratch1),
mode_(mode),
stub_mode_(stub_mode),
zone_(gen->zone()) {}
void Generate() final {
if (mode_ > RecordWriteMode::kValueIsPointer) {
__ JumpIfSmi(value_, exit());
}
if (COMPRESS_POINTERS_BOOL) {
__ DecompressTaggedPointer(value_, value_);
}
__ CheckPageFlag(value_, scratch0_,
MemoryChunk::kPointersToHereAreInterestingMask, zero,
exit());
__ leaq(scratch1_, operand_);
RememberedSetAction const remembered_set_action =
mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
: OMIT_REMEMBERED_SET;
SaveFPRegsMode const save_fp_mode =
frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
__ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
} else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
// A direct call to a wasm runtime stub defined in this module.
// Just encode the stub index. This will be patched when the code
// is added to the native module and copied into wasm code space.
__ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
} else {
__ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
save_fp_mode);
}
}
private:
Register const object_;
Operand const operand_;
Register const value_;
Register const scratch0_;
Register const scratch1_;
RecordWriteMode const mode_;
StubCallMode const stub_mode_;
Zone* zone_;
};
class WasmOutOfLineTrap : public OutOfLineCode {
public:
WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
: OutOfLineCode(gen), gen_(gen), instr_(instr) {}
void Generate() override {
X64OperandConverter i(gen_, instr_);
TrapId trap_id =
static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
GenerateWithTrapId(trap_id);
}
protected:
CodeGenerator* gen_;
void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
private:
void GenerateCallToTrap(TrapId trap_id) {
if (!gen_->wasm_runtime_exception_support()) {
// We cannot test calls to the runtime in cctest/test-run-wasm.
// Therefore we emit a call to C here instead of a call to the runtime.
__ PrepareCallCFunction(0);
__ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
0);
__ LeaveFrame(StackFrame::WASM_COMPILED);
auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
size_t pop_size =
call_descriptor->StackParameterCount() * kSystemPointerSize;
// Use rcx as a scratch register, we return anyways immediately.
__ Ret(static_cast<int>(pop_size), rcx);
} else {
gen_->AssembleSourcePosition(instr_);
// A direct call to a wasm runtime stub defined in this module.
// Just encode the stub index. This will be patched when the code
// is added to the native module and copied into wasm code space.
__ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
ReferenceMap* reference_map =
new (gen_->zone()) ReferenceMap(gen_->zone());
gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
__ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
}
}
Instruction* instr_;
};
class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
public:
WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
: WasmOutOfLineTrap(gen, instr), pc_(pc) {}
void Generate() final {
gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
}
private:
int pc_;
};
void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
InstructionCode opcode, Instruction* instr,
X64OperandConverter& i, // NOLINT(runtime/references)
int pc) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessProtected) {
new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
}
}
void EmitWordLoadPoisoningIfNeeded(
CodeGenerator* codegen, InstructionCode opcode, Instruction* instr,
X64OperandConverter& i) { // NOLINT(runtime/references)
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
Register value = i.OutputRegister();
codegen->tasm()->andq(value, kSpeculationPoisonRegister);
}
}
} // namespace
#define ASSEMBLE_UNOP(asm_instr) \
do { \
if (instr->Output()->IsRegister()) { \
__ asm_instr(i.OutputRegister()); \
} else { \
__ asm_instr(i.OutputOperand()); \
} \
} while (false)
#define ASSEMBLE_BINOP(asm_instr) \
do { \
if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
size_t index = 1; \
Operand right = i.MemoryOperand(&index); \
__ asm_instr(i.InputRegister(0), right); \
} else { \
if (HasImmediateInput(instr, 1)) { \
if (instr->InputAt(0)->IsRegister()) { \
__ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
} else { \
__ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
} \
} else { \
if (instr->InputAt(1)->IsRegister()) { \
__ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
} else { \
__ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
} \
} \
} \
} while (false)
#define ASSEMBLE_COMPARE(asm_instr) \
do { \
if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
size_t index = 0; \
Operand left = i.MemoryOperand(&index); \
if (HasImmediateInput(instr, index)) { \
__ asm_instr(left, i.InputImmediate(index)); \
} else { \
__ asm_instr(left, i.InputRegister(index)); \
} \
} else { \
if (HasImmediateInput(instr, 1)) { \
if (instr->InputAt(0)->IsRegister()) { \
__ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
} else { \
__ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
} \
} else { \
if (instr->InputAt(1)->IsRegister()) { \
__ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
} else { \
__ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
} \
} \
} \
} while (false)
#define ASSEMBLE_MULT(asm_instr) \
do { \
if (HasImmediateInput(instr, 1)) { \
if (instr->InputAt(0)->IsRegister()) { \
__ asm_instr(i.OutputRegister(), i.InputRegister(0), \
i.InputImmediate(1)); \
} else { \
__ asm_instr(i.OutputRegister(), i.InputOperand(0), \
i.InputImmediate(1)); \
} \
} else { \
if (instr->InputAt(1)->IsRegister()) { \
__ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
} else { \
__ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
} \
} \
} while (false)
#define ASSEMBLE_SHIFT(asm_instr, width) \
do { \
if (HasImmediateInput(instr, 1)) { \
if (instr->Output()->IsRegister()) { \
__ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
} else { \
__ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
} \
} else { \
if (instr->Output()->IsRegister()) { \
__ asm_instr##_cl(i.OutputRegister()); \
} else { \
__ asm_instr##_cl(i.OutputOperand()); \
} \
} \
} while (false)
#define ASSEMBLE_MOVX(asm_instr) \
do { \
if (instr->addressing_mode() != kMode_None) { \
__ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
} else if (instr->InputAt(0)->IsRegister()) { \
__ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
} else { \
__ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
} \
} while (false)
#define ASSEMBLE_SSE_BINOP(asm_instr) \
do { \
if (instr->InputAt(1)->IsFPRegister()) { \
__ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
} else { \
__ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
} \
} while (false)
#define ASSEMBLE_SSE_UNOP(asm_instr) \
do { \
if (instr->InputAt(0)->IsFPRegister()) { \
__ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
} else { \
__ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
} \
} while (false)
#define ASSEMBLE_AVX_BINOP(asm_instr) \
do { \
CpuFeatureScope avx_scope(tasm(), AVX); \
if (instr->InputAt(1)->IsFPRegister()) { \
__ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
i.InputDoubleRegister(1)); \
} else { \
__ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
i.InputOperand(1)); \
} \
} while (false)
#define ASSEMBLE_IEEE754_BINOP(name) \
do { \
__ PrepareCallCFunction(2); \
__ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
} while (false)
#define ASSEMBLE_IEEE754_UNOP(name) \
do { \
__ PrepareCallCFunction(1); \
__ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
} while (false)
#define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
do { \
Label binop; \
__ bind(&binop); \
__ mov_inst(rax, i.MemoryOperand(1)); \
__ movl(i.TempRegister(0), rax); \
__ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
__ lock(); \
__ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
__ j(not_equal, &binop); \
} while (false)
#define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
do { \
Label binop; \
__ bind(&binop); \
__ mov_inst(rax, i.MemoryOperand(1)); \
__ movq(i.TempRegister(0), rax); \
__ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
__ lock(); \
__ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
__ j(not_equal, &binop); \
} while (false)
#define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
do { \
if (instr->InputAt(index)->IsSimd128Register()) { \
__ opcode(dst_operand, i.InputSimd128Register(index)); \
} else { \
__ opcode(dst_operand, i.InputOperand(index)); \
} \
} while (false)
#define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
do { \
if (instr->InputAt(index)->IsSimd128Register()) { \
__ opcode(dst_operand, i.InputSimd128Register(index), imm); \
} else { \
__ opcode(dst_operand, i.InputOperand(index), imm); \
} \
} while (false)
#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
do { \
XMMRegister dst = i.OutputSimd128Register(); \
DCHECK_EQ(dst, i.InputSimd128Register(0)); \
byte input_index = instr->InputCount() == 2 ? 1 : 0; \
ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
} while (false)
#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
do { \
CpuFeatureScope sse_scope(tasm(), SSELevel); \
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
__ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
} while (false)
#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
do { \
CpuFeatureScope sse_scope(tasm(), SSE4_1); \
Register dst = i.OutputRegister(); \
Register tmp = i.TempRegister(0); \
__ movq(tmp, Immediate(1)); \
__ xorq(dst, dst); \
__ pxor(kScratchDoubleReg, kScratchDoubleReg); \
__ opcode(kScratchDoubleReg, i.InputSimd128Register(0)); \
__ ptest(kScratchDoubleReg, kScratchDoubleReg); \
__ cmovq(zero, dst, tmp); \
} while (false)
void CodeGenerator::AssembleDeconstructFrame() {
unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
__ movq(rsp, rbp);
__ popq(rbp);
}
void CodeGenerator::AssemblePrepareTailCall() {
if (frame_access_state()->has_frame()) {
__ movq(rbp, MemOperand(rbp, 0));
}
frame_access_state()->SetFrameAccessToSP();
}
void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
Register scratch1,
Register scratch2,
Register scratch3) {
DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
Label done;
// Check if current frame is an arguments adaptor frame.
__ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
__ j(not_equal, &done, Label::kNear);
// Load arguments count from current arguments adaptor frame (note, it
// does not include receiver).
Register caller_args_count_reg = scratch1;
__ SmiUntag(caller_args_count_reg,
Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
ParameterCount callee_args_count(args_reg);
__ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
scratch3);
__ bind(&done);
}
namespace {
void AdjustStackPointerForTailCall(TurboAssembler* assembler,
FrameAccessState* state,
int new_slot_above_sp,
bool allow_shrinkage = true) {
int current_sp_offset = state->GetSPToFPSlotCount() +
StandardFrameConstants::kFixedSlotCountAboveFp;
int stack_slot_delta = new_slot_above_sp - current_sp_offset;
if (stack_slot_delta > 0) {
assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
state->IncreaseSPDelta(stack_slot_delta);
} else if (allow_shrinkage && stack_slot_delta < 0) {
assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
state->IncreaseSPDelta(stack_slot_delta);
}
}
void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
assembler->movq(kScratchRegister, shuffle_mask);
assembler->Push(kScratchRegister);
shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
assembler->movq(kScratchRegister, shuffle_mask);
assembler->Push(kScratchRegister);
}
} // namespace
void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
int first_unused_stack_slot) {
CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
ZoneVector<MoveOperands*> pushes(zone());
GetPushCompatibleMoves(instr, flags, &pushes);
if (!pushes.empty() &&
(LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
first_unused_stack_slot)) {
X64OperandConverter g(this, instr);
for (auto move : pushes) {
LocationOperand destination_location(
LocationOperand::cast(move->destination()));
InstructionOperand source(move->source());
AdjustStackPointerForTailCall(tasm(), frame_access_state(),
destination_location.index());
if (source.IsStackSlot()) {
LocationOperand source_location(LocationOperand::cast(source));
__ Push(g.SlotToOperand(source_location.index()));
} else if (source.IsRegister()) {
LocationOperand source_location(LocationOperand::cast(source));
__ Push(source_location.GetRegister());
} else if (source.IsImmediate()) {
__ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
} else {
// Pushes of non-scalar data types is not supported.
UNIMPLEMENTED();
}
frame_access_state()->IncreaseSPDelta(1);
move->Eliminate();
}
}
AdjustStackPointerForTailCall(tasm(), frame_access_state(),
first_unused_stack_slot, false);
}
void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
int first_unused_stack_slot) {
AdjustStackPointerForTailCall(tasm(), frame_access_state(),
first_unused_stack_slot);
}
// Check that {kJavaScriptCallCodeStartRegister} is correct.
void CodeGenerator::AssembleCodeStartRegisterCheck() {
__ ComputeCodeStartAddress(rbx);
__ cmpq(rbx, kJavaScriptCallCodeStartRegister);
__ Assert(equal, AbortReason::kWrongFunctionCodeStart);
}
// Check if the code object is marked for deoptimization. If it is, then it
// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
// to:
// 1. read from memory the word that contains that bit, which can be found in
// the flags in the referenced {CodeDataContainer} object;
// 2. test kMarkedForDeoptimizationBit in those flags; and
// 3. if it is not zero then it jumps to the builtin.
void CodeGenerator::BailoutIfDeoptimized() {
int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
__ LoadTaggedPointerField(rbx,
Operand(kJavaScriptCallCodeStartRegister, offset));
__ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
Immediate(1 << Code::kMarkedForDeoptimizationBit));
__ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
RelocInfo::CODE_TARGET, not_zero);
}
void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
// Set a mask which has all bits set in the normal case, but has all
// bits cleared if we are speculatively executing the wrong PC.
__ ComputeCodeStartAddress(rbx);
__ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
__ cmpq(kJavaScriptCallCodeStartRegister, rbx);
__ movq(rbx, Immediate(-1));
__ cmovq(equal, kSpeculationPoisonRegister, rbx);
}
void CodeGenerator::AssembleRegisterArgumentPoisoning() {
__ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
__ andq(kContextRegister, kSpeculationPoisonRegister);
__ andq(rsp, kSpeculationPoisonRegister);
}
// Assembles an instruction after register allocation, producing machine code.
CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Instruction* instr) {
X64OperandConverter i(this, instr);
InstructionCode opcode = instr->opcode();
ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
switch (arch_opcode) {
case kArchCallCodeObject: {
if (HasImmediateInput(instr, 0)) {
Handle<Code> code = i.InputCode(0);
__ Call(code, RelocInfo::CODE_TARGET);
} else {
Register reg = i.InputRegister(0);
DCHECK_IMPLIES(
HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
reg == kJavaScriptCallCodeStartRegister);
__ LoadCodeObjectEntry(reg, reg);
if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
__ RetpolineCall(reg);
} else {
__ call(reg);
}
}
RecordCallPosition(instr);
frame_access_state()->ClearSPDelta();
break;
}
case kArchCallBuiltinPointer: {
DCHECK(!HasImmediateInput(instr, 0));
Register builtin_index = i.InputRegister(0);
__ CallBuiltinByIndex(builtin_index);
RecordCallPosition(instr);
frame_access_state()->ClearSPDelta();
break;
}
case kArchCallWasmFunction: {
if (HasImmediateInput(instr, 0)) {
Constant constant = i.ToConstant(instr->InputAt(0));
Address wasm_code = static_cast<Address>(constant.ToInt64());
if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
__ near_call(wasm_code, constant.rmode());
} else {
if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
__ RetpolineCall(wasm_code, constant.rmode());
} else {
__ Call(wasm_code, constant.rmode());
}
}
} else {
Register reg = i.InputRegister(0);
if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
__ RetpolineCall(reg);
} else {
__ call(reg);
}
}
RecordCallPosition(instr);
frame_access_state()->ClearSPDelta();
break;
}
case kArchTailCallCodeObjectFromJSFunction:
case kArchTailCallCodeObject: {
if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
i.TempRegister(0), i.TempRegister(1),
i.TempRegister(2));
}
if (HasImmediateInput(instr, 0)) {
Handle<Code> code = i.InputCode(0);
__ Jump(code, RelocInfo::CODE_TARGET);
} else {
Register reg = i.InputRegister(0);
DCHECK_IMPLIES(
HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
reg == kJavaScriptCallCodeStartRegister);
__ LoadCodeObjectEntry(reg, reg);
if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
__ RetpolineJump(reg);
} else {
__ jmp(reg);
}
}
unwinding_info_writer_.MarkBlockWillExit();
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchTailCallWasm: {
if (HasImmediateInput(instr, 0)) {
Constant constant = i.ToConstant(instr->InputAt(0));
Address wasm_code = static_cast<Address>(constant.ToInt64());
if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
__ near_jmp(wasm_code, constant.rmode());
} else {
__ Move(kScratchRegister, wasm_code, constant.rmode());
__ jmp(kScratchRegister);
}
} else {
Register reg = i.InputRegister(0);
if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
__ RetpolineJump(reg);
} else {
__ jmp(reg);
}
}
unwinding_info_writer_.MarkBlockWillExit();
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchTailCallAddress: {
CHECK(!HasImmediateInput(instr, 0));
Register reg = i.InputRegister(0);
DCHECK_IMPLIES(
HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
reg == kJavaScriptCallCodeStartRegister);
if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
__ RetpolineJump(reg);
} else {
__ jmp(reg);
}
unwinding_info_writer_.MarkBlockWillExit();
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchCallJSFunction: {
Register func = i.InputRegister(0);
if (FLAG_debug_code) {
// Check the function's context matches the context argument.
__ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
__ Assert(equal, AbortReason::kWrongFunctionContext);
}
static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
__ LoadTaggedPointerField(rcx,
FieldOperand(func, JSFunction::kCodeOffset));
__ CallCodeObject(rcx);
frame_access_state()->ClearSPDelta();
RecordCallPosition(instr);
break;
}
case kArchPrepareCallCFunction: {
// Frame alignment requires using FP-relative frame addressing.
frame_access_state()->SetFrameAccessToFP();
int const num_parameters = MiscField::decode(instr->opcode());
__ PrepareCallCFunction(num_parameters);
break;
}
case kArchSaveCallerRegisters: {
fp_mode_ =
static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
// kReturnRegister0 should have been saved before entering the stub.
int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
DCHECK(IsAligned(bytes, kSystemPointerSize));
DCHECK_EQ(0, frame_access_state()->sp_delta());
frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
DCHECK(!caller_registers_saved_);
caller_registers_saved_ = true;
break;
}
case kArchRestoreCallerRegisters: {
DCHECK(fp_mode_ ==
static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
// Don't overwrite the returned value.
int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
DCHECK_EQ(0, frame_access_state()->sp_delta());
DCHECK(caller_registers_saved_);
caller_registers_saved_ = false;
break;
}
case kArchPrepareTailCall:
AssemblePrepareTailCall();
break;
case kArchCallCFunction: {
int const num_parameters = MiscField::decode(instr->opcode());
Label return_location;
if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
// Put the return address in a stack slot.
__ leaq(kScratchRegister, Operand(&return_location, 0));
__ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
kScratchRegister);
}
if (HasImmediateInput(instr, 0)) {
ExternalReference ref = i.InputExternalReference(0);
__ CallCFunction(ref, num_parameters);
} else {
Register func = i.InputRegister(0);
__ CallCFunction(func, num_parameters);
}
__ bind(&return_location);
RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
frame_access_state()->SetFrameAccessToDefault();
// Ideally, we should decrement SP delta to match the change of stack
// pointer in CallCFunction. However, for certain architectures (e.g.
// ARM), there may be more strict alignment requirement, causing old SP
// to be saved on the stack. In those cases, we can not calculate the SP
// delta statically.
frame_access_state()->ClearSPDelta();
if (caller_registers_saved_) {
// Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
// Here, we assume the sequence to be:
// kArchSaveCallerRegisters;
// kArchCallCFunction;
// kArchRestoreCallerRegisters;
int bytes =
__ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
}
// TODO(tebbi): Do we need an lfence here?
break;
}
case kArchJmp:
AssembleArchJump(i.InputRpo(0));
break;
case kArchBinarySearchSwitch:
AssembleArchBinarySearchSwitch(instr);
break;
case kArchLookupSwitch:
AssembleArchLookupSwitch(instr);
break;
case kArchTableSwitch:
AssembleArchTableSwitch(instr);
break;
case kArchComment:
__ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
break;
case kArchAbortCSAAssert:
DCHECK(i.InputRegister(0) == rdx);
{
// We don't actually want to generate a pile of code for this, so just
// claim there is a stack frame, without generating one.
FrameScope scope(tasm(), StackFrame::NONE);
__ Call(
isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
RelocInfo::CODE_TARGET);
}
__ int3();
unwinding_info_writer_.MarkBlockWillExit();
break;
case kArchDebugBreak:
__ int3();
break;
case kArchThrowTerminator:
unwinding_info_writer_.MarkBlockWillExit();
break;
case kArchNop:
// don't emit code for nops.
break;
case kArchDeoptimize: {
int deopt_state_id =
BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
CodeGenResult result =
AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
if (result != kSuccess) return result;
unwinding_info_writer_.MarkBlockWillExit();
break;
}
case kArchRet:
AssembleReturn(instr->InputAt(0));
break;
case kArchStackPointer:
__ movq(i.OutputRegister(), rsp);
break;
case kArchFramePointer:
__ movq(i.OutputRegister(), rbp);
break;
case kArchParentFramePointer:
if (frame_access_state()->has_frame()) {
__ movq(i.OutputRegister(), Operand(rbp, 0));
} else {
__ movq(i.OutputRegister(), rbp);
}
break;
case kArchTruncateDoubleToI: {
auto result = i.OutputRegister();
auto input = i.InputDoubleRegister(0);
auto ool = new (zone()) OutOfLineTruncateDoubleToI(
this, result, input, DetermineStubCallMode(),
&unwinding_info_writer_);
// We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
// use of Cvttsd2siq requires the movl below to avoid sign extension.
__ Cvttsd2siq(result, input);
__ cmpq(result, Immediate(1));
__ j(overflow, ool->entry());
__ bind(ool->exit());
__ movl(result, result);
break;
}
case kArchStoreWithWriteBarrier: {
RecordWriteMode mode =
static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
Register object = i.InputRegister(0);
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
Register value = i.InputRegister(index);
Register scratch0 = i.TempRegister(0);
Register scratch1 = i.TempRegister(1);
auto ool = new (zone())
OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
mode, DetermineStubCallMode());
__ StoreTaggedField(operand, value);
__ CheckPageFlag(object, scratch0,
MemoryChunk::kPointersFromHereAreInterestingMask,
not_zero, ool->entry());
__ bind(ool->exit());
break;
}
case kArchWordPoisonOnSpeculation:
DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
__ andq(i.InputRegister(0), kSpeculationPoisonRegister);
break;
case kX64MFence:
__ mfence();
break;
case kX64LFence:
__ lfence();
break;
case kArchStackSlot: {
FrameOffset offset =
frame_access_state()->GetFrameOffset(i.InputInt32(0));
Register base = offset.from_stack_pointer() ? rsp : rbp;
__ leaq(i.OutputRegister(), Operand(base, offset.offset()));
break;
}
case kIeee754Float64Acos:
ASSEMBLE_IEEE754_UNOP(acos);
break;
case kIeee754Float64Acosh:
ASSEMBLE_IEEE754_UNOP(acosh);
break;
case kIeee754Float64Asin:
ASSEMBLE_IEEE754_UNOP(asin);
break;
case kIeee754Float64Asinh:
ASSEMBLE_IEEE754_UNOP(asinh);
break;
case kIeee754Float64Atan:
ASSEMBLE_IEEE754_UNOP(atan);
break;
case kIeee754Float64Atanh:
ASSEMBLE_IEEE754_UNOP(atanh);
break;
case kIeee754Float64Atan2:
ASSEMBLE_IEEE754_BINOP(atan2);
break;
case kIeee754Float64Cbrt:
ASSEMBLE_IEEE754_UNOP(cbrt);
break;
case kIeee754Float64Cos:
ASSEMBLE_IEEE754_UNOP(cos);
break;
case kIeee754Float64Cosh:
ASSEMBLE_IEEE754_UNOP(cosh);
break;
case kIeee754Float64Exp:
ASSEMBLE_IEEE754_UNOP(exp);
break;
case kIeee754Float64Expm1:
ASSEMBLE_IEEE754_UNOP(expm1);
break;
case kIeee754Float64Log:
ASSEMBLE_IEEE754_UNOP(log);
break;
case kIeee754Float64Log1p:
ASSEMBLE_IEEE754_UNOP(log1p);
break;
case kIeee754Float64Log2:
ASSEMBLE_IEEE754_UNOP(log2);
break;
case kIeee754Float64Log10:
ASSEMBLE_IEEE754_UNOP(log10);
break;
case kIeee754Float64Pow:
ASSEMBLE_IEEE754_BINOP(pow);
break;
case kIeee754Float64Sin:
ASSEMBLE_IEEE754_UNOP(sin);
break;
case kIeee754Float64Sinh:
ASSEMBLE_IEEE754_UNOP(sinh);
break;
case kIeee754Float64Tan:
ASSEMBLE_IEEE754_UNOP(tan);
break;
case kIeee754Float64Tanh:
ASSEMBLE_IEEE754_UNOP(tanh);
break;
case kX64Add32:
ASSEMBLE_BINOP(addl);
break;
case kX64Add:
ASSEMBLE_BINOP(addq);
break;
case kX64Sub32:
ASSEMBLE_BINOP(subl);
break;
case kX64Sub:
ASSEMBLE_BINOP(subq);
break;
case kX64And32:
ASSEMBLE_BINOP(andl);
break;
case kX64And:
ASSEMBLE_BINOP(andq);
break;
case kX64Cmp8:
ASSEMBLE_COMPARE(cmpb);
break;
case kX64Cmp16:
ASSEMBLE_COMPARE(cmpw);
break;
case kX64Cmp32:
ASSEMBLE_COMPARE(cmpl);
break;
case kX64Cmp:
ASSEMBLE_COMPARE(cmpq);
break;
case kX64Test8:
ASSEMBLE_COMPARE(testb);
break;
case kX64Test16:
ASSEMBLE_COMPARE(testw);
break;
case kX64Test32:
ASSEMBLE_COMPARE(testl);
break;
case kX64Test:
ASSEMBLE_COMPARE(testq);
break;
case kX64Imul32:
ASSEMBLE_MULT(imull);
break;
case kX64Imul:
ASSEMBLE_MULT(imulq);
break;
case kX64ImulHigh32:
if (instr->InputAt(1)->IsRegister()) {
__ imull(i.InputRegister(1));
} else {
__ imull(i.InputOperand(1));
}
break;
case kX64UmulHigh32:
if (instr->InputAt(1)->IsRegister()) {
__ mull(i.InputRegister(1));
} else {
__ mull(i.InputOperand(1));
}
break;
case kX64Idiv32:
__ cdq();
__ idivl(i.InputRegister(1));
break;
case kX64Idiv:
__ cqo();
__ idivq(i.InputRegister(1));
break;
case kX64Udiv32:
__ xorl(rdx, rdx);
__ divl(i.InputRegister(1));
break;
case kX64Udiv:
__ xorq(rdx, rdx);
__ divq(i.InputRegister(1));
break;
case kX64Not:
ASSEMBLE_UNOP(notq);
break;
case kX64Not32:
ASSEMBLE_UNOP(notl);
break;
case kX64Neg:
ASSEMBLE_UNOP(negq);
break;
case kX64Neg32:
ASSEMBLE_UNOP(negl);
break;
case kX64Or32:
ASSEMBLE_BINOP(orl);
break;
case kX64Or:
ASSEMBLE_BINOP(orq);
break;
case kX64Xor32:
ASSEMBLE_BINOP(xorl);
break;
case kX64Xor:
ASSEMBLE_BINOP(xorq);
break;
case kX64Shl32:
ASSEMBLE_SHIFT(shll, 5);
break;
case kX64Shl:
ASSEMBLE_SHIFT(shlq, 6);
break;
case kX64Shr32:
ASSEMBLE_SHIFT(shrl, 5);
break;
case kX64Shr:
ASSEMBLE_SHIFT(shrq, 6);
break;
case kX64Sar32:
ASSEMBLE_SHIFT(sarl, 5);
break;
case kX64Sar:
ASSEMBLE_SHIFT(sarq, 6);
break;
case kX64Ror32:
ASSEMBLE_SHIFT(rorl, 5);
break;
case kX64Ror:
ASSEMBLE_SHIFT(rorq, 6);
break;
case kX64Lzcnt:
if (instr->InputAt(0)->IsRegister()) {
__ Lzcntq(i.OutputRegister(), i.InputRegister(0));
} else {
__ Lzcntq(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Lzcnt32:
if (instr->InputAt(0)->IsRegister()) {
__ Lzcntl(i.OutputRegister(), i.InputRegister(0));
} else {
__ Lzcntl(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Tzcnt:
if (instr->InputAt(0)->IsRegister()) {
__ Tzcntq(i.OutputRegister(), i.InputRegister(0));
} else {
__ Tzcntq(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Tzcnt32:
if (instr->InputAt(0)->IsRegister()) {
__ Tzcntl(i.OutputRegister(), i.InputRegister(0));
} else {
__ Tzcntl(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Popcnt:
if (instr->InputAt(0)->IsRegister()) {
__ Popcntq(i.OutputRegister(), i.InputRegister(0));
} else {
__ Popcntq(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Popcnt32:
if (instr->InputAt(0)->IsRegister()) {
__ Popcntl(i.OutputRegister(), i.InputRegister(0));
} else {
__ Popcntl(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Bswap:
__ bswapq(i.OutputRegister());
break;
case kX64Bswap32:
__ bswapl(i.OutputRegister());
break;
case kSSEFloat32Cmp:
ASSEMBLE_SSE_BINOP(Ucomiss);
break;
case kSSEFloat32Add:
ASSEMBLE_SSE_BINOP(addss);
break;
case kSSEFloat32Sub:
ASSEMBLE_SSE_BINOP(subss);
break;
case kSSEFloat32Mul:
ASSEMBLE_SSE_BINOP(mulss);
break;
case kSSEFloat32Div:
ASSEMBLE_SSE_BINOP(divss);
// Don't delete this mov. It may improve performance on some CPUs,
// when there is a (v)mulss depending on the result.
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kSSEFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrlq(kScratchDoubleReg, 33);
__ Andps(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat32Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psllq(kScratchDoubleReg, 31);
__ Xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat32Sqrt:
ASSEMBLE_SSE_UNOP(sqrtss);
break;
case kSSEFloat32ToFloat64:
ASSEMBLE_SSE_UNOP(Cvtss2sd);
break;
case kSSEFloat32Round: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
break;
}
case kSSEFloat32ToInt32:
if (instr->InputAt(0)->IsFPRegister()) {
__ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
} else {
__ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
}
break;
case kSSEFloat32ToUint32: {
if (instr->InputAt(0)->IsFPRegister()) {
__ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
} else {
__ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
}
break;
}
case kSSEFloat64Cmp:
ASSEMBLE_SSE_BINOP(Ucomisd);
break;
case kSSEFloat64Add:
ASSEMBLE_SSE_BINOP(addsd);
break;
case kSSEFloat64Sub:
ASSEMBLE_SSE_BINOP(subsd);
break;
case kSSEFloat64Mul:
ASSEMBLE_SSE_BINOP(mulsd);
break;
case kSSEFloat64Div:
ASSEMBLE_SSE_BINOP(divsd);
// Don't delete this mov. It may improve performance on some CPUs,
// when there is a (v)mulsd depending on the result.
__ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kSSEFloat64Mod: {
__ AllocateStackSpace(kDoubleSize);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kDoubleSize);
// Move values to st(0) and st(1).
__ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
__ fld_d(Operand(rsp, 0));
__ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
__ fld_d(Operand(rsp, 0));
// Loop while fprem isn't done.
Label mod_loop;
__ bind(&mod_loop);
// This instructions traps on all kinds inputs, but we are assuming the
// floating point control word is set to ignore them all.
__ fprem();
// The following 2 instruction implicitly use rax.
__ fnstsw_ax();
if (CpuFeatures::IsSupported(SAHF)) {
CpuFeatureScope sahf_scope(tasm(), SAHF);
__ sahf();
} else {
__ shrl(rax, Immediate(8));
__ andl(rax, Immediate(0xFF));
__ pushq(rax);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kSystemPointerSize);
__ popfq();
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
-kSystemPointerSize);
}
__ j(parity_even, &mod_loop);
// Move output to stack and clean up.
__ fstp(1);
__ fstp_d(Operand(rsp, 0));
__ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
__ addq(rsp, Immediate(kDoubleSize));
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
-kDoubleSize);
break;
}
case kSSEFloat32Max: {
Label compare_swap, done_compare;
if (instr->InputAt(1)->IsFPRegister()) {
__ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
}
auto ool =
new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
__ j(parity_even, ool->entry());
__ j(above, &done_compare, Label::kNear);
__ j(below, &compare_swap, Label::kNear);
__ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
__ testl(kScratchRegister, Immediate(1));
__ j(zero, &done_compare, Label::kNear);
__ bind(&compare_swap);
if (instr->InputAt(1)->IsFPRegister()) {
__ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
}
__ bind(&done_compare);
__ bind(ool->exit());
break;
}
case kSSEFloat32Min: {
Label compare_swap, done_compare;
if (instr->InputAt(1)->IsFPRegister()) {
__ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
}
auto ool =
new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
__ j(parity_even, ool->entry());
__ j(below, &done_compare, Label::kNear);
__ j(above, &compare_swap, Label::kNear);
if (instr->InputAt(1)->IsFPRegister()) {
__ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
} else {
__ Movss(kScratchDoubleReg, i.InputOperand(1));
__ Movmskps(kScratchRegister, kScratchDoubleReg);
}
__ testl(kScratchRegister, Immediate(1));
__ j(zero, &done_compare, Label::kNear);
__ bind(&compare_swap);
if (instr->InputAt(1)->IsFPRegister()) {
__ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
}
__ bind(&done_compare);
__ bind(ool->exit());
break;
}
case kSSEFloat64Max: {
Label compare_swap, done_compare;
if (instr->InputAt(1)->IsFPRegister()) {
__ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
}
auto ool =
new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
__ j(parity_even, ool->entry());
__ j(above, &done_compare, Label::kNear);
__ j(below, &compare_swap, Label::kNear);
__ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
__ testl(kScratchRegister, Immediate(1));
__ j(zero, &done_compare, Label::kNear);
__ bind(&compare_swap);
if (instr->InputAt(1)->IsFPRegister()) {
__ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
}
__ bind(&done_compare);
__ bind(ool->exit());
break;
}
case kSSEFloat64Min: {
Label compare_swap, done_compare;
if (instr->InputAt(1)->IsFPRegister()) {
__ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
}
auto ool =
new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
__ j(parity_even, ool->entry());
__ j(below, &done_compare, Label::kNear);
__ j(above, &compare_swap, Label::kNear);
if (instr->InputAt(1)->IsFPRegister()) {
__ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
} else {
__ Movsd(kScratchDoubleReg, i.InputOperand(1));
__ Movmskpd(kScratchRegister, kScratchDoubleReg);
}
__ testl(kScratchRegister, Immediate(1));
__ j(zero, &done_compare, Label::kNear);
__ bind(&compare_swap);
if (instr->InputAt(1)->IsFPRegister()) {
__ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
}
__ bind(&done_compare);
__ bind(ool->exit());
break;
}
case kX64F64x2Abs:
case kSSEFloat64Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrlq(kScratchDoubleReg, 1);
__ Andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kX64F64x2Neg:
case kSSEFloat64Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psllq(kScratchDoubleReg, 63);
__ Xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat64Sqrt:
ASSEMBLE_SSE_UNOP(Sqrtsd);
break;
case kSSEFloat64Round: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
break;
}
case kSSEFloat64ToFloat32:
ASSEMBLE_SSE_UNOP(Cvtsd2ss);
break;
case kSSEFloat64ToInt32:
if (instr->InputAt(0)->IsFPRegister()) {
__ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
} else {
__ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
}
break;
case kSSEFloat64ToUint32: {
if (instr->InputAt(0)->IsFPRegister()) {
__ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
} else {
__ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
}
if (MiscField::decode(instr->opcode())) {
__ AssertZeroExtended(i.OutputRegister());
}
break;
}
case kSSEFloat32ToInt64:
if (instr->InputAt(0)->IsFPRegister()) {
__ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
} else {
__ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
}
if (instr->OutputCount() > 1) {
__ Set(i.OutputRegister(1), 1);
Label done;
Label fail;
__ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
if (instr->InputAt(0)->IsFPRegister()) {
__ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
} else {
__ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
}
// If the input is NaN, then the conversion fails.
__ j(parity_even, &fail);
// If the input is INT64_MIN, then the conversion succeeds.
__ j(equal, &done);
__ cmpq(i.OutputRegister(0), Immediate(1));
// If the conversion results in INT64_MIN, but the input was not
// INT64_MIN, then the conversion fails.
__ j(no_overflow, &done);
__ bind(&fail);
__ Set(i.OutputRegister(1), 0);
__ bind(&done);
}
break;
case kSSEFloat64ToInt64:
if (instr->InputAt(0)->IsFPRegister()) {
__ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
} else {
__ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
}
if (instr->OutputCount() > 1) {
__ Set(i.OutputRegister(1), 1);
Label done;
Label fail;
__ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
if (instr->InputAt(0)->IsFPRegister()) {
__ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
} else {
__ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
}
// If the input is NaN, then the conversion fails.
__ j(parity_even, &fail);
// If the input is INT64_MIN, then the conversion succeeds.
__ j(equal, &done);
__ cmpq(i.OutputRegister(0), Immediate(1));
// If the conversion results in INT64_MIN, but the input was not
// INT64_MIN, then the conversion fails.
__ j(no_overflow, &done);
__ bind(&fail);
__ Set(i.OutputRegister(1), 0);
__ bind(&done);
}
break;
case kSSEFloat32ToUint64: {
Label fail;
if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
if (instr->InputAt(0)->IsFPRegister()) {
__ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
} else {
__ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
}
if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
__ bind(&fail);
break;
}
case kSSEFloat64ToUint64: {
Label fail;
if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
if (instr->InputAt(0)->IsFPRegister()) {
__ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
} else {
__ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
}
if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
__ bind(&fail);
break;
}
case kSSEInt32ToFloat64:
if (instr->InputAt(0)->IsRegister()) {
__ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kSSEInt32ToFloat32:
if (instr->InputAt(0)->IsRegister()) {
__ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kSSEInt64ToFloat32:
if (instr->InputAt(0)->IsRegister()) {
__ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kSSEInt64ToFloat64:
if (instr->InputAt(0)->IsRegister()) {
__ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kSSEUint64ToFloat32:
if (instr->InputAt(0)->IsRegister()) {
__ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kSSEUint64ToFloat64:
if (instr->InputAt(0)->IsRegister()) {
__ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kSSEUint32ToFloat64:
if (instr->InputAt(0)->IsRegister()) {
__ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kSSEUint32ToFloat32:
if (instr->InputAt(0)->IsRegister()) {
__ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kSSEFloat64ExtractLowWord32:
if (instr->InputAt(0)->IsFPStackSlot()) {
__ movl(i.OutputRegister(), i.InputOperand(0));
} else {
__ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
}
break;
case kSSEFloat64ExtractHighWord32:
if (instr->InputAt(0)->IsFPStackSlot()) {
__ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
} else {
__ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
}
break;
case kSSEFloat64InsertLowWord32:
if (instr->InputAt(1)->IsRegister()) {
__ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
} else {
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
}
break;
case kSSEFloat64InsertHighWord32:
if (instr->InputAt(1)->IsRegister()) {
__ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
} else {
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
}
break;
case kSSEFloat64LoadLowWord32:
if (instr->InputAt(0)->IsRegister()) {
__ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kAVXFloat32Cmp: {
CpuFeatureScope avx_scope(tasm(), AVX);
if (instr->InputAt(1)->IsFPRegister()) {
__ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
}
break;
}
case kAVXFloat32Add:
ASSEMBLE_AVX_BINOP(vaddss);
break;
case kAVXFloat32Sub:
ASSEMBLE_AVX_BINOP(vsubss);
break;
case kAVXFloat32Mul:
ASSEMBLE_AVX_BINOP(vmulss);
break;
case kAVXFloat32Div:
ASSEMBLE_AVX_BINOP(vdivss);
// Don't delete this mov. It may improve performance on some CPUs,
// when there is a (v)mulss depending on the result.
__ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kAVXFloat64Cmp: {
CpuFeatureScope avx_scope(tasm(), AVX);
if (instr->InputAt(1)->IsFPRegister()) {
__ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
} else {
__ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
}
break;
}
case kAVXFloat64Add:
ASSEMBLE_AVX_BINOP(vaddsd);
break;
case kAVXFloat64Sub:
ASSEMBLE_AVX_BINOP(vsubsd);
break;
case kAVXFloat64Mul:
ASSEMBLE_AVX_BINOP(vmulsd);
break;
case kAVXFloat64Div:
ASSEMBLE_AVX_BINOP(vdivsd);
// Don't delete this mov. It may improve performance on some CPUs,
// when there is a (v)mulsd depending on the result.
__ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kAVXFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
if (instr->InputAt(0)->IsFPRegister()) {
__ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputDoubleRegister(0));
} else {
__ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
}
break;
}
case kAVXFloat32Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
if (instr->InputAt(0)->IsFPRegister()) {
__ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputDoubleRegister(0));
} else {
__ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
}
break;
}
case kAVXFloat64Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
if (instr->InputAt(0)->IsFPRegister()) {
__ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputDoubleRegister(0));
} else {
__ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
}
break;
}
case kAVXFloat64Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
if (instr->InputAt(0)->IsFPRegister()) {
__ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputDoubleRegister(0));
} else {
__ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
}
break;
}
case kSSEFloat64SilenceNaN:
__ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
__ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
break;
case kX64Movsxbl:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
ASSEMBLE_MOVX(movsxbl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movzxbl:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
ASSEMBLE_MOVX(movzxbl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movsxbq:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
ASSEMBLE_MOVX(movsxbq);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movzxbq:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
ASSEMBLE_MOVX(movzxbq);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movb: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
if (HasImmediateInput(instr, index)) {
__ movb(operand, Immediate(i.InputInt8(index)));
} else {
__ movb(operand, i.InputRegister(index));
}
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
}
case kX64Movsxwl:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
ASSEMBLE_MOVX(movsxwl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movzxwl:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
ASSEMBLE_MOVX(movzxwl);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movsxwq:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
ASSEMBLE_MOVX(movsxwq);
break;
case kX64Movzxwq:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
ASSEMBLE_MOVX(movzxwq);
__ AssertZeroExtended(i.OutputRegister());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movw: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
if (HasImmediateInput(instr, index)) {
__ movw(operand, Immediate(i.InputInt16(index)));
} else {
__ movw(operand, i.InputRegister(index));
}
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
}
case kX64Movl:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
if (instr->HasOutput()) {
if (instr->addressing_mode() == kMode_None) {
if (instr->InputAt(0)->IsRegister()) {
__ movl(i.OutputRegister(), i.InputRegister(0));
} else {
__ movl(i.OutputRegister(), i.InputOperand(0));
}
} else {
__ movl(i.OutputRegister(), i.MemoryOperand());
}
__ AssertZeroExtended(i.OutputRegister());
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
if (HasImmediateInput(instr, index)) {
__ movl(operand, i.InputImmediate(index));
} else {
__ movl(operand, i.InputRegister(index));
}
}
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movsxlq:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
ASSEMBLE_MOVX(movsxlq);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64MovqDecompressTaggedSigned: {
CHECK(instr->HasOutput());
__ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
}
case kX64MovqDecompressTaggedPointer: {
CHECK(instr->HasOutput());
__ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
}
case kX64MovqDecompressAnyTagged: {
CHECK(instr->HasOutput());
__ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
}
case kX64MovqCompressTagged: {
CHECK(!instr->HasOutput());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
if (HasImmediateInput(instr, index)) {
__ StoreTaggedField(operand, i.InputImmediate(index));
} else {
__ StoreTaggedField(operand, i.InputRegister(index));
}
break;
}
case kX64DecompressSigned: {
CHECK(instr->HasOutput());
ASSEMBLE_MOVX(DecompressTaggedSigned);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
}
case kX64DecompressPointer: {
CHECK(instr->HasOutput());
ASSEMBLE_MOVX(DecompressTaggedPointer);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
}
case kX64DecompressAny: {
CHECK(instr->HasOutput());
ASSEMBLE_MOVX(DecompressAnyTagged);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
}
case kX64CompressSigned: // Fall through.
case kX64CompressPointer: // Fall through.
case kX64CompressAny: {
ASSEMBLE_MOVX(movl);
break;
}
case kX64Movq:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
if (instr->HasOutput()) {
__ movq(i.OutputRegister(), i.MemoryOperand());
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
if (HasImmediateInput(instr, index)) {
__ movq(operand, i.InputImmediate(index));
} else {
__ movq(operand, i.InputRegister(index));
}
}
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kX64Movss:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
if (instr->HasOutput()) {
__ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Movss(operand, i.InputDoubleRegister(index));
}
break;
case kX64Movsd: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
if (instr->HasOutput()) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
// If we have to poison the loaded value, we load into a general
// purpose register first, mask it with the poison, and move the
// value from the general purpose register into the double register.
__ movq(kScratchRegister, i.MemoryOperand());
__ andq(kScratchRegister, kSpeculationPoisonRegister);
__ Movq(i.OutputDoubleRegister(), kScratchRegister);
} else {
__ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
}
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Movsd(operand, i.InputDoubleRegister(index));
}
break;
}
case kX64Movdqu: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
if (instr->HasOutput()) {
__ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Movdqu(operand, i.InputSimd128Register(index));
}
break;
}
case kX64BitcastFI:
if (instr->InputAt(0)->IsFPStackSlot()) {
__ movl(i.OutputRegister(), i.InputOperand(0));
} else {
__ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
}
break;
case kX64BitcastDL:
if (instr->InputAt(0)->IsFPStackSlot()) {
__ movq(i.OutputRegister(), i.InputOperand(0));
} else {
__ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
}
break;
case kX64BitcastIF:
if (instr->InputAt(0)->IsRegister()) {
__ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kX64BitcastLD:
if (instr->InputAt(0)->IsRegister()) {
__ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kX64Lea32: {
AddressingMode mode = AddressingModeField::decode(instr->opcode());
// Shorten "leal" to "addl", "subl" or "shll" if the register allocation
// and addressing mode just happens to work out. The "addl"/"subl" forms
// in these cases are faster based on measurements.
if (i.InputRegister(0) == i.OutputRegister()) {
if (mode == kMode_MRI) {
int32_t constant_summand = i.InputInt32(1);
DCHECK_NE(0, constant_summand);
if (constant_summand > 0) {
__ addl(i.OutputRegister(), Immediate(constant_summand));
} else {
__ subl(i.OutputRegister(),
Immediate(base::NegateWithWraparound(constant_summand)));
}
} else if (mode == kMode_MR1) {
if (i.InputRegister(1) == i.OutputRegister()) {
__ shll(i.OutputRegister(), Immediate(1));
} else {
__ addl(i.OutputRegister(), i.InputRegister(1));
}
} else if (mode == kMode_M2) {
__ shll(i.OutputRegister(), Immediate(1));
} else if (mode == kMode_M4) {
__ shll(i.OutputRegister(), Immediate(2));
} else if (mode == kMode_M8) {
__ shll(i.OutputRegister(), Immediate(3));
} else {
__ leal(i.OutputRegister(), i.MemoryOperand());
}
} else if (mode == kMode_MR1 &&
i.InputRegister(1) == i.OutputRegister()) {
__ addl(i.OutputRegister(), i.InputRegister(0));
} else {
__ leal(i.OutputRegister(), i.MemoryOperand());
}
__ AssertZeroExtended(i.OutputRegister());
break;
}
case kX64Lea: {
AddressingMode mode = AddressingModeField::decode(instr->opcode());
// Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
// and addressing mode just happens to work out. The "addq"/"subq" forms
// in these cases are faster based on measurements.
if (i.InputRegister(0) == i.OutputRegister()) {
if (mode == kMode_MRI) {
int32_t constant_summand = i.InputInt32(1);
if (constant_summand > 0) {
__ addq(i.OutputRegister(), Immediate(constant_summand));
} else if (constant_summand < 0) {
__ subq(i.OutputRegister(), Immediate(-constant_summand));
}
} else if (mode == kMode_MR1) {
if (i.InputRegister(1) == i.OutputRegister()) {
__ shlq(i.OutputRegister(), Immediate(1));
} else {
__ addq(i.OutputRegister(), i.InputRegister(1));
}
} else if (mode == kMode_M2) {
__ shlq(i.OutputRegister(), Immediate(1));
} else if (mode == kMode_M4) {
__ shlq(i.OutputRegister(), Immediate(2));
} else if (mode == kMode_M8) {
__ shlq(i.OutputRegister(), Immediate(3));
} else {
__ leaq(i.OutputRegister(), i.MemoryOperand());
}
} else if (mode == kMode_MR1 &&
i.InputRegister(1) == i.OutputRegister()) {
__ addq(i.OutputRegister(), i.InputRegister(0));
} else {
__ leaq(i.OutputRegister(), i.MemoryOperand());
}
break;
}
case kX64Dec32:
__ decl(i.OutputRegister());
break;
case kX64Inc32:
__ incl(i.OutputRegister());
break;
case kX64Push:
if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ pushq(operand);
frame_access_state()->IncreaseSPDelta(1);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kSystemPointerSize);
} else if (HasImmediateInput(instr, 0)) {
__ pushq(i.InputImmediate(0));
frame_access_state()->IncreaseSPDelta(1);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kSystemPointerSize);
} else if (instr->InputAt(0)->IsRegister()) {
__ pushq(i.InputRegister(0));
frame_access_state()->IncreaseSPDelta(1);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kSystemPointerSize);
} else if (instr->InputAt(0)->IsFloatRegister() ||
instr->InputAt(0)->IsDoubleRegister()) {
// TODO(titzer): use another machine instruction?
__ AllocateStackSpace(kDoubleSize);
frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kDoubleSize);
__ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
} else if (instr->InputAt(0)->IsSimd128Register()) {
// TODO(titzer): use another machine instruction?
__ AllocateStackSpace(kSimd128Size);
frame_access_state()->IncreaseSPDelta(kSimd128Size /
kSystemPointerSize);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kSimd128Size);
__ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
} else if (instr->InputAt(0)->IsStackSlot() ||
instr->InputAt(0)->IsFloatStackSlot() ||
instr->InputAt(0)->IsDoubleStackSlot()) {
__ pushq(i.InputOperand(0));
frame_access_state()->IncreaseSPDelta(1);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kSystemPointerSize);
} else {
DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
__ Movups(kScratchDoubleReg, i.InputOperand(0));
// TODO(titzer): use another machine instruction?
__ AllocateStackSpace(kSimd128Size);
frame_access_state()->IncreaseSPDelta(kSimd128Size /
kSystemPointerSize);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kSimd128Size);
__ Movups(Operand(rsp, 0), kScratchDoubleReg);
}
break;
case kX64Poke: {
int slot = MiscField::decode(instr->opcode());
if (HasImmediateInput(instr, 0)) {
__ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
} else {
__ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
}
break;
}
case kX64Peek: {
int reverse_slot = i.InputInt32(0);
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
if (instr->OutputAt(0)->IsFPRegister()) {
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
if (op->representation() == MachineRepresentation::kFloat64) {
__ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
__ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
}
} else {
__ movq(i.OutputRegister(), Operand(rbp, offset));
}
break;
}
case kX64F64x2Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsFPRegister()) {
__ pshufd(dst, i.InputDoubleRegister(0), 0x44);
} else {
__ pshufd(dst, i.InputOperand(0), 0x44);
}
break;
}
case kX64F64x2ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (instr->InputAt(2)->IsFPRegister()) {
__ movq(kScratchRegister, i.InputDoubleRegister(2));
__ pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
} else {
__ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
case kX64F64x2ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
__ movq(i.OutputDoubleRegister(), kScratchRegister);
break;
}
case kX64F64x2Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F64x2Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F64x2Lt: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F64x2Le: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
// TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
case kX64F32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsFPRegister()) {
__ movss(dst, i.InputDoubleRegister(0));
} else {
__ movss(dst, i.InputOperand(0));
}
__ shufps(dst, dst, 0x0);
break;
}
case kX64F32x4ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
__ movd(i.OutputDoubleRegister(), kScratchRegister);
break;
}
case kX64F32x4ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
// The insertps instruction uses imm8[5:4] to indicate the lane
// that needs to be replaced.
byte select = i.InputInt8(1) << 4 & 0x30;
if (instr->InputAt(2)->IsFPRegister()) {
__ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
select);
} else {
__ insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
}
break;
}
case kX64F32x4SConvertI32x4: {
__ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F32x4UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
__ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
__ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
__ psubd(dst, kScratchDoubleReg); // get hi 16 bits
__ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ psrld(dst, 1); // divide by 2 to get in unsigned range
__ cvtdq2ps(dst, dst); // convert hi exactly
__ addps(dst, dst); // double hi, exactly
__ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
break;
}
case kX64F32x4Abs: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrld(kScratchDoubleReg, 1);
__ andps(i.OutputSimd128Register(), kScratchDoubleReg);
} else {
__ pcmpeqd(dst, dst);
__ psrld(dst, 1);
__ andps(dst, i.InputSimd128Register(0));
}
break;
}
case kX64F32x4Neg: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pslld(kScratchDoubleReg, 31);
__ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
} else {
__ pcmpeqd(dst, dst);
__ pslld(dst, 31);
__ xorps(dst, i.InputSimd128Register(0));
}
break;
}
case kX64F32x4RecipApprox: {
__ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F32x4RecipSqrtApprox: {
__ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F32x4Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE3);
__ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Mul: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Min: {
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The minps instruction doesn't propagate NaNs and +0's in its first
// operand. Perform minps in both orders, merge the resuls, and adjust.
__ movaps(kScratchDoubleReg, src1);
__ minps(kScratchDoubleReg, dst);
__ minps(dst, src1);
// propagate -0's and NaNs, which may be non-canonical.
__ orps(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload.
__ cmpps(dst, kScratchDoubleReg, 3);
__ orps(kScratchDoubleReg, dst);
__ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg);
break;
}
case kX64F32x4Max: {
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The maxps instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxps in both orders, merge the resuls, and adjust.
__ movaps(kScratchDoubleReg, src1);
__ maxps(kScratchDoubleReg, dst);
__ maxps(dst, src1);
// Find discrepancies.
__ xorps(dst, kScratchDoubleReg);
// Propagate NaNs, which may be non-canonical.
__ orps(kScratchDoubleReg, dst);
// Propagate sign discrepancy and (subtle) quiet NaNs.
__ subps(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ cmpps(dst, kScratchDoubleReg, 3);
__ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg);
break;
}
case kX64F32x4Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
break;
}
case kX64F32x4Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
break;
}
case kX64F32x4Lt: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Le: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I64x2Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsRegister()) {
__ movq(dst, i.InputRegister(0));
} else {
__ movq(dst, i.InputOperand(0));
}
__ pshufd(dst, dst, 0x44);
break;
}
case kX64I64x2ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kX64I64x2ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (instr->InputAt(2)->IsRegister()) {
__ pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
__ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
case kX64I64x2Neg: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ movapd(kScratchDoubleReg, src);
src = kScratchDoubleReg;
}
__ pxor(dst, dst);
__ psubq(dst, src);
break;
}
case kX64I64x2Shl: {
__ psllq(i.OutputSimd128Register(), i.InputInt8(1));
break;
}
case kX64I64x2ShrS: {
// TODO(zhin): there is vpsraq but requires AVX512
CpuFeatureScope sse_scope(tasm(), SSE4_1);
// ShrS on each quadword one at a time
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
// lower quadword
__ pextrq(kScratchRegister, src, 0x0);
__ sarq(kScratchRegister, Immediate(i.InputInt8(1)));
__ pinsrq(dst, kScratchRegister, 0x0);
// upper quadword
__ pextrq(kScratchRegister, src, 0x1);
__ sarq(kScratchRegister, Immediate(i.InputInt8(1)));
__ pinsrq(dst, kScratchRegister, 0x1);
break;
}
case kX64I64x2Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ paddq(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I64x2Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ psubq(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I64x2Mul: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister left = i.InputSimd128Register(0);
XMMRegister right = i.InputSimd128Register(1);
XMMRegister tmp1 = i.ToSimd128Register(instr->TempAt(0));
XMMRegister tmp2 = i.ToSimd128Register(instr->TempAt(1));
__ movaps(tmp1, left);
__ movaps(tmp2, right);
// Multiply high dword of each qword of left with right.
__ psrlq(tmp1, 32);
__ pmuludq(tmp1, right);
// Multiply high dword of each qword of right with left.
__ psrlq(tmp2, 32);
__ pmuludq(tmp2, left);
__ paddq(tmp2, tmp1);
__ psllq(tmp2, 32);
__ pmuludq(left, right);
__ paddq(left, tmp2); // left == dst
break;
}
case kX64I64x2Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I64x2Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
break;
}
case kX64I64x2GtS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_2);
__ pcmpgtq(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I64x2GeS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_2);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
__ movaps(tmp, src);
__ pcmpgtq(tmp, dst);
__ pcmpeqd(dst, dst);
__ pxor(dst, tmp);
break;
}
case kX64I64x2ShrU: {
__ psrlq(i.OutputSimd128Register(), i.InputInt8(1));
break;
}
case kX64I64x2GtU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_2);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psllq(kScratchDoubleReg, 63);
__ movaps(tmp, src);
__ pxor(tmp, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
__ pcmpgtq(dst, tmp);
break;
}
case kX64I64x2GeU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_2);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psllq(kScratchDoubleReg, 63);
__ movaps(tmp, src);
__ pxor(dst, kScratchDoubleReg);
__ pxor(tmp, kScratchDoubleReg);
__ pcmpgtq(tmp, dst);
__ pcmpeqd(dst, dst);
__ pxor(dst, tmp);
break;
}
case kX64I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsRegister()) {
__ movd(dst, i.InputRegister(0));
} else {
__ movd(dst, i.InputOperand(0));
}
__ pshufd(dst, dst, 0x0);
break;
}
case kX64I32x4ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kX64I32x4ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (instr->InputAt(2)->IsRegister()) {
__ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
__ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
case kX64I32x4SConvertF32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister dst = i.OutputSimd128Register();
// NAN->0
__ movaps(kScratchDoubleReg, dst);
__ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
__ pand(dst, kScratchDoubleReg);
// Set top bit if >= 0 (but not -0.0!)
__ pxor(kScratchDoubleReg, dst);
// Convert
__ cvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0
__ pand(kScratchDoubleReg, dst);
__ psrad(kScratchDoubleReg, 31);
// Set positive overflow lanes to 0x7FFFFFFF
__ pxor(dst, kScratchDoubleReg);