blob: 9267cb1f0c353bf1a3f75d4081150115f5f4907d [file] [log] [blame]
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/codegen/assembler-inl.h"
#include "src/codegen/macro-assembler.h"
#include "src/codegen/optimized-compilation-info.h"
#include "src/compiler/backend/code-generator-impl.h"
#include "src/compiler/backend/code-generator.h"
#include "src/compiler/backend/gap-resolver.h"
#include "src/compiler/backend/instruction-codes.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/osr.h"
#include "src/heap/memory-chunk.h"
#include "src/numbers/double.h"
#include "src/utils/boxed-float.h"
#include "src/wasm/wasm-code-manager.h"
#include "src/wasm/wasm-objects.h"
namespace v8 {
namespace internal {
namespace compiler {
#define __ tasm()->
// Adds Arm-specific methods to convert InstructionOperands.
class ArmOperandConverter final : public InstructionOperandConverter {
public:
ArmOperandConverter(CodeGenerator* gen, Instruction* instr)
: InstructionOperandConverter(gen, instr) {}
SBit OutputSBit() const {
switch (instr_->flags_mode()) {
case kFlags_branch:
case kFlags_branch_and_poison:
case kFlags_deoptimize:
case kFlags_deoptimize_and_poison:
case kFlags_set:
case kFlags_trap:
return SetCC;
case kFlags_none:
return LeaveCC;
}
UNREACHABLE();
}
Operand InputImmediate(size_t index) const {
return ToImmediate(instr_->InputAt(index));
}
Operand InputOperand2(size_t first_index) {
const size_t index = first_index;
switch (AddressingModeField::decode(instr_->opcode())) {
case kMode_None:
case kMode_Offset_RI:
case kMode_Offset_RR:
case kMode_Root:
break;
case kMode_Operand2_I:
return InputImmediate(index + 0);
case kMode_Operand2_R:
return Operand(InputRegister(index + 0));
case kMode_Operand2_R_ASR_I:
return Operand(InputRegister(index + 0), ASR, InputInt5(index + 1));
case kMode_Operand2_R_ASR_R:
return Operand(InputRegister(index + 0), ASR, InputRegister(index + 1));
case kMode_Operand2_R_LSL_I:
return Operand(InputRegister(index + 0), LSL, InputInt5(index + 1));
case kMode_Operand2_R_LSL_R:
return Operand(InputRegister(index + 0), LSL, InputRegister(index + 1));
case kMode_Operand2_R_LSR_I:
return Operand(InputRegister(index + 0), LSR, InputInt5(index + 1));
case kMode_Operand2_R_LSR_R:
return Operand(InputRegister(index + 0), LSR, InputRegister(index + 1));
case kMode_Operand2_R_ROR_I:
return Operand(InputRegister(index + 0), ROR, InputInt5(index + 1));
case kMode_Operand2_R_ROR_R:
return Operand(InputRegister(index + 0), ROR, InputRegister(index + 1));
}
UNREACHABLE();
}
MemOperand InputOffset(size_t* first_index) {
const size_t index = *first_index;
switch (AddressingModeField::decode(instr_->opcode())) {
case kMode_None:
case kMode_Operand2_I:
case kMode_Operand2_R:
case kMode_Operand2_R_ASR_I:
case kMode_Operand2_R_ASR_R:
case kMode_Operand2_R_LSL_R:
case kMode_Operand2_R_LSR_I:
case kMode_Operand2_R_LSR_R:
case kMode_Operand2_R_ROR_I:
case kMode_Operand2_R_ROR_R:
break;
case kMode_Operand2_R_LSL_I:
*first_index += 3;
return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
LSL, InputInt32(index + 2));
case kMode_Offset_RI:
*first_index += 2;
return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
case kMode_Offset_RR:
*first_index += 2;
return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
case kMode_Root:
*first_index += 1;
return MemOperand(kRootRegister, InputInt32(index));
}
UNREACHABLE();
}
MemOperand InputOffset(size_t first_index = 0) {
return InputOffset(&first_index);
}
Operand ToImmediate(InstructionOperand* operand) const {
Constant constant = ToConstant(operand);
switch (constant.type()) {
case Constant::kInt32:
if (RelocInfo::IsWasmReference(constant.rmode())) {
return Operand(constant.ToInt32(), constant.rmode());
} else {
return Operand(constant.ToInt32());
}
case Constant::kFloat32:
return Operand::EmbeddedNumber(constant.ToFloat32());
case Constant::kFloat64:
return Operand::EmbeddedNumber(constant.ToFloat64().value());
case Constant::kExternalReference:
return Operand(constant.ToExternalReference());
case Constant::kDelayedStringConstant:
return Operand::EmbeddedStringConstant(
constant.ToDelayedStringConstant());
case Constant::kInt64:
case Constant::kCompressedHeapObject:
case Constant::kHeapObject:
// TODO(dcarney): loading RPO constants on arm.
case Constant::kRpoNumber:
break;
}
UNREACHABLE();
}
MemOperand ToMemOperand(InstructionOperand* op) const {
DCHECK_NOT_NULL(op);
DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
return SlotToMemOperand(AllocatedOperand::cast(op)->index());
}
MemOperand SlotToMemOperand(int slot) const {
FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
}
NeonMemOperand NeonInputOperand(size_t first_index) {
const size_t index = first_index;
switch (AddressingModeField::decode(instr_->opcode())) {
case kMode_Operand2_R:
return NeonMemOperand(InputRegister(index + 0));
default:
break;
}
UNREACHABLE();
}
};
namespace {
class OutOfLineRecordWrite final : public OutOfLineCode {
public:
OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset,
Register value, RecordWriteMode mode,
StubCallMode stub_mode,
UnwindingInfoWriter* unwinding_info_writer)
: OutOfLineCode(gen),
object_(object),
offset_(offset),
value_(value),
mode_(mode),
stub_mode_(stub_mode),
must_save_lr_(!gen->frame_access_state()->has_frame()),
unwinding_info_writer_(unwinding_info_writer),
zone_(gen->zone()) {}
void Generate() final {
if (mode_ > RecordWriteMode::kValueIsPointer) {
__ JumpIfSmi(value_, exit());
}
__ CheckPageFlag(value_, MemoryChunk::kPointersToHereAreInterestingMask, eq,
exit());
RememberedSetAction const remembered_set_action =
mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
: OMIT_REMEMBERED_SET;
SaveFPRegsMode const save_fp_mode =
frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
if (must_save_lr_) {
// We need to save and restore lr if the frame was elided.
__ Push(lr);
unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset());
}
if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
__ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode);
} else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
__ CallRecordWriteStub(object_, offset_, remembered_set_action,
save_fp_mode, wasm::WasmCode::kRecordWrite);
} else {
__ CallRecordWriteStub(object_, offset_, remembered_set_action,
save_fp_mode);
}
if (must_save_lr_) {
__ Pop(lr);
unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
}
}
private:
Register const object_;
Operand const offset_;
Register const value_;
RecordWriteMode const mode_;
StubCallMode stub_mode_;
bool must_save_lr_;
UnwindingInfoWriter* const unwinding_info_writer_;
Zone* zone_;
};
template <typename T>
class OutOfLineFloatMin final : public OutOfLineCode {
public:
OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right)
: OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
void Generate() final { __ FloatMinOutOfLine(result_, left_, right_); }
private:
T const result_;
T const left_;
T const right_;
};
using OutOfLineFloat32Min = OutOfLineFloatMin<SwVfpRegister>;
using OutOfLineFloat64Min = OutOfLineFloatMin<DwVfpRegister>;
template <typename T>
class OutOfLineFloatMax final : public OutOfLineCode {
public:
OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right)
: OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
void Generate() final { __ FloatMaxOutOfLine(result_, left_, right_); }
private:
T const result_;
T const left_;
T const right_;
};
using OutOfLineFloat32Max = OutOfLineFloatMax<SwVfpRegister>;
using OutOfLineFloat64Max = OutOfLineFloatMax<DwVfpRegister>;
Condition FlagsConditionToCondition(FlagsCondition condition) {
switch (condition) {
case kEqual:
return eq;
case kNotEqual:
return ne;
case kSignedLessThan:
return lt;
case kSignedGreaterThanOrEqual:
return ge;
case kSignedLessThanOrEqual:
return le;
case kSignedGreaterThan:
return gt;
case kUnsignedLessThan:
return lo;
case kUnsignedGreaterThanOrEqual:
return hs;
case kUnsignedLessThanOrEqual:
return ls;
case kUnsignedGreaterThan:
return hi;
case kFloatLessThanOrUnordered:
return lt;
case kFloatGreaterThanOrEqual:
return ge;
case kFloatLessThanOrEqual:
return ls;
case kFloatGreaterThanOrUnordered:
return hi;
case kFloatLessThan:
return lo;
case kFloatGreaterThanOrEqualOrUnordered:
return hs;
case kFloatLessThanOrEqualOrUnordered:
return le;
case kFloatGreaterThan:
return gt;
case kOverflow:
return vs;
case kNotOverflow:
return vc;
case kPositiveOrZero:
return pl;
case kNegative:
return mi;
default:
break;
}
UNREACHABLE();
}
void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
InstructionCode opcode,
ArmOperandConverter const& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
Register value = i.OutputRegister();
codegen->tasm()->and_(value, value, Operand(kSpeculationPoisonRegister));
}
}
void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
InstructionCode opcode,
ArmOperandConverter const& i,
Register address) {
DCHECK_EQ(kMemoryAccessPoisoned,
static_cast<MemoryAccessMode>(MiscField::decode(opcode)));
switch (AddressingModeField::decode(opcode)) {
case kMode_Offset_RI:
codegen->tasm()->mov(address, i.InputImmediate(1));
codegen->tasm()->add(address, address, i.InputRegister(0));
break;
case kMode_Offset_RR:
codegen->tasm()->add(address, i.InputRegister(0), i.InputRegister(1));
break;
default:
UNREACHABLE();
}
codegen->tasm()->and_(address, address, Operand(kSpeculationPoisonRegister));
}
} // namespace
#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr) \
do { \
__ asm_instr(i.OutputRegister(), \
MemOperand(i.InputRegister(0), i.InputRegister(1))); \
__ dmb(ISH); \
} while (0)
#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr) \
do { \
__ dmb(ISH); \
__ asm_instr(i.InputRegister(2), \
MemOperand(i.InputRegister(0), i.InputRegister(1))); \
__ dmb(ISH); \
} while (0)
#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr) \
do { \
Label exchange; \
__ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \
__ dmb(ISH); \
__ bind(&exchange); \
__ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
__ store_instr(i.TempRegister(0), i.InputRegister(2), i.TempRegister(1)); \
__ teq(i.TempRegister(0), Operand(0)); \
__ b(ne, &exchange); \
__ dmb(ISH); \
} while (0)
#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, \
cmp_reg) \
do { \
Label compareExchange; \
Label exit; \
__ dmb(ISH); \
__ bind(&compareExchange); \
__ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
__ teq(cmp_reg, Operand(i.OutputRegister(0))); \
__ b(ne, &exit); \
__ store_instr(i.TempRegister(0), i.InputRegister(3), i.TempRegister(1)); \
__ teq(i.TempRegister(0), Operand(0)); \
__ b(ne, &compareExchange); \
__ bind(&exit); \
__ dmb(ISH); \
} while (0)
#define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr) \
do { \
Label binop; \
__ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \
__ dmb(ISH); \
__ bind(&binop); \
__ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
__ bin_instr(i.TempRegister(0), i.OutputRegister(0), \
Operand(i.InputRegister(2))); \
__ store_instr(i.TempRegister(2), i.TempRegister(0), i.TempRegister(1)); \
__ teq(i.TempRegister(2), Operand(0)); \
__ b(ne, &binop); \
__ dmb(ISH); \
} while (0)
#define ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2) \
do { \
Label binop; \
__ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
__ dmb(ISH); \
__ bind(&binop); \
__ ldrexd(r2, r3, i.TempRegister(0)); \
__ instr1(i.TempRegister(1), r2, i.InputRegister(0), SBit::SetCC); \
__ instr2(i.TempRegister(2), r3, Operand(i.InputRegister(1))); \
DCHECK_EQ(LeaveCC, i.OutputSBit()); \
__ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
i.TempRegister(0)); \
__ teq(i.TempRegister(3), Operand(0)); \
__ b(ne, &binop); \
__ dmb(ISH); \
} while (0)
#define ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr) \
do { \
Label binop; \
__ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
__ dmb(ISH); \
__ bind(&binop); \
__ ldrexd(r2, r3, i.TempRegister(0)); \
__ instr(i.TempRegister(1), r2, Operand(i.InputRegister(0))); \
__ instr(i.TempRegister(2), r3, Operand(i.InputRegister(1))); \
__ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
i.TempRegister(0)); \
__ teq(i.TempRegister(3), Operand(0)); \
__ b(ne, &binop); \
__ dmb(ISH); \
} while (0)
#define ASSEMBLE_IEEE754_BINOP(name) \
do { \
/* TODO(bmeurer): We should really get rid of this special instruction, */ \
/* and generate a CallAddress instruction instead. */ \
FrameScope scope(tasm(), StackFrame::MANUAL); \
__ PrepareCallCFunction(0, 2); \
__ MovToFloatParameters(i.InputDoubleRegister(0), \
i.InputDoubleRegister(1)); \
__ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
/* Move the result in the double result register. */ \
__ MovFromFloatResult(i.OutputDoubleRegister()); \
DCHECK_EQ(LeaveCC, i.OutputSBit()); \
} while (0)
#define ASSEMBLE_IEEE754_UNOP(name) \
do { \
/* TODO(bmeurer): We should really get rid of this special instruction, */ \
/* and generate a CallAddress instruction instead. */ \
FrameScope scope(tasm(), StackFrame::MANUAL); \
__ PrepareCallCFunction(0, 1); \
__ MovToFloatParameter(i.InputDoubleRegister(0)); \
__ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
/* Move the result in the double result register. */ \
__ MovFromFloatResult(i.OutputDoubleRegister()); \
DCHECK_EQ(LeaveCC, i.OutputSBit()); \
} while (0)
#define ASSEMBLE_NEON_NARROWING_OP(dt, sdt) \
do { \
Simd128Register dst = i.OutputSimd128Register(), \
src0 = i.InputSimd128Register(0), \
src1 = i.InputSimd128Register(1); \
if (dst == src0 && dst == src1) { \
__ vqmovn(dt, sdt, dst.low(), src0); \
__ vmov(dst.high(), dst.low()); \
} else if (dst == src0) { \
__ vqmovn(dt, sdt, dst.low(), src0); \
__ vqmovn(dt, sdt, dst.high(), src1); \
} else { \
__ vqmovn(dt, sdt, dst.high(), src1); \
__ vqmovn(dt, sdt, dst.low(), src0); \
} \
} while (0)
#define ASSEMBLE_NEON_PAIRWISE_OP(op, size) \
do { \
Simd128Register dst = i.OutputSimd128Register(), \
src0 = i.InputSimd128Register(0), \
src1 = i.InputSimd128Register(1); \
if (dst == src0) { \
__ op(size, dst.low(), src0.low(), src0.high()); \
if (dst == src1) { \
__ vmov(dst.high(), dst.low()); \
} else { \
__ op(size, dst.high(), src1.low(), src1.high()); \
} \
} else { \
__ op(size, dst.high(), src1.low(), src1.high()); \
__ op(size, dst.low(), src0.low(), src0.high()); \
} \
} while (0)
#define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op) \
do { \
__ op(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low(), \
i.InputSimd128Register(1).low()); \
__ op(i.OutputSimd128Register().high(), i.InputSimd128Register(0).high(), \
i.InputSimd128Register(1).high()); \
} while (0)
// If shift value is an immediate, we can call asm_imm, taking the shift value
// modulo 2^width. Otherwise, emit code to perform the modulus operation, and
// call vshl.
#define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, sz, dt) \
do { \
QwNeonRegister dst = i.OutputSimd128Register(); \
QwNeonRegister src = i.InputSimd128Register(0); \
if (instr->InputAt(1)->IsImmediate()) { \
__ asm_imm(dt, dst, src, i.InputInt##width(1)); \
} else { \
QwNeonRegister tmp = i.TempSimd128Register(0); \
Register shift = i.TempRegister(1); \
constexpr int mask = (1 << width) - 1; \
__ and_(shift, i.InputRegister(1), Operand(mask)); \
__ vdup(sz, tmp, shift); \
__ vshl(dt, dst, src, tmp); \
} \
} while (0)
// If shift value is an immediate, we can call asm_imm, taking the shift value
// modulo 2^width. Otherwise, emit code to perform the modulus operation, and
// call vshl, passing in the negative shift value (treated as a right shift).
#define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, sz, dt) \
do { \
QwNeonRegister dst = i.OutputSimd128Register(); \
QwNeonRegister src = i.InputSimd128Register(0); \
if (instr->InputAt(1)->IsImmediate()) { \
__ asm_imm(dt, dst, src, i.InputInt##width(1)); \
} else { \
QwNeonRegister tmp = i.TempSimd128Register(0); \
Register shift = i.TempRegister(1); \
constexpr int mask = (1 << width) - 1; \
__ and_(shift, i.InputRegister(1), Operand(mask)); \
__ vdup(sz, tmp, shift); \
__ vneg(sz, tmp, tmp); \
__ vshl(dt, dst, src, tmp); \
} \
} while (0)
void CodeGenerator::AssembleDeconstructFrame() {
__ LeaveFrame(StackFrame::MANUAL);
unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
}
void CodeGenerator::AssemblePrepareTailCall() {
if (frame_access_state()->has_frame()) {
__ ldm(ia, fp, lr.bit() | fp.bit());
}
frame_access_state()->SetFrameAccessToSP();
}
void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
Register scratch1,
Register scratch2,
Register scratch3) {
DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
Label done;
// Check if current frame is an arguments adaptor frame.
__ ldr(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
__ cmp(scratch1,
Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
__ b(ne, &done);
// Load arguments count from current arguments adaptor frame (note, it
// does not include receiver).
Register caller_args_count_reg = scratch1;
__ ldr(caller_args_count_reg,
MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
__ SmiUntag(caller_args_count_reg);
__ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
__ bind(&done);
}
namespace {
void FlushPendingPushRegisters(TurboAssembler* tasm,
FrameAccessState* frame_access_state,
ZoneVector<Register>* pending_pushes) {
switch (pending_pushes->size()) {
case 0:
break;
case 1:
tasm->push((*pending_pushes)[0]);
break;
case 2:
tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
break;
case 3:
tasm->Push((*pending_pushes)[0], (*pending_pushes)[1],
(*pending_pushes)[2]);
break;
default:
UNREACHABLE();
}
frame_access_state->IncreaseSPDelta(pending_pushes->size());
pending_pushes->clear();
}
void AdjustStackPointerForTailCall(
TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
ZoneVector<Register>* pending_pushes = nullptr,
bool allow_shrinkage = true) {
int current_sp_offset = state->GetSPToFPSlotCount() +
StandardFrameConstants::kFixedSlotCountAboveFp;
int stack_slot_delta = new_slot_above_sp - current_sp_offset;
if (stack_slot_delta > 0) {
if (pending_pushes != nullptr) {
FlushPendingPushRegisters(tasm, state, pending_pushes);
}
tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
state->IncreaseSPDelta(stack_slot_delta);
} else if (allow_shrinkage && stack_slot_delta < 0) {
if (pending_pushes != nullptr) {
FlushPendingPushRegisters(tasm, state, pending_pushes);
}
tasm->add(sp, sp, Operand(-stack_slot_delta * kSystemPointerSize));
state->IncreaseSPDelta(stack_slot_delta);
}
}
#if DEBUG
bool VerifyOutputOfAtomicPairInstr(ArmOperandConverter* converter,
const Instruction* instr, Register low,
Register high) {
DCHECK_GE(instr->OutputCount() + instr->TempCount(), 2);
if (instr->OutputCount() == 2) {
return (converter->OutputRegister(0) == low &&
converter->OutputRegister(1) == high);
}
if (instr->OutputCount() == 1) {
return (converter->OutputRegister(0) == low &&
converter->TempRegister(instr->TempCount() - 1) == high) ||
(converter->OutputRegister(0) == high &&
converter->TempRegister(instr->TempCount() - 1) == low);
}
DCHECK_EQ(instr->OutputCount(), 0);
return (converter->TempRegister(instr->TempCount() - 2) == low &&
converter->TempRegister(instr->TempCount() - 1) == high);
}
#endif
} // namespace
void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
int first_unused_stack_slot) {
ZoneVector<MoveOperands*> pushes(zone());
GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
if (!pushes.empty() &&
(LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
first_unused_stack_slot)) {
ArmOperandConverter g(this, instr);
ZoneVector<Register> pending_pushes(zone());
for (auto move : pushes) {
LocationOperand destination_location(
LocationOperand::cast(move->destination()));
InstructionOperand source(move->source());
AdjustStackPointerForTailCall(
tasm(), frame_access_state(),
destination_location.index() - pending_pushes.size(),
&pending_pushes);
// Pushes of non-register data types are not supported.
DCHECK(source.IsRegister());
LocationOperand source_location(LocationOperand::cast(source));
pending_pushes.push_back(source_location.GetRegister());
// TODO(arm): We can push more than 3 registers at once. Add support in
// the macro-assembler for pushing a list of registers.
if (pending_pushes.size() == 3) {
FlushPendingPushRegisters(tasm(), frame_access_state(),
&pending_pushes);
}
move->Eliminate();
}
FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes);
}
AdjustStackPointerForTailCall(tasm(), frame_access_state(),
first_unused_stack_slot, nullptr, false);
}
void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
int first_unused_stack_slot) {
AdjustStackPointerForTailCall(tasm(), frame_access_state(),
first_unused_stack_slot);
}
// Check that {kJavaScriptCallCodeStartRegister} is correct.
void CodeGenerator::AssembleCodeStartRegisterCheck() {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
__ ComputeCodeStartAddress(scratch);
__ cmp(scratch, kJavaScriptCallCodeStartRegister);
__ Assert(eq, AbortReason::kWrongFunctionCodeStart);
}
// Check if the code object is marked for deoptimization. If it is, then it
// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
// to:
// 1. read from memory the word that contains that bit, which can be found in
// the flags in the referenced {CodeDataContainer} object;
// 2. test kMarkedForDeoptimizationBit in those flags; and
// 3. if it is not zero then it jumps to the builtin.
void CodeGenerator::BailoutIfDeoptimized() {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
__ ldr(scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
__ ldr(scratch,
FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
__ tst(scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
__ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
RelocInfo::CODE_TARGET, ne);
}
void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
// Set a mask which has all bits set in the normal case, but has all
// bits cleared if we are speculatively executing the wrong PC.
__ ComputeCodeStartAddress(scratch);
__ cmp(kJavaScriptCallCodeStartRegister, scratch);
__ mov(kSpeculationPoisonRegister, Operand(-1), SBit::LeaveCC, eq);
__ mov(kSpeculationPoisonRegister, Operand(0), SBit::LeaveCC, ne);
__ csdb();
}
void CodeGenerator::AssembleRegisterArgumentPoisoning() {
__ and_(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
__ and_(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
__ and_(sp, sp, kSpeculationPoisonRegister);
}
// Assembles an instruction after register allocation, producing machine code.
CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Instruction* instr) {
ArmOperandConverter i(this, instr);
__ MaybeCheckConstPool();
InstructionCode opcode = instr->opcode();
ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
switch (arch_opcode) {
case kArchCallCodeObject: {
if (instr->InputAt(0)->IsImmediate()) {
__ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
} else {
Register reg = i.InputRegister(0);
DCHECK_IMPLIES(
instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
reg == kJavaScriptCallCodeStartRegister);
__ CallCodeObject(reg);
}
RecordCallPosition(instr);
DCHECK_EQ(LeaveCC, i.OutputSBit());
frame_access_state()->ClearSPDelta();
break;
}
case kArchCallBuiltinPointer: {
DCHECK(!instr->InputAt(0)->IsImmediate());
Register builtin_index = i.InputRegister(0);
__ CallBuiltinByIndex(builtin_index);
RecordCallPosition(instr);
frame_access_state()->ClearSPDelta();
break;
}
case kArchCallWasmFunction: {
if (instr->InputAt(0)->IsImmediate()) {
Constant constant = i.ToConstant(instr->InputAt(0));
Address wasm_code = static_cast<Address>(constant.ToInt32());
__ Call(wasm_code, constant.rmode());
} else {
__ Call(i.InputRegister(0));
}
RecordCallPosition(instr);
DCHECK_EQ(LeaveCC, i.OutputSBit());
frame_access_state()->ClearSPDelta();
break;
}
case kArchTailCallCodeObjectFromJSFunction:
case kArchTailCallCodeObject: {
if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
i.TempRegister(0), i.TempRegister(1),
i.TempRegister(2));
}
if (instr->InputAt(0)->IsImmediate()) {
__ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
} else {
Register reg = i.InputRegister(0);
DCHECK_IMPLIES(
instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
reg == kJavaScriptCallCodeStartRegister);
__ JumpCodeObject(reg);
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
unwinding_info_writer_.MarkBlockWillExit();
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchTailCallWasm: {
if (instr->InputAt(0)->IsImmediate()) {
Constant constant = i.ToConstant(instr->InputAt(0));
Address wasm_code = static_cast<Address>(constant.ToInt32());
__ Jump(wasm_code, constant.rmode());
} else {
__ Jump(i.InputRegister(0));
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
unwinding_info_writer_.MarkBlockWillExit();
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchTailCallAddress: {
CHECK(!instr->InputAt(0)->IsImmediate());
Register reg = i.InputRegister(0);
DCHECK_IMPLIES(
instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
reg == kJavaScriptCallCodeStartRegister);
__ Jump(reg);
unwinding_info_writer_.MarkBlockWillExit();
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchCallJSFunction: {
Register func = i.InputRegister(0);
if (FLAG_debug_code) {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
// Check the function's context matches the context argument.
__ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset));
__ cmp(cp, scratch);
__ Assert(eq, AbortReason::kWrongFunctionContext);
}
static_assert(kJavaScriptCallCodeStartRegister == r2, "ABI mismatch");
__ ldr(r2, FieldMemOperand(func, JSFunction::kCodeOffset));
__ CallCodeObject(r2);
RecordCallPosition(instr);
DCHECK_EQ(LeaveCC, i.OutputSBit());
frame_access_state()->ClearSPDelta();
break;
}
case kArchPrepareCallCFunction: {
int const num_parameters = MiscField::decode(instr->opcode());
__ PrepareCallCFunction(num_parameters);
// Frame alignment requires using FP-relative frame addressing.
frame_access_state()->SetFrameAccessToFP();
break;
}
case kArchSaveCallerRegisters: {
fp_mode_ =
static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
// kReturnRegister0 should have been saved before entering the stub.
int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
DCHECK(IsAligned(bytes, kSystemPointerSize));
DCHECK_EQ(0, frame_access_state()->sp_delta());
frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
DCHECK(!caller_registers_saved_);
caller_registers_saved_ = true;
break;
}
case kArchRestoreCallerRegisters: {
DCHECK(fp_mode_ ==
static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
// Don't overwrite the returned value.
int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
DCHECK_EQ(0, frame_access_state()->sp_delta());
DCHECK(caller_registers_saved_);
caller_registers_saved_ = false;
break;
}
case kArchPrepareTailCall:
AssemblePrepareTailCall();
break;
case kArchCallCFunction: {
int const num_parameters = MiscField::decode(instr->opcode());
if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
// Put the return address in a stack slot.
__ str(pc, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
}
if (instr->InputAt(0)->IsImmediate()) {
ExternalReference ref = i.InputExternalReference(0);
__ CallCFunction(ref, num_parameters);
} else {
Register func = i.InputRegister(0);
__ CallCFunction(func, num_parameters);
}
if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
}
frame_access_state()->SetFrameAccessToDefault();
// Ideally, we should decrement SP delta to match the change of stack
// pointer in CallCFunction. However, for certain architectures (e.g.
// ARM), there may be more strict alignment requirement, causing old SP
// to be saved on the stack. In those cases, we can not calculate the SP
// delta statically.
frame_access_state()->ClearSPDelta();
if (caller_registers_saved_) {
// Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
// Here, we assume the sequence to be:
// kArchSaveCallerRegisters;
// kArchCallCFunction;
// kArchRestoreCallerRegisters;
int bytes =
__ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
}
break;
}
case kArchJmp:
AssembleArchJump(i.InputRpo(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArchBinarySearchSwitch:
AssembleArchBinarySearchSwitch(instr);
break;
case kArchTableSwitch:
AssembleArchTableSwitch(instr);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArchAbortCSAAssert:
DCHECK(i.InputRegister(0) == r1);
{
// We don't actually want to generate a pile of code for this, so just
// claim there is a stack frame, without generating one.
FrameScope scope(tasm(), StackFrame::NONE);
__ Call(
isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
RelocInfo::CODE_TARGET);
}
__ stop();
unwinding_info_writer_.MarkBlockWillExit();
break;
case kArchDebugBreak:
__ DebugBreak();
break;
case kArchComment:
__ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
break;
case kArchThrowTerminator:
DCHECK_EQ(LeaveCC, i.OutputSBit());
unwinding_info_writer_.MarkBlockWillExit();
break;
case kArchNop:
// don't emit code for nops.
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArchDeoptimize: {
DeoptimizationExit* exit =
BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
__ b(exit->label());
break;
}
case kArchRet:
AssembleReturn(instr->InputAt(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArchFramePointer:
__ mov(i.OutputRegister(), fp);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArchParentFramePointer:
if (frame_access_state()->has_frame()) {
__ ldr(i.OutputRegister(), MemOperand(fp, 0));
} else {
__ mov(i.OutputRegister(), fp);
}
break;
case kArchStackPointerGreaterThan: {
// Potentially apply an offset to the current stack pointer before the
// comparison to consider the size difference of an optimized frame versus
// the contained unoptimized frames.
Register lhs_register = sp;
uint32_t offset;
if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
lhs_register = i.TempRegister(0);
__ sub(lhs_register, sp, Operand(offset));
}
constexpr size_t kValueIndex = 0;
DCHECK(instr->InputAt(kValueIndex)->IsRegister());
__ cmp(lhs_register, i.InputRegister(kValueIndex));
break;
}
case kArchStackCheckOffset:
__ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
break;
case kArchTruncateDoubleToI:
__ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
i.InputDoubleRegister(0), DetermineStubCallMode());
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArchStoreWithWriteBarrier: {
RecordWriteMode mode =
static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
Register object = i.InputRegister(0);
Register value = i.InputRegister(2);
AddressingMode addressing_mode =
AddressingModeField::decode(instr->opcode());
Operand offset(0);
if (addressing_mode == kMode_Offset_RI) {
int32_t immediate = i.InputInt32(1);
offset = Operand(immediate);
__ str(value, MemOperand(object, immediate));
} else {
DCHECK_EQ(kMode_Offset_RR, addressing_mode);
Register reg = i.InputRegister(1);
offset = Operand(reg);
__ str(value, MemOperand(object, reg));
}
auto ool = zone()->New<OutOfLineRecordWrite>(
this, object, offset, value, mode, DetermineStubCallMode(),
&unwinding_info_writer_);
__ CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask,
ne, ool->entry());
__ bind(ool->exit());
break;
}
case kArchStackSlot: {
FrameOffset offset =
frame_access_state()->GetFrameOffset(i.InputInt32(0));
Register base = offset.from_stack_pointer() ? sp : fp;
__ add(i.OutputRegister(0), base, Operand(offset.offset()));
break;
}
case kIeee754Float64Acos:
ASSEMBLE_IEEE754_UNOP(acos);
break;
case kIeee754Float64Acosh:
ASSEMBLE_IEEE754_UNOP(acosh);
break;
case kIeee754Float64Asin:
ASSEMBLE_IEEE754_UNOP(asin);
break;
case kIeee754Float64Asinh:
ASSEMBLE_IEEE754_UNOP(asinh);
break;
case kIeee754Float64Atan:
ASSEMBLE_IEEE754_UNOP(atan);
break;
case kIeee754Float64Atanh:
ASSEMBLE_IEEE754_UNOP(atanh);
break;
case kIeee754Float64Atan2:
ASSEMBLE_IEEE754_BINOP(atan2);
break;
case kIeee754Float64Cbrt:
ASSEMBLE_IEEE754_UNOP(cbrt);
break;
case kIeee754Float64Cos:
ASSEMBLE_IEEE754_UNOP(cos);
break;
case kIeee754Float64Cosh:
ASSEMBLE_IEEE754_UNOP(cosh);
break;
case kIeee754Float64Exp:
ASSEMBLE_IEEE754_UNOP(exp);
break;
case kIeee754Float64Expm1:
ASSEMBLE_IEEE754_UNOP(expm1);
break;
case kIeee754Float64Log:
ASSEMBLE_IEEE754_UNOP(log);
break;
case kIeee754Float64Log1p:
ASSEMBLE_IEEE754_UNOP(log1p);
break;
case kIeee754Float64Log2:
ASSEMBLE_IEEE754_UNOP(log2);
break;
case kIeee754Float64Log10:
ASSEMBLE_IEEE754_UNOP(log10);
break;
case kIeee754Float64Pow:
ASSEMBLE_IEEE754_BINOP(pow);
break;
case kIeee754Float64Sin:
ASSEMBLE_IEEE754_UNOP(sin);
break;
case kIeee754Float64Sinh:
ASSEMBLE_IEEE754_UNOP(sinh);
break;
case kIeee754Float64Tan:
ASSEMBLE_IEEE754_UNOP(tan);
break;
case kIeee754Float64Tanh:
ASSEMBLE_IEEE754_UNOP(tanh);
break;
case kArmAdd:
__ add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
i.OutputSBit());
break;
case kArmAnd:
__ and_(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
i.OutputSBit());
break;
case kArmBic:
__ bic(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
i.OutputSBit());
break;
case kArmMul:
__ mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
i.OutputSBit());
break;
case kArmMla:
__ mla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
i.InputRegister(2), i.OutputSBit());
break;
case kArmMls: {
CpuFeatureScope scope(tasm(), ARMv7);
__ mls(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
i.InputRegister(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmSmull:
__ smull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
i.InputRegister(1));
break;
case kArmSmmul:
__ smmul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmSmmla:
__ smmla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
i.InputRegister(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmUmull:
__ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
i.InputRegister(1), i.OutputSBit());
break;
case kArmSdiv: {
CpuFeatureScope scope(tasm(), SUDIV);
__ sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmUdiv: {
CpuFeatureScope scope(tasm(), SUDIV);
__ udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmMov:
__ Move(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
break;
case kArmMvn:
__ mvn(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
break;
case kArmOrr:
__ orr(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
i.OutputSBit());
break;
case kArmEor:
__ eor(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
i.OutputSBit());
break;
case kArmSub:
__ sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
i.OutputSBit());
break;
case kArmRsb:
__ rsb(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
i.OutputSBit());
break;
case kArmBfc: {
CpuFeatureScope scope(tasm(), ARMv7);
__ bfc(i.OutputRegister(), i.InputInt8(1), i.InputInt8(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmUbfx: {
CpuFeatureScope scope(tasm(), ARMv7);
__ ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
i.InputInt8(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmSbfx: {
CpuFeatureScope scope(tasm(), ARMv7);
__ sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
i.InputInt8(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmSxtb:
__ sxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmSxth:
__ sxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmSxtab:
__ sxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
i.InputInt32(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmSxtah:
__ sxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
i.InputInt32(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmUxtb:
__ uxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmUxth:
__ uxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmUxtab:
__ uxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
i.InputInt32(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmUxtah:
__ uxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
i.InputInt32(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmRbit: {
CpuFeatureScope scope(tasm(), ARMv7);
__ rbit(i.OutputRegister(), i.InputRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmRev:
__ rev(i.OutputRegister(), i.InputRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmClz:
__ clz(i.OutputRegister(), i.InputRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmCmp:
__ cmp(i.InputRegister(0), i.InputOperand2(1));
DCHECK_EQ(SetCC, i.OutputSBit());
break;
case kArmCmn:
__ cmn(i.InputRegister(0), i.InputOperand2(1));
DCHECK_EQ(SetCC, i.OutputSBit());
break;
case kArmTst:
__ tst(i.InputRegister(0), i.InputOperand2(1));
DCHECK_EQ(SetCC, i.OutputSBit());
break;
case kArmTeq:
__ teq(i.InputRegister(0), i.InputOperand2(1));
DCHECK_EQ(SetCC, i.OutputSBit());
break;
case kArmAddPair:
// i.InputRegister(0) ... left low word.
// i.InputRegister(1) ... left high word.
// i.InputRegister(2) ... right low word.
// i.InputRegister(3) ... right high word.
__ add(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
SBit::SetCC);
__ adc(i.OutputRegister(1), i.InputRegister(1),
Operand(i.InputRegister(3)));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmSubPair:
// i.InputRegister(0) ... left low word.
// i.InputRegister(1) ... left high word.
// i.InputRegister(2) ... right low word.
// i.InputRegister(3) ... right high word.
__ sub(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
SBit::SetCC);
__ sbc(i.OutputRegister(1), i.InputRegister(1),
Operand(i.InputRegister(3)));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmMulPair:
// i.InputRegister(0) ... left low word.
// i.InputRegister(1) ... left high word.
// i.InputRegister(2) ... right low word.
// i.InputRegister(3) ... right high word.
__ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
i.InputRegister(2));
__ mla(i.OutputRegister(1), i.InputRegister(0), i.InputRegister(3),
i.OutputRegister(1));
__ mla(i.OutputRegister(1), i.InputRegister(2), i.InputRegister(1),
i.OutputRegister(1));
break;
case kArmLslPair: {
Register second_output =
instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
if (instr->InputAt(2)->IsImmediate()) {
__ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
i.InputRegister(1), i.InputInt32(2));
} else {
__ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
i.InputRegister(1), i.InputRegister(2));
}
break;
}
case kArmLsrPair: {
Register second_output =
instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
if (instr->InputAt(2)->IsImmediate()) {
__ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
i.InputRegister(1), i.InputInt32(2));
} else {
__ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
i.InputRegister(1), i.InputRegister(2));
}
break;
}
case kArmAsrPair: {
Register second_output =
instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
if (instr->InputAt(2)->IsImmediate()) {
__ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
i.InputRegister(1), i.InputInt32(2));
} else {
__ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
i.InputRegister(1), i.InputRegister(2));
}
break;
}
case kArmVcmpF32:
if (instr->InputAt(1)->IsFPRegister()) {
__ VFPCompareAndSetFlags(i.InputFloatRegister(0),
i.InputFloatRegister(1));
} else {
DCHECK(instr->InputAt(1)->IsImmediate());
// 0.0 is the only immediate supported by vcmp instructions.
DCHECK_EQ(0.0f, i.InputFloat32(1));
__ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1));
}
DCHECK_EQ(SetCC, i.OutputSBit());
break;
case kArmVaddF32:
__ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0),
i.InputFloatRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVsubF32:
__ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0),
i.InputFloatRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmulF32:
__ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0),
i.InputFloatRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmlaF32:
__ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1),
i.InputFloatRegister(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmlsF32:
__ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1),
i.InputFloatRegister(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVdivF32:
__ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0),
i.InputFloatRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVsqrtF32:
__ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0));
break;
case kArmVabsF32:
__ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0));
break;
case kArmVnegF32:
__ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0));
break;
case kArmVcmpF64:
if (instr->InputAt(1)->IsFPRegister()) {
__ VFPCompareAndSetFlags(i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
} else {
DCHECK(instr->InputAt(1)->IsImmediate());
// 0.0 is the only immediate supported by vcmp instructions.
DCHECK_EQ(0.0, i.InputDouble(1));
__ VFPCompareAndSetFlags(i.InputDoubleRegister(0), i.InputDouble(1));
}
DCHECK_EQ(SetCC, i.OutputSBit());
break;
case kArmVaddF64:
__ vadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVsubF64:
__ vsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmulF64:
__ vmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmlaF64:
__ vmla(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
i.InputDoubleRegister(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmlsF64:
__ vmls(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
i.InputDoubleRegister(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVdivF64:
__ vdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmodF64: {
// TODO(bmeurer): We should really get rid of this special instruction,
// and generate a CallAddress instruction instead.
FrameScope scope(tasm(), StackFrame::MANUAL);
__ PrepareCallCFunction(0, 2);
__ MovToFloatParameters(i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
__ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
// Move the result in the double result register.
__ MovFromFloatResult(i.OutputDoubleRegister());
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVsqrtF64:
__ vsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
case kArmVabsF64:
__ vabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
case kArmVnegF64:
__ vneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
case kArmVrintmF32: {
CpuFeatureScope scope(tasm(), ARMv8);
if (instr->InputAt(0)->IsSimd128Register()) {
__ vrintm(NeonS32, i.OutputSimd128Register(),
i.InputSimd128Register(0));
} else {
__ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
}
break;
}
case kArmVrintmF64: {
CpuFeatureScope scope(tasm(), ARMv8);
__ vrintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
}
case kArmVrintpF32: {
CpuFeatureScope scope(tasm(), ARMv8);
if (instr->InputAt(0)->IsSimd128Register()) {
__ vrintp(NeonS32, i.OutputSimd128Register(),
i.InputSimd128Register(0));
} else {
__ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
}
break;
}
case kArmVrintpF64: {
CpuFeatureScope scope(tasm(), ARMv8);
__ vrintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
}
case kArmVrintzF32: {
CpuFeatureScope scope(tasm(), ARMv8);
if (instr->InputAt(0)->IsSimd128Register()) {
__ vrintz(NeonS32, i.OutputSimd128Register(),
i.InputSimd128Register(0));
} else {
__ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
}
break;
}
case kArmVrintzF64: {
CpuFeatureScope scope(tasm(), ARMv8);
__ vrintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
}
case kArmVrintaF64: {
CpuFeatureScope scope(tasm(), ARMv8);
__ vrinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
}
case kArmVrintnF32: {
CpuFeatureScope scope(tasm(), ARMv8);
if (instr->InputAt(0)->IsSimd128Register()) {
__ vrintn(NeonS32, i.OutputSimd128Register(),
i.InputSimd128Register(0));
} else {
__ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
}
break;
}
case kArmVrintnF64: {
CpuFeatureScope scope(tasm(), ARMv8);
__ vrintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
}
case kArmVcvtF32F64: {
__ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtF64F32: {
__ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtF32S32: {
UseScratchRegisterScope temps(tasm());
SwVfpRegister scratch = temps.AcquireS();
__ vmov(scratch, i.InputRegister(0));
__ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtF32U32: {
UseScratchRegisterScope temps(tasm());
SwVfpRegister scratch = temps.AcquireS();
__ vmov(scratch, i.InputRegister(0));
__ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtF64S32: {
UseScratchRegisterScope temps(tasm());
SwVfpRegister scratch = temps.AcquireS();
__ vmov(scratch, i.InputRegister(0));
__ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtF64U32: {
UseScratchRegisterScope temps(tasm());
SwVfpRegister scratch = temps.AcquireS();
__ vmov(scratch, i.InputRegister(0));
__ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtS32F32: {
UseScratchRegisterScope temps(tasm());
SwVfpRegister scratch = temps.AcquireS();
__ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
__ vmov(i.OutputRegister(), scratch);
bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
if (set_overflow_to_min_i32) {
// Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
// because INT32_MIN allows easier out-of-bounds detection.
__ cmn(i.OutputRegister(), Operand(1));
__ mov(i.OutputRegister(), Operand(INT32_MIN), SBit::LeaveCC, vs);
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtU32F32: {
UseScratchRegisterScope temps(tasm());
SwVfpRegister scratch = temps.AcquireS();
__ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
__ vmov(i.OutputRegister(), scratch);
bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
if (set_overflow_to_min_u32) {
// Avoid UINT32_MAX as an overflow indicator and use 0 instead,
// because 0 allows easier out-of-bounds detection.
__ cmn(i.OutputRegister(), Operand(1));
__ adc(i.OutputRegister(), i.OutputRegister(), Operand::Zero());
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtS32F64: {
UseScratchRegisterScope temps(tasm());
SwVfpRegister scratch = temps.AcquireS();
__ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
__ vmov(i.OutputRegister(), scratch);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtU32F64: {
UseScratchRegisterScope temps(tasm());
SwVfpRegister scratch = temps.AcquireS();
__ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
__ vmov(i.OutputRegister(), scratch);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVmovU32F32:
__ vmov(i.OutputRegister(), i.InputFloatRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmovF32U32:
__ vmov(i.OutputFloatRegister(), i.InputRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmovLowU32F64:
__ VmovLow(i.OutputRegister(), i.InputDoubleRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmovLowF64U32:
__ VmovLow(i.OutputDoubleRegister(), i.InputRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmovHighU32F64:
__ VmovHigh(i.OutputRegister(), i.InputDoubleRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmovHighF64U32:
__ VmovHigh(i.OutputDoubleRegister(), i.InputRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmovF64U32U32:
__ vmov(i.OutputDoubleRegister(), i.InputRegister(0), i.InputRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmovU32U32F64:
__ vmov(i.OutputRegister(0), i.OutputRegister(1),
i.InputDoubleRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmLdrb:
__ ldrb(i.OutputRegister(), i.InputOffset());
DCHECK_EQ(LeaveCC, i.OutputSBit());
EmitWordLoadPoisoningIfNeeded(this, opcode, i);
break;
case kArmLdrsb:
__ ldrsb(i.OutputRegister(), i.InputOffset());
DCHECK_EQ(LeaveCC, i.OutputSBit());
EmitWordLoadPoisoningIfNeeded(this, opcode, i);
break;
case kArmStrb:
__ strb(i.InputRegister(0), i.InputOffset(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmLdrh:
__ ldrh(i.OutputRegister(), i.InputOffset());
EmitWordLoadPoisoningIfNeeded(this, opcode, i);
break;
case kArmLdrsh:
__ ldrsh(i.OutputRegister(), i.InputOffset());
EmitWordLoadPoisoningIfNeeded(this, opcode, i);
break;
case kArmStrh:
__ strh(i.InputRegister(0), i.InputOffset(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmLdr:
__ ldr(i.OutputRegister(), i.InputOffset());
EmitWordLoadPoisoningIfNeeded(this, opcode, i);
break;
case kArmStr:
__ str(i.InputRegister(0), i.InputOffset(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVldrF32: {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
UseScratchRegisterScope temps(tasm());
Register address = temps.Acquire();
ComputePoisonedAddressForLoad(this, opcode, i, address);
__ vldr(i.OutputFloatRegister(), address, 0);
} else {
__ vldr(i.OutputFloatRegister(), i.InputOffset());
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVstrF32:
__ vstr(i.InputFloatRegister(0), i.InputOffset(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVld1F64: {
__ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()),
i.NeonInputOperand(0));
break;
}
case kArmVst1F64: {
__ vst1(Neon8, NeonListOperand(i.InputDoubleRegister(0)),
i.NeonInputOperand(1));
break;
}
case kArmVld1S128: {
__ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
i.NeonInputOperand(0));
break;
}
case kArmVst1S128: {
__ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
i.NeonInputOperand(1));
break;
}
case kArmVldrF64: {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
UseScratchRegisterScope temps(tasm());
Register address = temps.Acquire();
ComputePoisonedAddressForLoad(this, opcode, i, address);
__ vldr(i.OutputDoubleRegister(), address, 0);
} else {
__ vldr(i.OutputDoubleRegister(), i.InputOffset());
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVstrF64:
__ vstr(i.InputDoubleRegister(0), i.InputOffset(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmFloat32Max: {
SwVfpRegister result = i.OutputFloatRegister();
SwVfpRegister left = i.InputFloatRegister(0);
SwVfpRegister right = i.InputFloatRegister(1);
if (left == right) {
__ Move(result, left);
} else {
auto ool = zone()->New<OutOfLineFloat32Max>(this, result, left, right);
__ FloatMax(result, left, right, ool->entry());
__ bind(ool->exit());
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmFloat64Max: {
DwVfpRegister result = i.OutputDoubleRegister();
DwVfpRegister left = i.InputDoubleRegister(0);
DwVfpRegister right = i.InputDoubleRegister(1);
if (left == right) {
__ Move(result, left);
} else {
auto ool = zone()->New<OutOfLineFloat64Max>(this, result, left, right);
__ FloatMax(result, left, right, ool->entry());
__ bind(ool->exit());
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmFloat32Min: {
SwVfpRegister result = i.OutputFloatRegister();
SwVfpRegister left = i.InputFloatRegister(0);
SwVfpRegister right = i.InputFloatRegister(1);
if (left == right) {
__ Move(result, left);
} else {
auto ool = zone()->New<OutOfLineFloat32Min>(this, result, left, right);
__ FloatMin(result, left, right, ool->entry());
__ bind(ool->exit());
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmFloat64Min: {
DwVfpRegister result = i.OutputDoubleRegister();
DwVfpRegister left = i.InputDoubleRegister(0);
DwVfpRegister right = i.InputDoubleRegister(1);
if (left == right) {
__ Move(result, left);
} else {
auto ool = zone()->New<OutOfLineFloat64Min>(this, result, left, right);
__ FloatMin(result, left, right, ool->entry());
__ bind(ool->exit());
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmFloat64SilenceNaN: {
DwVfpRegister value = i.InputDoubleRegister(0);
DwVfpRegister result = i.OutputDoubleRegister();
__ VFPCanonicalizeNaN(result, value);
break;
}
case kArmPush:
if (instr->InputAt(0)->IsFPRegister()) {
LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
switch (op->representation()) {
case MachineRepresentation::kFloat32:
__ vpush(i.InputFloatRegister(0));
frame_access_state()->IncreaseSPDelta(1);
break;
case MachineRepresentation::kFloat64:
__ vpush(i.InputDoubleRegister(0));
frame_access_state()->IncreaseSPDelta(kDoubleSize /
kSystemPointerSize);
break;
case MachineRepresentation::kSimd128: {
__ vpush(i.InputSimd128Register(0));
frame_access_state()->IncreaseSPDelta(kSimd128Size /
kSystemPointerSize);
break;
}
default:
UNREACHABLE();
break;
}
} else {
__ push(i.InputRegister(0));
frame_access_state()->IncreaseSPDelta(1);
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmPoke: {
int const slot = MiscField::decode(instr->opcode());
__ str(i.InputRegister(0), MemOperand(sp, slot * kSystemPointerSize));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmPeek: {
int reverse_slot = i.InputInt32(0);
int offset =
FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
if (instr->OutputAt(0)->IsFPRegister()) {
LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
if (op->representation() == MachineRepresentation::kFloat64) {
__ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
} else if (op->representation() == MachineRepresentation::kFloat32) {
__ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
__ add(scratch, fp, Operand(offset));
__ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
NeonMemOperand(scratch));
}
} else {
__ ldr(i.OutputRegister(), MemOperand(fp, offset));
}
break;
}
case kArmDmbIsh: {
__ dmb(ISH);
break;
}
case kArmDsbIsb: {
__ dsb(SY);
__ isb(SY);
break;
}
case kArchWordPoisonOnSpeculation:
__ and_(i.OutputRegister(0), i.InputRegister(0),
Operand(kSpeculationPoisonRegister));
break;
case kArmF64x2Splat: {
Simd128Register dst = i.OutputSimd128Register();
DoubleRegister src = i.InputDoubleRegister(0);
__ Move(dst.low(), src);
__ Move(dst.high(), src);
break;
}
case kArmF64x2ExtractLane: {
__ ExtractLane(i.OutputDoubleRegister(), i.InputSimd128Register(0),
i.InputInt8(1));
break;
}
case kArmF64x2ReplaceLane: {
__ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputDoubleRegister(2), i.InputInt8(1));
break;
}
case kArmF64x2Abs: {
__ vabs(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low());
__ vabs(i.OutputSimd128Register().high(),
i.InputSimd128Register(0).high());
break;
}
case kArmF64x2Neg: {
__ vneg(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low());
__ vneg(i.OutputSimd128Register().high(),
i.InputSimd128Register(0).high());
break;
}
case kArmF64x2Sqrt: {
__ vsqrt(i.OutputSimd128Register().low(),
i.InputSimd128Register(0).low());
__ vsqrt(i.OutputSimd128Register().high(),
i.InputSimd128Register(0).high());
break;
}
case kArmF64x2Add: {
ASSEMBLE_F64X2_ARITHMETIC_BINOP(vadd);
break;
}
case kArmF64x2Sub: {
ASSEMBLE_F64X2_ARITHMETIC_BINOP(vsub);
break;
}
case kArmF64x2Mul: {
ASSEMBLE_F64X2_ARITHMETIC_BINOP(vmul);
break;
}
case kArmF64x2Div: {
ASSEMBLE_F64X2_ARITHMETIC_BINOP(vdiv);
break;
}
case kArmF64x2Min: {
Simd128Register result = i.OutputSimd128Register();
Simd128Register left = i.InputSimd128Register(0);
Simd128Register right = i.InputSimd128Register(1);
if (left == right) {
__ Move(result, left);
} else {
auto ool_low = zone()->New<OutOfLineFloat64Min>(
this, result.low(), left.low(), right.low());
auto ool_high = zone()->New<OutOfLineFloat64Min>(
this, result.high(), left.high(), right.high());
__ FloatMin(result.low(), left.low(), right.low(), ool_low->entry());
__ bind(ool_low->exit());
__ FloatMin(result.high(), left.high(), right.high(),
ool_high->entry());
__ bind(ool_high->exit());
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmF64x2Max: {
Simd128Register result = i.OutputSimd128Register();
Simd128Register left = i.InputSimd128Register(0);
Simd128Register right = i.InputSimd128Register(1);
if (left == right) {
__ Move(result, left);
} else {
auto ool_low = zone()->New<OutOfLineFloat64Max>(
this, result.low(), left.low(), right.low());
auto ool_high = zone()->New<OutOfLineFloat64Max>(
this, result.high(), left.high(), right.high());
__ FloatMax(result.low(), left.low(), right.low(), ool_low->entry());
__ bind(ool_low->exit());
__ FloatMax(result.high(), left.high(), right.high(),
ool_high->entry());
__ bind(ool_high->exit());
}
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
#undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
case kArmF64x2Eq: {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
__ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
i.InputSimd128Register(1).low());
__ mov(scratch, Operand(-1), LeaveCC, eq);
__ vmov(i.OutputSimd128Register().low(), scratch, scratch);
__ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
i.InputSimd128Register(1).high());
__ mov(scratch, Operand(-1), LeaveCC, eq);
__ vmov(i.OutputSimd128Register().high(), scratch, scratch);
break;
}
case kArmF64x2Ne: {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
__ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
i.InputSimd128Register(1).low());
__ mov(scratch, Operand(-1), LeaveCC, ne);
__ vmov(i.OutputSimd128Register().low(), scratch, scratch);
__ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
i.InputSimd128Register(1).high());
__ mov(scratch, Operand(-1), LeaveCC, ne);
__ vmov(i.OutputSimd128Register().high(), scratch, scratch);
break;
}
case kArmF64x2Lt: {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
i.InputSimd128Register(1).low());
__ mov(scratch, Operand(0), LeaveCC, cs);
__ mov(scratch, Operand(-1), LeaveCC, mi);
__ vmov(i.OutputSimd128Register().low(), scratch, scratch);
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
i.InputSimd128Register(1).high());
__ mov(scratch, Operand(0), LeaveCC, cs);
__ mov(scratch, Operand(-1), LeaveCC, mi);
__ vmov(i.OutputSimd128Register().high(), scratch, scratch);
break;
}
case kArmF64x2Le: {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
i.InputSimd128Register(1).low());
__ mov(scratch, Operand(0), LeaveCC, hi);
__ mov(scratch, Operand(-1), LeaveCC, ls);
__ vmov(i.OutputSimd128Register().low(), scratch, scratch);
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
i.InputSimd128Register(1).high());
__ mov(scratch, Operand(0), LeaveCC, hi);
__ mov(scratch, Operand(-1), LeaveCC, ls);
__ vmov(i.OutputSimd128Register().high(), scratch, scratch);
break;
}
case kArmF64x2Pmin: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
DCHECK_EQ(dst, lhs);
// Move rhs only when rhs is strictly lesser (mi).
__ VFPCompareAndSetFlags(rhs.low(), lhs.low());
__ vmov(dst.low(), rhs.low(), mi);
__ VFPCompareAndSetFlags(rhs.high(), lhs.high());
__ vmov(dst.high(), rhs.high(), mi);
break;
}
case kArmF64x2Pmax: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
DCHECK_EQ(dst, lhs);
// Move rhs only when rhs is strictly greater (gt).
__ VFPCompareAndSetFlags(rhs.low(), lhs.low());
__ vmov(dst.low(), rhs.low(), gt);
__ VFPCompareAndSetFlags(rhs.high(), lhs.high());
__ vmov(dst.high(), rhs.high(), gt);
break;
}
case kArmF64x2Ceil: {
CpuFeatureScope scope(tasm(), ARMv8);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vrintp(dst.low(), src.low());
__ vrintp(dst.high(), src.high());
break;
}
case kArmF64x2Floor: {
CpuFeatureScope scope(tasm(), ARMv8);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vrintm(dst.low(), src.low());
__ vrintm(dst.high(), src.high());
break;
}
case kArmF64x2Trunc: {
CpuFeatureScope scope(tasm(), ARMv8);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vrintz(dst.low(), src.low());
__ vrintz(dst.high(), src.high());
break;
}
case kArmF64x2NearestInt: {
CpuFeatureScope scope(tasm(), ARMv8);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vrintn(dst.low(), src.low());
__ vrintn(dst.high(), src.high());
break;
}
case kArmI64x2SplatI32Pair: {
Simd128Register dst = i.OutputSimd128Register();
__ vdup(Neon32, dst, i.InputRegister(0));
__ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 1);
__ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 3);
break;
}
case kArmI64x2ReplaceLaneI32Pair: {
Simd128Register dst = i.OutputSimd128Register();
int8_t lane = i.InputInt8(1);
__ ReplaceLane(dst, dst, i.InputRegister(2), NeonS32, lane * 2);
__ ReplaceLane(dst, dst, i.InputRegister(3), NeonS32, lane * 2 + 1);
break;
}
case kArmI64x2Add: {
__ vadd(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI64x2Sub: {
__ vsub(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI64x2Mul: {
QwNeonRegister dst = i.OutputSimd128Register();
QwNeonRegister left = i.InputSimd128Register(0);
QwNeonRegister right = i.InputSimd128Register(1);
QwNeonRegister tmp1 = i.TempSimd128Register(0);
QwNeonRegister tmp2 = i.TempSimd128Register(1);
// This algorithm uses vector operations to perform 64-bit integer
// multiplication by splitting it into a high and low 32-bit integers.
// The tricky part is getting the low and high integers in the correct
// place inside a NEON register, so that we can use as little vmull and
// vmlal as possible.
// Move left and right into temporaries, they will be modified by vtrn.
__ vmov(tmp1, left);
__ vmov(tmp2, right);
// This diagram shows how the 64-bit integers fit into NEON registers.
//
// [q.high()| q.low()]
// left/tmp1: [ a3, a2 | a1, a0 ]
// right/tmp2: [ b3, b2 | b1, b0 ]
//
// We want to multiply the low 32 bits of left with high 32 bits of right,
// for each lane, i.e. a2 * b3, a0 * b1. However, vmull takes two input d
// registers, and multiply the corresponding low/high 32 bits, to get a
// 64-bit integer: a1 * b1, a0 * b0. In order to make it work we transpose
// the vectors, so that we get the low 32 bits of each 64-bit integer into
// the same lane, similarly for high 32 bits.
__ vtrn(Neon32, tmp1.low(), tmp1.high());
// tmp1: [ a3, a1 | a2, a0 ]
__ vtrn(Neon32, tmp2.low(), tmp2.high());
// tmp2: [ b3, b1 | b2, b0 ]
__ vmull(NeonU32, dst, tmp1.low(), tmp2.high());
// dst: [ a2*b3 | a0*b1 ]
__ vmlal(NeonU32, dst, tmp1.high(), tmp2.low());
// dst: [ a2*b3 + a3*b2 | a0*b1 + a1*b0 ]
__ vshl(NeonU64, dst, dst, 32);
// dst: [ (a2*b3 + a3*b2) << 32 | (a0*b1 + a1*b0) << 32 ]
__ vmlal(NeonU32, dst, tmp1.low(), tmp2.low());
// dst: [ (a2*b3 + a3*b2)<<32 + (a2*b2) | (a0*b1 + a1*b0)<<32 + (a0*b0) ]
break;
}
case kArmI64x2Neg: {
Simd128Register dst = i.OutputSimd128Register();
__ vmov(dst, uint64_t{0});
__ vsub(Neon64, dst, dst, i.InputSimd128Register(0));
break;
}
case kArmI64x2Shl: {
ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 6, Neon32, NeonS64);
break;
}
case kArmI64x2ShrS: {
// Only the least significant byte of each lane is used, so we can use
// Neon32 as the size.
ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonS64);
break;
}
case kArmI64x2ShrU: {
// Only the least significant byte of each lane is used, so we can use
// Neon32 as the size.
ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonU64);
break;
}
case kArmF32x4Splat: {
int src_code = i.InputFloatRegister(0).code();
__ vdup(Neon32, i.OutputSimd128Register(),
DwVfpRegister::from_code(src_code / 2), src_code % 2);
break;
}
case kArmF32x4ExtractLane: {
__ ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
i.InputInt8(1));
break;
}
case kArmF32x4ReplaceLane: {
__ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputFloatRegister(2), i.InputInt8(1));
break;
}
case kArmF32x4SConvertI32x4: {
__ vcvt_f32_s32(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmF32x4UConvertI32x4: {
__ vcvt_f32_u32(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmF32x4Abs: {
__ vabs(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmF32x4Neg: {
__ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmF32x4Sqrt: {
QwNeonRegister dst = i.OutputSimd128Register();
QwNeonRegister src1 = i.InputSimd128Register(0);
DCHECK_EQ(dst, q0);
DCHECK_EQ(src1, q0);
#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
__ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0));
__ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1));
__ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2));
__ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3));
#undef S_FROM_Q
break;
}
case kArmF32x4RecipApprox: {
__ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmF32x4RecipSqrtApprox: {
__ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmF32x4Add: {
__ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmF32x4AddHoriz: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
// Make sure we don't overwrite source data before it's used.
if (dst == src0) {
__ vpadd(dst.low(), src0.low(), src0.high());
if (dst == src1) {
__ vmov(dst.high(), dst.low());
} else {
__ vpadd(dst.high(), src1.low(), src1.high());
}
} else {
__ vpadd(dst.high(), src1.low(), src1.high());
__ vpadd(dst.low(), src0.low(), src0.high());
}
break;
}
case kArmF32x4Sub: {
__ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmF32x4Mul: {
__ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmF32x4Div: {
QwNeonRegister dst = i.OutputSimd128Register();
QwNeonRegister src1 = i.InputSimd128Register(0);
QwNeonRegister src2 = i.InputSimd128Register(1);
DCHECK_EQ(dst, q0);
DCHECK_EQ(src1, q0);
DCHECK_EQ(src2, q1);
#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
__ vdiv(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0), S_FROM_Q(src2, 0));
__ vdiv(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1), S_FROM_Q(src2, 1));
__ vdiv(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2), S_FROM_Q(src2, 2));
__ vdiv(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3), S_FROM_Q(src2, 3));
#undef S_FROM_Q
break;
}
case kArmF32x4Min: {
__ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmF32x4Max: {
__ vmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmF32x4Eq: {
__ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmF32x4Ne: {
Simd128Register dst = i.OutputSimd128Register();
__ vceq(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
__ vmvn(dst, dst);
break;
}
case kArmF32x4Lt: {
__ vcgt(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0));
break;
}
case kArmF32x4Le: {
__ vcge(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0));
break;
}
case kArmF32x4Pmin: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
DCHECK_NE(dst, lhs);
DCHECK_NE(dst, rhs);
// f32x4.pmin(lhs, rhs)
// = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
// = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
__ vcgt(dst, lhs, rhs);
__ vbsl(dst, rhs, lhs);
break;
}
case kArmF32x4Pmax: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
DCHECK_NE(dst, lhs);
DCHECK_NE(dst, rhs);
// f32x4.pmax(lhs, rhs)
// = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
__ vcgt(dst, rhs, lhs);
__ vbsl(dst, rhs, lhs);
break;
}
case kArmI32x4Splat: {
__ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
break;
}
case kArmI32x4ExtractLane: {
__ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS32,
i.InputInt8(1));
break;
}
case kArmI32x4ReplaceLane: {
__ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(2), NeonS32, i.InputInt8(1));
break;
}
case kArmI32x4SConvertF32x4: {
__ vcvt_s32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmI32x4SConvertI16x8Low: {
__ vmovl(NeonS16, i.OutputSimd128Register(),
i.InputSimd128Register(0).low());
break;
}
case kArmI32x4SConvertI16x8High: {
__ vmovl(NeonS16, i.OutputSimd128Register(),
i.InputSimd128Register(0).high());
break;
}
case kArmI32x4Neg: {
__ vneg(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmI32x4Shl: {
ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 5, Neon32, NeonS32);
break;
}
case kArmI32x4ShrS: {
ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 5, Neon32, NeonS32);
break;
}
case kArmI32x4Add: {
__ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4AddHoriz:
ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32);
break;
case kArmI32x4Sub: {
__ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4Mul: {
__ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4MinS: {
__ vmin(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4MaxS: {
__ vmax(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4Eq: {
__ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4Ne: {
Simd128Register dst = i.OutputSimd128Register();
__ vceq(Neon32, dst, i.InputSimd128Register(0),
i.InputSimd128Register(1));
__ vmvn(dst, dst);
break;
}
case kArmI32x4GtS: {
__ vcgt(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4GeS: {
__ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4UConvertF32x4: {
__ vcvt_u32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmI32x4UConvertI16x8Low: {
__ vmovl(NeonU16, i.OutputSimd128Register(),
i.InputSimd128Register(0).low());
break;
}
case kArmI32x4UConvertI16x8High: {
__ vmovl(NeonU16, i.OutputSimd128Register(),
i.InputSimd128Register(0).high());
break;
}
case kArmI32x4ShrU: {
ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 5, Neon32, NeonU32);
break;
}
case kArmI32x4MinU: {
__ vmin(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4MaxU: {
__ vmax(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4GtU: {
__ vcgt(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4GeU: {
__ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI32x4Abs: {
__ vabs(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmI32x4BitMask: {
Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0);
Simd128Register tmp2 = i.TempSimd128Register(0);
Simd128Register mask = i.TempSimd128Register(1);
__ vshr(NeonS32, tmp2, src, 31);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
__ vmov(mask.low(), Double(uint64_t{0x0000'0002'0000'0001}));
__ vmov(mask.high(), Double(uint64_t{0x0000'0008'0000'0004}));
__ vand(tmp2, mask, tmp2);
__ vpadd(Neon32, tmp2.low(), tmp2.low(), tmp2.high());
__ vpadd(Neon32, tmp2.low(), tmp2.low(), kDoubleRegZero);
__ VmovLow(dst, tmp2.low());
break;
}
case kArmI32x4DotI16x8S: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
Simd128Register tmp1 = i.TempSimd128Register(0);
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
__ vmull(NeonS16, tmp1, lhs.low(), rhs.low());
__ vmull(NeonS16, scratch, lhs.high(), rhs.high());
__ vpadd(Neon32, dst.low(), tmp1.low(), tmp1.high());
__ vpadd(Neon32, dst.high(), scratch.low(), scratch.high());
break;
}
case kArmI16x8Splat: {
__ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
break;
}
case kArmI16x8ExtractLaneU: {
__ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU16,
i.InputInt8(1));
break;
}
case kArmI16x8ExtractLaneS: {
__ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
i.InputInt8(1));
break;
}
case kArmI16x8ReplaceLane: {
__ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(2), NeonS16, i.InputInt8(1));
break;
}
case kArmI16x8SConvertI8x16Low: {
__ vmovl(NeonS8, i.OutputSimd128Register(),
i.InputSimd128Register(0).low());
break;
}
case kArmI16x8SConvertI8x16High: {
__ vmovl(NeonS8, i.OutputSimd128Register(),
i.InputSimd128Register(0).high());
break;
}
case kArmI16x8Neg: {
__ vneg(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmI16x8Shl: {
ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 4, Neon16, NeonS16);
break;
}
case kArmI16x8ShrS: {
ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 4, Neon16, NeonS16);
break;
}
case kArmI16x8SConvertI32x4:
ASSEMBLE_NEON_NARROWING_OP(NeonS16, NeonS16);
break;
case kArmI16x8Add: {
__ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8AddSatS: {
__ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8AddHoriz:
ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon16);
break;
case kArmI16x8Sub: {
__ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8SubSatS: {
__ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8Mul: {
__ vmul(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8MinS: {
__ vmin(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8MaxS: {
__ vmax(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8Eq: {
__ vceq(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8Ne: {
Simd128Register dst = i.OutputSimd128Register();
__ vceq(Neon16, dst, i.InputSimd128Register(0),
i.InputSimd128Register(1));
__ vmvn(dst, dst);
break;
}
case kArmI16x8GtS: {
__ vcgt(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8GeS: {
__ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8UConvertI8x16Low: {
__ vmovl(NeonU8, i.OutputSimd128Register(),
i.InputSimd128Register(0).low());
break;
}
case kArmI16x8UConvertI8x16High: {
__ vmovl(NeonU8, i.OutputSimd128Register(),
i.InputSimd128Register(0).high());
break;
}
case kArmI16x8ShrU: {
ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 4, Neon16, NeonU16);
break;
}
case kArmI16x8UConvertI32x4:
ASSEMBLE_NEON_NARROWING_OP(NeonU16, NeonS16);
break;
case kArmI16x8AddSatU: {
__ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8SubSatU: {
__ vqsub(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8MinU: {
__ vmin(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8MaxU: {
__ vmax(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8GtU: {
__ vcgt(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8GeU: {
__ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8RoundingAverageU: {
__ vrhadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI16x8Abs: {
__ vabs(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmI16x8BitMask: {
Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0);
Simd128Register tmp2 = i.TempSimd128Register(0);
Simd128Register mask = i.TempSimd128Register(1);
__ vshr(NeonS16, tmp2, src, 15);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
__ vmov(mask.low(), Double(uint64_t{0x0008'0004'0002'0001}));
__ vmov(mask.high(), Double(uint64_t{0x0080'0040'0020'0010}));
__ vand(tmp2, mask, tmp2);
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high());
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low());
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low());
__ vmov(NeonU16, dst, tmp2.low(), 0);
break;
}
case kArmI8x16Splat: {
__ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0));
break;
}
case kArmI8x16ExtractLaneU: {
__ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU8,
i.InputInt8(1));
break;
}
case kArmI8x16ExtractLaneS: {
__ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
i.InputInt8(1));
break;
}
case kArmI8x16ReplaceLane: {
__ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(2), NeonS8, i.InputInt8(1));
break;
}
case kArmI8x16Neg: {
__ vneg(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmI8x16Shl: {
ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 3, Neon8, NeonS8);
break;
}
case kArmI8x16ShrS: {
ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 3, Neon8, NeonS8);
break;
}
case kArmI8x16SConvertI16x8:
ASSEMBLE_NEON_NARROWING_OP(NeonS8, NeonS8);
break;
case kArmI8x16Add: {
__ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16AddSatS: {
__ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16Sub: {
__ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16SubSatS: {
__ vqsub(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16Mul: {
__ vmul(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16MinS: {
__ vmin(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16MaxS: {
__ vmax(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16Eq: {
__ vceq(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16Ne: {
Simd128Register dst = i.OutputSimd128Register();
__ vceq(Neon8, dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
__ vmvn(dst, dst);
break;
}
case kArmI8x16GtS: {
__ vcgt(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16GeS: {
__ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16ShrU: {
ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 3, Neon8, NeonU8);
break;
}
case kArmI8x16UConvertI16x8:
ASSEMBLE_NEON_NARROWING_OP(NeonU8, NeonS8);
break;
case kArmI8x16AddSatU: {
__ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16SubSatU: {
__ vqsub(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16MinU: {
__ vmin(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16MaxU: {
__ vmax(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16GtU: {
__ vcgt(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16GeU: {
__ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16RoundingAverageU: {
__ vrhadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16Abs: {
__ vabs(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmI8x16BitMask: {
Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0);
Simd128Register tmp2 = i.TempSimd128Register(0);
Simd128Register mask = i.TempSimd128Register(1);
__ vshr(NeonS8, tmp2, src, 7);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
__ vmov(mask.low(), Double(uint64_t{0x8040'2010'0804'0201}));
__ vmov(mask.high(), Double(uint64_t{0x8040'2010'0804'0201}));
__ vand(tmp2, mask, tmp2);
__ vext(mask, tmp2, tmp2, 8);
__ vzip(Neon8, mask, tmp2);
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high());
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low());
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low());
__ vmov(NeonU16, dst, tmp2.low(), 0);
break;
}
case kArmS128Const: {
QwNeonRegister dst = i.OutputSimd128Register();
uint64_t imm1 = make_uint64(i.InputUint32(1), i.InputUint32(0));
uint64_t imm2 = make_uint64(i.InputUint32(3), i.InputUint32(2));
__ vmov(dst.low(), Double(imm1));
__ vmov(dst.high(), Double(imm2));
break;
}
case kArmS128Zero: {
__ veor(i.OutputSimd128Register(), i.OutputSimd128Register(),
i.OutputSimd128Register());
break;
}
case kArmS128AllOnes: {
__ vmov(i.OutputSimd128Register(), uint64_t{0xffff'ffff'ffff'ffff});
break;
}
case kArmS128Dup: {
NeonSize size = static_cast<NeonSize>(i.InputInt32(1));
int lanes = kSimd128Size >> size;
int index = i.InputInt32(2);
DCHECK(index < lanes);
int d_lanes = lanes / 2;
int src_d_index = index & (d_lanes - 1);
int src_d_code = i.InputSimd128Register(0).low().code() + index / d_lanes;
__ vdup(size, i.OutputSimd128Register(),
DwVfpRegister::from_code(src_d_code), src_d_index);
break;
}
case kArmS128And: {
__ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmS128Or: {
__ vorr(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmS128Xor: {
__ veor(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmS128Not: {
__ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS128Select: {
Simd128Register dst = i.OutputSimd128Register();
DCHECK(dst == i.InputSimd128Register(0));
__ vbsl(dst, i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kArmS128AndNot: {
__ vbic(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmS32x4ZipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
__ vmov(dst.high(), src1.low()); // dst = [0, 1, 4, 5]
__ vtrn(Neon32, dst.low(), dst.high()); // dst = [0, 4, 1, 5]
break;
}
case kArmS32x4ZipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
__ vmov(dst.low(), src1.high()); // dst = [2, 3, 6, 7]
__ vtrn(Neon32, dst.low(), dst.high()); // dst = [2, 6, 3, 7]
break;
}
case kArmS32x4UnzipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst == i.InputSimd128Register(0));
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
// src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
__ vmov(scratch, src1);
__ vuzp(Neon32, dst, scratch); // dst = [0, 2, 4, 6]
break;
}
case kArmS32x4UnzipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst == i.InputSimd128Register(0));
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
// src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
__ vmov(scratch, src1);
__ vuzp(Neon32, scratch, dst); // dst = [1, 3, 5, 7]
break;
}
case kArmS32x4TransposeLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst == i.InputSimd128Register(0));
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
// src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
__ vmov(scratch, src1);
__ vtrn(Neon32, dst, scratch); // dst = [0, 4, 2, 6]
break;
}
case kArmS32x4Shuffle: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
// Perform shuffle as a vmov per lane.
int dst_code = dst.code() * 4;
int src0_code = src0.code() * 4;
int src1_code = src1.code() * 4;
int32_t shuffle = i.InputInt32(2);
for (int i = 0; i < 4; i++) {
int lane = shuffle & 0x7;
int src_code = src0_code;
if (lane >= 4) {
src_code = src1_code;
lane &= 0x3;
}
__ VmovExtended(dst_code + i, src_code + lane);
shuffle >>= 8;
}
break;
}
case kArmS32x4TransposeRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
__ vmov(scratch, src1);
__ vtrn(Neon32, scratch, dst); // dst = [1, 5, 3, 7]
break;
}
case kArmS16x8ZipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
// src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
DCHECK(dst == i.InputSimd128Register(0));
__ vmov(dst.high(), src1.low()); // dst = [0, 1, 2, 3, 8, ... 11]
__ vzip(Neon16, dst.low(), dst.high()); // dst = [0, 8, 1, 9, ... 11]
break;
}
case kArmS16x8ZipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
__ vmov(dst.low(), src1.high());
__ vzip(Neon16, dst.low(), dst.high()); // dst = [4, 12, 5, 13, ... 15]
break;
}
case kArmS16x8UnzipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
__ vmov(scratch, src1);
__ vuzp(Neon16, dst, scratch); // dst = [0, 2, 4, 6, ... 14]
break;
}
case kArmS16x8UnzipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
__ vmov(scratch, src1);
__ vuzp(Neon16, scratch, dst); // dst = [1, 3, 5, 7, ... 15]
break;
}
case kArmS16x8TransposeLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
__ vmov(scratch, src1);
__ vtrn(Neon16, dst, scratch); // dst = [0, 8, 2, 10, ... 14]
break;
}
case kArmS16x8TransposeRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
__ vmov(scratch, src1);
__ vtrn(Neon16, scratch, dst); // dst = [1, 9, 3, 11, ... 15]
break;
}
case kArmS8x16ZipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
__ vmov(dst.high(), src1.low());
__ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]
break;
}
case kArmS8x16ZipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
__ vmov(dst.low(), src1.high());
__ vzip(Neon8, dst.low(), dst.high()); // dst = [8, 24, 9, 25, ... 31]
break;
}
case kArmS8x16UnzipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
__ vmov(scratch, src1);
__ vuzp(Neon8, dst, scratch); // dst = [0, 2, 4, 6, ... 30]
break;
}
case kArmS8x16UnzipRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
__ vmov(scratch, src1);
__ vuzp(Neon8, scratch, dst); // dst = [1, 3, 5, 7, ... 31]
break;
}
case kArmS8x16TransposeLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
__ vmov(scratch, src1);
__ vtrn(Neon8, dst, scratch); // dst = [0, 16, 2, 18, ... 30]
break;
}
case kArmS8x16TransposeRight: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
DCHECK(dst == i.InputSimd128Register(0));
// src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
__ vmov(scratch, src1);
__ vtrn(Neon8, scratch, dst); // dst = [1, 17, 3, 19, ... 31]
break;
}
case kArmS8x16Concat: {
__ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputInt4(2));
break;
}
case kArmI8x16Swizzle: {
Simd128Register dst = i.OutputSimd128Register(),
tbl = i.InputSimd128Register(0),
src = i.InputSimd128Register(1);
NeonListOperand table(tbl);
__ vtbl(dst.low(), table, src.low());
__ vtbl(dst.high(), table, src.high());
break;
}
case kArmI8x16Shuffle: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
DwVfpRegister table_base = src0.low();
UseScratchRegisterScope temps(tasm());
Simd128Register scratch = temps.AcquireQ();
// If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
// src1. They must be consecutive.
int table_size = src0 == src1 ? 2 : 4;
DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code());
// The shuffle lane mask is a byte mask, materialize in scratch.
int scratch_s_base = scratch.code() * 4;
for (int j = 0; j < 4; j++) {
uint32_t four_lanes = i.InputUint32(2 + j);
DCHECK_EQ(0, four_lanes & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0));
__ vmov(SwVfpRegister::from_code(scratch_s_base + j),
Float32::FromBits(four_lanes));
}
NeonListOperand table(table_base, table_size);
if (dst != src0 && dst != src1) {
__ vtbl(dst.low(), table, scratch.low());
__ vtbl(dst.high(), table, scratch.high());
} else {
__ vtbl(scratch.low(), table, scratch.low());
__ vtbl(scratch.high(), table, scratch.high());
__ vmov(dst, scratch);
}
break;
}
case kArmS32x2Reverse: {
__ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS16x4Reverse: {
__ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS16x2Reverse: {
__ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS8x8Reverse: {
__ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS8x4Reverse: {
__ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmS8x2Reverse: {
__ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmV32x4AnyTrue:
case kArmV16x8AnyTrue:
case kArmV8x16AnyTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
__ vpmax(NeonU32, scratch, src.low(), src.high());
__ vpmax(NeonU32, scratch, scratch, scratch);
__ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
__ cmp(i.OutputRegister(), Operand(0));
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
case kArmV32x4AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
__ vpmin(NeonU32, scratch, src.low(), src.high());
__ vpmin(NeonU32, scratch, scratch, scratch);
__ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
__ cmp(i.OutputRegister(), Operand(0));
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
case kArmV16x8AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
__ vpmin(NeonU16, scratch, src.low(), src.high());
__ vpmin(NeonU16, scratch, scratch, scratch);
__ vpmin(NeonU16, scratch, scratch, scratch);
__ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
__ cmp(i.OutputRegister(), Operand(0));
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
case kArmV8x16AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
__ vpmin(NeonU8, scratch, src.low(), src.high());
__ vpmin(NeonU8, scratch, scratch, scratch);
__ vpmin(NeonU8, scratch, scratch, scratch);
__ vpmin(NeonU8, scratch, scratch, scratch);
__ ExtractLane(i.OutputRegister(), scratch, NeonS8, 0);
__ cmp(i.OutputRegister(), Operand(0));
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
case kArmS128Load8Splat: {
__ vld1r(Neon8, NeonListOperand(i.OutputSimd128Register()),
i.NeonInputOperand(0));
break;
}
case kArmS128Load16Splat: {
__ vld1r(Neon16, NeonListOperand(i.OutputSimd128Register()),
i.NeonInputOperand(0));
break;
}
case kArmS128Load32Splat: {
__ vld1r(Neon32, NeonListOperand(i.OutputSimd128Register()),
i.NeonInputOperand(0));
break;
}
case kArmS128Load64Splat: {
Simd128Register dst = i.OutputSimd128Register();
__ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
__ Move(dst.high(), dst.low());
break;
}
case kArmS128Load8x8S: {
Simd128Register dst = i.OutputSimd128Register();
__ vld1(Neon8, NeonListOperand(dst.low()), i.NeonInputOperand(0));
__ vmovl(NeonS8, dst, dst.low());
break;
}
case kArmS128Load8x8U: {
Simd128Register dst = i.OutputSimd128Register();
__ vld1(Neon8, NeonListOperand(dst.low()), i.NeonInputOperand(0));
__ vmovl(NeonU8, dst, dst.low());
break;
}
case kArmS128Load16x4S: {
Simd128Register dst = i.OutputSimd128Register();
__ vld1(Neon16, NeonListOperand(dst.low()), i.NeonInputOperand(0));
__ vmovl(NeonS16, dst, dst.low());
break;
}
case kArmS128Load16x4U: {
Simd128Register dst = i.OutputSimd128Register();
__ vld1(Neon16, NeonListOperand(dst.low()), i.NeonInputOperand(0));
__ vmovl(NeonU16, dst, dst.low());
break;
}
case kArmS128Load32x2S: {
Simd128Register dst = i.OutputSimd128Register();
__ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
__ vmovl(NeonS32, dst, dst.low());
break;
}
case kArmS128Load32x2U: {
Simd128Register dst = i.OutputSimd128Register();
__ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
__ vmovl(NeonU32, dst, dst.low());
break;
}
case kArmS128Load32Zero: {
Simd128Register dst = i.OutputSimd128Register();
__ vmov(dst, 0);
__ vld1s(Neon32, NeonListOperand(dst.low()), 0, i.NeonInputOperand(0));
break;
}
case kArmS128Load64Zero: {
Simd128Register dst = i.OutputSimd128Register();
__ vmov(dst.high(), 0);
__ vld1(Neon64, NeonListOperand(dst.low()), i.NeonInputOperand(0));
break;
}
case kWord32AtomicLoadInt8:
ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsb);
break;
case kWord32AtomicLoadUint8:
ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrb);
break;
case kWord32AtomicLoadInt16:
ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsh);
break;
case kWord32AtomicLoadUint16:
ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrh);
break;
case kWord32AtomicLoadWord32:
ASSEMBLE_ATOMIC_LOAD_INTEGER(ldr);
break;
case kWord32AtomicStoreWord8:
ASSEMBLE_ATOMIC_STORE_INTEGER(strb);
break;
case kWord32AtomicStoreWord16:
ASSEMBLE_ATOMIC_STORE_INTEGER(strh);
break;
case kWord32AtomicStoreWord32:
ASSEMBLE_ATOMIC_STORE_INTEGER(str);
break;
case kWord32AtomicExchangeInt8:
ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
__ sxtb(i.OutputRegister(0), i.OutputRegister(0));
break;
case kWord32AtomicExchangeUint8:
ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
break;
case kWord32AtomicExchangeInt16:
ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
__ sxth(i.OutputRegister(0), i.OutputRegister(0));
break;
case kWord32AtomicExchangeUint16:
ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
break;
case kWord32AtomicExchangeWord32:
ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrex, strex);
break;
case kWord32AtomicCompareExchangeInt8:
__ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
__ uxtb(i.TempRegister(2), i.InputRegister(2));
ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
i.TempRegister(2));
__ sxtb(i.OutputRegister(0), i.OutputRegister(0));
break;
case kWord32AtomicCompareExchangeUint8:
__ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
__ uxtb(i.TempRegister(2), i.InputRegister(2));
ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
i.TempRegister(2));
break;
case kWord32AtomicCompareExchangeInt16:
__ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
__ uxth(i.TempRegister(2), i.InputRegister(2));
ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
i.TempRegister(2));
__ sxth(i.OutputRegister(0), i.OutputRegister(0));
break;
case kWord32AtomicCompareExchangeUint16:
__ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
__ uxth(i.TempRegister(2), i.InputRegister(2));
ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
i.TempRegister(2));
break;
case kWord32AtomicCompareExchangeWord32:
__ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrex, strex,
i.InputRegister(2));
break;
#define ATOMIC_BINOP_CASE(op, inst) \
case kWord32Atomic##op##Int8: \
ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
__ sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
break; \
case kWord32Atomic##op##Uint8: \
ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
break; \
case kWord32Atomic##op##Int16: \
ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
__ sxth(i.OutputRegister(0), i.OutputRegister(0)); \
break; \
case kWord32Atomic##op##Uint16: \
ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
break; \
case kWord32Atomic##op##Word32: \
ASSEMBLE_ATOMIC_BINOP(ldrex, strex, inst); \
break;
ATOMIC_BINOP_CASE(Add, add)
ATOMIC_BINOP_CASE(Sub, sub)
ATOMIC_BINOP_CASE(And, and_)
ATOMIC_BINOP_CASE(Or, orr)
ATOMIC_BINOP_CASE(Xor, eor)
#undef ATOMIC_BINOP_CASE
case kArmWord32AtomicPairLoad: {
if (instr->OutputCount() == 2) {
DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r0, r1));
__ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
__ ldrexd(r0, r1, i.TempRegister(0));
__ dmb(ISH);
} else {
// A special case of this instruction: even though this is a pair load,
// we only need one of the two words. We emit a normal atomic load.
DCHECK_EQ(instr->OutputCount(), 1);
Register base = i.InputRegister(0);
Register offset = i.InputRegister(1);
DCHECK(instr->InputAt(2)->IsImmediate());
int32_t offset_imm = i.InputInt32(2);
if (offset_imm != 0) {
Register temp = i.TempRegister(0);
__ add(temp, offset, Operand(offset_imm));
offset = temp;
}
__ ldr(i.OutputRegister(), MemOperand(base, offset));
__ dmb(ISH);
}
break;
}
case kArmWord32AtomicPairStore: {
Label store;
Register base = i.InputRegister(0);
Register offset = i.InputRegister(1);
Register value_low = i.InputRegister(2);
Register value_high = i.InputRegister(3);
Register actual_addr = i.TempRegister(0);
// The {ldrexd} instruction needs two temp registers. We do not need the
// result of {ldrexd}, but {strexd} likely fails without the {ldrexd}.
Register tmp1 = i.TempRegister(1);
Register tmp2 = i.TempRegister(2);
// Reuse one of the temp registers for the result of {strexd}.
Register store_result = tmp1;
__ add(actual_addr, base, offset);
__ dmb(ISH);
__ bind(&store);
// Add this {ldrexd} instruction here so that {strexd} below can succeed.
// We don't need the result of {ldrexd} itself.
__ ldrexd(tmp1, tmp2, actual_addr);
__ strexd(store_result, value_low, value_high, actual_addr);
__ cmp(store_result, Operand(0));
__ b(ne, &store);
__ dmb(ISH);
break;
}
#define ATOMIC_ARITH_BINOP_CASE(op, instr1, instr2) \
case kArmWord32AtomicPair##op: { \
DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2); \
break; \
}
ATOMIC_ARITH_BINOP_CASE(Add, add, adc)
ATOMIC_ARITH_BINOP_CASE(Sub, sub, sbc)
#undef ATOMIC_ARITH_BINOP_CASE
#define ATOMIC_LOGIC_BINOP_CASE(op, instr1) \
case kArmWord32AtomicPair##op: { \
DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr1); \
break; \
}
ATOMIC_LOGIC_BINOP_CASE(And, and_)
ATOMIC_LOGIC_BINOP_CASE(Or, orr)
ATOMIC_LOGIC_BINOP_CASE(Xor, eor)
#undef ATOMIC_LOGIC_BINOP_CASE
case kArmWord32AtomicPairExchange: {
DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r6, r7));
Label exchange;
__ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3));
__ dmb(ISH);
__ bind(&exchange);
__ ldrexd(r6, r7, i.TempRegister(0));
__ strexd(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1),
i.TempRegister(0));
__ teq(i.TempRegister(1), Operand(0));
__ b(ne, &exchange);
__ dmb(ISH);
break;
}
case kArmWord32AtomicPairCompareExchange: {
DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3));
__ add(i.TempRegister(0), i.InputRegister(4), i.InputRegister(5));
Label compareExchange;
Label exit;
__ dmb(ISH);
__ bind(&compareExchange);
__ ldrexd(r2, r3, i.TempRegister(0));
__ teq(i.InputRegister(0), Operand(r2));
__ b(ne, &exit);
__ teq(i.InputRegister(1), Operand(r3));
__ b(ne, &exit);
__ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
i.TempRegister(0));
__ teq(i.TempRegister(1), Operand(0));
__ b(ne, &compareExchange);
__ bind(&exit);
__ dmb(ISH);
break;
}
#undef ASSEMBLE_ATOMIC_LOAD_INTEGER
#undef ASSEMBLE_ATOMIC_STORE_INTEGER
#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
#undef ASSEMBLE_ATOMIC_BINOP
#undef ASSEMBLE_ATOMIC64_ARITH_BINOP
#undef ASSEMBLE_ATOMIC64_LOGIC_BINOP
#undef ASSEMBLE_IEEE754_BINOP
#undef ASSEMBLE_IEEE754_UNOP
#undef ASSEMBLE_NEON_NARROWING_OP
#undef ASSEMBLE_NEON_PAIRWISE_OP
#undef ASSEMBLE_SIMD_SHIFT_LEFT
#undef ASSEMBLE_SIMD_SHIFT_RIGHT
}
return kSuccess;
} // NOLINT(readability/fn_size)
// Assembles branches after an instruction.
void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
ArmOperandConverter i(this, instr);
Label* tlabel = branch->true_label;
Label* flabel = branch->false_label;
Condition cc = FlagsConditionToCondition(branch->condition);
__ b(cc, tlabel);
if (!branch->fallthru) __ b(flabel); // no fallthru to flabel.
}
void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
Instruction* instr) {
// TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
return;
}
condition = NegateFlagsCondition(condition);
__ eor(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
Operand(kSpeculationPoisonRegister), SBit::LeaveCC,
FlagsConditionToCondition(condition));
__ csdb();
}
void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
BranchInfo* branch) {
AssembleArchBranch(instr, branch);
}
void CodeGenerator::AssembleArchJump(RpoNumber target) {
if (!IsNextInAssemblyOrder(target)) __ b(GetLabel(target));
}
void CodeGenerator::AssembleArchTrap(Instruction* instr,
FlagsCondition condition) {
class OutOfLineTrap final : public OutOfLineCode {
public:
OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
: OutOfLineCode(gen), instr_(instr), gen_(gen) {}
void Generate() final {
ArmOperandConverter i(gen_, instr_);
TrapId trap_id =
static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
GenerateCallToTrap(trap_id);
}
private:
void GenerateCallToTrap(TrapId trap_id) {
if (trap_id == TrapId::kInvalid) {
// We cannot test calls to the runtime in cctest/test-run-wasm.
// Therefore we emit a call to C here instead of a call to the runtime.
// We use the context register as the scratch register, because we do
// not have a context here.
__ PrepareCallCFunction(0, 0);
__ CallCFunction(
ExternalReference::wasm_call_trap_callback_for_testing(), 0);
__ LeaveFrame(StackFrame::WASM);
auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
int pop_count =
static_cast<int>(call_descriptor->StackParameterCount());
__ Drop(pop_count);
__ Ret();
} else {
gen_->AssembleSourcePosition(instr_);
// A direct call to a wasm runtime stub defined in this module.
// Just encode the stub index. This will be patched when the code
// is added to the native module and copied into wasm code space.
__ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
ReferenceMap* reference_map =
gen_->zone()->New<ReferenceMap>(gen_->zone());
gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
if (FLAG_debug_code) {
__ stop();
}
}
}
Instruction* instr_;
CodeGenerator* gen_;
};
auto ool = zone()->New<OutOfLineTrap>(this, instr);
Label* tlabel = ool->entry();
Condition cc = FlagsConditionToCondition(condition);
__ b(cc, tlabel);
}
// Assembles boolean materializations after an instruction.
void CodeGenerator::AssembleArchBoolean(Instruction* instr,
FlagsCondition condition) {
ArmOperandConverter i(this, instr);
// Materialize a full 32-bit 1 or 0 value. The result register is always the
// last output of the instruction.
DCHECK_NE(0u, instr->OutputCount());
Register reg = i.OutputRegister(instr->OutputCount() - 1);
Condition cc = FlagsConditionToCondition(condition);
__ mov(reg, Operand(0));
__ mov(reg, Operand(1), LeaveCC, cc);
}
void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
ArmOperandConverter i(this, instr);
Register input = i.InputRegister(0);
std::vector<std::pair<int32_t, Label*>> cases;
for (size_t index = 2; index < instr->InputCount(); index += 2) {
cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
}
AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
cases.data() + cases.size());
}
void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
ArmOperandConverter i(this, instr);
Register input = i.InputRegister(0);
size_t const case_count = instr->InputCount() - 2;
// This {cmp} might still emit a constant pool entry.
__ cmp(input, Operand(case_count));
// Ensure to emit the constant pool first if necessary.
__ CheckConstPool(true, true);
__ BlockConstPoolFor(case_count + 2);
__ add(pc, pc, Operand(input, LSL, 2), LeaveCC, lo);
__ b(GetLabel(i.InputRpo(1)));
for (size_t index = 0; index < case_count; ++index) {
__ b(GetLabel(i.InputRpo(index + 2)));
}
}
void CodeGenerator::FinishFrame(Frame* frame) {
auto call_descriptor = linkage()->GetIncomingDescriptor();
const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
if (saves_fp != 0) {
frame->AlignSavedCalleeRegisterSlots();
}
if (saves_fp != 0) {
// Save callee-saved FP registers.
STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
DCHECK_EQ((last - first + 1), base::bits::CountPopulation(saves_fp));
frame->AllocateSavedCalleeRegisterSlots((last - first + 1) *
(kDoubleSize / kSystemPointerSize));
}
const RegList saves = call_descriptor->CalleeSavedRegisters();
if (saves != 0) {
// Save callee-saved registers.
frame->AllocateSavedCalleeRegisterSlots(base::bits::CountPopulation(saves));
}
}
void CodeGenerator::AssembleConstructFrame() {
auto call_descriptor = linkage()->GetIncomingDescriptor();
if (frame_access_state()->has_frame()) {
if (call_descriptor->IsCFunctionCall()) {
if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
__ StubPrologue(StackFrame::C_WASM_ENTRY);
// Reserve stack space for saving the c_entry_fp later.
__ AllocateStackSpace(kSystemPointerSize);
} else {
__ Push(lr, fp);
__ mov(fp, sp);
}
} else if (call_descriptor->IsJSFunctionCall()) {
__ Prologue();
} else {
__ StubPrologue(info()->GetOutputStackFrameType());
if (call_descriptor->IsWasmFunctionCall()) {
__ Push(kWasmInstanceRegister);
} else if (call_descriptor->IsWasmImportWrapper() ||
call_descriptor->IsWasmCapiFunction()) {
// Wasm import wrappers are passed a tuple in the place of the instance.
// Unpack the tuple into the instance and the target callable.
// This must be done here in the codegen because it cannot be expressed
// properly in the graph.
__ ldr(kJSFunctionRegister,
FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
__ ldr(kWasmInstanceRegister,
FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
__ Push(kWasmInstanceRegister);
if (call_descriptor->IsWasmCapiFunction()) {
// Reserve space for saving the PC later.
__ AllocateStackSpace(kSystemPointerSize);
}
}
}
unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
}
int required_slots =
frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
if (info()->is_osr()) {
// TurboFan OSR-compiled functions cannot be entered directly.
__ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
// Unoptimized code jumps directly to this entrypoint while the unoptimized
// frame is still on the stack. Optimized code uses OSR values directly from
// the unoptimized frame. Thus, all that needs to be done is to allocate the
// remaining stack slots.
if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
osr_pc_offset_ = __ pc_offset();
required_slots -= osr_helper()->UnoptimizedFrameSlots();
ResetSpeculationPoison();
}
const RegList saves = call_descriptor->CalleeSavedRegisters();
const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
if (required_slots > 0) {
DCHECK(frame_access_state()->has_frame());
if (info()->IsWasm() && required_slots > 128) {
// For WebAssembly functions with big frames we have to do the stack
// overflow check before we construct the frame. Otherwise we may not
// have enough space on the stack to call the runtime for the stack
// overflow.
Label done;
// If the frame is bigger than the stack, we throw the stack overflow
// exception unconditionally. Thereby we can avoid the integer overflow
// check in the condition code.
if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
__ ldr(scratch, FieldMemOperand(
kWasmInstanceRegister,
WasmInstanceObject::kRealStackLimitAddressOffset));
__ ldr(scratch, MemOperand(scratch));
__ add(scratch, scratch, Operand(required_slots * kSystemPointerSize));
__ cmp(sp, scratch);
__ b(cs, &done);
}
__ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
// We come from WebAssembly, there are no references for the GC.
ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
if (FLAG_debug_code) {
__ stop();
}
__ bind(&done);
}
// Skip callee-saved and return slots, which are pushed below.
required_slots -= base::bits::CountPopulation(saves);
required_slots -= frame()->GetReturnSlotCount();
required_slots -= 2 * base::bits::CountPopulation(saves_fp);
if (required_slots > 0) {
__ AllocateStackSpace(required_slots * kSystemPointerSize);
}
}
if (saves_fp != 0) {
// Save callee-saved FP registers.
STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
DCHECK_EQ((last - first + 1), base::bits::CountPopulation(saves_fp));
__ vstm(db_w, sp, DwVfpRegister::from_code(first),
DwVfpRegister::from_code(last));
}
if (saves != 0) {
// Save callee-saved registers.
__ stm(db_w, sp, saves);
}
const int returns = frame()->GetReturnSlotCount();
if (returns != 0) {
// Create space for returns.
__ AllocateStackSpace(returns * kSystemPointerSize);
}
}
void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
auto call_descriptor = linkage()->GetIncomingDescriptor();
const int returns = frame()->GetReturnSlotCount();
if (returns != 0) {
// Free space of returns.
__ add(sp, sp, Operand(returns * kSystemPointerSize));
}
// Restore registers.
const RegList saves = call_descriptor->CalleeSavedRegisters();
if (saves != 0) {
__ ldm(ia_w, sp, saves);
}
// Restore FP registers.
const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
if (saves_fp != 0) {
STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
__ vldm(ia_w, sp, DwVfpRegister::from_code(first),
DwVfpRegister::from_code(last));
}
unwinding_info_writer_.MarkBlockWillExit();
// We might need r3 for scratch.
DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & r3.bit());
ArmOperandConverter g(this, nullptr);
const int parameter_count =
static_cast<int>(call_descriptor->StackParameterCount());
// {additional_pop_count} is only greater than zero if {parameter_count = 0}.
// Check RawMachineAssembler::PopAndReturn.
if (parameter_count != 0) {
if (additional_pop_count->IsImmediate()) {
DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
} else if (__ emit_debug_code()) {
__ cmp(g.ToRegister(additional_pop_count), Operand(0));
__ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue);
}
}
Register argc_reg = r3;
#ifdef V8_NO_ARGUMENTS_ADAPTOR
// Functions with JS linkage have at least one parameter (the receiver).
// If {parameter_count} == 0, it means it is a builtin with
// kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
// itself.
const bool drop_jsargs = frame_access_state()->has_frame() &&
call_descriptor->IsJSFunctionCall() &&
parameter_count != 0;
#else
const bool drop_jsargs = false;
#endif
if (call_descriptor->IsCFunctionCall()) {
AssembleDeconstructFrame();
} else if (frame_access_state()->has_frame()) {
// Canonicalize JSFunction return sites for now unless they have an variable
// number of stack slot pops.
if (additional_pop_count->IsImmediate() &&
g.ToConstant(additional_pop_count).ToInt32() == 0) {
if (return_label_.is_bound()) {
__ b(&return_label_);
return;
} else {
__ bind(&return_label_);
}
}
if (drop_jsargs) {
// Get the actual argument count.
__ ldr(argc_reg, MemOperand(fp, StandardFrameConstants::kArgCOffset));
}
AssembleDeconstructFrame();
}
if (drop_jsargs) {
// We must pop all arguments from the stack (including the receiver). This
// number of arguments is given by max(1 + argc_reg, parameter_count).
__ add(argc_reg, argc_reg, Operand(1)); // Also pop the receiver.
if (parameter_count > 1) {
__ cmp(argc_reg, Operand(parameter_count));
__ mov(argc_reg, Operand(parameter_count), LeaveCC, lt);
}
__ Drop(argc_reg);
} else if (additional_pop_count->IsImmediate()) {
DCHECK_EQ(Constant::kInt32, g.ToConstant(additional_pop_count).type());
int additional_count = g.ToConstant(additional_pop_count).ToInt32();
__ Drop(parameter_count + additional_count);
} else if (parameter_count == 0) {
__ Drop(g.ToRegister(additional_pop_count));
} else {
// {additional_pop_count} is guaranteed to be zero if {parameter_count !=
// 0}. Check RawMachineAssembler::PopAndReturn.
__ Drop(parameter_count);
}
__ Ret();
}
void CodeGenerator::FinishCode() { __ CheckConstPool(true, false); }
void CodeGenerator::PrepareForDeoptimizationExits(
ZoneDeque<DeoptimizationExit*>* exits) {
__ CheckConstPool(true, false);
}
void CodeGenerator::AssembleMove(InstructionOperand* source,
InstructionOperand* destination) {
ArmOperandConverter g(this, nullptr);
// Helper function to write the given constant to the dst register.
auto MoveConstantToRegister = [&](Register dst, Constant src) {
if (src.type() == Constant::kHeapObject) {
Handle<HeapObject> src_object = src.ToHeapObject();
RootIndex index;
if (IsMaterializableFromRoot(src_object, &index)) {
__ LoadRoot(dst, index);
} else {
__ Move(dst, src_object);
}
} else if (src.type() == Constant::kExternalReference) {
__ Move(dst, src.ToExternalReference());
} else {
__ mov(dst, g.ToImmediate(source));
}
};
switch (MoveType::InferMove(source, destination)) {
case MoveType::kRegisterToRegister:
if (source->IsRegister()) {
__ mov(g.ToRegister(destination), g.ToRegister(source));
} else if (source->IsFloatRegister()) {
DCHECK(destination->IsFloatRegister());
// GapResolver may give us reg codes that don't map to actual
// s-registers. Generate code to work around those cases.
int src_code = LocationOperand::cast(source)->register_code();
int dst_code = LocationOperand::cast(destination)->register_code();
__ VmovExtended(dst_code, src_code);
} else if (source->IsDoubleRegister()) {
__ Move(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
} else {
__ Move(g.ToSimd128Register(destination), g.ToSimd128Register(source));
}
return;
case MoveType::kRegisterToStack: {
MemOperand dst = g.ToMemOperand(destination);
if (source->IsRegister()) {
__ str(g.ToRegister(source), dst);
} else if (source->IsFloatRegister()) {
// GapResolver may give us reg codes that don't map to actual
// s-registers. Generate code to work around those cases.
int src_code = LocationOperand::cast(source)->register_code();
__ VmovExtended(dst, src_code);
} else if (source->IsDoubleRegister()) {
__ vstr(g.ToDoubleRegister(source), dst);
} else {
UseScratchRegisterScope temps(tasm());
Register temp = temps.Acquire();
QwNeonRegister src = g.ToSimd128Register(source);
__ add(temp, dst.rn(), Operand(dst.offset()));
__ vst1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
}
return;
}
case MoveType::kStackToRegister: {
MemOperand src = g.ToMemOperand(source);
if (source->IsStackSlot()) {
__ ldr(g.ToRegister(destination), src);
} else if (source->IsFloatStackSlot()) {
DCHECK(destination->IsFloatRegister());
// GapResolver may give us reg codes that don't map to actual
// s-registers. Generate code to work around those cases.
int dst_code = LocationOperand::cast(destination)->register_code();
__ VmovExtended(dst_code, src);
} else if (source->IsDoubleStackSlot()) {
__ vldr(g.ToDoubleRegister(destination), src);
} else {
UseScratchRegisterScope temps(tasm());
Register temp = temps.Acquire();
QwNeonRegister dst = g.ToSimd128Register(destination);
__ add(temp, src.rn(), Operand(src.offset()));
__ vld1(Neon8, NeonListOperand(dst.low(), 2), NeonMemOperand(temp));
}
return;
}
case MoveType::kStackToStack: {
MemOperand src = g.ToMemOperand(source);
MemOperand dst = g.ToMemOperand(destination);
UseScratchRegisterScope temps(tasm());
if (source->IsStackSlot() || source->IsFloatStackSlot()) {
SwVfpRegister temp = temps.AcquireS();
__ vldr(temp, src);
__ vstr(temp, dst);
} else if (source->IsDoubleStackSlot()) {
DwVfpRegister temp = temps.AcquireD();
__ vldr(temp, src);
__ vstr(temp, dst);
} else {
DCHECK(source->IsSimd128StackSlot());
Register temp = temps.Acquire();
QwNeonRegister temp_q = temps.AcquireQ();
__ add(temp, src.rn(), Operand(src.offset()));
__ vld1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
__ add(temp, dst.rn(), Operand(dst.offset()));
__ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
}
return;
}
case MoveType::kConstantToRegister: {
Constant src = g.ToConstant(source);
if (destination->IsRegister()) {
MoveConstantToRegister(g.ToRegister(destination), src);
} else if (destination->IsFloatRegister()) {
__ vmov(g.ToFloatRegister(destination),
Float32::FromBits(src.ToFloat32AsInt()));
} else {
// TODO(arm): Look into optimizing this further if possible. Supporting
// the NEON version of VMOV may help.
__ vmov(g.ToDoubleRegister(destination), src.ToFloat64());
}
return;
}
case MoveType::kConstantToStack: {
Constant src = g.ToConstant(source);
MemOperand dst = g.ToMemOperand(destination);
if (destination->IsStackSlot()) {
UseScratchRegisterScope temps(tasm());
// Acquire a S register instead of a general purpose register in case
// `vstr` needs one to compute the address of `dst`.
SwVfpRegister s_temp = temps.AcquireS();
{
// TODO(arm): This sequence could be optimized further if necessary by
// writing the constant directly into `s_temp`.
UseScratchRegisterScope temps(tasm());
Register temp = temps.Acquire();
MoveConstantToRegister(temp, src);
__ vmov(s_temp, temp);
}
__ vstr(s_temp, dst);
} else if (destination->IsFloatStackSlot()) {
UseScratchRegisterScope temps(tasm());
SwVfpRegister temp = temps.AcquireS();
__ vmov(temp, Float32::FromBits(src.ToFloat32AsInt()));
__ vstr(temp, dst);
} else {
DCHECK(destination->IsDoubleStackSlot());
UseScratchRegisterScope temps(tasm());
DwVfpRegister temp = temps.AcquireD();
// TODO(arm): Look into optimizing this further if possible. Supporting
// the NEON version of VMOV may help.
__ vmov(temp, src.ToFloat64());
__ vstr(temp, g.ToMemOperand(destination));
}
return;
}
}
UNREACHABLE();
}
void CodeGenerator::AssembleSwap(InstructionOperand* source,
InstructionOperand* destination) {
ArmOperandConverter g(this, nullptr);
switch (MoveType::InferSwap(source, destination)) {
case MoveType::kRegisterToRegister:
if (source->IsRegister()) {
__ Swap(g.ToRegister(source), g.ToRegister(destination));
} else if (source->IsFloatRegister()) {
DCHECK(destination->IsFloatRegister());
// GapResolver may give us reg codes that don't map to actual
// s-registers. Generate code to work around those cases.
UseScratchRegisterScope temps(tasm());
LowDwVfpRegister temp = temps.AcquireLowD();
int src_code = LocationOperand::cast(source)->register_code();
int dst_code = LocationOperand::cast(destination)->register_code();
__ VmovExtended(temp.low().code(), src_code);
__ VmovExtended(src_code, dst_code);
__ VmovExtended(dst_code, temp.low().code());
} else if (source->IsDoubleRegister()) {
__ Swap(g.ToDoubleRegister(source), g.ToDoubleRegister(destination));
} else {
__ Swap(g.ToSimd128Register(source), g.ToSimd128Register(destination));
}
return;
case MoveType::kRegisterToStack: {
MemOperand dst = g.ToMemOperand(destination);
if (source->IsRegister()) {
Register src = g.ToRegister(source);
UseScratchRegisterScope temps(tasm());
SwVfpRegister temp = temps.AcquireS();
__ vmov(temp, src);
__ ldr(src, dst);
__ vstr(temp, dst);
} else if (source->IsFloatRegister()) {
int src_code = LocationOperand::cast(source)->register_code();
UseScratchRegisterScope temps(tasm());
LowDwVfpRegister temp = temps.AcquireLowD();
__ VmovExtended(temp.low().code(), src_code);
__ VmovExtended(src_code, dst);
__ vstr(temp.low(), dst);
} else if (source->IsDoubleRegister()) {
UseScratchRegisterScope temps(tasm());
DwVfpRegister temp = temps.AcquireD();
DwVfpRegister src = g.ToDoubleRegister(source);
__ Move(temp, src);
__ vldr(src, dst);
__ vstr(temp, dst);
} else {
QwNeonRegister src = g.ToSimd128Register(source);
UseScratchRegisterScope temps(tasm());
Register temp = temps.Acquire();
QwNeonRegister temp_q = temps.AcquireQ();
__ Move(temp_q, src);
__ add(temp, dst.rn(), Operand(dst.offset()));
__ vld1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
__ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
}
return;
}
case MoveType::kStackToStack: {
MemOperand src = g.ToMemOperand(source);
MemOperand dst = g.ToMemOperand(destination);
if (source->IsStackSlot() || source->IsFloatStackSlot()) {
UseScratchRegisterScope temps(tasm());
SwVfpRegister temp_0 = temps.AcquireS();
SwVfpRegister temp_1 = temps.AcquireS();
__ vldr(temp_0, dst);
__ vldr(temp_1, src);
__ vstr(temp_0, src);
__ vstr(temp_1, dst);
} else if (source->IsDoubleStackSlot()) {
UseScratchRegisterScope temps(tasm());
LowDwVfpRegister temp = temps.AcquireLowD();
if (temps.CanAcquireD()) {
DwVfpRegister temp_0 = temp;
DwVfpRegister temp_1 = temps.AcquireD();
__ vldr(temp_0, dst);
__ vldr(temp_1, src);
__ vstr(temp_0, src);
__ vstr(temp_1, dst);
} else {
// We only have a single D register available. However, we can split
// it into 2 S registers and swap the slots 32 bits at a time.
MemOperand src0 = src;
MemOperand dst0 = dst;
MemOperand src1(src.rn(), src.offset() + kFloatSize);
MemOperand dst1(dst.rn(), dst.offset() + kFloatSize);
SwVfpRegister temp_0 = temp.low();
SwVfpRegister temp_1 = temp.high();
__ vldr(temp_0, dst0);
__ vldr(temp_1, src0);
__ vstr(temp_0, src0);
__ vstr(temp_1, dst0);
__ vldr(temp_0, dst1);
__ vldr(temp_1, src1);
__ vstr(temp_0, src1);
__ vstr(temp_1, dst1);
}
} else {
DCHECK(source->IsSimd128StackSlot());
MemOperand src0 = src;
MemOperand dst0 = dst;
MemOperand src1(src.rn(), src.offset() + kDoubleSize);
MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
UseScratchRegisterScope temps(tasm());
DwVfpRegister temp_0 = temps.AcquireD();
DwVfpRegister temp_1 = temps.AcquireD();
__ vldr(temp_0, dst0);
__ vldr(temp_1, src0);
__ vstr(temp_0, src0);
__ vstr(temp_1, dst0);
__ vldr(temp_0, dst1);
__ vldr(temp_1, src1);
__ vstr(temp_0, src1);
__ vstr(temp_1, dst1);
}
return;
}
default:
UNREACHABLE();
}
}
void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
// On 32-bit ARM we emit the jump tables inline.
UNREACHABLE();
}
#undef __
} // namespace compiler
} // namespace internal
} // namespace v8