| // Copyright 2014 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "src/codegen/assembler-inl.h" |
| #include "src/codegen/macro-assembler.h" |
| #include "src/codegen/optimized-compilation-info.h" |
| #include "src/compiler/backend/code-generator-impl.h" |
| #include "src/compiler/backend/code-generator.h" |
| #include "src/compiler/backend/gap-resolver.h" |
| #include "src/compiler/backend/instruction-codes.h" |
| #include "src/compiler/node-matchers.h" |
| #include "src/compiler/osr.h" |
| #include "src/heap/memory-chunk.h" |
| #include "src/numbers/double.h" |
| #include "src/utils/boxed-float.h" |
| #include "src/wasm/wasm-code-manager.h" |
| #include "src/wasm/wasm-objects.h" |
| |
| namespace v8 { |
| namespace internal { |
| namespace compiler { |
| |
| #define __ tasm()-> |
| |
| // Adds Arm-specific methods to convert InstructionOperands. |
| class ArmOperandConverter final : public InstructionOperandConverter { |
| public: |
| ArmOperandConverter(CodeGenerator* gen, Instruction* instr) |
| : InstructionOperandConverter(gen, instr) {} |
| |
| SBit OutputSBit() const { |
| switch (instr_->flags_mode()) { |
| case kFlags_branch: |
| case kFlags_branch_and_poison: |
| case kFlags_deoptimize: |
| case kFlags_deoptimize_and_poison: |
| case kFlags_set: |
| case kFlags_trap: |
| return SetCC; |
| case kFlags_none: |
| return LeaveCC; |
| } |
| UNREACHABLE(); |
| } |
| |
| Operand InputImmediate(size_t index) const { |
| return ToImmediate(instr_->InputAt(index)); |
| } |
| |
| Operand InputOperand2(size_t first_index) { |
| const size_t index = first_index; |
| switch (AddressingModeField::decode(instr_->opcode())) { |
| case kMode_None: |
| case kMode_Offset_RI: |
| case kMode_Offset_RR: |
| case kMode_Root: |
| break; |
| case kMode_Operand2_I: |
| return InputImmediate(index + 0); |
| case kMode_Operand2_R: |
| return Operand(InputRegister(index + 0)); |
| case kMode_Operand2_R_ASR_I: |
| return Operand(InputRegister(index + 0), ASR, InputInt5(index + 1)); |
| case kMode_Operand2_R_ASR_R: |
| return Operand(InputRegister(index + 0), ASR, InputRegister(index + 1)); |
| case kMode_Operand2_R_LSL_I: |
| return Operand(InputRegister(index + 0), LSL, InputInt5(index + 1)); |
| case kMode_Operand2_R_LSL_R: |
| return Operand(InputRegister(index + 0), LSL, InputRegister(index + 1)); |
| case kMode_Operand2_R_LSR_I: |
| return Operand(InputRegister(index + 0), LSR, InputInt5(index + 1)); |
| case kMode_Operand2_R_LSR_R: |
| return Operand(InputRegister(index + 0), LSR, InputRegister(index + 1)); |
| case kMode_Operand2_R_ROR_I: |
| return Operand(InputRegister(index + 0), ROR, InputInt5(index + 1)); |
| case kMode_Operand2_R_ROR_R: |
| return Operand(InputRegister(index + 0), ROR, InputRegister(index + 1)); |
| } |
| UNREACHABLE(); |
| } |
| |
| MemOperand InputOffset(size_t* first_index) { |
| const size_t index = *first_index; |
| switch (AddressingModeField::decode(instr_->opcode())) { |
| case kMode_None: |
| case kMode_Operand2_I: |
| case kMode_Operand2_R: |
| case kMode_Operand2_R_ASR_I: |
| case kMode_Operand2_R_ASR_R: |
| case kMode_Operand2_R_LSL_R: |
| case kMode_Operand2_R_LSR_I: |
| case kMode_Operand2_R_LSR_R: |
| case kMode_Operand2_R_ROR_I: |
| case kMode_Operand2_R_ROR_R: |
| break; |
| case kMode_Operand2_R_LSL_I: |
| *first_index += 3; |
| return MemOperand(InputRegister(index + 0), InputRegister(index + 1), |
| LSL, InputInt32(index + 2)); |
| case kMode_Offset_RI: |
| *first_index += 2; |
| return MemOperand(InputRegister(index + 0), InputInt32(index + 1)); |
| case kMode_Offset_RR: |
| *first_index += 2; |
| return MemOperand(InputRegister(index + 0), InputRegister(index + 1)); |
| case kMode_Root: |
| *first_index += 1; |
| return MemOperand(kRootRegister, InputInt32(index)); |
| } |
| UNREACHABLE(); |
| } |
| |
| MemOperand InputOffset(size_t first_index = 0) { |
| return InputOffset(&first_index); |
| } |
| |
| Operand ToImmediate(InstructionOperand* operand) const { |
| Constant constant = ToConstant(operand); |
| switch (constant.type()) { |
| case Constant::kInt32: |
| if (RelocInfo::IsWasmReference(constant.rmode())) { |
| return Operand(constant.ToInt32(), constant.rmode()); |
| } else { |
| return Operand(constant.ToInt32()); |
| } |
| case Constant::kFloat32: |
| return Operand::EmbeddedNumber(constant.ToFloat32()); |
| case Constant::kFloat64: |
| return Operand::EmbeddedNumber(constant.ToFloat64().value()); |
| case Constant::kExternalReference: |
| return Operand(constant.ToExternalReference()); |
| case Constant::kDelayedStringConstant: |
| return Operand::EmbeddedStringConstant( |
| constant.ToDelayedStringConstant()); |
| case Constant::kInt64: |
| case Constant::kCompressedHeapObject: |
| case Constant::kHeapObject: |
| // TODO(dcarney): loading RPO constants on arm. |
| case Constant::kRpoNumber: |
| break; |
| } |
| UNREACHABLE(); |
| } |
| |
| MemOperand ToMemOperand(InstructionOperand* op) const { |
| DCHECK_NOT_NULL(op); |
| DCHECK(op->IsStackSlot() || op->IsFPStackSlot()); |
| return SlotToMemOperand(AllocatedOperand::cast(op)->index()); |
| } |
| |
| MemOperand SlotToMemOperand(int slot) const { |
| FrameOffset offset = frame_access_state()->GetFrameOffset(slot); |
| return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset()); |
| } |
| |
| NeonMemOperand NeonInputOperand(size_t first_index) { |
| const size_t index = first_index; |
| switch (AddressingModeField::decode(instr_->opcode())) { |
| case kMode_Operand2_R: |
| return NeonMemOperand(InputRegister(index + 0)); |
| default: |
| break; |
| } |
| UNREACHABLE(); |
| } |
| }; |
| |
| namespace { |
| |
| class OutOfLineRecordWrite final : public OutOfLineCode { |
| public: |
| OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset, |
| Register value, RecordWriteMode mode, |
| StubCallMode stub_mode, |
| UnwindingInfoWriter* unwinding_info_writer) |
| : OutOfLineCode(gen), |
| object_(object), |
| offset_(offset), |
| value_(value), |
| mode_(mode), |
| stub_mode_(stub_mode), |
| must_save_lr_(!gen->frame_access_state()->has_frame()), |
| unwinding_info_writer_(unwinding_info_writer), |
| zone_(gen->zone()) {} |
| |
| void Generate() final { |
| if (mode_ > RecordWriteMode::kValueIsPointer) { |
| __ JumpIfSmi(value_, exit()); |
| } |
| __ CheckPageFlag(value_, MemoryChunk::kPointersToHereAreInterestingMask, eq, |
| exit()); |
| RememberedSetAction const remembered_set_action = |
| mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET |
| : OMIT_REMEMBERED_SET; |
| SaveFPRegsMode const save_fp_mode = |
| frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs; |
| if (must_save_lr_) { |
| // We need to save and restore lr if the frame was elided. |
| __ Push(lr); |
| unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset()); |
| } |
| if (mode_ == RecordWriteMode::kValueIsEphemeronKey) { |
| __ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode); |
| } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) { |
| __ CallRecordWriteStub(object_, offset_, remembered_set_action, |
| save_fp_mode, wasm::WasmCode::kRecordWrite); |
| } else { |
| __ CallRecordWriteStub(object_, offset_, remembered_set_action, |
| save_fp_mode); |
| } |
| if (must_save_lr_) { |
| __ Pop(lr); |
| unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset()); |
| } |
| } |
| |
| private: |
| Register const object_; |
| Operand const offset_; |
| Register const value_; |
| RecordWriteMode const mode_; |
| StubCallMode stub_mode_; |
| bool must_save_lr_; |
| UnwindingInfoWriter* const unwinding_info_writer_; |
| Zone* zone_; |
| }; |
| |
| template <typename T> |
| class OutOfLineFloatMin final : public OutOfLineCode { |
| public: |
| OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right) |
| : OutOfLineCode(gen), result_(result), left_(left), right_(right) {} |
| |
| void Generate() final { __ FloatMinOutOfLine(result_, left_, right_); } |
| |
| private: |
| T const result_; |
| T const left_; |
| T const right_; |
| }; |
| using OutOfLineFloat32Min = OutOfLineFloatMin<SwVfpRegister>; |
| using OutOfLineFloat64Min = OutOfLineFloatMin<DwVfpRegister>; |
| |
| template <typename T> |
| class OutOfLineFloatMax final : public OutOfLineCode { |
| public: |
| OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right) |
| : OutOfLineCode(gen), result_(result), left_(left), right_(right) {} |
| |
| void Generate() final { __ FloatMaxOutOfLine(result_, left_, right_); } |
| |
| private: |
| T const result_; |
| T const left_; |
| T const right_; |
| }; |
| using OutOfLineFloat32Max = OutOfLineFloatMax<SwVfpRegister>; |
| using OutOfLineFloat64Max = OutOfLineFloatMax<DwVfpRegister>; |
| |
| Condition FlagsConditionToCondition(FlagsCondition condition) { |
| switch (condition) { |
| case kEqual: |
| return eq; |
| case kNotEqual: |
| return ne; |
| case kSignedLessThan: |
| return lt; |
| case kSignedGreaterThanOrEqual: |
| return ge; |
| case kSignedLessThanOrEqual: |
| return le; |
| case kSignedGreaterThan: |
| return gt; |
| case kUnsignedLessThan: |
| return lo; |
| case kUnsignedGreaterThanOrEqual: |
| return hs; |
| case kUnsignedLessThanOrEqual: |
| return ls; |
| case kUnsignedGreaterThan: |
| return hi; |
| case kFloatLessThanOrUnordered: |
| return lt; |
| case kFloatGreaterThanOrEqual: |
| return ge; |
| case kFloatLessThanOrEqual: |
| return ls; |
| case kFloatGreaterThanOrUnordered: |
| return hi; |
| case kFloatLessThan: |
| return lo; |
| case kFloatGreaterThanOrEqualOrUnordered: |
| return hs; |
| case kFloatLessThanOrEqualOrUnordered: |
| return le; |
| case kFloatGreaterThan: |
| return gt; |
| case kOverflow: |
| return vs; |
| case kNotOverflow: |
| return vc; |
| case kPositiveOrZero: |
| return pl; |
| case kNegative: |
| return mi; |
| default: |
| break; |
| } |
| UNREACHABLE(); |
| } |
| |
| void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, |
| InstructionCode opcode, |
| ArmOperandConverter const& i) { |
| const MemoryAccessMode access_mode = |
| static_cast<MemoryAccessMode>(MiscField::decode(opcode)); |
| if (access_mode == kMemoryAccessPoisoned) { |
| Register value = i.OutputRegister(); |
| codegen->tasm()->and_(value, value, Operand(kSpeculationPoisonRegister)); |
| } |
| } |
| |
| void ComputePoisonedAddressForLoad(CodeGenerator* codegen, |
| InstructionCode opcode, |
| ArmOperandConverter const& i, |
| Register address) { |
| DCHECK_EQ(kMemoryAccessPoisoned, |
| static_cast<MemoryAccessMode>(MiscField::decode(opcode))); |
| switch (AddressingModeField::decode(opcode)) { |
| case kMode_Offset_RI: |
| codegen->tasm()->mov(address, i.InputImmediate(1)); |
| codegen->tasm()->add(address, address, i.InputRegister(0)); |
| break; |
| case kMode_Offset_RR: |
| codegen->tasm()->add(address, i.InputRegister(0), i.InputRegister(1)); |
| break; |
| default: |
| UNREACHABLE(); |
| } |
| codegen->tasm()->and_(address, address, Operand(kSpeculationPoisonRegister)); |
| } |
| |
| } // namespace |
| |
| #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr) \ |
| do { \ |
| __ asm_instr(i.OutputRegister(), \ |
| MemOperand(i.InputRegister(0), i.InputRegister(1))); \ |
| __ dmb(ISH); \ |
| } while (0) |
| |
| #define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr) \ |
| do { \ |
| __ dmb(ISH); \ |
| __ asm_instr(i.InputRegister(2), \ |
| MemOperand(i.InputRegister(0), i.InputRegister(1))); \ |
| __ dmb(ISH); \ |
| } while (0) |
| |
| #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr) \ |
| do { \ |
| Label exchange; \ |
| __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \ |
| __ dmb(ISH); \ |
| __ bind(&exchange); \ |
| __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \ |
| __ store_instr(i.TempRegister(0), i.InputRegister(2), i.TempRegister(1)); \ |
| __ teq(i.TempRegister(0), Operand(0)); \ |
| __ b(ne, &exchange); \ |
| __ dmb(ISH); \ |
| } while (0) |
| |
| #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, \ |
| cmp_reg) \ |
| do { \ |
| Label compareExchange; \ |
| Label exit; \ |
| __ dmb(ISH); \ |
| __ bind(&compareExchange); \ |
| __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \ |
| __ teq(cmp_reg, Operand(i.OutputRegister(0))); \ |
| __ b(ne, &exit); \ |
| __ store_instr(i.TempRegister(0), i.InputRegister(3), i.TempRegister(1)); \ |
| __ teq(i.TempRegister(0), Operand(0)); \ |
| __ b(ne, &compareExchange); \ |
| __ bind(&exit); \ |
| __ dmb(ISH); \ |
| } while (0) |
| |
| #define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr) \ |
| do { \ |
| Label binop; \ |
| __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \ |
| __ dmb(ISH); \ |
| __ bind(&binop); \ |
| __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \ |
| __ bin_instr(i.TempRegister(0), i.OutputRegister(0), \ |
| Operand(i.InputRegister(2))); \ |
| __ store_instr(i.TempRegister(2), i.TempRegister(0), i.TempRegister(1)); \ |
| __ teq(i.TempRegister(2), Operand(0)); \ |
| __ b(ne, &binop); \ |
| __ dmb(ISH); \ |
| } while (0) |
| |
| #define ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2) \ |
| do { \ |
| Label binop; \ |
| __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \ |
| __ dmb(ISH); \ |
| __ bind(&binop); \ |
| __ ldrexd(r2, r3, i.TempRegister(0)); \ |
| __ instr1(i.TempRegister(1), r2, i.InputRegister(0), SBit::SetCC); \ |
| __ instr2(i.TempRegister(2), r3, Operand(i.InputRegister(1))); \ |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); \ |
| __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \ |
| i.TempRegister(0)); \ |
| __ teq(i.TempRegister(3), Operand(0)); \ |
| __ b(ne, &binop); \ |
| __ dmb(ISH); \ |
| } while (0) |
| |
| #define ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr) \ |
| do { \ |
| Label binop; \ |
| __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \ |
| __ dmb(ISH); \ |
| __ bind(&binop); \ |
| __ ldrexd(r2, r3, i.TempRegister(0)); \ |
| __ instr(i.TempRegister(1), r2, Operand(i.InputRegister(0))); \ |
| __ instr(i.TempRegister(2), r3, Operand(i.InputRegister(1))); \ |
| __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \ |
| i.TempRegister(0)); \ |
| __ teq(i.TempRegister(3), Operand(0)); \ |
| __ b(ne, &binop); \ |
| __ dmb(ISH); \ |
| } while (0) |
| |
| #define ASSEMBLE_IEEE754_BINOP(name) \ |
| do { \ |
| /* TODO(bmeurer): We should really get rid of this special instruction, */ \ |
| /* and generate a CallAddress instruction instead. */ \ |
| FrameScope scope(tasm(), StackFrame::MANUAL); \ |
| __ PrepareCallCFunction(0, 2); \ |
| __ MovToFloatParameters(i.InputDoubleRegister(0), \ |
| i.InputDoubleRegister(1)); \ |
| __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \ |
| /* Move the result in the double result register. */ \ |
| __ MovFromFloatResult(i.OutputDoubleRegister()); \ |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); \ |
| } while (0) |
| |
| #define ASSEMBLE_IEEE754_UNOP(name) \ |
| do { \ |
| /* TODO(bmeurer): We should really get rid of this special instruction, */ \ |
| /* and generate a CallAddress instruction instead. */ \ |
| FrameScope scope(tasm(), StackFrame::MANUAL); \ |
| __ PrepareCallCFunction(0, 1); \ |
| __ MovToFloatParameter(i.InputDoubleRegister(0)); \ |
| __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \ |
| /* Move the result in the double result register. */ \ |
| __ MovFromFloatResult(i.OutputDoubleRegister()); \ |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); \ |
| } while (0) |
| |
| #define ASSEMBLE_NEON_NARROWING_OP(dt, sdt) \ |
| do { \ |
| Simd128Register dst = i.OutputSimd128Register(), \ |
| src0 = i.InputSimd128Register(0), \ |
| src1 = i.InputSimd128Register(1); \ |
| if (dst == src0 && dst == src1) { \ |
| __ vqmovn(dt, sdt, dst.low(), src0); \ |
| __ vmov(dst.high(), dst.low()); \ |
| } else if (dst == src0) { \ |
| __ vqmovn(dt, sdt, dst.low(), src0); \ |
| __ vqmovn(dt, sdt, dst.high(), src1); \ |
| } else { \ |
| __ vqmovn(dt, sdt, dst.high(), src1); \ |
| __ vqmovn(dt, sdt, dst.low(), src0); \ |
| } \ |
| } while (0) |
| |
| #define ASSEMBLE_NEON_PAIRWISE_OP(op, size) \ |
| do { \ |
| Simd128Register dst = i.OutputSimd128Register(), \ |
| src0 = i.InputSimd128Register(0), \ |
| src1 = i.InputSimd128Register(1); \ |
| if (dst == src0) { \ |
| __ op(size, dst.low(), src0.low(), src0.high()); \ |
| if (dst == src1) { \ |
| __ vmov(dst.high(), dst.low()); \ |
| } else { \ |
| __ op(size, dst.high(), src1.low(), src1.high()); \ |
| } \ |
| } else { \ |
| __ op(size, dst.high(), src1.low(), src1.high()); \ |
| __ op(size, dst.low(), src0.low(), src0.high()); \ |
| } \ |
| } while (0) |
| |
| #define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op) \ |
| do { \ |
| __ op(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low(), \ |
| i.InputSimd128Register(1).low()); \ |
| __ op(i.OutputSimd128Register().high(), i.InputSimd128Register(0).high(), \ |
| i.InputSimd128Register(1).high()); \ |
| } while (0) |
| |
| // If shift value is an immediate, we can call asm_imm, taking the shift value |
| // modulo 2^width. Otherwise, emit code to perform the modulus operation, and |
| // call vshl. |
| #define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, sz, dt) \ |
| do { \ |
| QwNeonRegister dst = i.OutputSimd128Register(); \ |
| QwNeonRegister src = i.InputSimd128Register(0); \ |
| if (instr->InputAt(1)->IsImmediate()) { \ |
| __ asm_imm(dt, dst, src, i.InputInt##width(1)); \ |
| } else { \ |
| QwNeonRegister tmp = i.TempSimd128Register(0); \ |
| Register shift = i.TempRegister(1); \ |
| constexpr int mask = (1 << width) - 1; \ |
| __ and_(shift, i.InputRegister(1), Operand(mask)); \ |
| __ vdup(sz, tmp, shift); \ |
| __ vshl(dt, dst, src, tmp); \ |
| } \ |
| } while (0) |
| |
| // If shift value is an immediate, we can call asm_imm, taking the shift value |
| // modulo 2^width. Otherwise, emit code to perform the modulus operation, and |
| // call vshl, passing in the negative shift value (treated as a right shift). |
| #define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, sz, dt) \ |
| do { \ |
| QwNeonRegister dst = i.OutputSimd128Register(); \ |
| QwNeonRegister src = i.InputSimd128Register(0); \ |
| if (instr->InputAt(1)->IsImmediate()) { \ |
| __ asm_imm(dt, dst, src, i.InputInt##width(1)); \ |
| } else { \ |
| QwNeonRegister tmp = i.TempSimd128Register(0); \ |
| Register shift = i.TempRegister(1); \ |
| constexpr int mask = (1 << width) - 1; \ |
| __ and_(shift, i.InputRegister(1), Operand(mask)); \ |
| __ vdup(sz, tmp, shift); \ |
| __ vneg(sz, tmp, tmp); \ |
| __ vshl(dt, dst, src, tmp); \ |
| } \ |
| } while (0) |
| |
| void CodeGenerator::AssembleDeconstructFrame() { |
| __ LeaveFrame(StackFrame::MANUAL); |
| unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset()); |
| } |
| |
| void CodeGenerator::AssemblePrepareTailCall() { |
| if (frame_access_state()->has_frame()) { |
| __ ldm(ia, fp, lr.bit() | fp.bit()); |
| } |
| frame_access_state()->SetFrameAccessToSP(); |
| } |
| |
| void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg, |
| Register scratch1, |
| Register scratch2, |
| Register scratch3) { |
| DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3)); |
| Label done; |
| |
| // Check if current frame is an arguments adaptor frame. |
| __ ldr(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset)); |
| __ cmp(scratch1, |
| Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR))); |
| __ b(ne, &done); |
| |
| // Load arguments count from current arguments adaptor frame (note, it |
| // does not include receiver). |
| Register caller_args_count_reg = scratch1; |
| __ ldr(caller_args_count_reg, |
| MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset)); |
| __ SmiUntag(caller_args_count_reg); |
| |
| __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3); |
| __ bind(&done); |
| } |
| |
| namespace { |
| |
| void FlushPendingPushRegisters(TurboAssembler* tasm, |
| FrameAccessState* frame_access_state, |
| ZoneVector<Register>* pending_pushes) { |
| switch (pending_pushes->size()) { |
| case 0: |
| break; |
| case 1: |
| tasm->push((*pending_pushes)[0]); |
| break; |
| case 2: |
| tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]); |
| break; |
| case 3: |
| tasm->Push((*pending_pushes)[0], (*pending_pushes)[1], |
| (*pending_pushes)[2]); |
| break; |
| default: |
| UNREACHABLE(); |
| } |
| frame_access_state->IncreaseSPDelta(pending_pushes->size()); |
| pending_pushes->clear(); |
| } |
| |
| void AdjustStackPointerForTailCall( |
| TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp, |
| ZoneVector<Register>* pending_pushes = nullptr, |
| bool allow_shrinkage = true) { |
| int current_sp_offset = state->GetSPToFPSlotCount() + |
| StandardFrameConstants::kFixedSlotCountAboveFp; |
| int stack_slot_delta = new_slot_above_sp - current_sp_offset; |
| if (stack_slot_delta > 0) { |
| if (pending_pushes != nullptr) { |
| FlushPendingPushRegisters(tasm, state, pending_pushes); |
| } |
| tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize); |
| state->IncreaseSPDelta(stack_slot_delta); |
| } else if (allow_shrinkage && stack_slot_delta < 0) { |
| if (pending_pushes != nullptr) { |
| FlushPendingPushRegisters(tasm, state, pending_pushes); |
| } |
| tasm->add(sp, sp, Operand(-stack_slot_delta * kSystemPointerSize)); |
| state->IncreaseSPDelta(stack_slot_delta); |
| } |
| } |
| |
| #if DEBUG |
| bool VerifyOutputOfAtomicPairInstr(ArmOperandConverter* converter, |
| const Instruction* instr, Register low, |
| Register high) { |
| DCHECK_GE(instr->OutputCount() + instr->TempCount(), 2); |
| if (instr->OutputCount() == 2) { |
| return (converter->OutputRegister(0) == low && |
| converter->OutputRegister(1) == high); |
| } |
| if (instr->OutputCount() == 1) { |
| return (converter->OutputRegister(0) == low && |
| converter->TempRegister(instr->TempCount() - 1) == high) || |
| (converter->OutputRegister(0) == high && |
| converter->TempRegister(instr->TempCount() - 1) == low); |
| } |
| DCHECK_EQ(instr->OutputCount(), 0); |
| return (converter->TempRegister(instr->TempCount() - 2) == low && |
| converter->TempRegister(instr->TempCount() - 1) == high); |
| } |
| #endif |
| |
| } // namespace |
| |
| void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr, |
| int first_unused_stack_slot) { |
| ZoneVector<MoveOperands*> pushes(zone()); |
| GetPushCompatibleMoves(instr, kRegisterPush, &pushes); |
| |
| if (!pushes.empty() && |
| (LocationOperand::cast(pushes.back()->destination()).index() + 1 == |
| first_unused_stack_slot)) { |
| ArmOperandConverter g(this, instr); |
| ZoneVector<Register> pending_pushes(zone()); |
| for (auto move : pushes) { |
| LocationOperand destination_location( |
| LocationOperand::cast(move->destination())); |
| InstructionOperand source(move->source()); |
| AdjustStackPointerForTailCall( |
| tasm(), frame_access_state(), |
| destination_location.index() - pending_pushes.size(), |
| &pending_pushes); |
| // Pushes of non-register data types are not supported. |
| DCHECK(source.IsRegister()); |
| LocationOperand source_location(LocationOperand::cast(source)); |
| pending_pushes.push_back(source_location.GetRegister()); |
| // TODO(arm): We can push more than 3 registers at once. Add support in |
| // the macro-assembler for pushing a list of registers. |
| if (pending_pushes.size() == 3) { |
| FlushPendingPushRegisters(tasm(), frame_access_state(), |
| &pending_pushes); |
| } |
| move->Eliminate(); |
| } |
| FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes); |
| } |
| AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
| first_unused_stack_slot, nullptr, false); |
| } |
| |
| void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr, |
| int first_unused_stack_slot) { |
| AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
| first_unused_stack_slot); |
| } |
| |
| // Check that {kJavaScriptCallCodeStartRegister} is correct. |
| void CodeGenerator::AssembleCodeStartRegisterCheck() { |
| UseScratchRegisterScope temps(tasm()); |
| Register scratch = temps.Acquire(); |
| __ ComputeCodeStartAddress(scratch); |
| __ cmp(scratch, kJavaScriptCallCodeStartRegister); |
| __ Assert(eq, AbortReason::kWrongFunctionCodeStart); |
| } |
| |
| // Check if the code object is marked for deoptimization. If it is, then it |
| // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need |
| // to: |
| // 1. read from memory the word that contains that bit, which can be found in |
| // the flags in the referenced {CodeDataContainer} object; |
| // 2. test kMarkedForDeoptimizationBit in those flags; and |
| // 3. if it is not zero then it jumps to the builtin. |
| void CodeGenerator::BailoutIfDeoptimized() { |
| UseScratchRegisterScope temps(tasm()); |
| Register scratch = temps.Acquire(); |
| int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize; |
| __ ldr(scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset)); |
| __ ldr(scratch, |
| FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset)); |
| __ tst(scratch, Operand(1 << Code::kMarkedForDeoptimizationBit)); |
| __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode), |
| RelocInfo::CODE_TARGET, ne); |
| } |
| |
| void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() { |
| UseScratchRegisterScope temps(tasm()); |
| Register scratch = temps.Acquire(); |
| |
| // Set a mask which has all bits set in the normal case, but has all |
| // bits cleared if we are speculatively executing the wrong PC. |
| __ ComputeCodeStartAddress(scratch); |
| __ cmp(kJavaScriptCallCodeStartRegister, scratch); |
| __ mov(kSpeculationPoisonRegister, Operand(-1), SBit::LeaveCC, eq); |
| __ mov(kSpeculationPoisonRegister, Operand(0), SBit::LeaveCC, ne); |
| __ csdb(); |
| } |
| |
| void CodeGenerator::AssembleRegisterArgumentPoisoning() { |
| __ and_(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister); |
| __ and_(kContextRegister, kContextRegister, kSpeculationPoisonRegister); |
| __ and_(sp, sp, kSpeculationPoisonRegister); |
| } |
| |
| // Assembles an instruction after register allocation, producing machine code. |
| CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
| Instruction* instr) { |
| ArmOperandConverter i(this, instr); |
| |
| __ MaybeCheckConstPool(); |
| InstructionCode opcode = instr->opcode(); |
| ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode); |
| switch (arch_opcode) { |
| case kArchCallCodeObject: { |
| if (instr->InputAt(0)->IsImmediate()) { |
| __ Call(i.InputCode(0), RelocInfo::CODE_TARGET); |
| } else { |
| Register reg = i.InputRegister(0); |
| DCHECK_IMPLIES( |
| instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister), |
| reg == kJavaScriptCallCodeStartRegister); |
| __ CallCodeObject(reg); |
| } |
| RecordCallPosition(instr); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchCallBuiltinPointer: { |
| DCHECK(!instr->InputAt(0)->IsImmediate()); |
| Register builtin_index = i.InputRegister(0); |
| __ CallBuiltinByIndex(builtin_index); |
| RecordCallPosition(instr); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchCallWasmFunction: { |
| if (instr->InputAt(0)->IsImmediate()) { |
| Constant constant = i.ToConstant(instr->InputAt(0)); |
| Address wasm_code = static_cast<Address>(constant.ToInt32()); |
| __ Call(wasm_code, constant.rmode()); |
| } else { |
| __ Call(i.InputRegister(0)); |
| } |
| RecordCallPosition(instr); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchTailCallCodeObjectFromJSFunction: |
| case kArchTailCallCodeObject: { |
| if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) { |
| AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister, |
| i.TempRegister(0), i.TempRegister(1), |
| i.TempRegister(2)); |
| } |
| if (instr->InputAt(0)->IsImmediate()) { |
| __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET); |
| } else { |
| Register reg = i.InputRegister(0); |
| DCHECK_IMPLIES( |
| instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister), |
| reg == kJavaScriptCallCodeStartRegister); |
| __ JumpCodeObject(reg); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| unwinding_info_writer_.MarkBlockWillExit(); |
| frame_access_state()->ClearSPDelta(); |
| frame_access_state()->SetFrameAccessToDefault(); |
| break; |
| } |
| case kArchTailCallWasm: { |
| if (instr->InputAt(0)->IsImmediate()) { |
| Constant constant = i.ToConstant(instr->InputAt(0)); |
| Address wasm_code = static_cast<Address>(constant.ToInt32()); |
| __ Jump(wasm_code, constant.rmode()); |
| } else { |
| __ Jump(i.InputRegister(0)); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| unwinding_info_writer_.MarkBlockWillExit(); |
| frame_access_state()->ClearSPDelta(); |
| frame_access_state()->SetFrameAccessToDefault(); |
| break; |
| } |
| case kArchTailCallAddress: { |
| CHECK(!instr->InputAt(0)->IsImmediate()); |
| Register reg = i.InputRegister(0); |
| DCHECK_IMPLIES( |
| instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister), |
| reg == kJavaScriptCallCodeStartRegister); |
| __ Jump(reg); |
| unwinding_info_writer_.MarkBlockWillExit(); |
| frame_access_state()->ClearSPDelta(); |
| frame_access_state()->SetFrameAccessToDefault(); |
| break; |
| } |
| case kArchCallJSFunction: { |
| Register func = i.InputRegister(0); |
| if (FLAG_debug_code) { |
| UseScratchRegisterScope temps(tasm()); |
| Register scratch = temps.Acquire(); |
| // Check the function's context matches the context argument. |
| __ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset)); |
| __ cmp(cp, scratch); |
| __ Assert(eq, AbortReason::kWrongFunctionContext); |
| } |
| static_assert(kJavaScriptCallCodeStartRegister == r2, "ABI mismatch"); |
| __ ldr(r2, FieldMemOperand(func, JSFunction::kCodeOffset)); |
| __ CallCodeObject(r2); |
| RecordCallPosition(instr); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| frame_access_state()->ClearSPDelta(); |
| break; |
| } |
| case kArchPrepareCallCFunction: { |
| int const num_parameters = MiscField::decode(instr->opcode()); |
| __ PrepareCallCFunction(num_parameters); |
| // Frame alignment requires using FP-relative frame addressing. |
| frame_access_state()->SetFrameAccessToFP(); |
| break; |
| } |
| case kArchSaveCallerRegisters: { |
| fp_mode_ = |
| static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())); |
| DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); |
| // kReturnRegister0 should have been saved before entering the stub. |
| int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0); |
| DCHECK(IsAligned(bytes, kSystemPointerSize)); |
| DCHECK_EQ(0, frame_access_state()->sp_delta()); |
| frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize); |
| DCHECK(!caller_registers_saved_); |
| caller_registers_saved_ = true; |
| break; |
| } |
| case kArchRestoreCallerRegisters: { |
| DCHECK(fp_mode_ == |
| static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()))); |
| DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); |
| // Don't overwrite the returned value. |
| int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0); |
| frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize)); |
| DCHECK_EQ(0, frame_access_state()->sp_delta()); |
| DCHECK(caller_registers_saved_); |
| caller_registers_saved_ = false; |
| break; |
| } |
| case kArchPrepareTailCall: |
| AssemblePrepareTailCall(); |
| break; |
| case kArchCallCFunction: { |
| int const num_parameters = MiscField::decode(instr->opcode()); |
| if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) { |
| // Put the return address in a stack slot. |
| __ str(pc, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset)); |
| } |
| if (instr->InputAt(0)->IsImmediate()) { |
| ExternalReference ref = i.InputExternalReference(0); |
| __ CallCFunction(ref, num_parameters); |
| } else { |
| Register func = i.InputRegister(0); |
| __ CallCFunction(func, num_parameters); |
| } |
| if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) { |
| RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt); |
| } |
| frame_access_state()->SetFrameAccessToDefault(); |
| // Ideally, we should decrement SP delta to match the change of stack |
| // pointer in CallCFunction. However, for certain architectures (e.g. |
| // ARM), there may be more strict alignment requirement, causing old SP |
| // to be saved on the stack. In those cases, we can not calculate the SP |
| // delta statically. |
| frame_access_state()->ClearSPDelta(); |
| if (caller_registers_saved_) { |
| // Need to re-sync SP delta introduced in kArchSaveCallerRegisters. |
| // Here, we assume the sequence to be: |
| // kArchSaveCallerRegisters; |
| // kArchCallCFunction; |
| // kArchRestoreCallerRegisters; |
| int bytes = |
| __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0); |
| frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize); |
| } |
| break; |
| } |
| case kArchJmp: |
| AssembleArchJump(i.InputRpo(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArchBinarySearchSwitch: |
| AssembleArchBinarySearchSwitch(instr); |
| break; |
| case kArchTableSwitch: |
| AssembleArchTableSwitch(instr); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArchAbortCSAAssert: |
| DCHECK(i.InputRegister(0) == r1); |
| { |
| // We don't actually want to generate a pile of code for this, so just |
| // claim there is a stack frame, without generating one. |
| FrameScope scope(tasm(), StackFrame::NONE); |
| __ Call( |
| isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert), |
| RelocInfo::CODE_TARGET); |
| } |
| __ stop(); |
| unwinding_info_writer_.MarkBlockWillExit(); |
| break; |
| case kArchDebugBreak: |
| __ DebugBreak(); |
| break; |
| case kArchComment: |
| __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0))); |
| break; |
| case kArchThrowTerminator: |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| unwinding_info_writer_.MarkBlockWillExit(); |
| break; |
| case kArchNop: |
| // don't emit code for nops. |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArchDeoptimize: { |
| DeoptimizationExit* exit = |
| BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); |
| __ b(exit->label()); |
| break; |
| } |
| case kArchRet: |
| AssembleReturn(instr->InputAt(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArchFramePointer: |
| __ mov(i.OutputRegister(), fp); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArchParentFramePointer: |
| if (frame_access_state()->has_frame()) { |
| __ ldr(i.OutputRegister(), MemOperand(fp, 0)); |
| } else { |
| __ mov(i.OutputRegister(), fp); |
| } |
| break; |
| case kArchStackPointerGreaterThan: { |
| // Potentially apply an offset to the current stack pointer before the |
| // comparison to consider the size difference of an optimized frame versus |
| // the contained unoptimized frames. |
| |
| Register lhs_register = sp; |
| uint32_t offset; |
| |
| if (ShouldApplyOffsetToStackCheck(instr, &offset)) { |
| lhs_register = i.TempRegister(0); |
| __ sub(lhs_register, sp, Operand(offset)); |
| } |
| |
| constexpr size_t kValueIndex = 0; |
| DCHECK(instr->InputAt(kValueIndex)->IsRegister()); |
| __ cmp(lhs_register, i.InputRegister(kValueIndex)); |
| break; |
| } |
| case kArchStackCheckOffset: |
| __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset())); |
| break; |
| case kArchTruncateDoubleToI: |
| __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(), |
| i.InputDoubleRegister(0), DetermineStubCallMode()); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArchStoreWithWriteBarrier: { |
| RecordWriteMode mode = |
| static_cast<RecordWriteMode>(MiscField::decode(instr->opcode())); |
| Register object = i.InputRegister(0); |
| Register value = i.InputRegister(2); |
| |
| AddressingMode addressing_mode = |
| AddressingModeField::decode(instr->opcode()); |
| Operand offset(0); |
| if (addressing_mode == kMode_Offset_RI) { |
| int32_t immediate = i.InputInt32(1); |
| offset = Operand(immediate); |
| __ str(value, MemOperand(object, immediate)); |
| } else { |
| DCHECK_EQ(kMode_Offset_RR, addressing_mode); |
| Register reg = i.InputRegister(1); |
| offset = Operand(reg); |
| __ str(value, MemOperand(object, reg)); |
| } |
| auto ool = zone()->New<OutOfLineRecordWrite>( |
| this, object, offset, value, mode, DetermineStubCallMode(), |
| &unwinding_info_writer_); |
| __ CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask, |
| ne, ool->entry()); |
| __ bind(ool->exit()); |
| break; |
| } |
| case kArchStackSlot: { |
| FrameOffset offset = |
| frame_access_state()->GetFrameOffset(i.InputInt32(0)); |
| Register base = offset.from_stack_pointer() ? sp : fp; |
| __ add(i.OutputRegister(0), base, Operand(offset.offset())); |
| break; |
| } |
| case kIeee754Float64Acos: |
| ASSEMBLE_IEEE754_UNOP(acos); |
| break; |
| case kIeee754Float64Acosh: |
| ASSEMBLE_IEEE754_UNOP(acosh); |
| break; |
| case kIeee754Float64Asin: |
| ASSEMBLE_IEEE754_UNOP(asin); |
| break; |
| case kIeee754Float64Asinh: |
| ASSEMBLE_IEEE754_UNOP(asinh); |
| break; |
| case kIeee754Float64Atan: |
| ASSEMBLE_IEEE754_UNOP(atan); |
| break; |
| case kIeee754Float64Atanh: |
| ASSEMBLE_IEEE754_UNOP(atanh); |
| break; |
| case kIeee754Float64Atan2: |
| ASSEMBLE_IEEE754_BINOP(atan2); |
| break; |
| case kIeee754Float64Cbrt: |
| ASSEMBLE_IEEE754_UNOP(cbrt); |
| break; |
| case kIeee754Float64Cos: |
| ASSEMBLE_IEEE754_UNOP(cos); |
| break; |
| case kIeee754Float64Cosh: |
| ASSEMBLE_IEEE754_UNOP(cosh); |
| break; |
| case kIeee754Float64Exp: |
| ASSEMBLE_IEEE754_UNOP(exp); |
| break; |
| case kIeee754Float64Expm1: |
| ASSEMBLE_IEEE754_UNOP(expm1); |
| break; |
| case kIeee754Float64Log: |
| ASSEMBLE_IEEE754_UNOP(log); |
| break; |
| case kIeee754Float64Log1p: |
| ASSEMBLE_IEEE754_UNOP(log1p); |
| break; |
| case kIeee754Float64Log2: |
| ASSEMBLE_IEEE754_UNOP(log2); |
| break; |
| case kIeee754Float64Log10: |
| ASSEMBLE_IEEE754_UNOP(log10); |
| break; |
| case kIeee754Float64Pow: |
| ASSEMBLE_IEEE754_BINOP(pow); |
| break; |
| case kIeee754Float64Sin: |
| ASSEMBLE_IEEE754_UNOP(sin); |
| break; |
| case kIeee754Float64Sinh: |
| ASSEMBLE_IEEE754_UNOP(sinh); |
| break; |
| case kIeee754Float64Tan: |
| ASSEMBLE_IEEE754_UNOP(tan); |
| break; |
| case kIeee754Float64Tanh: |
| ASSEMBLE_IEEE754_UNOP(tanh); |
| break; |
| case kArmAdd: |
| __ add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1), |
| i.OutputSBit()); |
| break; |
| case kArmAnd: |
| __ and_(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1), |
| i.OutputSBit()); |
| break; |
| case kArmBic: |
| __ bic(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1), |
| i.OutputSBit()); |
| break; |
| case kArmMul: |
| __ mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1), |
| i.OutputSBit()); |
| break; |
| case kArmMla: |
| __ mla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1), |
| i.InputRegister(2), i.OutputSBit()); |
| break; |
| case kArmMls: { |
| CpuFeatureScope scope(tasm(), ARMv7); |
| __ mls(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1), |
| i.InputRegister(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmSmull: |
| __ smull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0), |
| i.InputRegister(1)); |
| break; |
| case kArmSmmul: |
| __ smmul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmSmmla: |
| __ smmla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1), |
| i.InputRegister(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmUmull: |
| __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0), |
| i.InputRegister(1), i.OutputSBit()); |
| break; |
| case kArmSdiv: { |
| CpuFeatureScope scope(tasm(), SUDIV); |
| __ sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmUdiv: { |
| CpuFeatureScope scope(tasm(), SUDIV); |
| __ udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmMov: |
| __ Move(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit()); |
| break; |
| case kArmMvn: |
| __ mvn(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit()); |
| break; |
| case kArmOrr: |
| __ orr(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1), |
| i.OutputSBit()); |
| break; |
| case kArmEor: |
| __ eor(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1), |
| i.OutputSBit()); |
| break; |
| case kArmSub: |
| __ sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1), |
| i.OutputSBit()); |
| break; |
| case kArmRsb: |
| __ rsb(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1), |
| i.OutputSBit()); |
| break; |
| case kArmBfc: { |
| CpuFeatureScope scope(tasm(), ARMv7); |
| __ bfc(i.OutputRegister(), i.InputInt8(1), i.InputInt8(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmUbfx: { |
| CpuFeatureScope scope(tasm(), ARMv7); |
| __ ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1), |
| i.InputInt8(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmSbfx: { |
| CpuFeatureScope scope(tasm(), ARMv7); |
| __ sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1), |
| i.InputInt8(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmSxtb: |
| __ sxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmSxth: |
| __ sxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmSxtab: |
| __ sxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1), |
| i.InputInt32(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmSxtah: |
| __ sxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1), |
| i.InputInt32(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmUxtb: |
| __ uxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmUxth: |
| __ uxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmUxtab: |
| __ uxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1), |
| i.InputInt32(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmUxtah: |
| __ uxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1), |
| i.InputInt32(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmRbit: { |
| CpuFeatureScope scope(tasm(), ARMv7); |
| __ rbit(i.OutputRegister(), i.InputRegister(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmRev: |
| __ rev(i.OutputRegister(), i.InputRegister(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmClz: |
| __ clz(i.OutputRegister(), i.InputRegister(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmCmp: |
| __ cmp(i.InputRegister(0), i.InputOperand2(1)); |
| DCHECK_EQ(SetCC, i.OutputSBit()); |
| break; |
| case kArmCmn: |
| __ cmn(i.InputRegister(0), i.InputOperand2(1)); |
| DCHECK_EQ(SetCC, i.OutputSBit()); |
| break; |
| case kArmTst: |
| __ tst(i.InputRegister(0), i.InputOperand2(1)); |
| DCHECK_EQ(SetCC, i.OutputSBit()); |
| break; |
| case kArmTeq: |
| __ teq(i.InputRegister(0), i.InputOperand2(1)); |
| DCHECK_EQ(SetCC, i.OutputSBit()); |
| break; |
| case kArmAddPair: |
| // i.InputRegister(0) ... left low word. |
| // i.InputRegister(1) ... left high word. |
| // i.InputRegister(2) ... right low word. |
| // i.InputRegister(3) ... right high word. |
| __ add(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2), |
| SBit::SetCC); |
| __ adc(i.OutputRegister(1), i.InputRegister(1), |
| Operand(i.InputRegister(3))); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmSubPair: |
| // i.InputRegister(0) ... left low word. |
| // i.InputRegister(1) ... left high word. |
| // i.InputRegister(2) ... right low word. |
| // i.InputRegister(3) ... right high word. |
| __ sub(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2), |
| SBit::SetCC); |
| __ sbc(i.OutputRegister(1), i.InputRegister(1), |
| Operand(i.InputRegister(3))); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmMulPair: |
| // i.InputRegister(0) ... left low word. |
| // i.InputRegister(1) ... left high word. |
| // i.InputRegister(2) ... right low word. |
| // i.InputRegister(3) ... right high word. |
| __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0), |
| i.InputRegister(2)); |
| __ mla(i.OutputRegister(1), i.InputRegister(0), i.InputRegister(3), |
| i.OutputRegister(1)); |
| __ mla(i.OutputRegister(1), i.InputRegister(2), i.InputRegister(1), |
| i.OutputRegister(1)); |
| break; |
| case kArmLslPair: { |
| Register second_output = |
| instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0); |
| if (instr->InputAt(2)->IsImmediate()) { |
| __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0), |
| i.InputRegister(1), i.InputInt32(2)); |
| } else { |
| __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0), |
| i.InputRegister(1), i.InputRegister(2)); |
| } |
| break; |
| } |
| case kArmLsrPair: { |
| Register second_output = |
| instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0); |
| if (instr->InputAt(2)->IsImmediate()) { |
| __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0), |
| i.InputRegister(1), i.InputInt32(2)); |
| } else { |
| __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0), |
| i.InputRegister(1), i.InputRegister(2)); |
| } |
| break; |
| } |
| case kArmAsrPair: { |
| Register second_output = |
| instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0); |
| if (instr->InputAt(2)->IsImmediate()) { |
| __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0), |
| i.InputRegister(1), i.InputInt32(2)); |
| } else { |
| __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0), |
| i.InputRegister(1), i.InputRegister(2)); |
| } |
| break; |
| } |
| case kArmVcmpF32: |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ VFPCompareAndSetFlags(i.InputFloatRegister(0), |
| i.InputFloatRegister(1)); |
| } else { |
| DCHECK(instr->InputAt(1)->IsImmediate()); |
| // 0.0 is the only immediate supported by vcmp instructions. |
| DCHECK_EQ(0.0f, i.InputFloat32(1)); |
| __ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1)); |
| } |
| DCHECK_EQ(SetCC, i.OutputSBit()); |
| break; |
| case kArmVaddF32: |
| __ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0), |
| i.InputFloatRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVsubF32: |
| __ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0), |
| i.InputFloatRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmulF32: |
| __ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0), |
| i.InputFloatRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmlaF32: |
| __ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1), |
| i.InputFloatRegister(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmlsF32: |
| __ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1), |
| i.InputFloatRegister(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVdivF32: |
| __ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0), |
| i.InputFloatRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVsqrtF32: |
| __ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0)); |
| break; |
| case kArmVabsF32: |
| __ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0)); |
| break; |
| case kArmVnegF32: |
| __ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0)); |
| break; |
| case kArmVcmpF64: |
| if (instr->InputAt(1)->IsFPRegister()) { |
| __ VFPCompareAndSetFlags(i.InputDoubleRegister(0), |
| i.InputDoubleRegister(1)); |
| } else { |
| DCHECK(instr->InputAt(1)->IsImmediate()); |
| // 0.0 is the only immediate supported by vcmp instructions. |
| DCHECK_EQ(0.0, i.InputDouble(1)); |
| __ VFPCompareAndSetFlags(i.InputDoubleRegister(0), i.InputDouble(1)); |
| } |
| DCHECK_EQ(SetCC, i.OutputSBit()); |
| break; |
| case kArmVaddF64: |
| __ vadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputDoubleRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVsubF64: |
| __ vsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputDoubleRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmulF64: |
| __ vmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputDoubleRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmlaF64: |
| __ vmla(i.OutputDoubleRegister(), i.InputDoubleRegister(1), |
| i.InputDoubleRegister(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmlsF64: |
| __ vmls(i.OutputDoubleRegister(), i.InputDoubleRegister(1), |
| i.InputDoubleRegister(2)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVdivF64: |
| __ vdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0), |
| i.InputDoubleRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmodF64: { |
| // TODO(bmeurer): We should really get rid of this special instruction, |
| // and generate a CallAddress instruction instead. |
| FrameScope scope(tasm(), StackFrame::MANUAL); |
| __ PrepareCallCFunction(0, 2); |
| __ MovToFloatParameters(i.InputDoubleRegister(0), |
| i.InputDoubleRegister(1)); |
| __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2); |
| // Move the result in the double result register. |
| __ MovFromFloatResult(i.OutputDoubleRegister()); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVsqrtF64: |
| __ vsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); |
| break; |
| case kArmVabsF64: |
| __ vabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); |
| break; |
| case kArmVnegF64: |
| __ vneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); |
| break; |
| case kArmVrintmF32: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| if (instr->InputAt(0)->IsSimd128Register()) { |
| __ vrintm(NeonS32, i.OutputSimd128Register(), |
| i.InputSimd128Register(0)); |
| } else { |
| __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0)); |
| } |
| break; |
| } |
| case kArmVrintmF64: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| __ vrintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); |
| break; |
| } |
| case kArmVrintpF32: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| if (instr->InputAt(0)->IsSimd128Register()) { |
| __ vrintp(NeonS32, i.OutputSimd128Register(), |
| i.InputSimd128Register(0)); |
| } else { |
| __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0)); |
| } |
| break; |
| } |
| case kArmVrintpF64: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| __ vrintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); |
| break; |
| } |
| case kArmVrintzF32: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| if (instr->InputAt(0)->IsSimd128Register()) { |
| __ vrintz(NeonS32, i.OutputSimd128Register(), |
| i.InputSimd128Register(0)); |
| } else { |
| __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0)); |
| } |
| break; |
| } |
| case kArmVrintzF64: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| __ vrintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); |
| break; |
| } |
| case kArmVrintaF64: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| __ vrinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); |
| break; |
| } |
| case kArmVrintnF32: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| if (instr->InputAt(0)->IsSimd128Register()) { |
| __ vrintn(NeonS32, i.OutputSimd128Register(), |
| i.InputSimd128Register(0)); |
| } else { |
| __ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0)); |
| } |
| break; |
| } |
| case kArmVrintnF64: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| __ vrintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); |
| break; |
| } |
| case kArmVcvtF32F64: { |
| __ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVcvtF64F32: { |
| __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVcvtF32S32: { |
| UseScratchRegisterScope temps(tasm()); |
| SwVfpRegister scratch = temps.AcquireS(); |
| __ vmov(scratch, i.InputRegister(0)); |
| __ vcvt_f32_s32(i.OutputFloatRegister(), scratch); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVcvtF32U32: { |
| UseScratchRegisterScope temps(tasm()); |
| SwVfpRegister scratch = temps.AcquireS(); |
| __ vmov(scratch, i.InputRegister(0)); |
| __ vcvt_f32_u32(i.OutputFloatRegister(), scratch); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVcvtF64S32: { |
| UseScratchRegisterScope temps(tasm()); |
| SwVfpRegister scratch = temps.AcquireS(); |
| __ vmov(scratch, i.InputRegister(0)); |
| __ vcvt_f64_s32(i.OutputDoubleRegister(), scratch); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVcvtF64U32: { |
| UseScratchRegisterScope temps(tasm()); |
| SwVfpRegister scratch = temps.AcquireS(); |
| __ vmov(scratch, i.InputRegister(0)); |
| __ vcvt_f64_u32(i.OutputDoubleRegister(), scratch); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVcvtS32F32: { |
| UseScratchRegisterScope temps(tasm()); |
| SwVfpRegister scratch = temps.AcquireS(); |
| __ vcvt_s32_f32(scratch, i.InputFloatRegister(0)); |
| __ vmov(i.OutputRegister(), scratch); |
| bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode()); |
| if (set_overflow_to_min_i32) { |
| // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead, |
| // because INT32_MIN allows easier out-of-bounds detection. |
| __ cmn(i.OutputRegister(), Operand(1)); |
| __ mov(i.OutputRegister(), Operand(INT32_MIN), SBit::LeaveCC, vs); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVcvtU32F32: { |
| UseScratchRegisterScope temps(tasm()); |
| SwVfpRegister scratch = temps.AcquireS(); |
| __ vcvt_u32_f32(scratch, i.InputFloatRegister(0)); |
| __ vmov(i.OutputRegister(), scratch); |
| bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode()); |
| if (set_overflow_to_min_u32) { |
| // Avoid UINT32_MAX as an overflow indicator and use 0 instead, |
| // because 0 allows easier out-of-bounds detection. |
| __ cmn(i.OutputRegister(), Operand(1)); |
| __ adc(i.OutputRegister(), i.OutputRegister(), Operand::Zero()); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVcvtS32F64: { |
| UseScratchRegisterScope temps(tasm()); |
| SwVfpRegister scratch = temps.AcquireS(); |
| __ vcvt_s32_f64(scratch, i.InputDoubleRegister(0)); |
| __ vmov(i.OutputRegister(), scratch); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVcvtU32F64: { |
| UseScratchRegisterScope temps(tasm()); |
| SwVfpRegister scratch = temps.AcquireS(); |
| __ vcvt_u32_f64(scratch, i.InputDoubleRegister(0)); |
| __ vmov(i.OutputRegister(), scratch); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVmovU32F32: |
| __ vmov(i.OutputRegister(), i.InputFloatRegister(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmovF32U32: |
| __ vmov(i.OutputFloatRegister(), i.InputRegister(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmovLowU32F64: |
| __ VmovLow(i.OutputRegister(), i.InputDoubleRegister(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmovLowF64U32: |
| __ VmovLow(i.OutputDoubleRegister(), i.InputRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmovHighU32F64: |
| __ VmovHigh(i.OutputRegister(), i.InputDoubleRegister(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmovHighF64U32: |
| __ VmovHigh(i.OutputDoubleRegister(), i.InputRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmovF64U32U32: |
| __ vmov(i.OutputDoubleRegister(), i.InputRegister(0), i.InputRegister(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVmovU32U32F64: |
| __ vmov(i.OutputRegister(0), i.OutputRegister(1), |
| i.InputDoubleRegister(0)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmLdrb: |
| __ ldrb(i.OutputRegister(), i.InputOffset()); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, i); |
| break; |
| case kArmLdrsb: |
| __ ldrsb(i.OutputRegister(), i.InputOffset()); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, i); |
| break; |
| case kArmStrb: |
| __ strb(i.InputRegister(0), i.InputOffset(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmLdrh: |
| __ ldrh(i.OutputRegister(), i.InputOffset()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, i); |
| break; |
| case kArmLdrsh: |
| __ ldrsh(i.OutputRegister(), i.InputOffset()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, i); |
| break; |
| case kArmStrh: |
| __ strh(i.InputRegister(0), i.InputOffset(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmLdr: |
| __ ldr(i.OutputRegister(), i.InputOffset()); |
| EmitWordLoadPoisoningIfNeeded(this, opcode, i); |
| break; |
| case kArmStr: |
| __ str(i.InputRegister(0), i.InputOffset(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVldrF32: { |
| const MemoryAccessMode access_mode = |
| static_cast<MemoryAccessMode>(MiscField::decode(opcode)); |
| if (access_mode == kMemoryAccessPoisoned) { |
| UseScratchRegisterScope temps(tasm()); |
| Register address = temps.Acquire(); |
| ComputePoisonedAddressForLoad(this, opcode, i, address); |
| __ vldr(i.OutputFloatRegister(), address, 0); |
| } else { |
| __ vldr(i.OutputFloatRegister(), i.InputOffset()); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVstrF32: |
| __ vstr(i.InputFloatRegister(0), i.InputOffset(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmVld1F64: { |
| __ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()), |
| i.NeonInputOperand(0)); |
| break; |
| } |
| case kArmVst1F64: { |
| __ vst1(Neon8, NeonListOperand(i.InputDoubleRegister(0)), |
| i.NeonInputOperand(1)); |
| break; |
| } |
| case kArmVld1S128: { |
| __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()), |
| i.NeonInputOperand(0)); |
| break; |
| } |
| case kArmVst1S128: { |
| __ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)), |
| i.NeonInputOperand(1)); |
| break; |
| } |
| case kArmVldrF64: { |
| const MemoryAccessMode access_mode = |
| static_cast<MemoryAccessMode>(MiscField::decode(opcode)); |
| if (access_mode == kMemoryAccessPoisoned) { |
| UseScratchRegisterScope temps(tasm()); |
| Register address = temps.Acquire(); |
| ComputePoisonedAddressForLoad(this, opcode, i, address); |
| __ vldr(i.OutputDoubleRegister(), address, 0); |
| } else { |
| __ vldr(i.OutputDoubleRegister(), i.InputOffset()); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmVstrF64: |
| __ vstr(i.InputDoubleRegister(0), i.InputOffset(1)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmFloat32Max: { |
| SwVfpRegister result = i.OutputFloatRegister(); |
| SwVfpRegister left = i.InputFloatRegister(0); |
| SwVfpRegister right = i.InputFloatRegister(1); |
| if (left == right) { |
| __ Move(result, left); |
| } else { |
| auto ool = zone()->New<OutOfLineFloat32Max>(this, result, left, right); |
| __ FloatMax(result, left, right, ool->entry()); |
| __ bind(ool->exit()); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmFloat64Max: { |
| DwVfpRegister result = i.OutputDoubleRegister(); |
| DwVfpRegister left = i.InputDoubleRegister(0); |
| DwVfpRegister right = i.InputDoubleRegister(1); |
| if (left == right) { |
| __ Move(result, left); |
| } else { |
| auto ool = zone()->New<OutOfLineFloat64Max>(this, result, left, right); |
| __ FloatMax(result, left, right, ool->entry()); |
| __ bind(ool->exit()); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmFloat32Min: { |
| SwVfpRegister result = i.OutputFloatRegister(); |
| SwVfpRegister left = i.InputFloatRegister(0); |
| SwVfpRegister right = i.InputFloatRegister(1); |
| if (left == right) { |
| __ Move(result, left); |
| } else { |
| auto ool = zone()->New<OutOfLineFloat32Min>(this, result, left, right); |
| __ FloatMin(result, left, right, ool->entry()); |
| __ bind(ool->exit()); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmFloat64Min: { |
| DwVfpRegister result = i.OutputDoubleRegister(); |
| DwVfpRegister left = i.InputDoubleRegister(0); |
| DwVfpRegister right = i.InputDoubleRegister(1); |
| if (left == right) { |
| __ Move(result, left); |
| } else { |
| auto ool = zone()->New<OutOfLineFloat64Min>(this, result, left, right); |
| __ FloatMin(result, left, right, ool->entry()); |
| __ bind(ool->exit()); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmFloat64SilenceNaN: { |
| DwVfpRegister value = i.InputDoubleRegister(0); |
| DwVfpRegister result = i.OutputDoubleRegister(); |
| __ VFPCanonicalizeNaN(result, value); |
| break; |
| } |
| case kArmPush: |
| if (instr->InputAt(0)->IsFPRegister()) { |
| LocationOperand* op = LocationOperand::cast(instr->InputAt(0)); |
| switch (op->representation()) { |
| case MachineRepresentation::kFloat32: |
| __ vpush(i.InputFloatRegister(0)); |
| frame_access_state()->IncreaseSPDelta(1); |
| break; |
| case MachineRepresentation::kFloat64: |
| __ vpush(i.InputDoubleRegister(0)); |
| frame_access_state()->IncreaseSPDelta(kDoubleSize / |
| kSystemPointerSize); |
| break; |
| case MachineRepresentation::kSimd128: { |
| __ vpush(i.InputSimd128Register(0)); |
| frame_access_state()->IncreaseSPDelta(kSimd128Size / |
| kSystemPointerSize); |
| break; |
| } |
| default: |
| UNREACHABLE(); |
| break; |
| } |
| } else { |
| __ push(i.InputRegister(0)); |
| frame_access_state()->IncreaseSPDelta(1); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| case kArmPoke: { |
| int const slot = MiscField::decode(instr->opcode()); |
| __ str(i.InputRegister(0), MemOperand(sp, slot * kSystemPointerSize)); |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmPeek: { |
| int reverse_slot = i.InputInt32(0); |
| int offset = |
| FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot); |
| if (instr->OutputAt(0)->IsFPRegister()) { |
| LocationOperand* op = LocationOperand::cast(instr->OutputAt(0)); |
| if (op->representation() == MachineRepresentation::kFloat64) { |
| __ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset)); |
| } else if (op->representation() == MachineRepresentation::kFloat32) { |
| __ vldr(i.OutputFloatRegister(), MemOperand(fp, offset)); |
| } else { |
| DCHECK_EQ(MachineRepresentation::kSimd128, op->representation()); |
| UseScratchRegisterScope temps(tasm()); |
| Register scratch = temps.Acquire(); |
| __ add(scratch, fp, Operand(offset)); |
| __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()), |
| NeonMemOperand(scratch)); |
| } |
| } else { |
| __ ldr(i.OutputRegister(), MemOperand(fp, offset)); |
| } |
| break; |
| } |
| case kArmDmbIsh: { |
| __ dmb(ISH); |
| break; |
| } |
| case kArmDsbIsb: { |
| __ dsb(SY); |
| __ isb(SY); |
| break; |
| } |
| case kArchWordPoisonOnSpeculation: |
| __ and_(i.OutputRegister(0), i.InputRegister(0), |
| Operand(kSpeculationPoisonRegister)); |
| break; |
| case kArmF64x2Splat: { |
| Simd128Register dst = i.OutputSimd128Register(); |
| DoubleRegister src = i.InputDoubleRegister(0); |
| __ Move(dst.low(), src); |
| __ Move(dst.high(), src); |
| break; |
| } |
| case kArmF64x2ExtractLane: { |
| __ ExtractLane(i.OutputDoubleRegister(), i.InputSimd128Register(0), |
| i.InputInt8(1)); |
| break; |
| } |
| case kArmF64x2ReplaceLane: { |
| __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputDoubleRegister(2), i.InputInt8(1)); |
| break; |
| } |
| case kArmF64x2Abs: { |
| __ vabs(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low()); |
| __ vabs(i.OutputSimd128Register().high(), |
| i.InputSimd128Register(0).high()); |
| break; |
| } |
| case kArmF64x2Neg: { |
| __ vneg(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low()); |
| __ vneg(i.OutputSimd128Register().high(), |
| i.InputSimd128Register(0).high()); |
| break; |
| } |
| case kArmF64x2Sqrt: { |
| __ vsqrt(i.OutputSimd128Register().low(), |
| i.InputSimd128Register(0).low()); |
| __ vsqrt(i.OutputSimd128Register().high(), |
| i.InputSimd128Register(0).high()); |
| break; |
| } |
| case kArmF64x2Add: { |
| ASSEMBLE_F64X2_ARITHMETIC_BINOP(vadd); |
| break; |
| } |
| case kArmF64x2Sub: { |
| ASSEMBLE_F64X2_ARITHMETIC_BINOP(vsub); |
| break; |
| } |
| case kArmF64x2Mul: { |
| ASSEMBLE_F64X2_ARITHMETIC_BINOP(vmul); |
| break; |
| } |
| case kArmF64x2Div: { |
| ASSEMBLE_F64X2_ARITHMETIC_BINOP(vdiv); |
| break; |
| } |
| case kArmF64x2Min: { |
| Simd128Register result = i.OutputSimd128Register(); |
| Simd128Register left = i.InputSimd128Register(0); |
| Simd128Register right = i.InputSimd128Register(1); |
| if (left == right) { |
| __ Move(result, left); |
| } else { |
| auto ool_low = zone()->New<OutOfLineFloat64Min>( |
| this, result.low(), left.low(), right.low()); |
| auto ool_high = zone()->New<OutOfLineFloat64Min>( |
| this, result.high(), left.high(), right.high()); |
| __ FloatMin(result.low(), left.low(), right.low(), ool_low->entry()); |
| __ bind(ool_low->exit()); |
| __ FloatMin(result.high(), left.high(), right.high(), |
| ool_high->entry()); |
| __ bind(ool_high->exit()); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| case kArmF64x2Max: { |
| Simd128Register result = i.OutputSimd128Register(); |
| Simd128Register left = i.InputSimd128Register(0); |
| Simd128Register right = i.InputSimd128Register(1); |
| if (left == right) { |
| __ Move(result, left); |
| } else { |
| auto ool_low = zone()->New<OutOfLineFloat64Max>( |
| this, result.low(), left.low(), right.low()); |
| auto ool_high = zone()->New<OutOfLineFloat64Max>( |
| this, result.high(), left.high(), right.high()); |
| __ FloatMax(result.low(), left.low(), right.low(), ool_low->entry()); |
| __ bind(ool_low->exit()); |
| __ FloatMax(result.high(), left.high(), right.high(), |
| ool_high->entry()); |
| __ bind(ool_high->exit()); |
| } |
| DCHECK_EQ(LeaveCC, i.OutputSBit()); |
| break; |
| } |
| #undef ASSEMBLE_F64X2_ARITHMETIC_BINOP |
| case kArmF64x2Eq: { |
| UseScratchRegisterScope temps(tasm()); |
| Register scratch = temps.Acquire(); |
| __ mov(scratch, Operand(0)); |
| __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(), |
| i.InputSimd128Register(1).low()); |
| __ mov(scratch, Operand(-1), LeaveCC, eq); |
| __ vmov(i.OutputSimd128Register().low(), scratch, scratch); |
| |
| __ mov(scratch, Operand(0)); |
| __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(), |
| i.InputSimd128Register(1).high()); |
| __ mov(scratch, Operand(-1), LeaveCC, eq); |
| __ vmov(i.OutputSimd128Register().high(), scratch, scratch); |
| break; |
| } |
| case kArmF64x2Ne: { |
| UseScratchRegisterScope temps(tasm()); |
| Register scratch = temps.Acquire(); |
| __ mov(scratch, Operand(0)); |
| __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(), |
| i.InputSimd128Register(1).low()); |
| __ mov(scratch, Operand(-1), LeaveCC, ne); |
| __ vmov(i.OutputSimd128Register().low(), scratch, scratch); |
| |
| __ mov(scratch, Operand(0)); |
| __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(), |
| i.InputSimd128Register(1).high()); |
| __ mov(scratch, Operand(-1), LeaveCC, ne); |
| __ vmov(i.OutputSimd128Register().high(), scratch, scratch); |
| break; |
| } |
| case kArmF64x2Lt: { |
| UseScratchRegisterScope temps(tasm()); |
| Register scratch = temps.Acquire(); |
| __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(), |
| i.InputSimd128Register(1).low()); |
| __ mov(scratch, Operand(0), LeaveCC, cs); |
| __ mov(scratch, Operand(-1), LeaveCC, mi); |
| __ vmov(i.OutputSimd128Register().low(), scratch, scratch); |
| |
| __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(), |
| i.InputSimd128Register(1).high()); |
| __ mov(scratch, Operand(0), LeaveCC, cs); |
| __ mov(scratch, Operand(-1), LeaveCC, mi); |
| __ vmov(i.OutputSimd128Register().high(), scratch, scratch); |
| break; |
| } |
| case kArmF64x2Le: { |
| UseScratchRegisterScope temps(tasm()); |
| Register scratch = temps.Acquire(); |
| __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(), |
| i.InputSimd128Register(1).low()); |
| __ mov(scratch, Operand(0), LeaveCC, hi); |
| __ mov(scratch, Operand(-1), LeaveCC, ls); |
| __ vmov(i.OutputSimd128Register().low(), scratch, scratch); |
| |
| __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(), |
| i.InputSimd128Register(1).high()); |
| __ mov(scratch, Operand(0), LeaveCC, hi); |
| __ mov(scratch, Operand(-1), LeaveCC, ls); |
| __ vmov(i.OutputSimd128Register().high(), scratch, scratch); |
| break; |
| } |
| case kArmF64x2Pmin: { |
| Simd128Register dst = i.OutputSimd128Register(); |
| Simd128Register lhs = i.InputSimd128Register(0); |
| Simd128Register rhs = i.InputSimd128Register(1); |
| DCHECK_EQ(dst, lhs); |
| |
| // Move rhs only when rhs is strictly lesser (mi). |
| __ VFPCompareAndSetFlags(rhs.low(), lhs.low()); |
| __ vmov(dst.low(), rhs.low(), mi); |
| __ VFPCompareAndSetFlags(rhs.high(), lhs.high()); |
| __ vmov(dst.high(), rhs.high(), mi); |
| break; |
| } |
| case kArmF64x2Pmax: { |
| Simd128Register dst = i.OutputSimd128Register(); |
| Simd128Register lhs = i.InputSimd128Register(0); |
| Simd128Register rhs = i.InputSimd128Register(1); |
| DCHECK_EQ(dst, lhs); |
| |
| // Move rhs only when rhs is strictly greater (gt). |
| __ VFPCompareAndSetFlags(rhs.low(), lhs.low()); |
| __ vmov(dst.low(), rhs.low(), gt); |
| __ VFPCompareAndSetFlags(rhs.high(), lhs.high()); |
| __ vmov(dst.high(), rhs.high(), gt); |
| break; |
| } |
| case kArmF64x2Ceil: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| Simd128Register dst = i.OutputSimd128Register(); |
| Simd128Register src = i.InputSimd128Register(0); |
| __ vrintp(dst.low(), src.low()); |
| __ vrintp(dst.high(), src.high()); |
| break; |
| } |
| case kArmF64x2Floor: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| Simd128Register dst = i.OutputSimd128Register(); |
| Simd128Register src = i.InputSimd128Register(0); |
| __ vrintm(dst.low(), src.low()); |
| __ vrintm(dst.high(), src.high()); |
| break; |
| } |
| case kArmF64x2Trunc: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| Simd128Register dst = i.OutputSimd128Register(); |
| Simd128Register src = i.InputSimd128Register(0); |
| __ vrintz(dst.low(), src.low()); |
| __ vrintz(dst.high(), src.high()); |
| break; |
| } |
| case kArmF64x2NearestInt: { |
| CpuFeatureScope scope(tasm(), ARMv8); |
| Simd128Register dst = i.OutputSimd128Register(); |
| Simd128Register src = i.InputSimd128Register(0); |
| __ vrintn(dst.low(), src.low()); |
| __ vrintn(dst.high(), src.high()); |
| break; |
| } |
| case kArmI64x2SplatI32Pair: { |
| Simd128Register dst = i.OutputSimd128Register(); |
| __ vdup(Neon32, dst, i.InputRegister(0)); |
| __ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 1); |
| __ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 3); |
| break; |
| } |
| case kArmI64x2ReplaceLaneI32Pair: { |
| Simd128Register dst = i.OutputSimd128Register(); |
| int8_t lane = i.InputInt8(1); |
| __ ReplaceLane(dst, dst, i.InputRegister(2), NeonS32, lane * 2); |
| __ ReplaceLane(dst, dst, i.InputRegister(3), NeonS32, lane * 2 + 1); |
| break; |
| } |
| case kArmI64x2Add: { |
| __ vadd(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmI64x2Sub: { |
| __ vsub(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmI64x2Mul: { |
| QwNeonRegister dst = i.OutputSimd128Register(); |
| QwNeonRegister left = i.InputSimd128Register(0); |
| QwNeonRegister right = i.InputSimd128Register(1); |
| QwNeonRegister tmp1 = i.TempSimd128Register(0); |
| QwNeonRegister tmp2 = i.TempSimd128Register(1); |
| |
| // This algorithm uses vector operations to perform 64-bit integer |
| // multiplication by splitting it into a high and low 32-bit integers. |
| // The tricky part is getting the low and high integers in the correct |
| // place inside a NEON register, so that we can use as little vmull and |
| // vmlal as possible. |
| |
| // Move left and right into temporaries, they will be modified by vtrn. |
| __ vmov(tmp1, left); |
| __ vmov(tmp2, right); |
| |
| // This diagram shows how the 64-bit integers fit into NEON registers. |
| // |
| // [q.high()| q.low()] |
| // left/tmp1: [ a3, a2 | a1, a0 ] |
| // right/tmp2: [ b3, b2 | b1, b0 ] |
| // |
| // We want to multiply the low 32 bits of left with high 32 bits of right, |
| // for each lane, i.e. a2 * b3, a0 * b1. However, vmull takes two input d |
| // registers, and multiply the corresponding low/high 32 bits, to get a |
| // 64-bit integer: a1 * b1, a0 * b0. In order to make it work we transpose |
| // the vectors, so that we get the low 32 bits of each 64-bit integer into |
| // the same lane, similarly for high 32 bits. |
| __ vtrn(Neon32, tmp1.low(), tmp1.high()); |
| // tmp1: [ a3, a1 | a2, a0 ] |
| __ vtrn(Neon32, tmp2.low(), tmp2.high()); |
| // tmp2: [ b3, b1 | b2, b0 ] |
| |
| __ vmull(NeonU32, dst, tmp1.low(), tmp2.high()); |
| // dst: [ a2*b3 | a0*b1 ] |
| __ vmlal(NeonU32, dst, tmp1.high(), tmp2.low()); |
| // dst: [ a2*b3 + a3*b2 | a0*b1 + a1*b0 ] |
| __ vshl(NeonU64, dst, dst, 32); |
| // dst: [ (a2*b3 + a3*b2) << 32 | (a0*b1 + a1*b0) << 32 ] |
| |
| __ vmlal(NeonU32, dst, tmp1.low(), tmp2.low()); |
| // dst: [ (a2*b3 + a3*b2)<<32 + (a2*b2) | (a0*b1 + a1*b0)<<32 + (a0*b0) ] |
| break; |
| } |
| case kArmI64x2Neg: { |
| Simd128Register dst = i.OutputSimd128Register(); |
| __ vmov(dst, uint64_t{0}); |
| __ vsub(Neon64, dst, dst, i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmI64x2Shl: { |
| ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 6, Neon32, NeonS64); |
| break; |
| } |
| case kArmI64x2ShrS: { |
| // Only the least significant byte of each lane is used, so we can use |
| // Neon32 as the size. |
| ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonS64); |
| break; |
| } |
| case kArmI64x2ShrU: { |
| // Only the least significant byte of each lane is used, so we can use |
| // Neon32 as the size. |
| ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonU64); |
| break; |
| } |
| case kArmF32x4Splat: { |
| int src_code = i.InputFloatRegister(0).code(); |
| __ vdup(Neon32, i.OutputSimd128Register(), |
| DwVfpRegister::from_code(src_code / 2), src_code % 2); |
| break; |
| } |
| case kArmF32x4ExtractLane: { |
| __ ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0), |
| i.InputInt8(1)); |
| break; |
| } |
| case kArmF32x4ReplaceLane: { |
| __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputFloatRegister(2), i.InputInt8(1)); |
| break; |
| } |
| case kArmF32x4SConvertI32x4: { |
| __ vcvt_f32_s32(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmF32x4UConvertI32x4: { |
| __ vcvt_f32_u32(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmF32x4Abs: { |
| __ vabs(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmF32x4Neg: { |
| __ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmF32x4Sqrt: { |
| QwNeonRegister dst = i.OutputSimd128Register(); |
| QwNeonRegister src1 = i.InputSimd128Register(0); |
| DCHECK_EQ(dst, q0); |
| DCHECK_EQ(src1, q0); |
| #define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane) |
| __ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0)); |
| __ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1)); |
| __ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2)); |
| __ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3)); |
| #undef S_FROM_Q |
| break; |
| } |
| case kArmF32x4RecipApprox: { |
| __ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmF32x4RecipSqrtApprox: { |
| __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmF32x4Add: { |
| __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmF32x4AddHoriz: { |
| Simd128Register dst = i.OutputSimd128Register(), |
| src0 = i.InputSimd128Register(0), |
| src1 = i.InputSimd128Register(1); |
| // Make sure we don't overwrite source data before it's used. |
| if (dst == src0) { |
| __ vpadd(dst.low(), src0.low(), src0.high()); |
| if (dst == src1) { |
| __ vmov(dst.high(), dst.low()); |
| } else { |
| __ vpadd(dst.high(), src1.low(), src1.high()); |
| } |
| } else { |
| __ vpadd(dst.high(), src1.low(), src1.high()); |
| __ vpadd(dst.low(), src0.low(), src0.high()); |
| } |
| break; |
| } |
| case kArmF32x4Sub: { |
| __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmF32x4Mul: { |
| __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmF32x4Div: { |
| QwNeonRegister dst = i.OutputSimd128Register(); |
| QwNeonRegister src1 = i.InputSimd128Register(0); |
| QwNeonRegister src2 = i.InputSimd128Register(1); |
| DCHECK_EQ(dst, q0); |
| DCHECK_EQ(src1, q0); |
| DCHECK_EQ(src2, q1); |
| #define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane) |
| __ vdiv(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0), S_FROM_Q(src2, 0)); |
| __ vdiv(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1), S_FROM_Q(src2, 1)); |
| __ vdiv(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2), S_FROM_Q(src2, 2)); |
| __ vdiv(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3), S_FROM_Q(src2, 3)); |
| #undef S_FROM_Q |
| break; |
| } |
| case kArmF32x4Min: { |
| __ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmF32x4Max: { |
| __ vmax(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmF32x4Eq: { |
| __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmF32x4Ne: { |
| Simd128Register dst = i.OutputSimd128Register(); |
| __ vceq(dst, i.InputSimd128Register(0), i.InputSimd128Register(1)); |
| __ vmvn(dst, dst); |
| break; |
| } |
| case kArmF32x4Lt: { |
| __ vcgt(i.OutputSimd128Register(), i.InputSimd128Register(1), |
| i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmF32x4Le: { |
| __ vcge(i.OutputSimd128Register(), i.InputSimd128Register(1), |
| i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmF32x4Pmin: { |
| Simd128Register dst = i.OutputSimd128Register(); |
| Simd128Register lhs = i.InputSimd128Register(0); |
| Simd128Register rhs = i.InputSimd128Register(1); |
| DCHECK_NE(dst, lhs); |
| DCHECK_NE(dst, rhs); |
| |
| // f32x4.pmin(lhs, rhs) |
| // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs)) |
| // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs)) |
| __ vcgt(dst, lhs, rhs); |
| __ vbsl(dst, rhs, lhs); |
| break; |
| } |
| case kArmF32x4Pmax: { |
| Simd128Register dst = i.OutputSimd128Register(); |
| Simd128Register lhs = i.InputSimd128Register(0); |
| Simd128Register rhs = i.InputSimd128Register(1); |
| DCHECK_NE(dst, lhs); |
| DCHECK_NE(dst, rhs); |
| |
| // f32x4.pmax(lhs, rhs) |
| // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs)) |
| __ vcgt(dst, rhs, lhs); |
| __ vbsl(dst, rhs, lhs); |
| break; |
| } |
| case kArmI32x4Splat: { |
| __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0)); |
| break; |
| } |
| case kArmI32x4ExtractLane: { |
| __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS32, |
| i.InputInt8(1)); |
| break; |
| } |
| case kArmI32x4ReplaceLane: { |
| __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputRegister(2), NeonS32, i.InputInt8(1)); |
| break; |
| } |
| case kArmI32x4SConvertF32x4: { |
| __ vcvt_s32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmI32x4SConvertI16x8Low: { |
| __ vmovl(NeonS16, i.OutputSimd128Register(), |
| i.InputSimd128Register(0).low()); |
| break; |
| } |
| case kArmI32x4SConvertI16x8High: { |
| __ vmovl(NeonS16, i.OutputSimd128Register(), |
| i.InputSimd128Register(0).high()); |
| break; |
| } |
| case kArmI32x4Neg: { |
| __ vneg(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| case kArmI32x4Shl: { |
| ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 5, Neon32, NeonS32); |
| break; |
| } |
| case kArmI32x4ShrS: { |
| ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 5, Neon32, NeonS32); |
| break; |
| } |
| case kArmI32x4Add: { |
| __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmI32x4AddHoriz: |
| ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32); |
| break; |
| case kArmI32x4Sub: { |
| __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmI32x4Mul: { |
| __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmI32x4MinS: { |
| __ vmin(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmI32x4MaxS: { |
| __ vmax(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmI32x4Eq: { |
| __ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmI32x4Ne: { |
| Simd128Register dst = i.OutputSimd128Register(); |
| __ vceq(Neon32, dst, i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| __ vmvn(dst, dst); |
| break; |
| } |
| case kArmI32x4GtS: { |
| __ vcgt(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| case kArmI32x4GeS: { |
| __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register |