| // Copyright 2017 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_ |
| #define V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_ |
| |
| #include "src/heap/memory-chunk.h" |
| #include "src/wasm/baseline/liftoff-assembler.h" |
| #include "src/wasm/baseline/liftoff-register.h" |
| |
| namespace v8 { |
| namespace internal { |
| namespace wasm { |
| |
| namespace liftoff { |
| |
| // half |
| // slot Frame |
| // -----+--------------------+--------------------------- |
| // n+3 | parameter n | |
| // ... | ... | |
| // 4 | parameter 1 | or parameter 2 |
| // 3 | parameter 0 | or parameter 1 |
| // 2 | (result address) | or parameter 0 |
| // -----+--------------------+--------------------------- |
| // 1 | return addr (lr) | |
| // 0 | previous frame (fp)| |
| // -----+--------------------+ <-- frame ptr (fp) |
| // -1 | 0xa: WASM | |
| // -2 | instance | |
| // -----+--------------------+--------------------------- |
| // -3 | slot 0 (high) | ^ |
| // -4 | slot 0 (low) | | |
| // -5 | slot 1 (high) | Frame slots |
| // -6 | slot 1 (low) | | |
| // | | v |
| // -----+--------------------+ <-- stack ptr (sp) |
| // |
| static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize, |
| "Slot size should be twice the size of the 32 bit pointer."); |
| constexpr int kInstanceOffset = 2 * kSystemPointerSize; |
| // kPatchInstructionsRequired sets a maximum limit of how many instructions that |
| // PatchPrepareStackFrame will use in order to increase the stack appropriately. |
| // Three instructions are required to sub a large constant, movw + movt + sub. |
| constexpr int32_t kPatchInstructionsRequired = 3; |
| constexpr int kHalfStackSlotSize = LiftoffAssembler::kStackSlotSize >> 1; |
| |
| inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); } |
| |
| inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) { |
| int32_t half_offset = |
| half == kLowWord ? 0 : LiftoffAssembler::kStackSlotSize / 2; |
| return MemOperand(offset > 0 ? fp : sp, -offset + half_offset); |
| } |
| |
| inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); } |
| |
| inline MemOperand GetMemOp(LiftoffAssembler* assm, |
| UseScratchRegisterScope* temps, Register addr, |
| Register offset, int32_t offset_imm) { |
| if (offset != no_reg) { |
| if (offset_imm == 0) return MemOperand(addr, offset); |
| Register tmp = temps->Acquire(); |
| assm->add(tmp, offset, Operand(offset_imm)); |
| return MemOperand(addr, tmp); |
| } |
| return MemOperand(addr, offset_imm); |
| } |
| |
| inline Register CalculateActualAddress(LiftoffAssembler* assm, |
| UseScratchRegisterScope* temps, |
| Register addr_reg, Register offset_reg, |
| int32_t offset_imm, |
| Register result_reg = no_reg) { |
| if (offset_reg == no_reg && offset_imm == 0) { |
| if (result_reg == no_reg) { |
| return addr_reg; |
| } else { |
| assm->mov(result_reg, addr_reg); |
| return result_reg; |
| } |
| } |
| Register actual_addr_reg = |
| result_reg != no_reg ? result_reg : temps->Acquire(); |
| if (offset_reg == no_reg) { |
| assm->add(actual_addr_reg, addr_reg, Operand(offset_imm)); |
| } else { |
| assm->add(actual_addr_reg, addr_reg, Operand(offset_reg)); |
| if (offset_imm != 0) { |
| assm->add(actual_addr_reg, actual_addr_reg, Operand(offset_imm)); |
| } |
| } |
| return actual_addr_reg; |
| } |
| |
| inline Condition MakeUnsigned(Condition cond) { |
| switch (cond) { |
| case kSignedLessThan: |
| return kUnsignedLessThan; |
| case kSignedLessEqual: |
| return kUnsignedLessEqual; |
| case kSignedGreaterThan: |
| return kUnsignedGreaterThan; |
| case kSignedGreaterEqual: |
| return kUnsignedGreaterEqual; |
| case kEqual: |
| case kUnequal: |
| case kUnsignedLessThan: |
| case kUnsignedLessEqual: |
| case kUnsignedGreaterThan: |
| case kUnsignedGreaterEqual: |
| return cond; |
| default: |
| UNREACHABLE(); |
| } |
| } |
| |
| template <void (Assembler::*op)(Register, Register, Register, SBit, Condition), |
| void (Assembler::*op_with_carry)(Register, Register, const Operand&, |
| SBit, Condition)> |
| inline void I64Binop(LiftoffAssembler* assm, LiftoffRegister dst, |
| LiftoffRegister lhs, LiftoffRegister rhs) { |
| Register dst_low = dst.low_gp(); |
| if (dst_low == lhs.high_gp() || dst_low == rhs.high_gp()) { |
| dst_low = assm->GetUnusedRegister( |
| kGpReg, LiftoffRegList::ForRegs(lhs, rhs, dst.high_gp())) |
| .gp(); |
| } |
| (assm->*op)(dst_low, lhs.low_gp(), rhs.low_gp(), SetCC, al); |
| (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(rhs.high_gp()), |
| LeaveCC, al); |
| if (dst_low != dst.low_gp()) assm->mov(dst.low_gp(), dst_low); |
| } |
| |
| template <void (Assembler::*op)(Register, Register, const Operand&, SBit, |
| Condition), |
| void (Assembler::*op_with_carry)(Register, Register, const Operand&, |
| SBit, Condition)> |
| inline void I64BinopI(LiftoffAssembler* assm, LiftoffRegister dst, |
| LiftoffRegister lhs, int32_t imm) { |
| // The compiler allocated registers such that either {dst == lhs} or there is |
| // no overlap between the two. |
| DCHECK_NE(dst.low_gp(), lhs.high_gp()); |
| (assm->*op)(dst.low_gp(), lhs.low_gp(), Operand(imm), SetCC, al); |
| // Top half of the immediate sign extended, either 0 or -1. |
| int32_t sign_extend = imm < 0 ? -1 : 0; |
| (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(sign_extend), |
| LeaveCC, al); |
| } |
| |
| template <void (TurboAssembler::*op)(Register, Register, Register, Register, |
| Register), |
| bool is_left_shift> |
| inline void I64Shiftop(LiftoffAssembler* assm, LiftoffRegister dst, |
| LiftoffRegister src, Register amount) { |
| Register src_low = src.low_gp(); |
| Register src_high = src.high_gp(); |
| Register dst_low = dst.low_gp(); |
| Register dst_high = dst.high_gp(); |
| // Left shift writes {dst_high} then {dst_low}, right shifts write {dst_low} |
| // then {dst_high}. |
| Register clobbered_dst_reg = is_left_shift ? dst_high : dst_low; |
| LiftoffRegList pinned = LiftoffRegList::ForRegs(clobbered_dst_reg, src); |
| Register amount_capped = |
| pinned.set(assm->GetUnusedRegister(kGpReg, pinned)).gp(); |
| assm->and_(amount_capped, amount, Operand(0x3F)); |
| |
| // Ensure that writing the first half of {dst} does not overwrite the still |
| // needed half of {src}. |
| Register* later_src_reg = is_left_shift ? &src_low : &src_high; |
| if (*later_src_reg == clobbered_dst_reg) { |
| *later_src_reg = assm->GetUnusedRegister(kGpReg, pinned).gp(); |
| assm->TurboAssembler::Move(*later_src_reg, clobbered_dst_reg); |
| } |
| |
| (assm->*op)(dst_low, dst_high, src_low, src_high, amount_capped); |
| } |
| |
| inline FloatRegister GetFloatRegister(DoubleRegister reg) { |
| DCHECK_LT(reg.code(), kDoubleCode_d16); |
| return LowDwVfpRegister::from_code(reg.code()).low(); |
| } |
| |
| inline Simd128Register GetSimd128Register(DoubleRegister reg) { |
| return QwNeonRegister::from_code(reg.code() / 2); |
| } |
| |
| inline Simd128Register GetSimd128Register(LiftoffRegister reg) { |
| return liftoff::GetSimd128Register(reg.low_fp()); |
| } |
| |
| enum class MinOrMax : uint8_t { kMin, kMax }; |
| template <typename RegisterType> |
| inline void EmitFloatMinOrMax(LiftoffAssembler* assm, RegisterType dst, |
| RegisterType lhs, RegisterType rhs, |
| MinOrMax min_or_max) { |
| DCHECK(RegisterType::kSizeInBytes == 4 || RegisterType::kSizeInBytes == 8); |
| if (lhs == rhs) { |
| assm->TurboAssembler::Move(dst, lhs); |
| return; |
| } |
| Label done, is_nan; |
| if (min_or_max == MinOrMax::kMin) { |
| assm->TurboAssembler::FloatMin(dst, lhs, rhs, &is_nan); |
| } else { |
| assm->TurboAssembler::FloatMax(dst, lhs, rhs, &is_nan); |
| } |
| assm->b(&done); |
| assm->bind(&is_nan); |
| // Create a NaN output. |
| assm->vadd(dst, lhs, rhs); |
| assm->bind(&done); |
| } |
| |
| inline Register EnsureNoAlias(Assembler* assm, Register reg, |
| Register must_not_alias, |
| UseScratchRegisterScope* temps) { |
| if (reg != must_not_alias) return reg; |
| Register tmp = temps->Acquire(); |
| DCHECK_NE(reg, tmp); |
| assm->mov(tmp, reg); |
| return tmp; |
| } |
| |
| inline void S128NarrowOp(LiftoffAssembler* assm, NeonDataType dt, |
| NeonDataType sdt, LiftoffRegister dst, |
| LiftoffRegister lhs, LiftoffRegister rhs) { |
| if (dst == lhs) { |
| assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs)); |
| assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs)); |
| } else { |
| assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs)); |
| assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs)); |
| } |
| } |
| |
| inline void F64x2Compare(LiftoffAssembler* assm, LiftoffRegister dst, |
| LiftoffRegister lhs, LiftoffRegister rhs, |
| Condition cond) { |
| DCHECK(cond == eq || cond == ne || cond == lt || cond == le); |
| |
| QwNeonRegister dest = liftoff::GetSimd128Register(dst); |
| QwNeonRegister left = liftoff::GetSimd128Register(lhs); |
| QwNeonRegister right = liftoff::GetSimd128Register(rhs); |
| UseScratchRegisterScope temps(assm); |
| Register scratch = temps.Acquire(); |
| |
| assm->mov(scratch, Operand(0)); |
| assm->VFPCompareAndSetFlags(left.low(), right.low()); |
| assm->mov(scratch, Operand(-1), LeaveCC, cond); |
| if (cond == lt || cond == le) { |
| // Check for NaN. |
| assm->mov(scratch, Operand(0), LeaveCC, vs); |
| } |
| assm->vmov(dest.low(), scratch, scratch); |
| |
| assm->mov(scratch, Operand(0)); |
| assm->VFPCompareAndSetFlags(left.high(), right.high()); |
| assm->mov(scratch, Operand(-1), LeaveCC, cond); |
| if (cond == lt || cond == le) { |
| // Check for NaN. |
| assm->mov(scratch, Operand(0), LeaveCC, vs); |
| } |
| assm->vmov(dest.high(), scratch, scratch); |
| } |
| |
| inline void Store(LiftoffAssembler* assm, LiftoffRegister src, MemOperand dst, |
| ValueType type) { |
| #ifdef DEBUG |
| // The {str} instruction needs a temp register when the immediate in the |
| // provided MemOperand does not fit into 12 bits. This happens for large stack |
| // frames. This DCHECK checks that the temp register is available when needed. |
| DCHECK(UseScratchRegisterScope{assm}.CanAcquire()); |
| #endif |
| switch (type.kind()) { |
| case ValueType::kI32: |
| case ValueType::kOptRef: |
| case ValueType::kRef: |
| assm->str(src.gp(), dst); |
| break; |
| case ValueType::kI64: |
| // Positive offsets should be lowered to kI32. |
| assm->str(src.low_gp(), MemOperand(dst.rn(), dst.offset())); |
| assm->str( |
| src.high_gp(), |
| MemOperand(dst.rn(), dst.offset() + liftoff::kHalfStackSlotSize)); |
| break; |
| case ValueType::kF32: |
| assm->vstr(liftoff::GetFloatRegister(src.fp()), dst); |
| break; |
| case ValueType::kF64: |
| assm->vstr(src.fp(), dst); |
| break; |
| case ValueType::kS128: { |
| UseScratchRegisterScope temps(assm); |
| Register addr = liftoff::CalculateActualAddress(assm, &temps, dst.rn(), |
| no_reg, dst.offset()); |
| assm->vst1(Neon8, NeonListOperand(src.low_fp(), 2), NeonMemOperand(addr)); |
| break; |
| } |
| default: |
| UNREACHABLE(); |
| } |
| } |
| |
| inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src, |
| ValueType type) { |
| switch (type.kind()) { |
| case ValueType::kI32: |
| case ValueType::kOptRef: |
| case ValueType::kRef: |
| assm->ldr(dst.gp(), src); |
| break; |
| case ValueType::kI64: |
| assm->ldr(dst.low_gp(), MemOperand(src.rn(), src.offset())); |
| assm->ldr( |
| dst.high_gp(), |
| MemOperand(src.rn(), src.offset() + liftoff::kHalfStackSlotSize)); |
| break; |
| case ValueType::kF32: |
| assm->vldr(liftoff::GetFloatRegister(dst.fp()), src); |
| break; |
| case ValueType::kF64: |
| assm->vldr(dst.fp(), src); |
| break; |
| case ValueType::kS128: { |
| // Get memory address of slot to fill from. |
| UseScratchRegisterScope temps(assm); |
| Register addr = liftoff::CalculateActualAddress(assm, &temps, src.rn(), |
| no_reg, src.offset()); |
| assm->vld1(Neon8, NeonListOperand(dst.low_fp(), 2), NeonMemOperand(addr)); |
| break; |
| } |
| default: |
| UNREACHABLE(); |
| } |
| } |
| |
| constexpr int MaskFromNeonDataType(NeonDataType dt) { |
| switch (dt) { |
| case NeonS8: |
| case NeonU8: |
| return 7; |
| case NeonS16: |
| case NeonU16: |
| return 15; |
| case NeonS32: |
| case NeonU32: |
| return 31; |
| case NeonS64: |
| case NeonU64: |
| return 63; |
| } |
| } |
| |
| enum ShiftDirection { kLeft, kRight }; |
| |
| template <ShiftDirection dir = kLeft, NeonDataType dt, NeonSize sz> |
| inline void EmitSimdShift(LiftoffAssembler* assm, LiftoffRegister dst, |
| LiftoffRegister lhs, LiftoffRegister rhs) { |
| constexpr int mask = MaskFromNeonDataType(dt); |
| UseScratchRegisterScope temps(assm); |
| QwNeonRegister tmp = temps.AcquireQ(); |
| Register shift = temps.Acquire(); |
| assm->and_(shift, rhs.gp(), Operand(mask)); |
| assm->vdup(sz, tmp, shift); |
| if (dir == kRight) { |
| assm->vneg(sz, tmp, tmp); |
| } |
| assm->vshl(dt, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), tmp); |
| } |
| |
| template <ShiftDirection dir, NeonDataType dt> |
| inline void EmitSimdShiftImmediate(LiftoffAssembler* assm, LiftoffRegister dst, |
| LiftoffRegister lhs, int32_t rhs) { |
| // vshr by 0 is not allowed, so check for it, and only move if dst != lhs. |
| int32_t shift = rhs & MaskFromNeonDataType(dt); |
| if (shift) { |
| if (dir == kLeft) { |
| assm->vshl(dt, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), shift); |
| } else { |
| assm->vshr(dt, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), shift); |
| } |
| } else if (dst != lhs) { |
| assm->vmov(liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs)); |
| } |
| } |
| |
| inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, |
| LiftoffRegister src) { |
| UseScratchRegisterScope temps(assm); |
| DwVfpRegister scratch = temps.AcquireD(); |
| assm->vpmax(NeonU32, scratch, src.low_fp(), src.high_fp()); |
| assm->vpmax(NeonU32, scratch, scratch, scratch); |
| assm->ExtractLane(dst.gp(), scratch, NeonS32, 0); |
| assm->cmp(dst.gp(), Operand(0)); |
| assm->mov(dst.gp(), Operand(1), LeaveCC, ne); |
| } |
| |
| } // namespace liftoff |
| |
| int LiftoffAssembler::PrepareStackFrame() { |
| if (!CpuFeatures::IsSupported(ARMv7)) { |
| bailout(kUnsupportedArchitecture, "Armv6 not supported"); |
| return 0; |
| } |
| uint32_t offset = static_cast<uint32_t>(pc_offset()); |
| // PatchPrepareStackFrame will patch this in order to increase the stack |
| // appropriately. Additional nops are required as the bytes operand might |
| // require extra moves to encode. |
| for (int i = 0; i < liftoff::kPatchInstructionsRequired; i++) { |
| nop(); |
| } |
| DCHECK_EQ(offset + liftoff::kPatchInstructionsRequired * kInstrSize, |
| pc_offset()); |
| return offset; |
| } |
| |
| void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params, |
| int stack_param_delta) { |
| UseScratchRegisterScope temps(this); |
| Register scratch = temps.Acquire(); |
| |
| // Push the return address and frame pointer to complete the stack frame. |
| sub(sp, sp, Operand(8)); |
| ldr(scratch, MemOperand(fp, 4)); |
| str(scratch, MemOperand(sp, 4)); |
| ldr(scratch, MemOperand(fp, 0)); |
| str(scratch, MemOperand(sp, 0)); |
| |
| // Shift the whole frame upwards. |
| int slot_count = num_callee_stack_params + 2; |
| for (int i = slot_count - 1; i >= 0; --i) { |
| ldr(scratch, MemOperand(sp, i * 4)); |
| str(scratch, MemOperand(fp, (i - stack_param_delta) * 4)); |
| } |
| |
| // Set the new stack and frame pointer. |
| sub(sp, fp, Operand(stack_param_delta * 4)); |
| Pop(lr, fp); |
| } |
| |
| void LiftoffAssembler::PatchPrepareStackFrame(int offset, int frame_size) { |
| #ifdef USE_SIMULATOR |
| // When using the simulator, deal with Liftoff which allocates the stack |
| // before checking it. |
| // TODO(arm): Remove this when the stack check mechanism will be updated. |
| if (frame_size > KB / 2) { |
| bailout(kOtherReason, |
| "Stack limited to 512 bytes to avoid a bug in StackCheck"); |
| return; |
| } |
| #endif |
| PatchingAssembler patching_assembler(AssemblerOptions{}, |
| buffer_start_ + offset, |
| liftoff::kPatchInstructionsRequired); |
| #if V8_OS_WIN |
| if (frame_size > kStackPageSize) { |
| // Generate OOL code (at the end of the function, where the current |
| // assembler is pointing) to do the explicit stack limit check (see |
| // https://docs.microsoft.com/en-us/previous-versions/visualstudio/ |
| // visual-studio-6.0/aa227153(v=vs.60)). |
| // At the function start, emit a jump to that OOL code (from {offset} to |
| // {pc_offset()}). |
| int ool_offset = pc_offset() - offset; |
| patching_assembler.b(ool_offset - Instruction::kPcLoadDelta); |
| patching_assembler.PadWithNops(); |
| |
| // Now generate the OOL code. |
| AllocateStackSpace(frame_size); |
| // Jump back to the start of the function (from {pc_offset()} to {offset + |
| // liftoff::kPatchInstructionsRequired * kInstrSize}). |
| int func_start_offset = |
| offset + liftoff::kPatchInstructionsRequired * kInstrSize - pc_offset(); |
| b(func_start_offset - Instruction::kPcLoadDelta); |
| return; |
| } |
| #endif |
| patching_assembler.sub(sp, sp, Operand(frame_size)); |
| patching_assembler.PadWithNops(); |
| } |
| |
| void LiftoffAssembler::FinishCode() { CheckConstPool(true, false); } |
| |
| void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); } |
| |
| // static |
| constexpr int LiftoffAssembler::StaticStackFrameSize() { |
| return liftoff::kInstanceOffset; |
| } |
| |
| int LiftoffAssembler::SlotSizeForType(ValueType type) { |
| switch (type.kind()) { |
| case ValueType::kS128: |
| return type.element_size_bytes(); |
| default: |
| return kStackSlotSize; |
| } |
| } |
| |
| bool LiftoffAssembler::NeedsAlignment(ValueType type) { |
| return (type.kind() == ValueType::kS128 || type.is_reference_type()); |
| } |
| |
| void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value, |
| RelocInfo::Mode rmode) { |
| switch (value.type().kind()) { |
| case ValueType::kI32: |
| TurboAssembler::Move(reg.gp(), Operand(value.to_i32(), rmode)); |
| break; |
| case ValueType::kI64: { |
| DCHECK(RelocInfo::IsNone(rmode)); |
| int32_t low_word = value.to_i64(); |
| int32_t high_word = value.to_i64() >> 32; |
| TurboAssembler::Move(reg.low_gp(), Operand(low_word)); |
| TurboAssembler::Move(reg.high_gp(), Operand(high_word)); |
| break; |
| } |
| case ValueType::kF32: |
| vmov(liftoff::GetFloatRegister(reg.fp()), value.to_f32_boxed()); |
| break; |
| case ValueType::kF64: { |
| Register extra_scratch = GetUnusedRegister(kGpReg, {}).gp(); |
| vmov(reg.fp(), Double(value.to_f64_boxed().get_bits()), extra_scratch); |
| break; |
| } |
| default: |
| UNREACHABLE(); |
| } |
| } |
| |
| void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) { |
| DCHECK_LE(0, offset); |
| DCHECK_EQ(4, size); |
| ldr(dst, liftoff::GetInstanceOperand()); |
| ldr(dst, MemOperand(dst, offset)); |
| } |
| |
| void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst, int offset) { |
| LoadFromInstance(dst, offset, kTaggedSize); |
| } |
| |
| void LiftoffAssembler::SpillInstance(Register instance) { |
| str(instance, liftoff::GetInstanceOperand()); |
| } |
| |
| void LiftoffAssembler::FillInstanceInto(Register dst) { |
| ldr(dst, liftoff::GetInstanceOperand()); |
| } |
| |
| namespace liftoff { |
| #define __ lasm-> |
| inline void LoadInternal(LiftoffAssembler* lasm, LiftoffRegister dst, |
| Register src_addr, Register offset_reg, |
| int32_t offset_imm, LoadType type, |
| LiftoffRegList pinned, |
| uint32_t* protected_load_pc = nullptr, |
| bool is_load_mem = false) { |
| DCHECK_IMPLIES(type.value_type() == kWasmI64, dst.is_gp_pair()); |
| UseScratchRegisterScope temps(lasm); |
| if (type.value() == LoadType::kF64Load || |
| type.value() == LoadType::kF32Load || |
| type.value() == LoadType::kS128Load) { |
| Register actual_src_addr = liftoff::CalculateActualAddress( |
| lasm, &temps, src_addr, offset_reg, offset_imm); |
| if (type.value() == LoadType::kF64Load) { |
| // Armv6 is not supported so Neon can be used to avoid alignment issues. |
| CpuFeatureScope scope(lasm, NEON); |
| __ vld1(Neon64, NeonListOperand(dst.fp()), |
| NeonMemOperand(actual_src_addr)); |
| } else if (type.value() == LoadType::kF32Load) { |
| // TODO(arm): Use vld1 for f32 when implemented in simulator as used for |
| // f64. It supports unaligned access. |
| Register scratch = |
| (actual_src_addr == src_addr) ? temps.Acquire() : actual_src_addr; |
| __ ldr(scratch, MemOperand(actual_src_addr)); |
| __ vmov(liftoff::GetFloatRegister(dst.fp()), scratch); |
| } else { |
| // Armv6 is not supported so Neon can be used to avoid alignment issues. |
| CpuFeatureScope scope(lasm, NEON); |
| __ vld1(Neon8, NeonListOperand(dst.low_fp(), 2), |
| NeonMemOperand(actual_src_addr)); |
| } |
| } else { |
| MemOperand src_op = |
| liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg, offset_imm); |
| if (protected_load_pc) *protected_load_pc = __ pc_offset(); |
| switch (type.value()) { |
| case LoadType::kI32Load8U: |
| __ ldrb(dst.gp(), src_op); |
| break; |
| case LoadType::kI64Load8U: |
| __ ldrb(dst.low_gp(), src_op); |
| __ mov(dst.high_gp(), Operand(0)); |
| break; |
| case LoadType::kI32Load8S: |
| __ ldrsb(dst.gp(), src_op); |
| break; |
| case LoadType::kI64Load8S: |
| __ ldrsb(dst.low_gp(), src_op); |
| __ asr(dst.high_gp(), dst.low_gp(), Operand(31)); |
| break; |
| case LoadType::kI32Load16U: |
| __ ldrh(dst.gp(), src_op); |
| break; |
| case LoadType::kI64Load16U: |
| __ ldrh(dst.low_gp(), src_op); |
| __ mov(dst.high_gp(), Operand(0)); |
| break; |
| case LoadType::kI32Load16S: |
| __ ldrsh(dst.gp(), src_op); |
| break; |
| case LoadType::kI32Load: |
| __ ldr(dst.gp(), src_op); |
| break; |
| case LoadType::kI64Load16S: |
| __ ldrsh(dst.low_gp(), src_op); |
| __ asr(dst.high_gp(), dst.low_gp(), Operand(31)); |
| break; |
| case LoadType::kI64Load32U: |
| __ ldr(dst.low_gp(), src_op); |
| __ mov(dst.high_gp(), Operand(0)); |
| break; |
| case LoadType::kI64Load32S: |
| __ ldr(dst.low_gp(), src_op); |
| __ asr(dst.high_gp(), dst.low_gp(), Operand(31)); |
| break; |
| case LoadType::kI64Load: |
| __ ldr(dst.low_gp(), src_op); |
| // GetMemOp may use a scratch register as the offset register, in which |
| // case, calling GetMemOp again will fail due to the assembler having |
| // ran out of scratch registers. |
| if (temps.CanAcquire()) { |
| src_op = liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg, |
| offset_imm + kSystemPointerSize); |
| } else { |
| __ add(src_op.rm(), src_op.rm(), Operand(kSystemPointerSize)); |
| } |
| __ ldr(dst.high_gp(), src_op); |
| break; |
| default: |
| UNREACHABLE(); |
| } |
| } |
| } |
| #undef __ |
| } // namespace liftoff |
| |
| void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr, |
| Register offset_reg, |
| int32_t offset_imm, |
| LiftoffRegList pinned) { |
| STATIC_ASSERT(kTaggedSize == kInt32Size); |
| liftoff::LoadInternal(this, LiftoffRegister(dst), src_addr, offset_reg, |
| offset_imm, LoadType::kI32Load, pinned); |
| } |
| |
| void LiftoffAssembler::StoreTaggedPointer(Register dst_addr, |
| int32_t offset_imm, |
| LiftoffRegister src, |
| LiftoffRegList pinned) { |
| STATIC_ASSERT(kTaggedSize == kInt32Size); |
| // Store the value. |
| MemOperand dst_op(dst_addr, offset_imm); |
| str(src.gp(), dst_op); |
| // The write barrier. |
| Label write_barrier; |
| Label exit; |
| CheckPageFlag(dst_addr, MemoryChunk::kPointersFromHereAreInterestingMask, ne, |
| &write_barrier); |
| b(&exit); |
| bind(&write_barrier); |
| JumpIfSmi(src.gp(), &exit); |
| CheckPageFlag(src.gp(), MemoryChunk::kPointersToHereAreInterestingMask, eq, |
| &exit); |
| CallRecordWriteStub(dst_addr, Operand(offset_imm), EMIT_REMEMBERED_SET, |
| kSaveFPRegs, wasm::WasmCode::kRecordWrite); |
| bind(&exit); |
| } |
| |
| void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr, |
| Register offset_reg, uint32_t offset_imm, |
| LoadType type, LiftoffRegList pinned, |
| uint32_t* protected_load_pc, bool is_load_mem) { |
| // If offset_imm cannot be converted to int32 safely, we abort as a separate |
| // check should cause this code to never be executed. |
| // TODO(7881): Support when >2GB is required. |
| if (!is_uint31(offset_imm)) { |
| TurboAssembler::Abort(AbortReason::kOffsetOutOfRange); |
| return; |
| } |
| liftoff::LoadInternal(this, dst, src_addr, offset_reg, |
| static_cast<int32_t>(offset_imm), type, pinned, |
| protected_load_pc, is_load_mem); |
| } |
| |
| void LiftoffAssembler::Store(Register dst_addr, Register offset_reg, |
| uint32_t offset_imm, LiftoffRegister src, |
| StoreType type, LiftoffRegList pinned, |
| uint32_t* protected_store_pc, bool is_store_mem) { |
| // If offset_imm cannot be converted to int32 safely, we abort as a separate |
| // check should cause this code to never be executed. |
| // TODO(7881): Support when >2GB is required. |
| if (!is_uint31(offset_imm)) { |
| TurboAssembler::Abort(AbortReason::kOffsetOutOfRange); |
| return; |
| } |
| UseScratchRegisterScope temps(this); |
| if (type.value() == StoreType::kF64Store) { |
| Register actual_dst_addr = liftoff::CalculateActualAddress( |
| this, &temps, dst_addr, offset_reg, offset_imm); |
| // Armv6 is not supported so Neon can be used to avoid alignment issues. |
| CpuFeatureScope scope(this, NEON); |
| vst1(Neon64, NeonListOperand(src.fp()), NeonMemOperand(actual_dst_addr)); |
| } else if (type.value() == StoreType::kS128Store) { |
| Register actual_dst_addr = liftoff::CalculateActualAddress( |
| this, &temps, dst_addr, offset_reg, offset_imm); |
| // Armv6 is not supported so Neon can be used to avoid alignment issues. |
| CpuFeatureScope scope(this, NEON); |
| vst1(Neon8, NeonListOperand(src.low_fp(), 2), |
| NeonMemOperand(actual_dst_addr)); |
| } else if (type.value() == StoreType::kF32Store) { |
| // TODO(arm): Use vst1 for f32 when implemented in simulator as used for |
| // f64. It supports unaligned access. |
| // CalculateActualAddress will only not use a scratch register if the |
| // following condition holds, otherwise another register must be |
| // retrieved. |
| Register scratch = (offset_reg == no_reg && offset_imm == 0) |
| ? temps.Acquire() |
| : GetUnusedRegister(kGpReg, pinned).gp(); |
| Register actual_dst_addr = liftoff::CalculateActualAddress( |
| this, &temps, dst_addr, offset_reg, offset_imm); |
| vmov(scratch, liftoff::GetFloatRegister(src.fp())); |
| str(scratch, MemOperand(actual_dst_addr)); |
| } else { |
| MemOperand dst_op = |
| liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm); |
| if (protected_store_pc) *protected_store_pc = pc_offset(); |
| switch (type.value()) { |
| case StoreType::kI64Store8: |
| src = src.low(); |
| V8_FALLTHROUGH; |
| case StoreType::kI32Store8: |
| strb(src.gp(), dst_op); |
| break; |
| case StoreType::kI64Store16: |
| src = src.low(); |
| V8_FALLTHROUGH; |
| case StoreType::kI32Store16: |
| strh(src.gp(), dst_op); |
| break; |
| case StoreType::kI64Store32: |
| src = src.low(); |
| V8_FALLTHROUGH; |
| case StoreType::kI32Store: |
| str(src.gp(), dst_op); |
| break; |
| case StoreType::kI64Store: |
| str(src.low_gp(), dst_op); |
| // GetMemOp may use a scratch register as the offset register, in which |
| // case, calling GetMemOp again will fail due to the assembler having |
| // ran out of scratch registers. |
| if (temps.CanAcquire()) { |
| dst_op = liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, |
| offset_imm + kSystemPointerSize); |
| } else { |
| add(dst_op.rm(), dst_op.rm(), Operand(kSystemPointerSize)); |
| } |
| str(src.high_gp(), dst_op); |
| break; |
| default: |
| UNREACHABLE(); |
| } |
| } |
| } |
| |
| namespace liftoff { |
| #define __ lasm-> |
| |
| inline void AtomicOp32( |
| LiftoffAssembler* lasm, Register dst_addr, Register offset_reg, |
| uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, |
| LiftoffRegList pinned, |
| void (Assembler::*load)(Register, Register, Condition), |
| void (Assembler::*store)(Register, Register, Register, Condition), |
| void (*op)(LiftoffAssembler*, Register, Register, Register)) { |
| Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp(); |
| |
| // Allocate an additional {temp} register to hold the result that should be |
| // stored to memory. Note that {temp} and {store_result} are not allowed to be |
| // the same register. |
| Register temp = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp(); |
| |
| // Make sure that {result} is unique. |
| Register result_reg = result.gp(); |
| if (result_reg == value.gp() || result_reg == dst_addr || |
| result_reg == offset_reg) { |
| result_reg = __ GetUnusedRegister(kGpReg, pinned).gp(); |
| } |
| |
| UseScratchRegisterScope temps(lasm); |
| Register actual_addr = liftoff::CalculateActualAddress( |
| lasm, &temps, dst_addr, offset_reg, offset_imm); |
| |
| __ dmb(ISH); |
| Label retry; |
| __ bind(&retry); |
| (lasm->*load)(result_reg, actual_addr, al); |
| op(lasm, temp, result_reg, value.gp()); |
| (lasm->*store)(store_result, temp, actual_addr, al); |
| __ cmp(store_result, Operand(0)); |
| __ b(ne, &retry); |
| __ dmb(ISH); |
| if (result_reg != result.gp()) { |
| __ mov(result.gp(), result_reg); |
| } |
| } |
| |
| inline void Add(LiftoffAssembler* lasm, Register dst, Register lhs, |
| Register rhs) { |
| __ add(dst, lhs, rhs); |
| } |
| |
| inline void Sub(LiftoffAssembler* lasm, Register dst, Register lhs, |
| Register rhs) { |
| __ sub(dst, lhs, rhs); |
| } |
| |
| inline void And(LiftoffAssembler* lasm, Register dst, Register lhs, |
| Register rhs) { |
| __ and_(dst, lhs, rhs); |
| } |
| |
| inline void Or(LiftoffAssembler* lasm, Register dst, Register lhs, |
| Register rhs) { |
| __ orr(dst, lhs, rhs); |
| } |
| |
| inline void Xor(LiftoffAssembler* lasm, Register dst, Register lhs, |
| Register rhs) { |
| __ eor(dst, lhs, rhs); |
| } |
| |
| inline void Exchange(LiftoffAssembler* lasm, Register dst, Register lhs, |
| Register rhs) { |
| __ mov(dst, rhs); |
| } |
| |
| inline void AtomicBinop32(LiftoffAssembler* lasm, Register dst_addr, |
| Register offset_reg, uint32_t offset_imm, |
| LiftoffRegister value, LiftoffRegister result, |
| StoreType type, |
| void (*op)(LiftoffAssembler*, Register, Register, |
| Register)) { |
| LiftoffRegList pinned = |
| LiftoffRegList::ForRegs(dst_addr, offset_reg, value, result); |
| switch (type.value()) { |
| case StoreType::kI64Store8: |
| __ LoadConstant(result.high(), WasmValue(0)); |
| result = result.low(); |
| value = value.low(); |
| V8_FALLTHROUGH; |
| case StoreType::kI32Store8: |
| liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result, |
| pinned, &Assembler::ldrexb, &Assembler::strexb, op); |
| return; |
| case StoreType::kI64Store16: |
| __ LoadConstant(result.high(), WasmValue(0)); |
| result = result.low(); |
| value = value.low(); |
| V8_FALLTHROUGH; |
| case StoreType::kI32Store16: |
| liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result, |
| pinned, &Assembler::ldrexh, &Assembler::strexh, op); |
| return; |
| case StoreType::kI64Store32: |
| __ LoadConstant(result.high(), WasmValue(0)); |
| result = result.low(); |
| value = value.low(); |
| V8_FALLTHROUGH; |
| case StoreType::kI32Store: |
| liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result, |
| pinned, &Assembler::ldrex, &Assembler::strex, op); |
| return; |
| default: |
| UNREACHABLE(); |
| } |
| } |
| |
| inline void AtomicOp64(LiftoffAssembler* lasm, Register dst_addr, |
| Register offset_reg, uint32_t offset_imm, |
| LiftoffRegister value, |
| base::Optional<LiftoffRegister> result, |
| void (*op)(LiftoffAssembler*, LiftoffRegister, |
| LiftoffRegister, LiftoffRegister)) { |
| // strexd loads a 64 bit word into two registers. The first register needs |
| // to have an even index, e.g. r8, the second register needs to be the one |
| // with the next higher index, e.g. r9 if the first register is r8. In the |
| // following code we use the fixed register pair r8/r9 to make the code here |
| // simpler, even though other register pairs would also be possible. |
| constexpr Register dst_low = r8; |
| constexpr Register dst_high = r9; |
| |
| // Make sure {dst_low} and {dst_high} are not occupied by any other value. |
| Register value_low = value.low_gp(); |
| Register value_high = value.high_gp(); |
| LiftoffRegList pinned = LiftoffRegList::ForRegs( |
| dst_addr, offset_reg, value_low, value_high, dst_low, dst_high); |
| __ ClearRegister(dst_low, {&dst_addr, &offset_reg, &value_low, &value_high}, |
| pinned); |
| pinned = pinned | |
| LiftoffRegList::ForRegs(dst_addr, offset_reg, value_low, value_high); |
| __ ClearRegister(dst_high, {&dst_addr, &offset_reg, &value_low, &value_high}, |
| pinned); |
| pinned = pinned | |
| LiftoffRegList::ForRegs(dst_addr, offset_reg, value_low, value_high); |
| |
| // Make sure that {result}, if it exists, also does not overlap with |
| // {dst_low} and {dst_high}. We don't have to transfer the value stored in |
| // {result}. |
| Register result_low = no_reg; |
| Register result_high = no_reg; |
| if (result.has_value()) { |
| result_low = result.value().low_gp(); |
| if (pinned.has(result_low)) { |
| result_low = __ GetUnusedRegister(kGpReg, pinned).gp(); |
| } |
| pinned.set(result_low); |
| |
| result_high = result.value().high_gp(); |
| if (pinned.has(result_high)) { |
| result_high = __ GetUnusedRegister(kGpReg, pinned).gp(); |
| } |
| pinned.set(result_high); |
| } |
| |
| Register store_result = __ GetUnusedRegister(kGpReg, pinned).gp(); |
| |
| UseScratchRegisterScope temps(lasm); |
| Register actual_addr = liftoff::CalculateActualAddress( |
| lasm, &temps, dst_addr, offset_reg, offset_imm); |
| |
| __ dmb(ISH); |
| Label retry; |
| __ bind(&retry); |
| // {ldrexd} is needed here so that the {strexd} instruction below can |
| // succeed. We don't need the value we are reading. We use {dst_low} and |
| // {dst_high} as the destination registers because {ldrexd} has the same |
| // restrictions on registers as {strexd}, see the comment above. |
| __ ldrexd(dst_low, dst_high, actual_addr); |
| if (result.has_value()) { |
| __ mov(result_low, dst_low); |
| __ mov(result_high, dst_high); |
| } |
| op(lasm, LiftoffRegister::ForPair(dst_low, dst_high), |
| LiftoffRegister::ForPair(dst_low, dst_high), |
| LiftoffRegister::ForPair(value_low, value_high)); |
| __ strexd(store_result, dst_low, dst_high, actual_addr); |
| __ cmp(store_result, Operand(0)); |
| __ b(ne, &retry); |
| __ dmb(ISH); |
| |
| if (result.has_value()) { |
| if (result_low != result.value().low_gp()) { |
| __ mov(result.value().low_gp(), result_low); |
| } |
| if (result_high != result.value().high_gp()) { |
| __ mov(result.value().high_gp(), result_high); |
| } |
| } |
| } |
| |
| inline void I64Store(LiftoffAssembler* lasm, LiftoffRegister dst, |
| LiftoffRegister, LiftoffRegister src) { |
| __ mov(dst.low_gp(), src.low_gp()); |
| __ mov(dst.high_gp(), src.high_gp()); |
| } |
| |
| #undef __ |
| } // namespace liftoff |
| |
| void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr, |
| Register offset_reg, uint32_t offset_imm, |
| LoadType type, LiftoffRegList pinned) { |
| if (type.value() != LoadType::kI64Load) { |
| Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true); |
| dmb(ISH); |
| return; |
| } |
| // ldrexd loads a 64 bit word into two registers. The first register needs to |
| // have an even index, e.g. r8, the second register needs to be the one with |
| // the next higher index, e.g. r9 if the first register is r8. In the |
| // following code we use the fixed register pair r8/r9 to make the code here |
| // simpler, even though other register pairs would also be possible. |
| constexpr Register dst_low = r8; |
| constexpr Register dst_high = r9; |
| if (cache_state()->is_used(LiftoffRegister(dst_low))) { |
| SpillRegister(LiftoffRegister(dst_low)); |
| } |
| if (cache_state()->is_used(LiftoffRegister(dst_high))) { |
| SpillRegister(LiftoffRegister(dst_high)); |
| } |
| { |
| UseScratchRegisterScope temps(this); |
| Register actual_addr = liftoff::CalculateActualAddress( |
| this, &temps, src_addr, offset_reg, offset_imm); |
| ldrexd(dst_low, dst_high, actual_addr); |
| dmb(ISH); |
| } |
| |
| ParallelRegisterMove( |
| {{dst, LiftoffRegister::ForPair(dst_low, dst_high), kWasmI64}}); |
| } |
| |
| void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg, |
| uint32_t offset_imm, LiftoffRegister src, |
| StoreType type, LiftoffRegList pinned) { |
| if (type.value() == StoreType::kI64Store) { |
| liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, src, {}, |
| liftoff::I64Store); |
| return; |
| } |
| |
| dmb(ISH); |
| Store(dst_addr, offset_reg, offset_imm, src, type, pinned, nullptr, true); |
| dmb(ISH); |
| return; |
| } |
| |
| void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg, |
| uint32_t offset_imm, LiftoffRegister value, |
| LiftoffRegister result, StoreType type) { |
| if (type.value() == StoreType::kI64Store) { |
| liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, |
| liftoff::I64Binop<&Assembler::add, &Assembler::adc>); |
| return; |
| } |
| liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, |
| type, &liftoff::Add); |
| } |
| |
| void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg, |
| uint32_t offset_imm, LiftoffRegister value, |
| LiftoffRegister result, StoreType type) { |
| if (type.value() == StoreType::kI64Store) { |
| liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, |
| liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>); |
| return; |
| } |
| liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, |
| type, &liftoff::Sub); |
| } |
| |
| void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg, |
| uint32_t offset_imm, LiftoffRegister value, |
| LiftoffRegister result, StoreType type) { |
| if (type.value() == StoreType::kI64Store) { |
| liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, |
| liftoff::I64Binop<&Assembler::and_, &Assembler::and_>); |
| return; |
| } |
| liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, |
| type, &liftoff::And); |
| } |
| |
| void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg, |
| uint32_t offset_imm, LiftoffRegister value, |
| LiftoffRegister result, StoreType type) { |
| if (type.value() == StoreType::kI64Store) { |
| liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, |
| liftoff::I64Binop<&Assembler::orr, &Assembler::orr>); |
| return; |
| } |
| liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, |
| type, &liftoff::Or); |
| } |
| |
| void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg, |
| uint32_t offset_imm, LiftoffRegister value, |
| LiftoffRegister result, StoreType type) { |
| if (type.value() == StoreType::kI64Store) { |
| liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, |
| liftoff::I64Binop<&Assembler::eor, &Assembler::eor>); |
| return; |
| } |
| liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, |
| type, &liftoff::Xor); |
| } |
| |
| void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg, |
| uint32_t offset_imm, |
| LiftoffRegister value, |
| LiftoffRegister result, StoreType type) { |
| if (type.value() == StoreType::kI64Store) { |
| liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, |
| liftoff::I64Store); |
| return; |
| } |
| liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, |
| type, &liftoff::Exchange); |
| } |
| |
| namespace liftoff { |
| #define __ lasm-> |
| |
| inline void AtomicI64CompareExchange(LiftoffAssembler* lasm, |
| Register dst_addr_reg, Register offset_reg, |
| uint32_t offset_imm, |
| LiftoffRegister expected, |
| LiftoffRegister new_value, |
| LiftoffRegister result) { |
| // To implement I64AtomicCompareExchange, we nearly need all registers, with |
| // some registers having special constraints, e.g. like for {new_value} and |
| // {result} the low-word register has to have an even register code, and the |
| // high-word has to be in the next higher register. To avoid complicated |
| // register allocation code here, we just assign fixed registers to all |
| // values here, and then move all values into the correct register. |
| Register dst_addr = r0; |
| Register offset = r1; |
| Register result_low = r4; |
| Register result_high = r5; |
| Register new_value_low = r2; |
| Register new_value_high = r3; |
| Register store_result = r6; |
| Register expected_low = r8; |
| Register expected_high = r9; |
| |
| // We spill all registers, so that we can re-assign them afterwards. |
| __ SpillRegisters(dst_addr, offset, result_low, result_high, new_value_low, |
| new_value_high, store_result, expected_low, expected_high); |
| |
| __ ParallelRegisterMove( |
| {{LiftoffRegister::ForPair(new_value_low, new_value_high), new_value, |
| kWasmI64}, |
| {LiftoffRegister::ForPair(expected_low, expected_high), expected, |
| kWasmI64}, |
| {dst_addr, dst_addr_reg, kWasmI32}, |
| {offset, offset_reg != no_reg ? offset_reg : offset, kWasmI32}}); |
| |
| { |
| UseScratchRegisterScope temps(lasm); |
| Register temp = liftoff::CalculateActualAddress( |
| lasm, &temps, dst_addr, offset_reg == no_reg ? no_reg : offset, |
| offset_imm, dst_addr); |
| // Make sure the actual address is stored in the right register. |
| DCHECK_EQ(dst_addr, temp); |
| USE(temp); |
| } |
| |
| Label retry; |
| Label done; |
| __ dmb(ISH); |
| __ bind(&retry); |
| __ ldrexd(result_low, result_high, dst_addr); |
| __ cmp(result_low, expected_low); |
| __ b(ne, &done); |
| __ cmp(result_high, expected_high); |
| __ b(ne, &done); |
| __ strexd(store_result, new_value_low, new_value_high, dst_addr); |
| __ cmp(store_result, Operand(0)); |
| __ b(ne, &retry); |
| __ dmb(ISH); |
| __ bind(&done); |
| |
| __ ParallelRegisterMove( |
| {{result, LiftoffRegister::ForPair(result_low, result_high), kWasmI64}}); |
| } |
| #undef __ |
| } // namespace liftoff |
| |
| void LiftoffAssembler::AtomicCompareExchange( |
| Register dst_addr, Register offset_reg, uint32_t offset_imm, |
| LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result, |
| StoreType type) { |
| if (type.value() == StoreType::kI64Store) { |
| liftoff::AtomicI64CompareExchange(this, dst_addr, offset_reg, offset_imm, |
| expected, new_value, result); |
| return; |
| } |
| |
| // The other versions of CompareExchange can share code, but need special load |
| // and store instructions. |
| void (Assembler::*load)(Register, Register, Condition) = nullptr; |
| void (Assembler::*store)(Register, Register, Register, Condition) = nullptr; |
| |
| LiftoffRegList pinned = LiftoffRegList::ForRegs(dst_addr, offset_reg); |
| // We need to remember the high word of {result}, so we can set it to zero in |
| // the end if necessary. |
| Register result_high = no_reg; |
| switch (type.value()) { |
| case StoreType::kI64Store8: |
| result_high = result.high_gp(); |
| result = result.low(); |
| new_value = new_value.low(); |
| expected = expected.low(); |
| V8_FALLTHROUGH; |
| case StoreType::kI32Store8: |
| load = &Assembler::ldrexb; |
| store = &Assembler::strexb; |
| // We have to clear the high bits of {expected}, as we can only do a |
| // 32-bit comparison. If the {expected} register is used, we spill it |
| // first. |
| if (cache_state()->is_used(expected)) { |
| SpillRegister(expected); |
| } |
| uxtb(expected.gp(), expected.gp()); |
| break; |
| case StoreType::kI64Store16: |
| result_high = result.high_gp(); |
| result = result.low(); |
| new_value = new_value.low(); |
| expected = expected.low(); |
| V8_FALLTHROUGH; |
| case StoreType::kI32Store16: |
| load = &Assembler::ldrexh; |
| store = &Assembler::strexh; |
| // We have to clear the high bits of {expected}, as we can only do a |
| // 32-bit comparison. If the {expected} register is used, we spill it |
| // first. |
| if (cache_state()->is_used(expected)) { |
| SpillRegister(expected); |
| } |
| uxth(expected.gp(), expected.gp()); |
| break; |
| case StoreType::kI64Store32: |
| result_high = result.high_gp(); |
| result = result.low(); |
| new_value = new_value.low(); |
| expected = expected.low(); |
| V8_FALLTHROUGH; |
| case StoreType::kI32Store: |
| load = &Assembler::ldrex; |
| store = &Assembler::strex; |
| break; |
| default: |
| UNREACHABLE(); |
| } |
| pinned.set(new_value); |
| pinned.set(expected); |
| |
| Register result_reg = result.gp(); |
| if (pinned.has(result)) { |
| result_reg = GetUnusedRegister(kGpReg, pinned).gp(); |
| } |
| pinned.set(LiftoffRegister(result)); |
| Register store_result = GetUnusedRegister(kGpReg, pinned).gp(); |
| |
| UseScratchRegisterScope temps(this); |
| Register actual_addr = liftoff::CalculateActualAddress( |
| this, &temps, dst_addr, offset_reg, offset_imm); |
| |
| Label retry; |
| Label done; |
| dmb(ISH); |
| bind(&retry); |
| (this->*load)(result.gp(), actual_addr, al); |
| cmp(result.gp(), expected.gp()); |
| b(ne, &done); |
| (this->*store)(store_result, new_value.gp(), actual_addr, al); |
| cmp(store_result, Operand(0)); |
| b(ne, &retry); |
| dmb(ISH); |
| bind(&done); |
| |
| if (result.gp() != result_reg) { |
| mov(result.gp(), result_reg); |
| } |
| if (result_high != no_reg) { |
| LoadConstant(LiftoffRegister(result_high), WasmValue(0)); |
| } |
| } |
| |
| void LiftoffAssembler::AtomicFence() { dmb(ISH); } |
| |
| void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst, |
| uint32_t caller_slot_idx, |
| ValueType type) { |
| MemOperand src(fp, (caller_slot_idx + 1) * kSystemPointerSize); |
| liftoff::Load(this, dst, src, type); |
| } |
| |
| void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src, |
| uint32_t caller_slot_idx, |
| ValueType type) { |
| MemOperand dst(fp, (caller_slot_idx + 1) * kSystemPointerSize); |
| liftoff::Store(this, src, dst, type); |
| } |
| |
| void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset, |
| ValueType type) { |
| MemOperand src(sp, offset); |
| liftoff::Load(this, dst, src, type); |
| } |
| |
| void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset, |
| ValueType type) { |
| DCHECK_NE(dst_offset, src_offset); |
| LiftoffRegister reg = GetUnusedRegister(reg_class_for(type), {}); |
| Fill(reg, src_offset, type); |
| Spill(dst_offset, reg, type); |
| } |
| |
| void LiftoffAssembler::Move(Register dst, Register src, ValueType type) { |
| DCHECK_NE(dst, src); |
| DCHECK(type == kWasmI32 || type.is_reference_type()); |
| TurboAssembler::Move(dst, src); |
| } |
| |
| void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src, |
| ValueType type) { |
| DCHECK_NE(dst, src); |
| if (type == kWasmF32) { |
| vmov(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); |
| } else if (type == kWasmF64) { |
| vmov(dst, src); |
| } else { |
| DCHECK_EQ(kWasmS128, type); |
| vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); |
| } |
| } |
| |
| void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueType type) { |
| // The {str} instruction needs a temp register when the immediate in the |
| // provided MemOperand does not fit into 12 bits. This happens for large stack |
| // frames. This DCHECK checks that the temp register is available when needed. |
| DCHECK(UseScratchRegisterScope{this}.CanAcquire()); |
| DCHECK_LT(0, offset); |
| RecordUsedSpillOffset(offset); |
| MemOperand dst(fp, -offset); |
| liftoff::Store(this, reg, dst, type); |
| } |
| |
| void LiftoffAssembler::Spill(int offset, WasmValue value) { |
| RecordUsedSpillOffset(offset); |
| MemOperand dst = liftoff::GetStackSlot(offset); |
| UseScratchRegisterScope temps(this); |
| Register src = no_reg; |
| // The scratch register will be required by str if multiple instructions |
| // are required to encode the offset, and so we cannot use it in that case. |
| if (!ImmediateFitsAddrMode2Instruction(dst.offset())) { |
| src = GetUnusedRegister(kGpReg, {}).gp(); |
| } else { |
| src = temps.Acquire(); |
| } |
| switch (value.type().kind()) { |
| case ValueType::kI32: |
| mov(src, Operand(value.to_i32())); |
| str(src, dst); |
| break; |
| case ValueType::kI64: { |
| int32_t low_word = value.to_i64(); |
| mov(src, Operand(low_word)); |
| str(src, liftoff::GetHalfStackSlot(offset, kLowWord)); |
| int32_t high_word = value.to_i64() >> 32; |
| mov(src, Operand(high_word)); |
| str(src, liftoff::GetHalfStackSlot(offset, kHighWord)); |
| break; |
| } |
| default: |
| // We do not track f32 and f64 constants, hence they are unreachable. |
| UNREACHABLE(); |
| } |
| } |
| |
| void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueType type) { |
| liftoff::Load(this, reg, liftoff::GetStackSlot(offset), type); |
| } |
| |
| void LiftoffAssembler::FillI64Half(Register reg, int offset, RegPairHalf half) { |
| ldr(reg, liftoff::GetHalfStackSlot(offset, half)); |
| } |
| |
| void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) { |
| DCHECK_LT(0, size); |
| DCHECK_EQ(0, size % 4); |
| RecordUsedSpillOffset(start + size); |
| |
| // We need a zero reg. Always use r0 for that, and push it before to restore |
| // its value afterwards. |
| push(r0); |
| mov(r0, Operand(0)); |
| |
| if (size <= 36) { |
| // Special straight-line code for up to 9 words. Generates one |
| // instruction per word. |
| for (int offset = 4; offset <= size; offset += 4) { |
| str(r0, liftoff::GetHalfStackSlot(start + offset, kLowWord)); |
| } |
| } else { |
| // General case for bigger counts (9 instructions). |
| // Use r1 for start address (inclusive), r2 for end address (exclusive). |
| push(r1); |
| push(r2); |
| sub(r1, fp, Operand(start + size)); |
| sub(r2, fp, Operand(start)); |
| |
| Label loop; |
| bind(&loop); |
| str(r0, MemOperand(r1, /* offset */ kSystemPointerSize, PostIndex)); |
| cmp(r1, r2); |
| b(&loop, ne); |
| |
| pop(r2); |
| pop(r1); |
| } |
| |
| pop(r0); |
| } |
| |
| #define I32_BINOP(name, instruction) \ |
| void LiftoffAssembler::emit_##name(Register dst, Register lhs, \ |
| Register rhs) { \ |
| instruction(dst, lhs, rhs); \ |
| } |
| #define I32_BINOP_I(name, instruction) \ |
| I32_BINOP(name, instruction) \ |
| void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \ |
| int32_t imm) { \ |
| instruction(dst, lhs, Operand(imm)); \ |
| } |
| #define I32_SHIFTOP(name, instruction) \ |
| void LiftoffAssembler::emit_##name(Register dst, Register src, \ |
| Register amount) { \ |
| UseScratchRegisterScope temps(this); \ |
| Register scratch = temps.Acquire(); \ |
| and_(scratch, amount, Operand(0x1f)); \ |
| instruction(dst, src, Operand(scratch)); \ |
| } \ |
| void LiftoffAssembler::emit_##name##i(Register dst, Register src, \ |
| int32_t amount) { \ |
| if (V8_LIKELY((amount & 31) != 0)) { \ |
| instruction(dst, src, Operand(amount & 31)); \ |
| } else if (dst != src) { \ |
| mov(dst, src); \ |
| } \ |
| } |
| #define FP32_UNOP(name, instruction) \ |
| void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \ |
| instruction(liftoff::GetFloatRegister(dst), \ |
| liftoff::GetFloatRegister(src)); \ |
| } |
| #define FP32_BINOP(name, instruction) \ |
| void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \ |
| DoubleRegister rhs) { \ |
| instruction(liftoff::GetFloatRegister(dst), \ |
| liftoff::GetFloatRegister(lhs), \ |
| liftoff::GetFloatRegister(rhs)); \ |
| } |
| #define FP64_UNOP(name, instruction) \ |
| void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \ |
| instruction(dst, src); \ |
| } |
| #define FP64_BINOP(name, instruction) \ |
| void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \ |
| DoubleRegister rhs) { \ |
| instruction(dst, lhs, rhs); \ |
| } |
| |
| I32_BINOP_I(i32_add, add) |
| I32_BINOP(i32_sub, sub) |
| I32_BINOP(i32_mul, mul) |
| I32_BINOP_I(i32_and, and_) |
| I32_BINOP_I(i32_or, orr) |
| I32_BINOP_I(i32_xor, eor) |
| I32_SHIFTOP(i32_shl, lsl) |
| I32_SHIFTOP(i32_sar, asr) |
| I32_SHIFTOP(i32_shr, lsr) |
| FP32_BINOP(f32_add, vadd) |
| FP32_BINOP(f32_sub, vsub) |
| FP32_BINOP(f32_mul, vmul) |
| FP32_BINOP(f32_div, vdiv) |
| FP32_UNOP(f32_abs, vabs) |
| FP32_UNOP(f32_neg, vneg) |
| FP32_UNOP(f32_sqrt, vsqrt) |
| FP64_BINOP(f64_add, vadd) |
| FP64_BINOP(f64_sub, vsub) |
| FP64_BINOP(f64_mul, vmul) |
| FP64_BINOP(f64_div, vdiv) |
| FP64_UNOP(f64_abs, vabs) |
| FP64_UNOP(f64_neg, vneg) |
| FP64_UNOP(f64_sqrt, vsqrt) |
| |
| #undef I32_BINOP |
| #undef I32_SHIFTOP |
| #undef FP32_UNOP |
| #undef FP32_BINOP |
| #undef FP64_UNOP |
| #undef FP64_BINOP |
| |
| void LiftoffAssembler::emit_i32_clz(Register dst, Register src) { |
| clz(dst, src); |
| } |
| |
| void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) { |
| rbit(dst, src); |
| clz(dst, dst); |
| } |
| |
| namespace liftoff { |
| inline void GeneratePopCnt(Assembler* assm, Register dst, Register src, |
| Register scratch1, Register scratch2) { |
| DCHECK(!AreAliased(dst, scratch1, scratch2)); |
| if (src == scratch1) std::swap(scratch1, scratch2); |
| // x = x - ((x & (0x55555555 << 1)) >> 1) |
| assm->and_(scratch1, src, Operand(0xaaaaaaaa)); |
| assm->sub(dst, src, Operand(scratch1, LSR, 1)); |
| // x = (x & 0x33333333) + ((x & (0x33333333 << 2)) >> 2) |
| assm->mov(scratch1, Operand(0x33333333)); |
| assm->and_(scratch2, dst, Operand(scratch1, LSL, 2)); |
| assm->and_(scratch1, dst, scratch1); |
| assm->add(dst, scratch1, Operand(scratch2, LSR, 2)); |
| // x = (x + (x >> 4)) & 0x0F0F0F0F |
| assm->add(dst, dst, Operand(dst, LSR, 4)); |
| assm->and_(dst, dst, Operand(0x0f0f0f0f)); |
| // x = x + (x >> 8) |
| assm->add(dst, dst, Operand(dst, LSR, 8)); |
| // x = x + (x >> 16) |
| assm->add(dst, dst, Operand(dst, LSR, 16)); |
| // x = x & 0x3F |
| assm->and_(dst, dst, Operand(0x3f)); |
| } |
| } // namespace liftoff |
| |
| bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) { |
| LiftoffRegList pinned = LiftoffRegList::ForRegs(dst); |
| Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp(); |
| Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp(); |
| liftoff::GeneratePopCnt(this, dst, src, scratch1, scratch2); |
| return true; |
| } |
| |
| void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs, |
| Label* trap_div_by_zero, |
| Label* trap_div_unrepresentable) { |
| if (!CpuFeatures::IsSupported(SUDIV)) { |
| bailout(kMissingCPUFeature, "i32_divs"); |
| return; |
| } |
| CpuFeatureScope scope(this, SUDIV); |
| // Issue division early so we can perform the trapping checks whilst it |
| // completes. |
| bool speculative_sdiv = dst != lhs && dst != rhs; |
| if (speculative_sdiv) { |
| sdiv(dst, lhs, rhs); |
| } |
| Label noTrap; |
| // Check for division by zero. |
| cmp(rhs, Operand(0)); |
| b(trap_div_by_zero, eq); |
| // Check for kMinInt / -1. This is unrepresentable. |
| cmp(rhs, Operand(-1)); |
| b(&noTrap, ne); |
| cmp(lhs, Operand(kMinInt)); |
| b(trap_div_unrepresentable, eq); |
| bind(&noTrap); |
| if (!speculative_sdiv) { |
| sdiv(dst, lhs, rhs); |
| } |
| } |
| |
| void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs, |
| Label* trap_div_by_zero) { |
| if (!CpuFeatures::IsSupported(SUDIV)) { |
| bailout(kMissingCPUFeature, "i32_divu"); |
| return; |
| } |
| CpuFeatureScope scope(this, SUDIV); |
| // Check for division by zero. |
| cmp(rhs, Operand(0)); |
| b(trap_div_by_zero, eq); |
| udiv(dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs, |
| Label* trap_div_by_zero) { |
| if (!CpuFeatures::IsSupported(SUDIV)) { |
| // When this case is handled, a check for ARMv7 is required to use mls. |
| // Mls support is implied with SUDIV support. |
| bailout(kMissingCPUFeature, "i32_rems"); |
| return; |
| } |
| CpuFeatureScope scope(this, SUDIV); |
| // No need to check kMinInt / -1 because the result is kMinInt and then |
| // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0. |
| UseScratchRegisterScope temps(this); |
| Register scratch = temps.Acquire(); |
| sdiv(scratch, lhs, rhs); |
| // Check for division by zero. |
| cmp(rhs, Operand(0)); |
| b(trap_div_by_zero, eq); |
| // Compute remainder. |
| mls(dst, scratch, rhs, lhs); |
| } |
| |
| void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs, |
| Label* trap_div_by_zero) { |
| if (!CpuFeatures::IsSupported(SUDIV)) { |
| // When this case is handled, a check for ARMv7 is required to use mls. |
| // Mls support is implied with SUDIV support. |
| bailout(kMissingCPUFeature, "i32_remu"); |
| return; |
| } |
| CpuFeatureScope scope(this, SUDIV); |
| // No need to check kMinInt / -1 because the result is kMinInt and then |
| // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0. |
| UseScratchRegisterScope temps(this); |
| Register scratch = temps.Acquire(); |
| udiv(scratch, lhs, rhs); |
| // Check for division by zero. |
| cmp(rhs, Operand(0)); |
| b(trap_div_by_zero, eq); |
| // Compute remainder. |
| mls(dst, scratch, rhs, lhs); |
| } |
| |
| void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::I64Binop<&Assembler::add, &Assembler::adc>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs, |
| int32_t imm) { |
| liftoff::I64BinopI<&Assembler::add, &Assembler::adc>(this, dst, lhs, imm); |
| } |
| |
| void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| // Idea: |
| // [ lhs_hi | lhs_lo ] * [ rhs_hi | rhs_lo ] |
| // = [ lhs_hi * rhs_lo | ] (32 bit mul, shift 32) |
| // + [ lhs_lo * rhs_hi | ] (32 bit mul, shift 32) |
| // + [ lhs_lo * rhs_lo ] (32x32->64 mul, shift 0) |
| UseScratchRegisterScope temps(this); |
| Register scratch = temps.Acquire(); |
| // scratch = lhs_hi * rhs_lo |
| mul(scratch, lhs.high_gp(), rhs.low_gp()); |
| // scratch += lhs_lo * rhs_hi |
| mla(scratch, lhs.low_gp(), rhs.high_gp(), scratch); |
| // TODO(arm): use umlal once implemented correctly in the simulator. |
| // [dst_hi|dst_lo] = lhs_lo * rhs_lo |
| umull(dst.low_gp(), dst.high_gp(), lhs.low_gp(), rhs.low_gp()); |
| // dst_hi += scratch |
| add(dst.high_gp(), dst.high_gp(), scratch); |
| } |
| |
| bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs, |
| Label* trap_div_by_zero, |
| Label* trap_div_unrepresentable) { |
| return false; |
| } |
| |
| bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs, |
| Label* trap_div_by_zero) { |
| return false; |
| } |
| |
| bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs, |
| Label* trap_div_by_zero) { |
| return false; |
| } |
| |
| bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs, |
| Label* trap_div_by_zero) { |
| return false; |
| } |
| |
| void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src, |
| Register amount) { |
| liftoff::I64Shiftop<&TurboAssembler::LslPair, true>(this, dst, src, amount); |
| } |
| |
| void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src, |
| int32_t amount) { |
| UseScratchRegisterScope temps(this); |
| // {src.low_gp()} will still be needed after writing {dst.high_gp()}. |
| Register src_low = |
| liftoff::EnsureNoAlias(this, src.low_gp(), dst.high_gp(), &temps); |
| |
| LslPair(dst.low_gp(), dst.high_gp(), src_low, src.high_gp(), amount & 63); |
| } |
| |
| void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src, |
| Register amount) { |
| liftoff::I64Shiftop<&TurboAssembler::AsrPair, false>(this, dst, src, amount); |
| } |
| |
| void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src, |
| int32_t amount) { |
| UseScratchRegisterScope temps(this); |
| // {src.high_gp()} will still be needed after writing {dst.low_gp()}. |
| Register src_high = |
| liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps); |
| |
| AsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63); |
| } |
| |
| void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, |
| Register amount) { |
| liftoff::I64Shiftop<&TurboAssembler::LsrPair, false>(this, dst, src, amount); |
| } |
| |
| void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src, |
| int32_t amount) { |
| UseScratchRegisterScope temps(this); |
| // {src.high_gp()} will still be needed after writing {dst.low_gp()}. |
| Register src_high = |
| liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps); |
| |
| LsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63); |
| } |
| |
| void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) { |
| // return high == 0 ? 32 + CLZ32(low) : CLZ32(high); |
| Label done; |
| Label high_is_zero; |
| cmp(src.high_gp(), Operand(0)); |
| b(&high_is_zero, eq); |
| |
| clz(dst.low_gp(), src.high_gp()); |
| jmp(&done); |
| |
| bind(&high_is_zero); |
| clz(dst.low_gp(), src.low_gp()); |
| add(dst.low_gp(), dst.low_gp(), Operand(32)); |
| |
| bind(&done); |
| mov(dst.high_gp(), Operand(0)); // High word of result is always 0. |
| } |
| |
| void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) { |
| // return low == 0 ? 32 + CTZ32(high) : CTZ32(low); |
| // CTZ32(x) = CLZ(RBIT(x)) |
| Label done; |
| Label low_is_zero; |
| cmp(src.low_gp(), Operand(0)); |
| b(&low_is_zero, eq); |
| |
| rbit(dst.low_gp(), src.low_gp()); |
| clz(dst.low_gp(), dst.low_gp()); |
| jmp(&done); |
| |
| bind(&low_is_zero); |
| rbit(dst.low_gp(), src.high_gp()); |
| clz(dst.low_gp(), dst.low_gp()); |
| add(dst.low_gp(), dst.low_gp(), Operand(32)); |
| |
| bind(&done); |
| mov(dst.high_gp(), Operand(0)); // High word of result is always 0. |
| } |
| |
| bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst, |
| LiftoffRegister src) { |
| // Produce partial popcnts in the two dst registers, making sure not to |
| // overwrite the second src register before using it. |
| Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp(); |
| Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp(); |
| LiftoffRegList pinned = LiftoffRegList::ForRegs(dst, src2); |
| Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp(); |
| Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp(); |
| liftoff::GeneratePopCnt(this, dst.low_gp(), src1, scratch1, scratch2); |
| liftoff::GeneratePopCnt(this, dst.high_gp(), src2, scratch1, scratch2); |
| // Now add the two into the lower dst reg and clear the higher dst reg. |
| add(dst.low_gp(), dst.low_gp(), dst.high_gp()); |
| mov(dst.high_gp(), Operand(0)); |
| return true; |
| } |
| |
| bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) { |
| if (CpuFeatures::IsSupported(ARMv8)) { |
| CpuFeatureScope scope(this, ARMv8); |
| vrintp(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); |
| return true; |
| } |
| return false; |
| } |
| |
| bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) { |
| if (CpuFeatures::IsSupported(ARMv8)) { |
| CpuFeatureScope scope(this, ARMv8); |
| vrintm(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); |
| return true; |
| } |
| return false; |
| } |
| |
| bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) { |
| if (CpuFeatures::IsSupported(ARMv8)) { |
| CpuFeatureScope scope(this, ARMv8); |
| vrintz(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); |
| return true; |
| } |
| return false; |
| } |
| |
| bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst, |
| DoubleRegister src) { |
| if (CpuFeatures::IsSupported(ARMv8)) { |
| CpuFeatureScope scope(this, ARMv8); |
| vrintn(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); |
| return true; |
| } |
| return false; |
| } |
| |
| void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs, |
| DoubleRegister rhs) { |
| liftoff::EmitFloatMinOrMax( |
| this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs), |
| liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMin); |
| } |
| |
| void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs, |
| DoubleRegister rhs) { |
| liftoff::EmitFloatMinOrMax( |
| this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs), |
| liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMax); |
| } |
| |
| bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) { |
| if (CpuFeatures::IsSupported(ARMv8)) { |
| CpuFeatureScope scope(this, ARMv8); |
| vrintp(dst, src); |
| return true; |
| } |
| return false; |
| } |
| |
| bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) { |
| if (CpuFeatures::IsSupported(ARMv8)) { |
| CpuFeatureScope scope(this, ARMv8); |
| vrintm(dst, src); |
| return true; |
| } |
| return false; |
| } |
| |
| bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) { |
| if (CpuFeatures::IsSupported(ARMv8)) { |
| CpuFeatureScope scope(this, ARMv8); |
| vrintz(dst, src); |
| return true; |
| } |
| return false; |
| } |
| |
| bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst, |
| DoubleRegister src) { |
| if (CpuFeatures::IsSupported(ARMv8)) { |
| CpuFeatureScope scope(this, ARMv8); |
| vrintn(dst, src); |
| return true; |
| } |
| return false; |
| } |
| |
| void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs, |
| DoubleRegister rhs) { |
| liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMin); |
| } |
| |
| void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs, |
| DoubleRegister rhs) { |
| liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMax); |
| } |
| |
| void LiftoffAssembler::emit_u32_to_intptr(Register dst, Register src) { |
| // This is a nop on arm. |
| } |
| |
| void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs, |
| DoubleRegister rhs) { |
| constexpr uint32_t kF32SignBit = uint32_t{1} << 31; |
| UseScratchRegisterScope temps(this); |
| Register scratch = GetUnusedRegister(kGpReg, {}).gp(); |
| Register scratch2 = temps.Acquire(); |
| VmovLow(scratch, lhs); |
| // Clear sign bit in {scratch}. |
| bic(scratch, scratch, Operand(kF32SignBit)); |
| VmovLow(scratch2, rhs); |
| // Isolate sign bit in {scratch2}. |
| and_(scratch2, scratch2, Operand(kF32SignBit)); |
| // Combine {scratch2} into {scratch}. |
| orr(scratch, scratch, scratch2); |
| VmovLow(dst, scratch); |
| } |
| |
| void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs, |
| DoubleRegister rhs) { |
| constexpr uint32_t kF64SignBitHighWord = uint32_t{1} << 31; |
| // On arm, we cannot hold the whole f64 value in a gp register, so we just |
| // operate on the upper half (UH). |
| UseScratchRegisterScope temps(this); |
| Register scratch = GetUnusedRegister(kGpReg, {}).gp(); |
| Register scratch2 = temps.Acquire(); |
| VmovHigh(scratch, lhs); |
| // Clear sign bit in {scratch}. |
| bic(scratch, scratch, Operand(kF64SignBitHighWord)); |
| VmovHigh(scratch2, rhs); |
| // Isolate sign bit in {scratch2}. |
| and_(scratch2, scratch2, Operand(kF64SignBitHighWord)); |
| // Combine {scratch2} into {scratch}. |
| orr(scratch, scratch, scratch2); |
| vmov(dst, lhs); |
| VmovHigh(dst, scratch); |
| } |
| |
| bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, |
| LiftoffRegister dst, |
| LiftoffRegister src, Label* trap) { |
| switch (opcode) { |
| case kExprI32ConvertI64: |
| TurboAssembler::Move(dst.gp(), src.low_gp()); |
| return true; |
| case kExprI32SConvertF32: { |
| UseScratchRegisterScope temps(this); |
| SwVfpRegister scratch_f = temps.AcquireS(); |
| vcvt_s32_f32( |
| scratch_f, |
| liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. |
| vmov(dst.gp(), scratch_f); |
| // Check underflow and NaN. |
| vmov(scratch_f, Float32(static_cast<float>(INT32_MIN))); |
| VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f); |
| b(trap, lt); |
| // Check overflow. |
| cmp(dst.gp(), Operand(-1)); |
| b(trap, vs); |
| return true; |
| } |
| case kExprI32UConvertF32: { |
| UseScratchRegisterScope temps(this); |
| SwVfpRegister scratch_f = temps.AcquireS(); |
| vcvt_u32_f32( |
| scratch_f, |
| liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. |
| vmov(dst.gp(), scratch_f); |
| // Check underflow and NaN. |
| vmov(scratch_f, Float32(-1.0f)); |
| VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f); |
| b(trap, le); |
| // Check overflow. |
| cmp(dst.gp(), Operand(-1)); |
| b(trap, eq); |
| return true; |
| } |
| case kExprI32SConvertF64: { |
| UseScratchRegisterScope temps(this); |
| SwVfpRegister scratch_f = temps.AcquireS(); |
| vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. |
| vmov(dst.gp(), scratch_f); |
| // Check underflow and NaN. |
| DwVfpRegister scratch_d = temps.AcquireD(); |
| vmov(scratch_d, Double(static_cast<double>(INT32_MIN - 1.0))); |
| VFPCompareAndSetFlags(src.fp(), scratch_d); |
| b(trap, le); |
| // Check overflow. |
| vmov(scratch_d, Double(static_cast<double>(INT32_MAX + 1.0))); |
| VFPCompareAndSetFlags(src.fp(), scratch_d); |
| b(trap, ge); |
| return true; |
| } |
| case kExprI32UConvertF64: { |
| UseScratchRegisterScope temps(this); |
| SwVfpRegister scratch_f = temps.AcquireS(); |
| vcvt_u32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. |
| vmov(dst.gp(), scratch_f); |
| // Check underflow and NaN. |
| DwVfpRegister scratch_d = temps.AcquireD(); |
| vmov(scratch_d, Double(static_cast<double>(-1.0))); |
| VFPCompareAndSetFlags(src.fp(), scratch_d); |
| b(trap, le); |
| // Check overflow. |
| vmov(scratch_d, Double(static_cast<double>(UINT32_MAX + 1.0))); |
| VFPCompareAndSetFlags(src.fp(), scratch_d); |
| b(trap, ge); |
| return true; |
| } |
| case kExprI32SConvertSatF32: { |
| UseScratchRegisterScope temps(this); |
| SwVfpRegister scratch_f = temps.AcquireS(); |
| vcvt_s32_f32( |
| scratch_f, |
| liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. |
| vmov(dst.gp(), scratch_f); |
| return true; |
| } |
| case kExprI32UConvertSatF32: { |
| UseScratchRegisterScope temps(this); |
| SwVfpRegister scratch_f = temps.AcquireS(); |
| vcvt_u32_f32( |
| scratch_f, |
| liftoff::GetFloatRegister(src.fp())); // f32 -> u32 round to zero. |
| vmov(dst.gp(), scratch_f); |
| return true; |
| } |
| case kExprI32SConvertSatF64: { |
| UseScratchRegisterScope temps(this); |
| SwVfpRegister scratch_f = temps.AcquireS(); |
| vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. |
| vmov(dst.gp(), scratch_f); |
| return true; |
| } |
| case kExprI32UConvertSatF64: { |
| UseScratchRegisterScope temps(this); |
| SwVfpRegister scratch_f = temps.AcquireS(); |
| vcvt_u32_f64(scratch_f, src.fp()); // f64 -> u32 round to zero. |
| vmov(dst.gp(), scratch_f); |
| return true; |
| } |
| case kExprI32ReinterpretF32: |
| vmov(dst.gp(), liftoff::GetFloatRegister(src.fp())); |
| return true; |
| case kExprI64SConvertI32: |
| if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp()); |
| mov(dst.high_gp(), Operand(src.gp(), ASR, 31)); |
| return true; |
| case kExprI64UConvertI32: |
| if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp()); |
| mov(dst.high_gp(), Operand(0)); |
| return true; |
| case kExprI64ReinterpretF64: |
| vmov(dst.low_gp(), dst.high_gp(), src.fp()); |
| return true; |
| case kExprF32SConvertI32: { |
| SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp()); |
| vmov(dst_float, src.gp()); |
| vcvt_f32_s32(dst_float, dst_float); |
| return true; |
| } |
| case kExprF32UConvertI32: { |
| SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp()); |
| vmov(dst_float, src.gp()); |
| vcvt_f32_u32(dst_float, dst_float); |
| return true; |
| } |
| case kExprF32ConvertF64: |
| vcvt_f32_f64(liftoff::GetFloatRegister(dst.fp()), src.fp()); |
| return true; |
| case kExprF32ReinterpretI32: |
| vmov(liftoff::GetFloatRegister(dst.fp()), src.gp()); |
| return true; |
| case kExprF64SConvertI32: { |
| vmov(liftoff::GetFloatRegister(dst.fp()), src.gp()); |
| vcvt_f64_s32(dst.fp(), liftoff::GetFloatRegister(dst.fp())); |
| return true; |
| } |
| case kExprF64UConvertI32: { |
| vmov(liftoff::GetFloatRegister(dst.fp()), src.gp()); |
| vcvt_f64_u32(dst.fp(), liftoff::GetFloatRegister(dst.fp())); |
| return true; |
| } |
| case kExprF64ConvertF32: |
| vcvt_f64_f32(dst.fp(), liftoff::GetFloatRegister(src.fp())); |
| return true; |
| case kExprF64ReinterpretI64: |
| vmov(dst.fp(), src.low_gp(), src.high_gp()); |
| return true; |
| case kExprF64SConvertI64: |
| case kExprF64UConvertI64: |
| case kExprI64SConvertF32: |
| case kExprI64UConvertF32: |
| case kExprI64SConvertSatF32: |
| case kExprI64UConvertSatF32: |
| case kExprF32SConvertI64: |
| case kExprF32UConvertI64: |
| case kExprI64SConvertF64: |
| case kExprI64UConvertF64: |
| case kExprI64SConvertSatF64: |
| case kExprI64UConvertSatF64: |
| // These cases can be handled by the C fallback function. |
| return false; |
| default: |
| UNREACHABLE(); |
| } |
| } |
| |
| void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) { |
| sxtb(dst, src); |
| } |
| |
| void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) { |
| sxth(dst, src); |
| } |
| |
| void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst, |
| LiftoffRegister src) { |
| emit_i32_signextend_i8(dst.low_gp(), src.low_gp()); |
| mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31)); |
| } |
| |
| void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst, |
| LiftoffRegister src) { |
| emit_i32_signextend_i16(dst.low_gp(), src.low_gp()); |
| mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31)); |
| } |
| |
| void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst, |
| LiftoffRegister src) { |
| TurboAssembler::Move(dst.low_gp(), src.low_gp()); |
| mov(dst.high_gp(), Operand(src.low_gp(), ASR, 31)); |
| } |
| |
| void LiftoffAssembler::emit_jump(Label* label) { b(label); } |
| |
| void LiftoffAssembler::emit_jump(Register target) { bx(target); } |
| |
| void LiftoffAssembler::emit_cond_jump(Condition cond, Label* label, |
| ValueType type, Register lhs, |
| Register rhs) { |
| DCHECK_EQ(type, kWasmI32); |
| if (rhs == no_reg) { |
| cmp(lhs, Operand(0)); |
| } else { |
| cmp(lhs, rhs); |
| } |
| b(label, cond); |
| } |
| |
| void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) { |
| clz(dst, src); |
| mov(dst, Operand(dst, LSR, kRegSizeInBitsLog2)); |
| } |
| |
| void LiftoffAssembler::emit_i32_set_cond(Condition cond, Register dst, |
| Register lhs, Register rhs) { |
| cmp(lhs, rhs); |
| mov(dst, Operand(0), LeaveCC); |
| mov(dst, Operand(1), LeaveCC, cond); |
| } |
| |
| void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) { |
| orr(dst, src.low_gp(), src.high_gp()); |
| clz(dst, dst); |
| mov(dst, Operand(dst, LSR, 5)); |
| } |
| |
| void LiftoffAssembler::emit_i64_set_cond(Condition cond, Register dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| // For signed i64 comparisons, we still need to use unsigned comparison for |
| // the low word (the only bit carrying signedness information is the MSB in |
| // the high word). |
| Condition unsigned_cond = liftoff::MakeUnsigned(cond); |
| Label set_cond; |
| Label cont; |
| LiftoffRegister dest = LiftoffRegister(dst); |
| bool speculative_move = !dest.overlaps(lhs) && !dest.overlaps(rhs); |
| if (speculative_move) { |
| mov(dst, Operand(0)); |
| } |
| // Compare high word first. If it differs, use it for the set_cond. If it's |
| // equal, compare the low word and use that for set_cond. |
| cmp(lhs.high_gp(), rhs.high_gp()); |
| if (unsigned_cond == cond) { |
| cmp(lhs.low_gp(), rhs.low_gp(), kEqual); |
| if (!speculative_move) { |
| mov(dst, Operand(0)); |
| } |
| mov(dst, Operand(1), LeaveCC, cond); |
| } else { |
| // If the condition predicate for the low differs from that for the high |
| // word, the conditional move instructions must be separated. |
| b(ne, &set_cond); |
| cmp(lhs.low_gp(), rhs.low_gp()); |
| if (!speculative_move) { |
| mov(dst, Operand(0)); |
| } |
| mov(dst, Operand(1), LeaveCC, unsigned_cond); |
| b(&cont); |
| bind(&set_cond); |
| if (!speculative_move) { |
| mov(dst, Operand(0)); |
| } |
| mov(dst, Operand(1), LeaveCC, cond); |
| bind(&cont); |
| } |
| } |
| |
| void LiftoffAssembler::emit_f32_set_cond(Condition cond, Register dst, |
| DoubleRegister lhs, |
| DoubleRegister rhs) { |
| VFPCompareAndSetFlags(liftoff::GetFloatRegister(lhs), |
| liftoff::GetFloatRegister(rhs)); |
| mov(dst, Operand(0), LeaveCC); |
| mov(dst, Operand(1), LeaveCC, cond); |
| if (cond != ne) { |
| // If V flag set, at least one of the arguments was a Nan -> false. |
| mov(dst, Operand(0), LeaveCC, vs); |
| } |
| } |
| |
| void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, |
| DoubleRegister lhs, |
| DoubleRegister rhs) { |
| VFPCompareAndSetFlags(lhs, rhs); |
| mov(dst, Operand(0), LeaveCC); |
| mov(dst, Operand(1), LeaveCC, cond); |
| if (cond != ne) { |
| // If V flag set, at least one of the arguments was a Nan -> false. |
| mov(dst, Operand(0), LeaveCC, vs); |
| } |
| } |
| |
| bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition, |
| LiftoffRegister true_value, |
| LiftoffRegister false_value, |
| ValueType type) { |
| return false; |
| } |
| |
| void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, |
| Register offset_reg, uint32_t offset_imm, |
| LoadType type, |
| LoadTransformationKind transform, |
| uint32_t* protected_load_pc) { |
| UseScratchRegisterScope temps(this); |
| Register actual_src_addr = liftoff::CalculateActualAddress( |
| this, &temps, src_addr, offset_reg, offset_imm); |
| *protected_load_pc = pc_offset(); |
| MachineType memtype = type.mem_type(); |
| |
| if (transform == LoadTransformationKind::kExtend) { |
| if (memtype == MachineType::Int8()) { |
| vld1(Neon8, NeonListOperand(dst.low_fp()), |
| NeonMemOperand(actual_src_addr)); |
| vmovl(NeonS8, liftoff::GetSimd128Register(dst), dst.low_fp()); |
| } else if (memtype == MachineType::Uint8()) { |
| vld1(Neon8, NeonListOperand(dst.low_fp()), |
| NeonMemOperand(actual_src_addr)); |
| vmovl(NeonU8, liftoff::GetSimd128Register(dst), dst.low_fp()); |
| } else if (memtype == MachineType::Int16()) { |
| vld1(Neon16, NeonListOperand(dst.low_fp()), |
| NeonMemOperand(actual_src_addr)); |
| vmovl(NeonS16, liftoff::GetSimd128Register(dst), dst.low_fp()); |
| } else if (memtype == MachineType::Uint16()) { |
| vld1(Neon16, NeonListOperand(dst.low_fp()), |
| NeonMemOperand(actual_src_addr)); |
| vmovl(NeonU16, liftoff::GetSimd128Register(dst), dst.low_fp()); |
| } else if (memtype == MachineType::Int32()) { |
| vld1(Neon32, NeonListOperand(dst.low_fp()), |
| NeonMemOperand(actual_src_addr)); |
| vmovl(NeonS32, liftoff::GetSimd128Register(dst), dst.low_fp()); |
| } else if (memtype == MachineType::Uint32()) { |
| vld1(Neon32, NeonListOperand(dst.low_fp()), |
| NeonMemOperand(actual_src_addr)); |
| vmovl(NeonU32, liftoff::GetSimd128Register(dst), dst.low_fp()); |
| } |
| } else if (transform == LoadTransformationKind::kZeroExtend) { |
| Simd128Register dest = liftoff::GetSimd128Register(dst); |
| if (memtype == MachineType::Int32()) { |
| vmov(dest, 0); |
| vld1s(Neon32, NeonListOperand(dst.low_fp()), 0, |
| NeonMemOperand(actual_src_addr)); |
| } else { |
| DCHECK_EQ(MachineType::Int64(), memtype); |
| vmov(dest.high(), 0); |
| vld1(Neon64, NeonListOperand(dest.low()), |
| NeonMemOperand(actual_src_addr)); |
| } |
| } else { |
| DCHECK_EQ(LoadTransformationKind::kSplat, transform); |
| if (memtype == MachineType::Int8()) { |
| vld1r(Neon8, NeonListOperand(liftoff::GetSimd128Register(dst)), |
| NeonMemOperand(actual_src_addr)); |
| } else if (memtype == MachineType::Int16()) { |
| vld1r(Neon16, NeonListOperand(liftoff::GetSimd128Register(dst)), |
| NeonMemOperand(actual_src_addr)); |
| } else if (memtype == MachineType::Int32()) { |
| vld1r(Neon32, NeonListOperand(liftoff::GetSimd128Register(dst)), |
| NeonMemOperand(actual_src_addr)); |
| } else if (memtype == MachineType::Int64()) { |
| vld1(Neon32, NeonListOperand(dst.low_fp()), |
| NeonMemOperand(actual_src_addr)); |
| TurboAssembler::Move(dst.high_fp(), dst.low_fp()); |
| } |
| } |
| } |
| |
| void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| UseScratchRegisterScope temps(this); |
| |
| NeonListOperand table(liftoff::GetSimd128Register(lhs)); |
| if (dst == lhs) { |
| // dst will be overwritten, so keep the table somewhere else. |
| QwNeonRegister tbl = temps.AcquireQ(); |
| TurboAssembler::Move(tbl, liftoff::GetSimd128Register(lhs)); |
| table = NeonListOperand(tbl); |
| } |
| |
| vtbl(dst.low_fp(), table, rhs.low_fp()); |
| vtbl(dst.high_fp(), table, rhs.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, |
| LiftoffRegister src) { |
| TurboAssembler::Move(dst.low_fp(), src.fp()); |
| TurboAssembler::Move(dst.high_fp(), src.fp()); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| uint8_t imm_lane_idx) { |
| ExtractLane(dst.fp(), liftoff::GetSimd128Register(lhs), imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst, |
| LiftoffRegister src1, |
| LiftoffRegister src2, |
| uint8_t imm_lane_idx) { |
| ReplaceLane(liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src1), src2.fp(), imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vabs(dst.low_fp(), src.low_fp()); |
| vabs(dst.high_fp(), src.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vneg(dst.low_fp(), src.low_fp()); |
| vneg(dst.high_fp(), src.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vsqrt(dst.low_fp(), src.low_fp()); |
| vsqrt(dst.high_fp(), src.high_fp()); |
| } |
| |
| bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst, |
| LiftoffRegister src) { |
| if (!CpuFeatures::IsSupported(ARMv8)) { |
| return false; |
| } |
| |
| CpuFeatureScope scope(this, ARMv8); |
| vrintp(dst.low_fp(), src.low_fp()); |
| vrintp(dst.high_fp(), src.high_fp()); |
| return true; |
| } |
| |
| bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst, |
| LiftoffRegister src) { |
| if (!CpuFeatures::IsSupported(ARMv8)) { |
| return false; |
| } |
| |
| CpuFeatureScope scope(this, ARMv8); |
| vrintm(dst.low_fp(), src.low_fp()); |
| vrintm(dst.high_fp(), src.high_fp()); |
| return true; |
| } |
| |
| bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst, |
| LiftoffRegister src) { |
| if (!CpuFeatures::IsSupported(ARMv8)) { |
| return false; |
| } |
| |
| CpuFeatureScope scope(this, ARMv8); |
| vrintz(dst.low_fp(), src.low_fp()); |
| vrintz(dst.high_fp(), src.high_fp()); |
| return true; |
| } |
| |
| bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst, |
| LiftoffRegister src) { |
| if (!CpuFeatures::IsSupported(ARMv8)) { |
| return false; |
| } |
| |
| CpuFeatureScope scope(this, ARMv8); |
| vrintn(dst.low_fp(), src.low_fp()); |
| vrintn(dst.high_fp(), src.high_fp()); |
| return true; |
| } |
| |
| void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vadd(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); |
| vadd(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vsub(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); |
| vsub(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmul(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); |
| vmul(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vdiv(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); |
| vdiv(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| Simd128Register dest = liftoff::GetSimd128Register(dst); |
| Simd128Register left = liftoff::GetSimd128Register(lhs); |
| Simd128Register right = liftoff::GetSimd128Register(rhs); |
| |
| liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(), |
| liftoff::MinOrMax::kMin); |
| liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(), |
| liftoff::MinOrMax::kMin); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| Simd128Register dest = liftoff::GetSimd128Register(dst); |
| Simd128Register left = liftoff::GetSimd128Register(lhs); |
| Simd128Register right = liftoff::GetSimd128Register(rhs); |
| |
| liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(), |
| liftoff::MinOrMax::kMax); |
| liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(), |
| liftoff::MinOrMax::kMax); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| QwNeonRegister dest = liftoff::GetSimd128Register(dst); |
| QwNeonRegister left = liftoff::GetSimd128Register(lhs); |
| QwNeonRegister right = liftoff::GetSimd128Register(rhs); |
| |
| if (dst != rhs) { |
| vmov(dest, left); |
| } |
| |
| VFPCompareAndSetFlags(right.low(), left.low()); |
| vmov(dest.low(), right.low(), mi); |
| VFPCompareAndSetFlags(right.high(), left.high()); |
| vmov(dest.high(), right.high(), mi); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| QwNeonRegister dest = liftoff::GetSimd128Register(dst); |
| QwNeonRegister left = liftoff::GetSimd128Register(lhs); |
| QwNeonRegister right = liftoff::GetSimd128Register(rhs); |
| |
| if (dst != rhs) { |
| vmov(dest, left); |
| } |
| |
| VFPCompareAndSetFlags(right.low(), left.low()); |
| vmov(dest.low(), right.low(), gt); |
| VFPCompareAndSetFlags(right.high(), left.high()); |
| vmov(dest.high(), right.high(), gt); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| uint8_t imm_lane_idx) { |
| ExtractLane(liftoff::GetFloatRegister(dst.fp()), |
| liftoff::GetSimd128Register(lhs), imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst, |
| LiftoffRegister src1, |
| LiftoffRegister src2, |
| uint8_t imm_lane_idx) { |
| ReplaceLane(liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src1), |
| liftoff::GetFloatRegister(src2.fp()), imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vabs(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vneg(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst, |
| LiftoffRegister src) { |
| // The list of d registers available to us is from d0 to d15, which always |
| // maps to 2 s registers. |
| LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code()); |
| LowDwVfpRegister src_low = LowDwVfpRegister::from_code(src.low_fp().code()); |
| |
| LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code()); |
| LowDwVfpRegister src_high = LowDwVfpRegister::from_code(src.high_fp().code()); |
| |
| vsqrt(dst_low.low(), src_low.low()); |
| vsqrt(dst_low.high(), src_low.high()); |
| vsqrt(dst_high.low(), src_high.low()); |
| vsqrt(dst_high.high(), src_high.high()); |
| } |
| |
| bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst, |
| LiftoffRegister src) { |
| if (!CpuFeatures::IsSupported(ARMv8)) { |
| return false; |
| } |
| |
| CpuFeatureScope scope(this, ARMv8); |
| vrintp(NeonS32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| return true; |
| } |
| |
| bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst, |
| LiftoffRegister src) { |
| if (!CpuFeatures::IsSupported(ARMv8)) { |
| return false; |
| } |
| |
| CpuFeatureScope scope(this, ARMv8); |
| vrintm(NeonS32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| return true; |
| } |
| |
| bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst, |
| LiftoffRegister src) { |
| if (!CpuFeatures::IsSupported(ARMv8)) { |
| return false; |
| } |
| |
| CpuFeatureScope scope(this, ARMv8); |
| vrintz(NeonS32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| return true; |
| } |
| |
| bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst, |
| LiftoffRegister src) { |
| if (!CpuFeatures::IsSupported(ARMv8)) { |
| return false; |
| } |
| |
| CpuFeatureScope scope(this, ARMv8); |
| vrintn(NeonS32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| return true; |
| } |
| |
| void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vadd(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vsub(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmul(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| // The list of d registers available to us is from d0 to d15, which always |
| // maps to 2 s registers. |
| LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code()); |
| LowDwVfpRegister lhs_low = LowDwVfpRegister::from_code(lhs.low_fp().code()); |
| LowDwVfpRegister rhs_low = LowDwVfpRegister::from_code(rhs.low_fp().code()); |
| |
| LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code()); |
| LowDwVfpRegister lhs_high = LowDwVfpRegister::from_code(lhs.high_fp().code()); |
| LowDwVfpRegister rhs_high = LowDwVfpRegister::from_code(rhs.high_fp().code()); |
| |
| vdiv(dst_low.low(), lhs_low.low(), rhs_low.low()); |
| vdiv(dst_low.high(), lhs_low.high(), rhs_low.high()); |
| vdiv(dst_high.low(), lhs_high.low(), rhs_high.low()); |
| vdiv(dst_high.high(), lhs_high.high(), rhs_high.high()); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmin(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmax(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| UseScratchRegisterScope temps(this); |
| |
| QwNeonRegister tmp = liftoff::GetSimd128Register(dst); |
| if (dst == lhs || dst == rhs) { |
| tmp = temps.AcquireQ(); |
| } |
| |
| QwNeonRegister left = liftoff::GetSimd128Register(lhs); |
| QwNeonRegister right = liftoff::GetSimd128Register(rhs); |
| vcgt(tmp, left, right); |
| vbsl(tmp, right, left); |
| |
| if (dst == lhs || dst == rhs) { |
| vmov(liftoff::GetSimd128Register(dst), tmp); |
| } |
| } |
| |
| void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| UseScratchRegisterScope temps(this); |
| |
| QwNeonRegister tmp = liftoff::GetSimd128Register(dst); |
| if (dst == lhs || dst == rhs) { |
| tmp = temps.AcquireQ(); |
| } |
| |
| QwNeonRegister left = liftoff::GetSimd128Register(lhs); |
| QwNeonRegister right = liftoff::GetSimd128Register(rhs); |
| vcgt(tmp, right, left); |
| vbsl(tmp, right, left); |
| |
| if (dst == lhs || dst == rhs) { |
| vmov(liftoff::GetSimd128Register(dst), tmp); |
| } |
| } |
| |
| void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst, |
| LiftoffRegister src) { |
| Simd128Register dst_simd = liftoff::GetSimd128Register(dst); |
| vdup(Neon32, dst_simd, src.low_gp()); |
| ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 1); |
| ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 3); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| uint8_t imm_lane_idx) { |
| ExtractLane(dst.low_gp(), liftoff::GetSimd128Register(lhs), NeonS32, |
| imm_lane_idx * 2); |
| ExtractLane(dst.high_gp(), liftoff::GetSimd128Register(lhs), NeonS32, |
| imm_lane_idx * 2 + 1); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst, |
| LiftoffRegister src1, |
| LiftoffRegister src2, |
| uint8_t imm_lane_idx) { |
| Simd128Register dst_simd = liftoff::GetSimd128Register(dst); |
| Simd128Register src1_simd = liftoff::GetSimd128Register(src1); |
| ReplaceLane(dst_simd, src1_simd, src2.low_gp(), NeonS32, imm_lane_idx * 2); |
| ReplaceLane(dst_simd, dst_simd, src2.high_gp(), NeonS32, |
| imm_lane_idx * 2 + 1); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst, |
| LiftoffRegister src) { |
| UseScratchRegisterScope temps(this); |
| QwNeonRegister zero = |
| dst == src ? temps.AcquireQ() : liftoff::GetSimd128Register(dst); |
| vmov(zero, uint64_t{0}); |
| vsub(Neon64, liftoff::GetSimd128Register(dst), zero, |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kLeft, NeonS64, Neon32>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, |
| int32_t rhs) { |
| vshl(NeonS64, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), rhs & 63); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kRight, NeonS64, Neon32>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, |
| LiftoffRegister lhs, int32_t rhs) { |
| liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS64>(this, dst, lhs, |
| rhs); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kRight, NeonU64, Neon32>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, |
| LiftoffRegister lhs, int32_t rhs) { |
| liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU64>(this, dst, lhs, |
| rhs); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vadd(Neon64, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vsub(Neon64, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| UseScratchRegisterScope temps(this); |
| |
| QwNeonRegister dst_neon = liftoff::GetSimd128Register(dst); |
| QwNeonRegister left = liftoff::GetSimd128Register(lhs); |
| QwNeonRegister right = liftoff::GetSimd128Register(rhs); |
| |
| // These temporary registers will be modified. We can directly modify lhs and |
| // rhs if they are not uesd, saving on temporaries. |
| QwNeonRegister tmp1 = left; |
| QwNeonRegister tmp2 = right; |
| |
| LiftoffRegList used_plus_dst = |
| cache_state()->used_registers | LiftoffRegList::ForRegs(dst); |
| |
| if (used_plus_dst.has(lhs) && used_plus_dst.has(rhs)) { |
| tmp1 = temps.AcquireQ(); |
| // We only have 1 scratch Q register, so acquire another ourselves. |
| LiftoffRegList pinned = LiftoffRegList::ForRegs(dst); |
| LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); |
| tmp2 = liftoff::GetSimd128Register(unused_pair); |
| } else if (used_plus_dst.has(lhs)) { |
| tmp1 = temps.AcquireQ(); |
| } else if (used_plus_dst.has(rhs)) { |
| tmp2 = temps.AcquireQ(); |
| } |
| |
| // Algorithm from code-generator-arm.cc, refer to comments there for details. |
| if (tmp1 != left) { |
| vmov(tmp1, left); |
| } |
| if (tmp2 != right) { |
| vmov(tmp2, right); |
| } |
| |
| vtrn(Neon32, tmp1.low(), tmp1.high()); |
| vtrn(Neon32, tmp2.low(), tmp2.high()); |
| |
| vmull(NeonU32, dst_neon, tmp1.low(), tmp2.high()); |
| vmlal(NeonU32, dst_neon, tmp1.high(), tmp2.low()); |
| vshl(NeonU64, dst_neon, dst_neon, 32); |
| |
| vmlal(NeonU32, dst_neon, tmp1.low(), tmp2.low()); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vdup(Neon32, liftoff::GetSimd128Register(dst), src.gp()); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| uint8_t imm_lane_idx) { |
| ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS32, |
| imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst, |
| LiftoffRegister src1, |
| LiftoffRegister src2, |
| uint8_t imm_lane_idx) { |
| ReplaceLane(liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src1), src2.gp(), NeonS32, |
| imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vneg(Neon32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, |
| LiftoffRegister src) { |
| liftoff::EmitAnyTrue(this, dst, src); |
| } |
| |
| void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, |
| LiftoffRegister src) { |
| UseScratchRegisterScope temps(this); |
| DwVfpRegister scratch = temps.AcquireD(); |
| vpmin(NeonU32, scratch, src.low_fp(), src.high_fp()); |
| vpmin(NeonU32, scratch, scratch, scratch); |
| ExtractLane(dst.gp(), scratch, NeonS32, 0); |
| cmp(dst.gp(), Operand(0)); |
| mov(dst.gp(), Operand(1), LeaveCC, ne); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, |
| LiftoffRegister src) { |
| UseScratchRegisterScope temps(this); |
| Simd128Register tmp = liftoff::GetSimd128Register(src); |
| Simd128Register mask = temps.AcquireQ(); |
| |
| if (cache_state()->is_used(src)) { |
| // We only have 1 scratch Q register, so try and reuse src. |
| LiftoffRegList pinned = LiftoffRegList::ForRegs(src); |
| LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); |
| mask = liftoff::GetSimd128Register(unused_pair); |
| } |
| |
| vshr(NeonS32, tmp, liftoff::GetSimd128Register(src), 31); |
| // Set i-th bit of each lane i. When AND with tmp, the lanes that |
| // are signed will have i-th bit set, unsigned will be 0. |
| vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001)); |
| vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004)); |
| vand(tmp, mask, tmp); |
| vpadd(Neon32, tmp.low(), tmp.low(), tmp.high()); |
| vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero); |
| VmovLow(dst.gp(), tmp.low()); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kLeft, NeonS32, Neon32>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, |
| int32_t rhs) { |
| vshl(NeonS32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), rhs & 31); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kRight, NeonS32, Neon32>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, |
| LiftoffRegister lhs, int32_t rhs) { |
| liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS32>(this, dst, lhs, |
| rhs); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kRight, NeonU32, Neon32>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, |
| LiftoffRegister lhs, int32_t rhs) { |
| liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU32>(this, dst, lhs, |
| rhs); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vadd(Neon32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vsub(Neon32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmul(Neon32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmin(NeonS32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmin(NeonU32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmax(NeonS32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmax(NeonU32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| QwNeonRegister dest = liftoff::GetSimd128Register(dst); |
| QwNeonRegister left = liftoff::GetSimd128Register(lhs); |
| QwNeonRegister right = liftoff::GetSimd128Register(rhs); |
| |
| UseScratchRegisterScope temps(this); |
| Simd128Register scratch = temps.AcquireQ(); |
| |
| vmull(NeonS16, scratch, left.low(), right.low()); |
| vpadd(Neon32, dest.low(), scratch.low(), scratch.high()); |
| |
| vmull(NeonS16, scratch, left.high(), right.high()); |
| vpadd(Neon32, dest.high(), scratch.low(), scratch.high()); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vdup(Neon16, liftoff::GetSimd128Register(dst), src.gp()); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vneg(Neon16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, |
| LiftoffRegister src) { |
| liftoff::EmitAnyTrue(this, dst, src); |
| } |
| |
| void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, |
| LiftoffRegister src) { |
| UseScratchRegisterScope temps(this); |
| DwVfpRegister scratch = temps.AcquireD(); |
| vpmin(NeonU16, scratch, src.low_fp(), src.high_fp()); |
| vpmin(NeonU16, scratch, scratch, scratch); |
| vpmin(NeonU16, scratch, scratch, scratch); |
| ExtractLane(dst.gp(), scratch, NeonS16, 0); |
| cmp(dst.gp(), Operand(0)); |
| mov(dst.gp(), Operand(1), LeaveCC, ne); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, |
| LiftoffRegister src) { |
| UseScratchRegisterScope temps(this); |
| Simd128Register tmp = liftoff::GetSimd128Register(src); |
| Simd128Register mask = temps.AcquireQ(); |
| |
| if (cache_state()->is_used(src)) { |
| // We only have 1 scratch Q register, so try and reuse src. |
| LiftoffRegList pinned = LiftoffRegList::ForRegs(src); |
| LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); |
| mask = liftoff::GetSimd128Register(unused_pair); |
| } |
| |
| vshr(NeonS16, tmp, liftoff::GetSimd128Register(src), 15); |
| // Set i-th bit of each lane i. When AND with tmp, the lanes that |
| // are signed will have i-th bit set, unsigned will be 0. |
| vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001)); |
| vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010)); |
| vand(tmp, mask, tmp); |
| vpadd(Neon16, tmp.low(), tmp.low(), tmp.high()); |
| vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); |
| vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); |
| vmov(NeonU16, dst.gp(), tmp.low(), 0); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kLeft, NeonS16, Neon16>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, |
| int32_t rhs) { |
| vshl(NeonS16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), rhs & 15); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kRight, NeonS16, Neon16>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, |
| LiftoffRegister lhs, int32_t rhs) { |
| liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS16>(this, dst, lhs, |
| rhs); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kRight, NeonU16, Neon16>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, |
| LiftoffRegister lhs, int32_t rhs) { |
| liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU16>(this, dst, lhs, |
| rhs); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vadd(Neon16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vqadd(NeonS16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vsub(Neon16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vqsub(NeonS16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vqsub(NeonU16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmul(Neon16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vqadd(NeonU16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmin(NeonS16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmin(NeonU16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmax(NeonS16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmax(NeonU16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| uint8_t imm_lane_idx) { |
| ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU16, |
| imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| uint8_t imm_lane_idx) { |
| ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS16, |
| imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst, |
| LiftoffRegister src1, |
| LiftoffRegister src2, |
| uint8_t imm_lane_idx) { |
| ReplaceLane(liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src1), src2.gp(), NeonS16, |
| imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs, |
| const uint8_t shuffle[16], |
| bool is_swizzle) { |
| Simd128Register dest = liftoff::GetSimd128Register(dst); |
| Simd128Register src1 = liftoff::GetSimd128Register(lhs); |
| Simd128Register src2 = liftoff::GetSimd128Register(rhs); |
| UseScratchRegisterScope temps(this); |
| Simd128Register scratch = temps.AcquireQ(); |
| if ((src1 != src2) && src1.code() + 1 != src2.code()) { |
| // vtbl requires the operands to be consecutive or the same. |
| // If they are the same, we build a smaller list operand (table_size = 2). |
| // If they are not the same, and not consecutive, we move the src1 and src2 |
| // to q14 and q15, which will be unused since they are not allocatable in |
| // Liftoff. If the operands are the same, then we build a smaller list |
| // operand below. |
| static_assert(!(kLiftoffAssemblerFpCacheRegs & |
| (d28.bit() | d29.bit() | d30.bit() | d31.bit())), |
| "This only works if q14-q15 (d28-d31) are not used."); |
| vmov(q14, src1); |
| src1 = q14; |
| vmov(q15, src2); |
| src2 = q15; |
| } |
| |
| int table_size = src1 == src2 ? 2 : 4; |
| |
| int scratch_s_base = scratch.code() * 4; |
| for (int j = 0; j < 4; j++) { |
| uint32_t imm = 0; |
| for (int i = 3; i >= 0; i--) { |
| imm = (imm << 8) | shuffle[j * 4 + i]; |
| } |
| DCHECK_EQ(0, imm & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0)); |
| // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4. |
| vmov(SwVfpRegister::from_code(scratch_s_base + j), Float32::FromBits(imm)); |
| } |
| |
| DwVfpRegister table_base = src1.low(); |
| NeonListOperand table(table_base, table_size); |
| |
| if (dest != src1 && dest != src2) { |
| vtbl(dest.low(), table, scratch.low()); |
| vtbl(dest.high(), table, scratch.high()); |
| } else { |
| vtbl(scratch.low(), table, scratch.low()); |
| vtbl(scratch.high(), table, scratch.high()); |
| vmov(dest, scratch); |
| } |
| } |
| |
| void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp()); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| uint8_t imm_lane_idx) { |
| ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU8, imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| uint8_t imm_lane_idx) { |
| ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS8, imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, |
| LiftoffRegister src1, |
| LiftoffRegister src2, |
| uint8_t imm_lane_idx) { |
| ReplaceLane(liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src1), src2.gp(), NeonS8, |
| imm_lane_idx); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vneg(Neon8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, |
| LiftoffRegister src) { |
| liftoff::EmitAnyTrue(this, dst, src); |
| } |
| |
| void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, |
| LiftoffRegister src) { |
| UseScratchRegisterScope temps(this); |
| DwVfpRegister scratch = temps.AcquireD(); |
| vpmin(NeonU8, scratch, src.low_fp(), src.high_fp()); |
| vpmin(NeonU8, scratch, scratch, scratch); |
| vpmin(NeonU8, scratch, scratch, scratch); |
| vpmin(NeonU8, scratch, scratch, scratch); |
| ExtractLane(dst.gp(), scratch, NeonS8, 0); |
| cmp(dst.gp(), Operand(0)); |
| mov(dst.gp(), Operand(1), LeaveCC, ne); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, |
| LiftoffRegister src) { |
| UseScratchRegisterScope temps(this); |
| Simd128Register tmp = liftoff::GetSimd128Register(src); |
| Simd128Register mask = temps.AcquireQ(); |
| |
| if (cache_state()->is_used(src)) { |
| // We only have 1 scratch Q register, so try and reuse src. |
| LiftoffRegList pinned = LiftoffRegList::ForRegs(src); |
| LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); |
| mask = liftoff::GetSimd128Register(unused_pair); |
| } |
| |
| vshr(NeonS8, tmp, liftoff::GetSimd128Register(src), 7); |
| // Set i-th bit of each lane i. When AND with tmp, the lanes that |
| // are signed will have i-th bit set, unsigned will be 0. |
| vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201)); |
| vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201)); |
| vand(tmp, mask, tmp); |
| vext(mask, tmp, tmp, 8); |
| vzip(Neon8, mask, tmp); |
| vpadd(Neon16, tmp.low(), tmp.low(), tmp.high()); |
| vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); |
| vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); |
| vmov(NeonU16, dst.gp(), tmp.low(), 0); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kLeft, NeonS8, Neon8>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, |
| int32_t rhs) { |
| vshl(NeonS8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), rhs & 7); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kRight, NeonS8, Neon8>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, |
| LiftoffRegister lhs, int32_t rhs) { |
| liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS8>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::EmitSimdShift<liftoff::kRight, NeonU8, Neon8>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, |
| LiftoffRegister lhs, int32_t rhs) { |
| liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU8>(this, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vadd(Neon8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vqadd(NeonS8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vsub(Neon8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vqsub(NeonS8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vqsub(NeonU8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmul(Neon8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vqadd(NeonU8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmin(NeonS8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmin(NeonU8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmax(NeonS8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vmax(NeonU8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vceq(Neon8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vceq(Neon8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcgt(NeonS8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcgt(NeonU8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcge(NeonS8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcge(NeonU8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vceq(Neon16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vceq(Neon16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcgt(NeonS16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcgt(NeonU16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcge(NeonS16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcge(NeonU16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vceq(Neon32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vceq(Neon32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcgt(NeonS32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcgt(NeonU32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcge(NeonS32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcge(NeonU32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcgt(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs), |
| liftoff::GetSimd128Register(lhs)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vcge(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs), |
| liftoff::GetSimd128Register(lhs)); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::F64x2Compare(this, dst, lhs, rhs, eq); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::F64x2Compare(this, dst, lhs, rhs, ne); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::F64x2Compare(this, dst, lhs, rhs, lt); |
| } |
| |
| void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::F64x2Compare(this, dst, lhs, rhs, le); |
| } |
| |
| void LiftoffAssembler::emit_s128_const(LiftoffRegister dst, |
| const uint8_t imms[16]) { |
| uint64_t vals[2]; |
| memcpy(vals, imms, sizeof(vals)); |
| vmov(dst.low_fp(), Double(vals[0])); |
| vmov(dst.high_fp(), Double(vals[1])); |
| } |
| |
| void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) { |
| vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vand(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vorr(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| veor(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_s128_select(LiftoffRegister dst, |
| LiftoffRegister src1, |
| LiftoffRegister src2, |
| LiftoffRegister mask) { |
| if (dst != mask) { |
| vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(mask)); |
| } |
| vbsl(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1), |
| liftoff::GetSimd128Register(src2)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vcvt_s32_f32(liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vcvt_u32_f32(liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vcvt_f32_s32(liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vcvt_f32_u32(liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::S128NarrowOp(this, NeonS8, NeonS8, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::S128NarrowOp(this, NeonU8, NeonS8, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::S128NarrowOp(this, NeonS16, NeonS16, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| liftoff::S128NarrowOp(this, NeonU16, NeonS16, dst, lhs, rhs); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.low_fp()); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.low_fp()); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.low_fp()); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.low_fp()); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.high_fp()); |
| } |
| |
| void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vbic(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), |
| liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vrhadd(NeonU8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst, |
| LiftoffRegister lhs, |
| LiftoffRegister rhs) { |
| vrhadd(NeonU16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); |
| } |
| |
| void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vabs(Neon8, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vabs(Neon16, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst, |
| LiftoffRegister src) { |
| vabs(Neon32, liftoff::GetSimd128Register(dst), |
| liftoff::GetSimd128Register(src)); |
| } |
| |
| void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { |
| ldr(limit_address, MemOperand(limit_address)); |
| cmp(sp, limit_address); |
| b(ool_code, ls); |
| } |
| |
| void LiftoffAssembler::CallTrapCallbackForTesting() { |
| PrepareCallCFunction(0, 0); |
| CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0); |
| } |
| |
| void LiftoffAssembler::AssertUnreachable(AbortReason reason) { |
| // Asserts unreachable within the wasm code. |
| TurboAssembler::AssertUnreachable(reason); |
| } |
| |
| void LiftoffAssembler::PushRegisters(LiftoffRegList regs) { |
| RegList core_regs = regs.GetGpList(); |
| if (core_regs != 0) { |
| stm(db_w, sp, core_regs); |
| } |
| LiftoffRegList fp_regs = regs & kFpCacheRegList; |
| while (!fp_regs.is_empty()) { |
| LiftoffRegister reg = fp_regs.GetFirstRegSet(); |
| DoubleRegister first = reg.fp(); |
| DoubleRegister last = first; |
| fp_regs.clear(reg); |
| while (!fp_regs.is_empty()) { |
| LiftoffRegister reg = fp_regs.GetFirstRegSet(); |
| int code = reg.fp().code(); |
| // vstm can not push more than 16 registers. We have to make sure the |
| // condition is met. |
| if ((code != last.code() + 1) || ((code - first.code() + 1) > 16)) break; |
| last = reg.fp(); |
| fp_regs.clear(reg); |
| } |
| vstm(db_w, sp, first, last); |
| } |
| } |
| |
| void LiftoffAssembler::PopRegisters(LiftoffRegList regs) { |
| LiftoffRegList fp_regs = regs & kFpCacheRegList; |
| while (!fp_regs.is_empty()) { |
| LiftoffRegister reg = fp_regs.GetLastRegSet(); |
| DoubleRegister last = reg.fp(); |
| DoubleRegister first = last; |
| fp_regs.clear(reg); |
| while (!fp_regs.is_empty()) { |
| LiftoffRegister reg = fp_regs.GetLastRegSet(); |
| int code = reg.fp().code(); |
| if ((code != first.code() - 1) || ((last.code() - code + 1) > 16)) break; |
| first = reg.fp(); |
| fp_regs.clear(reg); |
| } |
| vldm(ia_w, sp, first, last); |
| } |
| RegList core_regs = regs.GetGpList(); |
| if (core_regs != 0) { |
| ldm(ia_w, sp, core_regs); |
| } |
| } |
| |
| void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) { |
| Drop(num_stack_slots); |
| Ret(); |
| } |
| |
| void LiftoffAssembler::CallC(const wasm::FunctionSig* sig, |
| const LiftoffRegister* args, |
| const LiftoffRegister* rets, |
| ValueType out_argument_type, int stack_bytes, |
| ExternalReference ext_ref) { |
| // Arguments are passed by pushing them all to the stack and then passing |
| // a pointer to them. |
| DCHECK(IsAligned(stack_bytes, kSystemPointerSize)); |
| // Reserve space in the stack. |
| AllocateStackSpace(stack_bytes); |
| |
| int arg_bytes = 0; |
| for (ValueType param_type : sig->parameters()) { |
| switch (param_type.kind()) { |
| case ValueType::kI32: |
| str(args->gp(), MemOperand(sp, arg_bytes)); |
| break; |
| case ValueType::kI64: |
| str(args->low_gp(), MemOperand(sp, arg_bytes)); |
| str(args->high_gp(), MemOperand(sp, arg_bytes + kSystemPointerSize)); |
| break; |
| case ValueType::kF32: |
| vstr(liftoff::GetFloatRegister(args->fp()), MemOperand(sp, arg_bytes)); |
| break; |
| case ValueType::kF64: |
| vstr(args->fp(), MemOperand(sp, arg_bytes)); |
| break; |
| case ValueType::kS128: |
| vstr(args->low_fp(), MemOperand(sp, arg_bytes)); |
| vstr(args->high_fp(), |
| MemOperand(sp, arg_bytes + 2 * kSystemPointerSize)); |
| break; |
| default: |
| UNREACHABLE(); |
| } |
| args++; |
| arg_bytes += param_type.element_size_bytes(); |
| } |
| DCHECK_LE(arg_bytes, stack_bytes); |
| |
| // Pass a pointer to the buffer with the arguments to the C function. |
| mov(r0, sp); |
| |
| // Now call the C function. |
| constexpr int kNumCCallArgs = 1; |
| PrepareCallCFunction(kNumCCallArgs); |
| CallCFunction(ext_ref, kNumCCallArgs); |
| |
| // Move return value to the right register. |
| const LiftoffRegister* result_reg = rets; |
| if (sig->return_count() > 0) { |
| DCHECK_EQ(1, sig->return_count()); |
| constexpr Register kReturnReg = r0; |
| if (kReturnReg != rets->gp()) { |
| Move(*rets, LiftoffRegister(kReturnReg), sig->GetReturn(0)); |
| } |
| result_reg++; |
| } |
| |
| // Load potential output value from the buffer on the stack. |
| if (out_argument_type != kWasmStmt) { |
| switch (out_argument_type.kind()) { |
| case ValueType::kI32: |
| ldr(result_reg->gp(), MemOperand(sp)); |
| break; |
| case ValueType::kI64: |
| ldr(result_reg->low_gp(), MemOperand(sp)); |
| ldr(result_reg->high_gp(), MemOperand(sp, kSystemPointerSize)); |
| break; |
| case ValueType::kF32: |
| vldr(liftoff::GetFloatRegister(result_reg->fp()), MemOperand(sp)); |
| break; |
| case ValueType::kF64: |
| vldr(result_reg->fp(), MemOperand(sp)); |
| break; |
| case ValueType::kS128: |
| vld1(Neon8, NeonListOperand(result_reg->low_fp(), 2), |
| NeonMemOperand(sp)); |
| break; |
| default: |
| UNREACHABLE(); |
| } |
| } |
| add(sp, sp, Operand(stack_bytes)); |
| } |
| |
| void LiftoffAssembler::CallNativeWasmCode(Address addr) { |
| Call(addr, RelocInfo::WASM_CALL); |
| } |
| |
| void LiftoffAssembler::TailCallNativeWasmCode(Address addr) { |
| Jump(addr, RelocInfo::WASM_CALL); |
| } |
| |
| void LiftoffAssembler::CallIndirect(const wasm::FunctionSig* sig, |
| compiler::CallDescriptor* call_descriptor, |
| Register target) { |
| DCHECK(target != no_reg); |
| Call(target); |
| } |
| |
| void LiftoffAssembler::TailCallIndirect(Register target) { |
| DCHECK(target != no_reg); |
| Jump(target); |
| } |
| |
| void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) { |
| // A direct call to a wasm runtime stub defined in this module. |
| // Just encode the stub index. This will be patched at relocation. |
| Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL); |
| } |
| |
| void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) { |
| AllocateStackSpace(size); |
| mov(addr, sp); |
| } |
| |
| void LiftoffAssembler::DeallocateStackSlot(uint32_t size) { |
| add(sp, sp, Operand(size)); |
| } |
| |
| void LiftoffStackSlots::Construct() { |
| for (auto& slot : slots_) { |
| const LiftoffAssembler::VarState& src = slot.src_; |
| switch (src.loc()) { |
| case LiftoffAssembler::VarState::kStack: { |
| switch (src.type().kind()) { |
| // i32 and i64 can be treated as similar cases, i64 being previously |
| // split into two i32 registers |
| case ValueType::kI32: |
| case ValueType::kI64: |
| case ValueType::kF32: { |
| UseScratchRegisterScope temps(asm_); |
| Register scratch = temps.Acquire(); |
| asm_->ldr(scratch, |
| liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_)); |
| asm_->Push(scratch); |
| } break; |
| case ValueType::kF64: { |
| UseScratchRegisterScope temps(asm_); |
| DwVfpRegister scratch = temps.AcquireD(); |
| asm_->vldr(scratch, liftoff::GetStackSlot(slot.src_offset_)); |
| asm_->vpush(scratch); |
| } break; |
| case ValueType::kS128: { |
| MemOperand mem_op = liftoff::GetStackSlot(slot.src_offset_); |
| UseScratchRegisterScope temps(asm_); |
| Register addr = liftoff::CalculateActualAddress( |
| asm_, &temps, mem_op.rn(), no_reg, mem_op.offset()); |
| QwNeonRegister scratch = temps.AcquireQ(); |
| asm_->vld1(Neon8, NeonListOperand(scratch), NeonMemOperand(addr)); |
| asm_->vpush(scratch); |
| break; |
| } |
| default: |
| UNREACHABLE(); |
| } |
| break; |
| } |
| case LiftoffAssembler::VarState::kRegister: |
| switch (src.type().kind()) { |
| case ValueType::kI64: { |
| LiftoffRegister reg = |
| slot.half_ == kLowWord ? src.reg().low() : src.reg().high(); |
| asm_->push(reg.gp()); |
| } break; |
| case ValueType::kI32: |
| asm_->push(src.reg().gp()); |
| break; |
| case ValueType::kF32: |
| asm_->vpush(liftoff::GetFloatRegister(src.reg().fp())); |
| break; |
| case ValueType::kF64: |
| asm_->vpush(src.reg().fp()); |
| break; |
| case ValueType::kS128: |
| asm_->vpush(liftoff::GetSimd128Register(src.reg())); |
| break; |
| default: |
| UNREACHABLE(); |
| } |
| break; |
| case LiftoffAssembler::VarState::kIntConst: { |
| DCHECK(src.type() == kWasmI32 || src.type() == kWasmI64); |
| UseScratchRegisterScope temps(asm_); |
| Register scratch = temps.Acquire(); |
| // The high word is the sign extension of the low word. |
| asm_->mov(scratch, |
| Operand(slot.half_ == kLowWord ? src.i32_const() |
| : src.i32_const() >> 31)); |
| asm_->push(scratch); |
| break; |
| } |
| } |
| } |
| } |
| |
| } // namespace wasm |
| } // namespace internal |
| } // namespace v8 |
| |
| #endif // V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_ |