| /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
| * vim: set ts=8 sts=4 et sw=4 tw=99: |
| * This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| |
| #include "jit/x86-shared/CodeGenerator-x86-shared.h" |
| |
| #include "mozilla/DebugOnly.h" |
| #include "mozilla/MathAlgorithms.h" |
| |
| #include "jsmath.h" |
| |
| #include "jit/JitCompartment.h" |
| #include "jit/JitFrames.h" |
| #include "jit/Linker.h" |
| #include "jit/RangeAnalysis.h" |
| #include "vm/TraceLogging.h" |
| |
| #include "jit/MacroAssembler-inl.h" |
| #include "jit/shared/CodeGenerator-shared-inl.h" |
| |
| using namespace js; |
| using namespace js::jit; |
| |
| using mozilla::Abs; |
| using mozilla::FloatingPoint; |
| using mozilla::FloorLog2; |
| using mozilla::NegativeInfinity; |
| using mozilla::SpecificNaN; |
| |
| using JS::GenericNaN; |
| |
| namespace js { |
| namespace jit { |
| |
| CodeGeneratorX86Shared::CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm) |
| : CodeGeneratorShared(gen, graph, masm) |
| { |
| } |
| |
| void |
| OutOfLineBailout::accept(CodeGeneratorX86Shared* codegen) |
| { |
| codegen->visitOutOfLineBailout(this); |
| } |
| |
| void |
| CodeGeneratorX86Shared::emitBranch(Assembler::Condition cond, MBasicBlock* mirTrue, |
| MBasicBlock* mirFalse, Assembler::NaNCond ifNaN) |
| { |
| if (ifNaN == Assembler::NaN_IsFalse) |
| jumpToBlock(mirFalse, Assembler::Parity); |
| else if (ifNaN == Assembler::NaN_IsTrue) |
| jumpToBlock(mirTrue, Assembler::Parity); |
| |
| if (isNextBlock(mirFalse->lir())) { |
| jumpToBlock(mirTrue, cond); |
| } else { |
| jumpToBlock(mirFalse, Assembler::InvertCondition(cond)); |
| jumpToBlock(mirTrue); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitDouble(LDouble* ins) |
| { |
| const LDefinition* out = ins->getDef(0); |
| masm.loadConstantDouble(ins->getDouble(), ToFloatRegister(out)); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitFloat32(LFloat32* ins) |
| { |
| const LDefinition* out = ins->getDef(0); |
| masm.loadConstantFloat32(ins->getFloat(), ToFloatRegister(out)); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitTestIAndBranch(LTestIAndBranch* test) |
| { |
| const LAllocation* opd = test->input(); |
| |
| // Test the operand |
| masm.test32(ToRegister(opd), ToRegister(opd)); |
| emitBranch(Assembler::NonZero, test->ifTrue(), test->ifFalse()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitTestDAndBranch(LTestDAndBranch* test) |
| { |
| const LAllocation* opd = test->input(); |
| |
| // vucomisd flags: |
| // Z P C |
| // --------- |
| // NaN 1 1 1 |
| // > 0 0 0 |
| // < 0 0 1 |
| // = 1 0 0 |
| // |
| // NaN is falsey, so comparing against 0 and then using the Z flag is |
| // enough to determine which branch to take. |
| ScratchDoubleScope scratch(masm); |
| masm.zeroDouble(scratch); |
| masm.vucomisd(scratch, ToFloatRegister(opd)); |
| emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitTestFAndBranch(LTestFAndBranch* test) |
| { |
| const LAllocation* opd = test->input(); |
| // vucomiss flags are the same as doubles; see comment above |
| { |
| ScratchFloat32Scope scratch(masm); |
| masm.zeroFloat32(scratch); |
| masm.vucomiss(scratch, ToFloatRegister(opd)); |
| } |
| emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitBitAndAndBranch(LBitAndAndBranch* baab) |
| { |
| if (baab->right()->isConstant()) |
| masm.test32(ToRegister(baab->left()), Imm32(ToInt32(baab->right()))); |
| else |
| masm.test32(ToRegister(baab->left()), ToRegister(baab->right())); |
| emitBranch(Assembler::NonZero, baab->ifTrue(), baab->ifFalse()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::emitCompare(MCompare::CompareType type, const LAllocation* left, const LAllocation* right) |
| { |
| #ifdef JS_CODEGEN_X64 |
| if (type == MCompare::Compare_Object) { |
| masm.cmpPtr(ToRegister(left), ToOperand(right)); |
| return; |
| } |
| #endif |
| |
| if (right->isConstant()) |
| masm.cmp32(ToRegister(left), Imm32(ToInt32(right))); |
| else |
| masm.cmp32(ToRegister(left), ToOperand(right)); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitCompare(LCompare* comp) |
| { |
| MCompare* mir = comp->mir(); |
| emitCompare(mir->compareType(), comp->left(), comp->right()); |
| masm.emitSet(JSOpToCondition(mir->compareType(), comp->jsop()), ToRegister(comp->output())); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitCompareAndBranch(LCompareAndBranch* comp) |
| { |
| MCompare* mir = comp->cmpMir(); |
| emitCompare(mir->compareType(), comp->left(), comp->right()); |
| Assembler::Condition cond = JSOpToCondition(mir->compareType(), comp->jsop()); |
| emitBranch(cond, comp->ifTrue(), comp->ifFalse()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitCompareD(LCompareD* comp) |
| { |
| FloatRegister lhs = ToFloatRegister(comp->left()); |
| FloatRegister rhs = ToFloatRegister(comp->right()); |
| |
| Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop()); |
| |
| Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); |
| if (comp->mir()->operandsAreNeverNaN()) |
| nanCond = Assembler::NaN_HandledByCond; |
| |
| masm.compareDouble(cond, lhs, rhs); |
| masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitCompareF(LCompareF* comp) |
| { |
| FloatRegister lhs = ToFloatRegister(comp->left()); |
| FloatRegister rhs = ToFloatRegister(comp->right()); |
| |
| Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop()); |
| |
| Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); |
| if (comp->mir()->operandsAreNeverNaN()) |
| nanCond = Assembler::NaN_HandledByCond; |
| |
| masm.compareFloat(cond, lhs, rhs); |
| masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitNotI(LNotI* ins) |
| { |
| masm.cmp32(ToRegister(ins->input()), Imm32(0)); |
| masm.emitSet(Assembler::Equal, ToRegister(ins->output())); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitNotD(LNotD* ins) |
| { |
| FloatRegister opd = ToFloatRegister(ins->input()); |
| |
| // Not returns true if the input is a NaN. We don't have to worry about |
| // it if we know the input is never NaN though. |
| Assembler::NaNCond nanCond = Assembler::NaN_IsTrue; |
| if (ins->mir()->operandIsNeverNaN()) |
| nanCond = Assembler::NaN_HandledByCond; |
| |
| ScratchDoubleScope scratch(masm); |
| masm.zeroDouble(scratch); |
| masm.compareDouble(Assembler::DoubleEqualOrUnordered, opd, scratch); |
| masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitNotF(LNotF* ins) |
| { |
| FloatRegister opd = ToFloatRegister(ins->input()); |
| |
| // Not returns true if the input is a NaN. We don't have to worry about |
| // it if we know the input is never NaN though. |
| Assembler::NaNCond nanCond = Assembler::NaN_IsTrue; |
| if (ins->mir()->operandIsNeverNaN()) |
| nanCond = Assembler::NaN_HandledByCond; |
| |
| ScratchFloat32Scope scratch(masm); |
| masm.zeroFloat32(scratch); |
| masm.compareFloat(Assembler::DoubleEqualOrUnordered, opd, scratch); |
| masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitCompareDAndBranch(LCompareDAndBranch* comp) |
| { |
| FloatRegister lhs = ToFloatRegister(comp->left()); |
| FloatRegister rhs = ToFloatRegister(comp->right()); |
| |
| Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop()); |
| |
| Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); |
| if (comp->cmpMir()->operandsAreNeverNaN()) |
| nanCond = Assembler::NaN_HandledByCond; |
| |
| masm.compareDouble(cond, lhs, rhs); |
| emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitCompareFAndBranch(LCompareFAndBranch* comp) |
| { |
| FloatRegister lhs = ToFloatRegister(comp->left()); |
| FloatRegister rhs = ToFloatRegister(comp->right()); |
| |
| Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop()); |
| |
| Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); |
| if (comp->cmpMir()->operandsAreNeverNaN()) |
| nanCond = Assembler::NaN_HandledByCond; |
| |
| masm.compareFloat(cond, lhs, rhs); |
| emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitAsmJSPassStackArg(LAsmJSPassStackArg* ins) |
| { |
| const MAsmJSPassStackArg* mir = ins->mir(); |
| Address dst(StackPointer, mir->spOffset()); |
| if (ins->arg()->isConstant()) { |
| masm.storePtr(ImmWord(ToInt32(ins->arg())), dst); |
| } else { |
| if (ins->arg()->isGeneralReg()) { |
| masm.storePtr(ToRegister(ins->arg()), dst); |
| } else { |
| switch (mir->input()->type()) { |
| case MIRType_Double: |
| case MIRType_Float32: |
| masm.storeDouble(ToFloatRegister(ins->arg()), dst); |
| return; |
| // StackPointer is SIMD-aligned and ABIArgGenerator guarantees |
| // stack offsets are SIMD-aligned. |
| case MIRType_Int32x4: |
| masm.storeAlignedInt32x4(ToFloatRegister(ins->arg()), dst); |
| return; |
| case MIRType_Float32x4: |
| masm.storeAlignedFloat32x4(ToFloatRegister(ins->arg()), dst); |
| return; |
| default: break; |
| } |
| MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("unexpected mir type in AsmJSPassStackArg"); |
| } |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds* ool) |
| { |
| switch (ool->viewType()) { |
| case Scalar::Float32x4: |
| case Scalar::Int32x4: |
| case Scalar::MaxTypedArrayViewType: |
| MOZ_CRASH("unexpected array type"); |
| case Scalar::Float32: |
| masm.loadConstantFloat32(float(GenericNaN()), ool->dest().fpu()); |
| break; |
| case Scalar::Float64: |
| masm.loadConstantDouble(GenericNaN(), ool->dest().fpu()); |
| break; |
| case Scalar::Int8: |
| case Scalar::Uint8: |
| case Scalar::Int16: |
| case Scalar::Uint16: |
| case Scalar::Int32: |
| case Scalar::Uint32: |
| case Scalar::Uint8Clamped: |
| Register destReg = ool->dest().gpr(); |
| masm.mov(ImmWord(0), destReg); |
| break; |
| } |
| masm.jmp(ool->rejoin()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitOffsetBoundsCheck(OffsetBoundsCheck* oolCheck) |
| { |
| // The access is heap[ptr + offset]. The inline code checks that |
| // ptr < heap.length - offset. We get here when that fails. We need to check |
| // for the case where ptr + offset >= 0, in which case the access is still |
| // in bounds. |
| MOZ_ASSERT(oolCheck->offset() != 0, |
| "An access without a constant offset doesn't need a separate OffsetBoundsCheck"); |
| masm.cmp32(oolCheck->ptrReg(), Imm32(-uint32_t(oolCheck->offset()))); |
| masm.j(Assembler::Below, oolCheck->outOfBounds()); |
| |
| #ifdef JS_CODEGEN_X64 |
| // In order to get the offset to wrap properly, we must sign-extend the |
| // pointer to 32-bits. We'll zero out the sign extension immediately |
| // after the access to restore asm.js invariants. |
| masm.movslq(oolCheck->ptrReg(), oolCheck->ptrReg()); |
| #endif |
| |
| masm.jmp(oolCheck->rejoin()); |
| } |
| |
| uint32_t |
| CodeGeneratorX86Shared::emitAsmJSBoundsCheckBranch(const MAsmJSHeapAccess* access, |
| const MInstruction* mir, |
| Register ptr, Label* fail) |
| { |
| // Emit a bounds-checking branch for |access|. |
| |
| MOZ_ASSERT(gen->needsAsmJSBoundsCheckBranch(access)); |
| |
| Label* pass = nullptr; |
| |
| // If we have a non-zero offset, it's possible that |ptr| itself is out of |
| // bounds, while adding the offset computes an in-bounds address. To catch |
| // this case, we need a second branch, which we emit out of line since it's |
| // unlikely to be needed in normal programs. |
| if (access->offset() != 0) { |
| OffsetBoundsCheck* oolCheck = new(alloc()) OffsetBoundsCheck(fail, ptr, access->offset()); |
| fail = oolCheck->entry(); |
| pass = oolCheck->rejoin(); |
| addOutOfLineCode(oolCheck, mir); |
| } |
| |
| // The bounds check is a comparison with an immediate value. The asm.js |
| // module linking process will add the length of the heap to the immediate |
| // field, so -access->endOffset() will turn into |
| // (heapLength - access->endOffset()), allowing us to test whether the end |
| // of the access is beyond the end of the heap. |
| uint32_t maybeCmpOffset = masm.cmp32WithPatch(ptr, Imm32(-access->endOffset())).offset(); |
| masm.j(Assembler::Above, fail); |
| |
| if (pass) |
| masm.bind(pass); |
| |
| return maybeCmpOffset; |
| } |
| |
| void |
| CodeGeneratorX86Shared::cleanupAfterAsmJSBoundsCheckBranch(const MAsmJSHeapAccess* access, |
| Register ptr) |
| { |
| // Clean up after performing a heap access checked by a branch. |
| |
| MOZ_ASSERT(gen->needsAsmJSBoundsCheckBranch(access)); |
| |
| #ifdef JS_CODEGEN_X64 |
| // If the offset is 0, we don't use an OffsetBoundsCheck. |
| if (access->offset() != 0) { |
| // Zero out the high 32 bits, in case the OffsetBoundsCheck code had to |
| // sign-extend (movslq) the pointer value to get wraparound to work. |
| masm.movl(ptr, ptr); |
| } |
| #endif |
| } |
| |
| bool |
| CodeGeneratorX86Shared::generateOutOfLineCode() |
| { |
| if (!CodeGeneratorShared::generateOutOfLineCode()) |
| return false; |
| |
| if (deoptLabel_.used()) { |
| // All non-table-based bailouts will go here. |
| masm.bind(&deoptLabel_); |
| |
| // Push the frame size, so the handler can recover the IonScript. |
| masm.push(Imm32(frameSize())); |
| |
| JitCode* handler = gen->jitRuntime()->getGenericBailoutHandler(); |
| masm.jmp(ImmPtr(handler->raw()), Relocation::JITCODE); |
| } |
| |
| return !masm.oom(); |
| } |
| |
| class BailoutJump { |
| Assembler::Condition cond_; |
| |
| public: |
| explicit BailoutJump(Assembler::Condition cond) : cond_(cond) |
| { } |
| #ifdef JS_CODEGEN_X86 |
| void operator()(MacroAssembler& masm, uint8_t* code) const { |
| masm.j(cond_, ImmPtr(code), Relocation::HARDCODED); |
| } |
| #endif |
| void operator()(MacroAssembler& masm, Label* label) const { |
| masm.j(cond_, label); |
| } |
| }; |
| |
| class BailoutLabel { |
| Label* label_; |
| |
| public: |
| explicit BailoutLabel(Label* label) : label_(label) |
| { } |
| #ifdef JS_CODEGEN_X86 |
| void operator()(MacroAssembler& masm, uint8_t* code) const { |
| masm.retarget(label_, ImmPtr(code), Relocation::HARDCODED); |
| } |
| #endif |
| void operator()(MacroAssembler& masm, Label* label) const { |
| masm.retarget(label_, label); |
| } |
| }; |
| |
| template <typename T> void |
| CodeGeneratorX86Shared::bailout(const T& binder, LSnapshot* snapshot) |
| { |
| encode(snapshot); |
| |
| // Though the assembler doesn't track all frame pushes, at least make sure |
| // the known value makes sense. We can't use bailout tables if the stack |
| // isn't properly aligned to the static frame size. |
| MOZ_ASSERT_IF(frameClass_ != FrameSizeClass::None() && deoptTable_, |
| frameClass_.frameSize() == masm.framePushed()); |
| |
| #ifdef JS_CODEGEN_X86 |
| // On x64, bailout tables are pointless, because 16 extra bytes are |
| // reserved per external jump, whereas it takes only 10 bytes to encode a |
| // a non-table based bailout. |
| if (assignBailoutId(snapshot)) { |
| binder(masm, deoptTable_->raw() + snapshot->bailoutId() * BAILOUT_TABLE_ENTRY_SIZE); |
| return; |
| } |
| #endif |
| |
| // We could not use a jump table, either because all bailout IDs were |
| // reserved, or a jump table is not optimal for this frame size or |
| // platform. Whatever, we will generate a lazy bailout. |
| // |
| // All bailout code is associated with the bytecodeSite of the block we are |
| // bailing out from. |
| InlineScriptTree* tree = snapshot->mir()->block()->trackedTree(); |
| OutOfLineBailout* ool = new(alloc()) OutOfLineBailout(snapshot); |
| addOutOfLineCode(ool, new(alloc()) BytecodeSite(tree, tree->script()->code())); |
| |
| binder(masm, ool->entry()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::bailoutIf(Assembler::Condition condition, LSnapshot* snapshot) |
| { |
| bailout(BailoutJump(condition), snapshot); |
| } |
| |
| void |
| CodeGeneratorX86Shared::bailoutIf(Assembler::DoubleCondition condition, LSnapshot* snapshot) |
| { |
| MOZ_ASSERT(Assembler::NaNCondFromDoubleCondition(condition) == Assembler::NaN_HandledByCond); |
| bailoutIf(Assembler::ConditionFromDoubleCondition(condition), snapshot); |
| } |
| |
| void |
| CodeGeneratorX86Shared::bailoutFrom(Label* label, LSnapshot* snapshot) |
| { |
| MOZ_ASSERT(label->used() && !label->bound()); |
| bailout(BailoutLabel(label), snapshot); |
| } |
| |
| void |
| CodeGeneratorX86Shared::bailout(LSnapshot* snapshot) |
| { |
| Label label; |
| masm.jump(&label); |
| bailoutFrom(&label, snapshot); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitOutOfLineBailout(OutOfLineBailout* ool) |
| { |
| masm.push(Imm32(ool->snapshot()->snapshotOffset())); |
| masm.jmp(&deoptLabel_); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD* ins) |
| { |
| FloatRegister first = ToFloatRegister(ins->first()); |
| FloatRegister second = ToFloatRegister(ins->second()); |
| #ifdef DEBUG |
| FloatRegister output = ToFloatRegister(ins->output()); |
| MOZ_ASSERT(first == output); |
| #endif |
| |
| Label done, nan, minMaxInst; |
| |
| // Do a vucomisd to catch equality and NaNs, which both require special |
| // handling. If the operands are ordered and inequal, we branch straight to |
| // the min/max instruction. If we wanted, we could also branch for less-than |
| // or greater-than here instead of using min/max, however these conditions |
| // will sometimes be hard on the branch predictor. |
| masm.vucomisd(second, first); |
| masm.j(Assembler::NotEqual, &minMaxInst); |
| if (!ins->mir()->range() || ins->mir()->range()->canBeNaN()) |
| masm.j(Assembler::Parity, &nan); |
| |
| // Ordered and equal. The operands are bit-identical unless they are zero |
| // and negative zero. These instructions merge the sign bits in that |
| // case, and are no-ops otherwise. |
| if (ins->mir()->isMax()) |
| masm.vandpd(second, first, first); |
| else |
| masm.vorpd(second, first, first); |
| masm.jump(&done); |
| |
| // x86's min/max are not symmetric; if either operand is a NaN, they return |
| // the read-only operand. We need to return a NaN if either operand is a |
| // NaN, so we explicitly check for a NaN in the read-write operand. |
| if (!ins->mir()->range() || ins->mir()->range()->canBeNaN()) { |
| masm.bind(&nan); |
| masm.vucomisd(first, first); |
| masm.j(Assembler::Parity, &done); |
| } |
| |
| // When the values are inequal, or second is NaN, x86's min and max will |
| // return the value we need. |
| masm.bind(&minMaxInst); |
| if (ins->mir()->isMax()) |
| masm.vmaxsd(second, first, first); |
| else |
| masm.vminsd(second, first, first); |
| |
| masm.bind(&done); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF* ins) |
| { |
| FloatRegister first = ToFloatRegister(ins->first()); |
| FloatRegister second = ToFloatRegister(ins->second()); |
| #ifdef DEBUG |
| FloatRegister output = ToFloatRegister(ins->output()); |
| MOZ_ASSERT(first == output); |
| #endif |
| |
| Label done, nan, minMaxInst; |
| |
| // Do a vucomiss to catch equality and NaNs, which both require special |
| // handling. If the operands are ordered and inequal, we branch straight to |
| // the min/max instruction. If we wanted, we could also branch for less-than |
| // or greater-than here instead of using min/max, however these conditions |
| // will sometimes be hard on the branch predictor. |
| masm.vucomiss(second, first); |
| masm.j(Assembler::NotEqual, &minMaxInst); |
| if (!ins->mir()->range() || ins->mir()->range()->canBeNaN()) |
| masm.j(Assembler::Parity, &nan); |
| |
| // Ordered and equal. The operands are bit-identical unless they are zero |
| // and negative zero. These instructions merge the sign bits in that |
| // case, and are no-ops otherwise. |
| if (ins->mir()->isMax()) |
| masm.vandps(second, first, first); |
| else |
| masm.vorps(second, first, first); |
| masm.jump(&done); |
| |
| // x86's min/max are not symmetric; if either operand is a NaN, they return |
| // the read-only operand. We need to return a NaN if either operand is a |
| // NaN, so we explicitly check for a NaN in the read-write operand. |
| if (!ins->mir()->range() || ins->mir()->range()->canBeNaN()) { |
| masm.bind(&nan); |
| masm.vucomiss(first, first); |
| masm.j(Assembler::Parity, &done); |
| } |
| |
| // When the values are inequal, or second is NaN, x86's min and max will |
| // return the value we need. |
| masm.bind(&minMaxInst); |
| if (ins->mir()->isMax()) |
| masm.vmaxss(second, first, first); |
| else |
| masm.vminss(second, first, first); |
| |
| masm.bind(&done); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitAbsD(LAbsD* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| MOZ_ASSERT(input == ToFloatRegister(ins->output())); |
| // Load a value which is all ones except for the sign bit. |
| ScratchDoubleScope scratch(masm); |
| masm.loadConstantDouble(SpecificNaN<double>(0, FloatingPoint<double>::kSignificandBits), scratch); |
| masm.vandpd(scratch, input, input); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitAbsF(LAbsF* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| MOZ_ASSERT(input == ToFloatRegister(ins->output())); |
| // Same trick as visitAbsD above. |
| ScratchFloat32Scope scratch(masm); |
| masm.loadConstantFloat32(SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits), scratch); |
| masm.vandps(scratch, input, input); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitClzI(LClzI* ins) |
| { |
| Register input = ToRegister(ins->input()); |
| Register output = ToRegister(ins->output()); |
| |
| // bsr is undefined on 0 |
| Label done, nonzero; |
| if (!ins->mir()->operandIsNeverZero()) { |
| masm.test32(input, input); |
| masm.j(Assembler::NonZero, &nonzero); |
| masm.move32(Imm32(32), output); |
| masm.jump(&done); |
| } |
| |
| masm.bind(&nonzero); |
| masm.bsr(input, output); |
| masm.xor32(Imm32(0x1F), output); |
| masm.bind(&done); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSqrtD(LSqrtD* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| masm.vsqrtsd(input, output, output); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSqrtF(LSqrtF* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| masm.vsqrtss(input, output, output); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitPowHalfD(LPowHalfD* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| ScratchDoubleScope scratch(masm); |
| |
| Label done, sqrt; |
| |
| if (!ins->mir()->operandIsNeverNegativeInfinity()) { |
| // Branch if not -Infinity. |
| masm.loadConstantDouble(NegativeInfinity<double>(), scratch); |
| |
| Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered; |
| if (ins->mir()->operandIsNeverNaN()) |
| cond = Assembler::DoubleNotEqual; |
| masm.branchDouble(cond, input, scratch, &sqrt); |
| |
| // Math.pow(-Infinity, 0.5) == Infinity. |
| masm.zeroDouble(input); |
| masm.subDouble(scratch, input); |
| masm.jump(&done); |
| |
| masm.bind(&sqrt); |
| } |
| |
| if (!ins->mir()->operandIsNeverNegativeZero()) { |
| // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5). Adding 0 converts any -0 to 0. |
| masm.zeroDouble(scratch); |
| masm.addDouble(scratch, input); |
| } |
| |
| masm.vsqrtsd(input, output, output); |
| |
| masm.bind(&done); |
| } |
| |
| class OutOfLineUndoALUOperation : public OutOfLineCodeBase<CodeGeneratorX86Shared> |
| { |
| LInstruction* ins_; |
| |
| public: |
| explicit OutOfLineUndoALUOperation(LInstruction* ins) |
| : ins_(ins) |
| { } |
| |
| virtual void accept(CodeGeneratorX86Shared* codegen) { |
| codegen->visitOutOfLineUndoALUOperation(this); |
| } |
| LInstruction* ins() const { |
| return ins_; |
| } |
| }; |
| |
| void |
| CodeGeneratorX86Shared::visitAddI(LAddI* ins) |
| { |
| if (ins->rhs()->isConstant()) |
| masm.addl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs())); |
| else |
| masm.addl(ToOperand(ins->rhs()), ToRegister(ins->lhs())); |
| |
| if (ins->snapshot()) { |
| if (ins->recoversInput()) { |
| OutOfLineUndoALUOperation* ool = new(alloc()) OutOfLineUndoALUOperation(ins); |
| addOutOfLineCode(ool, ins->mir()); |
| masm.j(Assembler::Overflow, ool->entry()); |
| } else { |
| bailoutIf(Assembler::Overflow, ins->snapshot()); |
| } |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSubI(LSubI* ins) |
| { |
| if (ins->rhs()->isConstant()) |
| masm.subl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs())); |
| else |
| masm.subl(ToOperand(ins->rhs()), ToRegister(ins->lhs())); |
| |
| if (ins->snapshot()) { |
| if (ins->recoversInput()) { |
| OutOfLineUndoALUOperation* ool = new(alloc()) OutOfLineUndoALUOperation(ins); |
| addOutOfLineCode(ool, ins->mir()); |
| masm.j(Assembler::Overflow, ool->entry()); |
| } else { |
| bailoutIf(Assembler::Overflow, ins->snapshot()); |
| } |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation* ool) |
| { |
| LInstruction* ins = ool->ins(); |
| Register reg = ToRegister(ins->getDef(0)); |
| |
| mozilla::DebugOnly<LAllocation*> lhs = ins->getOperand(0); |
| LAllocation* rhs = ins->getOperand(1); |
| |
| MOZ_ASSERT(reg == ToRegister(lhs)); |
| MOZ_ASSERT_IF(rhs->isGeneralReg(), reg != ToRegister(rhs)); |
| |
| // Undo the effect of the ALU operation, which was performed on the output |
| // register and overflowed. Writing to the output register clobbered an |
| // input reg, and the original value of the input needs to be recovered |
| // to satisfy the constraint imposed by any RECOVERED_INPUT operands to |
| // the bailout snapshot. |
| |
| if (rhs->isConstant()) { |
| Imm32 constant(ToInt32(rhs)); |
| if (ins->isAddI()) |
| masm.subl(constant, reg); |
| else |
| masm.addl(constant, reg); |
| } else { |
| if (ins->isAddI()) |
| masm.subl(ToOperand(rhs), reg); |
| else |
| masm.addl(ToOperand(rhs), reg); |
| } |
| |
| bailout(ool->ins()->snapshot()); |
| } |
| |
| class MulNegativeZeroCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared> |
| { |
| LMulI* ins_; |
| |
| public: |
| explicit MulNegativeZeroCheck(LMulI* ins) |
| : ins_(ins) |
| { } |
| |
| virtual void accept(CodeGeneratorX86Shared* codegen) { |
| codegen->visitMulNegativeZeroCheck(this); |
| } |
| LMulI* ins() const { |
| return ins_; |
| } |
| }; |
| |
| void |
| CodeGeneratorX86Shared::visitMulI(LMulI* ins) |
| { |
| const LAllocation* lhs = ins->lhs(); |
| const LAllocation* rhs = ins->rhs(); |
| MMul* mul = ins->mir(); |
| MOZ_ASSERT_IF(mul->mode() == MMul::Integer, !mul->canBeNegativeZero() && !mul->canOverflow()); |
| |
| if (rhs->isConstant()) { |
| // Bailout on -0.0 |
| int32_t constant = ToInt32(rhs); |
| if (mul->canBeNegativeZero() && constant <= 0) { |
| Assembler::Condition bailoutCond = (constant == 0) ? Assembler::Signed : Assembler::Equal; |
| masm.test32(ToRegister(lhs), ToRegister(lhs)); |
| bailoutIf(bailoutCond, ins->snapshot()); |
| } |
| |
| switch (constant) { |
| case -1: |
| masm.negl(ToOperand(lhs)); |
| break; |
| case 0: |
| masm.xorl(ToOperand(lhs), ToRegister(lhs)); |
| return; // escape overflow check; |
| case 1: |
| // nop |
| return; // escape overflow check; |
| case 2: |
| masm.addl(ToOperand(lhs), ToRegister(lhs)); |
| break; |
| default: |
| if (!mul->canOverflow() && constant > 0) { |
| // Use shift if cannot overflow and constant is power of 2 |
| int32_t shift = FloorLog2(constant); |
| if ((1 << shift) == constant) { |
| masm.shll(Imm32(shift), ToRegister(lhs)); |
| return; |
| } |
| } |
| masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs)); |
| } |
| |
| // Bailout on overflow |
| if (mul->canOverflow()) |
| bailoutIf(Assembler::Overflow, ins->snapshot()); |
| } else { |
| masm.imull(ToOperand(rhs), ToRegister(lhs)); |
| |
| // Bailout on overflow |
| if (mul->canOverflow()) |
| bailoutIf(Assembler::Overflow, ins->snapshot()); |
| |
| if (mul->canBeNegativeZero()) { |
| // Jump to an OOL path if the result is 0. |
| MulNegativeZeroCheck* ool = new(alloc()) MulNegativeZeroCheck(ins); |
| addOutOfLineCode(ool, mul); |
| |
| masm.test32(ToRegister(lhs), ToRegister(lhs)); |
| masm.j(Assembler::Zero, ool->entry()); |
| masm.bind(ool->rejoin()); |
| } |
| } |
| } |
| |
| class ReturnZero : public OutOfLineCodeBase<CodeGeneratorX86Shared> |
| { |
| Register reg_; |
| |
| public: |
| explicit ReturnZero(Register reg) |
| : reg_(reg) |
| { } |
| |
| virtual void accept(CodeGeneratorX86Shared* codegen) { |
| codegen->visitReturnZero(this); |
| } |
| Register reg() const { |
| return reg_; |
| } |
| }; |
| |
| void |
| CodeGeneratorX86Shared::visitReturnZero(ReturnZero* ool) |
| { |
| masm.mov(ImmWord(0), ool->reg()); |
| masm.jmp(ool->rejoin()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitUDivOrMod(LUDivOrMod* ins) |
| { |
| Register lhs = ToRegister(ins->lhs()); |
| Register rhs = ToRegister(ins->rhs()); |
| Register output = ToRegister(ins->output()); |
| |
| MOZ_ASSERT_IF(lhs != rhs, rhs != eax); |
| MOZ_ASSERT(rhs != edx); |
| MOZ_ASSERT_IF(output == eax, ToRegister(ins->remainder()) == edx); |
| |
| ReturnZero* ool = nullptr; |
| |
| // Put the lhs in eax. |
| if (lhs != eax) |
| masm.mov(lhs, eax); |
| |
| // Prevent divide by zero. |
| if (ins->canBeDivideByZero()) { |
| masm.test32(rhs, rhs); |
| if (ins->mir()->isTruncated()) { |
| if (!ool) |
| ool = new(alloc()) ReturnZero(output); |
| masm.j(Assembler::Zero, ool->entry()); |
| } else { |
| bailoutIf(Assembler::Zero, ins->snapshot()); |
| } |
| } |
| |
| // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit. |
| masm.mov(ImmWord(0), edx); |
| masm.udiv(rhs); |
| |
| // If the remainder is > 0, bailout since this must be a double. |
| if (ins->mir()->isDiv() && !ins->mir()->toDiv()->canTruncateRemainder()) { |
| Register remainder = ToRegister(ins->remainder()); |
| masm.test32(remainder, remainder); |
| bailoutIf(Assembler::NonZero, ins->snapshot()); |
| } |
| |
| // Unsigned div or mod can return a value that's not a signed int32. |
| // If our users aren't expecting that, bail. |
| if (!ins->mir()->isTruncated()) { |
| masm.test32(output, output); |
| bailoutIf(Assembler::Signed, ins->snapshot()); |
| } |
| |
| if (ool) { |
| addOutOfLineCode(ool, ins->mir()); |
| masm.bind(ool->rejoin()); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitUDivOrModConstant(LUDivOrModConstant *ins) { |
| Register lhs = ToRegister(ins->numerator()); |
| Register output = ToRegister(ins->output()); |
| uint32_t d = ins->denominator(); |
| |
| // This emits the division answer into edx or the modulus answer into eax. |
| MOZ_ASSERT(output == eax || output == edx); |
| MOZ_ASSERT(lhs != eax && lhs != edx); |
| bool isDiv = (output == edx); |
| |
| if (d == 0) { |
| if (ins->mir()->isTruncated()) |
| masm.xorl(output, output); |
| else |
| bailout(ins->snapshot()); |
| |
| return; |
| } |
| |
| // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI). |
| MOZ_ASSERT((d & (d - 1)) != 0); |
| |
| ReciprocalMulConstants rmc = computeDivisionConstants(d, /* maxLog = */ 32); |
| |
| // We first compute (M * n) >> 32, where M = rmc.multiplier. |
| masm.movl(Imm32(rmc.multiplier), eax); |
| masm.umull(lhs); |
| if (rmc.multiplier > UINT32_MAX) { |
| // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that |
| // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d, contradicting |
| // the proof of correctness in computeDivisionConstants. |
| MOZ_ASSERT(rmc.shiftAmount > 0); |
| MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33)); |
| |
| // We actually computed edx = ((uint32_t(M) * n) >> 32) instead. Since |
| // (M * n) >> (32 + shift) is the same as (edx + n) >> shift, we can |
| // correct for the overflow. This case is a bit trickier than the signed |
| // case, though, as the (edx + n) addition itself can overflow; however, |
| // note that (edx + n) >> shift == (((n - edx) >> 1) + edx) >> (shift - 1), |
| // which is overflow-free. See Hacker's Delight, section 10-8 for details. |
| |
| // Compute (n - edx) >> 1 into eax. |
| masm.movl(lhs, eax); |
| masm.subl(edx, eax); |
| masm.shrl(Imm32(1), eax); |
| |
| // Finish the computation. |
| masm.addl(eax, edx); |
| masm.shrl(Imm32(rmc.shiftAmount - 1), edx); |
| } else { |
| masm.shrl(Imm32(rmc.shiftAmount), edx); |
| } |
| |
| // We now have the truncated division value in edx. If we're |
| // computing a modulus or checking whether the division resulted |
| // in an integer, we need to multiply the obtained value by d and |
| // finish the computation/check. |
| if (!isDiv) { |
| masm.imull(Imm32(d), edx, edx); |
| masm.movl(lhs, eax); |
| masm.subl(edx, eax); |
| |
| // The final result of the modulus op, just computed above by the |
| // sub instruction, can be a number in the range [2^31, 2^32). If |
| // this is the case and the modulus is not truncated, we must bail |
| // out. |
| if (!ins->mir()->isTruncated()) |
| bailoutIf(Assembler::Signed, ins->snapshot()); |
| } else if (!ins->mir()->isTruncated()) { |
| masm.imull(Imm32(d), edx, eax); |
| masm.cmpl(lhs, eax); |
| bailoutIf(Assembler::NotEqual, ins->snapshot()); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitMulNegativeZeroCheck(MulNegativeZeroCheck* ool) |
| { |
| LMulI* ins = ool->ins(); |
| Register result = ToRegister(ins->output()); |
| Operand lhsCopy = ToOperand(ins->lhsCopy()); |
| Operand rhs = ToOperand(ins->rhs()); |
| MOZ_ASSERT_IF(lhsCopy.kind() == Operand::REG, lhsCopy.reg() != result.code()); |
| |
| // Result is -0 if lhs or rhs is negative. |
| masm.movl(lhsCopy, result); |
| masm.orl(rhs, result); |
| bailoutIf(Assembler::Signed, ins->snapshot()); |
| |
| masm.mov(ImmWord(0), result); |
| masm.jmp(ool->rejoin()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitDivPowTwoI(LDivPowTwoI* ins) |
| { |
| Register lhs = ToRegister(ins->numerator()); |
| mozilla::DebugOnly<Register> output = ToRegister(ins->output()); |
| |
| int32_t shift = ins->shift(); |
| bool negativeDivisor = ins->negativeDivisor(); |
| MDiv* mir = ins->mir(); |
| |
| // We use defineReuseInput so these should always be the same, which is |
| // convenient since all of our instructions here are two-address. |
| MOZ_ASSERT(lhs == output); |
| |
| if (!mir->isTruncated() && negativeDivisor) { |
| // 0 divided by a negative number must return a double. |
| masm.test32(lhs, lhs); |
| bailoutIf(Assembler::Zero, ins->snapshot()); |
| } |
| |
| if (shift != 0) { |
| if (!mir->isTruncated()) { |
| // If the remainder is != 0, bailout since this must be a double. |
| masm.test32(lhs, Imm32(UINT32_MAX >> (32 - shift))); |
| bailoutIf(Assembler::NonZero, ins->snapshot()); |
| } |
| |
| if (mir->isUnsigned()) { |
| masm.shrl(Imm32(shift), lhs); |
| } else { |
| // Adjust the value so that shifting produces a correctly |
| // rounded result when the numerator is negative. See 10-1 |
| // "Signed Division by a Known Power of 2" in Henry |
| // S. Warren, Jr.'s Hacker's Delight. |
| if (mir->canBeNegativeDividend()) { |
| Register lhsCopy = ToRegister(ins->numeratorCopy()); |
| MOZ_ASSERT(lhsCopy != lhs); |
| if (shift > 1) |
| masm.sarl(Imm32(31), lhs); |
| masm.shrl(Imm32(32 - shift), lhs); |
| masm.addl(lhsCopy, lhs); |
| } |
| masm.sarl(Imm32(shift), lhs); |
| |
| if (negativeDivisor) |
| masm.negl(lhs); |
| } |
| } else if (shift == 0) { |
| if (negativeDivisor) { |
| // INT32_MIN / -1 overflows. |
| masm.negl(lhs); |
| if (!mir->isTruncated()) |
| bailoutIf(Assembler::Overflow, ins->snapshot()); |
| } |
| |
| else if (mir->isUnsigned() && !mir->isTruncated()) { |
| // Unsigned division by 1 can overflow if output is not |
| // truncated. |
| masm.test32(lhs, lhs); |
| bailoutIf(Assembler::Signed, ins->snapshot()); |
| } |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitDivOrModConstantI(LDivOrModConstantI* ins) { |
| Register lhs = ToRegister(ins->numerator()); |
| Register output = ToRegister(ins->output()); |
| int32_t d = ins->denominator(); |
| |
| // This emits the division answer into edx or the modulus answer into eax. |
| MOZ_ASSERT(output == eax || output == edx); |
| MOZ_ASSERT(lhs != eax && lhs != edx); |
| bool isDiv = (output == edx); |
| |
| // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI |
| // and LModPowTwoI). |
| MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0); |
| |
| // We will first divide by Abs(d), and negate the answer if d is negative. |
| // If desired, this can be avoided by generalizing computeDivisionConstants. |
| ReciprocalMulConstants rmc = computeDivisionConstants(Abs(d), /* maxLog = */ 31); |
| |
| // We first compute (M * n) >> 32, where M = rmc.multiplier. |
| masm.movl(Imm32(rmc.multiplier), eax); |
| masm.imull(lhs); |
| if (rmc.multiplier > INT32_MAX) { |
| MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32)); |
| |
| // We actually computed edx = ((int32_t(M) * n) >> 32) instead. Since |
| // (M * n) >> 32 is the same as (edx + n), we can correct for the overflow. |
| // (edx + n) can't overflow, as n and edx have opposite signs because int32_t(M) |
| // is negative. |
| masm.addl(lhs, edx); |
| } |
| // (M * n) >> (32 + shift) is the truncated division answer if n is non-negative, |
| // as proved in the comments of computeDivisionConstants. We must add 1 later if n is |
| // negative to get the right answer in all cases. |
| masm.sarl(Imm32(rmc.shiftAmount), edx); |
| |
| // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be |
| // computed with just a sign-extending shift of 31 bits. |
| if (ins->canBeNegativeDividend()) { |
| masm.movl(lhs, eax); |
| masm.sarl(Imm32(31), eax); |
| masm.subl(eax, edx); |
| } |
| |
| // After this, edx contains the correct truncated division result. |
| if (d < 0) |
| masm.negl(edx); |
| |
| if (!isDiv) { |
| masm.imull(Imm32(-d), edx, eax); |
| masm.addl(lhs, eax); |
| } |
| |
| if (!ins->mir()->isTruncated()) { |
| if (isDiv) { |
| // This is a division op. Multiply the obtained value by d to check if |
| // the correct answer is an integer. This cannot overflow, since |d| > 1. |
| masm.imull(Imm32(d), edx, eax); |
| masm.cmp32(lhs, eax); |
| bailoutIf(Assembler::NotEqual, ins->snapshot()); |
| |
| // If lhs is zero and the divisor is negative, the answer should have |
| // been -0. |
| if (d < 0) { |
| masm.test32(lhs, lhs); |
| bailoutIf(Assembler::Zero, ins->snapshot()); |
| } |
| } else if (ins->canBeNegativeDividend()) { |
| // This is a mod op. If the computed value is zero and lhs |
| // is negative, the answer should have been -0. |
| Label done; |
| |
| masm.cmp32(lhs, Imm32(0)); |
| masm.j(Assembler::GreaterThanOrEqual, &done); |
| |
| masm.test32(eax, eax); |
| bailoutIf(Assembler::Zero, ins->snapshot()); |
| |
| masm.bind(&done); |
| } |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitDivI(LDivI* ins) |
| { |
| Register remainder = ToRegister(ins->remainder()); |
| Register lhs = ToRegister(ins->lhs()); |
| Register rhs = ToRegister(ins->rhs()); |
| Register output = ToRegister(ins->output()); |
| |
| MDiv* mir = ins->mir(); |
| |
| MOZ_ASSERT_IF(lhs != rhs, rhs != eax); |
| MOZ_ASSERT(rhs != edx); |
| MOZ_ASSERT(remainder == edx); |
| MOZ_ASSERT(output == eax); |
| |
| Label done; |
| ReturnZero* ool = nullptr; |
| |
| // Put the lhs in eax, for either the negative overflow case or the regular |
| // divide case. |
| if (lhs != eax) |
| masm.mov(lhs, eax); |
| |
| // Handle divide by zero. |
| if (mir->canBeDivideByZero()) { |
| masm.test32(rhs, rhs); |
| if (mir->canTruncateInfinities()) { |
| // Truncated division by zero is zero (Infinity|0 == 0) |
| if (!ool) |
| ool = new(alloc()) ReturnZero(output); |
| masm.j(Assembler::Zero, ool->entry()); |
| } else { |
| MOZ_ASSERT(mir->fallible()); |
| bailoutIf(Assembler::Zero, ins->snapshot()); |
| } |
| } |
| |
| // Handle an integer overflow exception from -2147483648 / -1. |
| if (mir->canBeNegativeOverflow()) { |
| Label notmin; |
| masm.cmp32(lhs, Imm32(INT32_MIN)); |
| masm.j(Assembler::NotEqual, ¬min); |
| masm.cmp32(rhs, Imm32(-1)); |
| if (mir->canTruncateOverflow()) { |
| // (-INT32_MIN)|0 == INT32_MIN and INT32_MIN is already in the |
| // output register (lhs == eax). |
| masm.j(Assembler::Equal, &done); |
| } else { |
| MOZ_ASSERT(mir->fallible()); |
| bailoutIf(Assembler::Equal, ins->snapshot()); |
| } |
| masm.bind(¬min); |
| } |
| |
| // Handle negative 0. |
| if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) { |
| Label nonzero; |
| masm.test32(lhs, lhs); |
| masm.j(Assembler::NonZero, &nonzero); |
| masm.cmp32(rhs, Imm32(0)); |
| bailoutIf(Assembler::LessThan, ins->snapshot()); |
| masm.bind(&nonzero); |
| } |
| |
| // Sign extend the lhs into edx to make (edx:eax), since idiv is 64-bit. |
| if (lhs != eax) |
| masm.mov(lhs, eax); |
| masm.cdq(); |
| masm.idiv(rhs); |
| |
| if (!mir->canTruncateRemainder()) { |
| // If the remainder is > 0, bailout since this must be a double. |
| masm.test32(remainder, remainder); |
| bailoutIf(Assembler::NonZero, ins->snapshot()); |
| } |
| |
| masm.bind(&done); |
| |
| if (ool) { |
| addOutOfLineCode(ool, mir); |
| masm.bind(ool->rejoin()); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitModPowTwoI(LModPowTwoI* ins) |
| { |
| Register lhs = ToRegister(ins->getOperand(0)); |
| int32_t shift = ins->shift(); |
| |
| Label negative; |
| |
| if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) { |
| // Switch based on sign of the lhs. |
| // Positive numbers are just a bitmask |
| masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); |
| } |
| |
| masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs); |
| |
| if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) { |
| Label done; |
| masm.jump(&done); |
| |
| // Negative numbers need a negate, bitmask, negate |
| masm.bind(&negative); |
| |
| // Unlike in the visitModI case, we are not computing the mod by means of a |
| // division. Therefore, the divisor = -1 case isn't problematic (the andl |
| // always returns 0, which is what we expect). |
| // |
| // The negl instruction overflows if lhs == INT32_MIN, but this is also not |
| // a problem: shift is at most 31, and so the andl also always returns 0. |
| masm.negl(lhs); |
| masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs); |
| masm.negl(lhs); |
| |
| // Since a%b has the same sign as b, and a is negative in this branch, |
| // an answer of 0 means the correct result is actually -0. Bail out. |
| if (!ins->mir()->isTruncated()) |
| bailoutIf(Assembler::Zero, ins->snapshot()); |
| masm.bind(&done); |
| } |
| } |
| |
| class ModOverflowCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared> |
| { |
| Label done_; |
| LModI* ins_; |
| Register rhs_; |
| |
| public: |
| explicit ModOverflowCheck(LModI* ins, Register rhs) |
| : ins_(ins), rhs_(rhs) |
| { } |
| |
| virtual void accept(CodeGeneratorX86Shared* codegen) { |
| codegen->visitModOverflowCheck(this); |
| } |
| Label* done() { |
| return &done_; |
| } |
| LModI* ins() const { |
| return ins_; |
| } |
| Register rhs() const { |
| return rhs_; |
| } |
| }; |
| |
| void |
| CodeGeneratorX86Shared::visitModOverflowCheck(ModOverflowCheck* ool) |
| { |
| masm.cmp32(ool->rhs(), Imm32(-1)); |
| if (ool->ins()->mir()->isTruncated()) { |
| masm.j(Assembler::NotEqual, ool->rejoin()); |
| masm.mov(ImmWord(0), edx); |
| masm.jmp(ool->done()); |
| } else { |
| bailoutIf(Assembler::Equal, ool->ins()->snapshot()); |
| masm.jmp(ool->rejoin()); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitModI(LModI* ins) |
| { |
| Register remainder = ToRegister(ins->remainder()); |
| Register lhs = ToRegister(ins->lhs()); |
| Register rhs = ToRegister(ins->rhs()); |
| |
| // Required to use idiv. |
| MOZ_ASSERT_IF(lhs != rhs, rhs != eax); |
| MOZ_ASSERT(rhs != edx); |
| MOZ_ASSERT(remainder == edx); |
| MOZ_ASSERT(ToRegister(ins->getTemp(0)) == eax); |
| |
| Label done; |
| ReturnZero* ool = nullptr; |
| ModOverflowCheck* overflow = nullptr; |
| |
| // Set up eax in preparation for doing a div. |
| if (lhs != eax) |
| masm.mov(lhs, eax); |
| |
| // Prevent divide by zero. |
| if (ins->mir()->canBeDivideByZero()) { |
| masm.test32(rhs, rhs); |
| if (ins->mir()->isTruncated()) { |
| if (!ool) |
| ool = new(alloc()) ReturnZero(edx); |
| masm.j(Assembler::Zero, ool->entry()); |
| } else { |
| bailoutIf(Assembler::Zero, ins->snapshot()); |
| } |
| } |
| |
| Label negative; |
| |
| // Switch based on sign of the lhs. |
| if (ins->mir()->canBeNegativeDividend()) |
| masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); |
| |
| // If lhs >= 0 then remainder = lhs % rhs. The remainder must be positive. |
| { |
| // Check if rhs is a power-of-two. |
| if (ins->mir()->canBePowerOfTwoDivisor()) { |
| MOZ_ASSERT(rhs != remainder); |
| |
| // Rhs y is a power-of-two if (y & (y-1)) == 0. Note that if |
| // y is any negative number other than INT32_MIN, both y and |
| // y-1 will have the sign bit set so these are never optimized |
| // as powers-of-two. If y is INT32_MIN, y-1 will be INT32_MAX |
| // and because lhs >= 0 at this point, lhs & INT32_MAX returns |
| // the correct value. |
| Label notPowerOfTwo; |
| masm.mov(rhs, remainder); |
| masm.subl(Imm32(1), remainder); |
| masm.branchTest32(Assembler::NonZero, remainder, rhs, ¬PowerOfTwo); |
| { |
| masm.andl(lhs, remainder); |
| masm.jmp(&done); |
| } |
| masm.bind(¬PowerOfTwo); |
| } |
| |
| // Since lhs >= 0, the sign-extension will be 0 |
| masm.mov(ImmWord(0), edx); |
| masm.idiv(rhs); |
| } |
| |
| // Otherwise, we have to beware of two special cases: |
| if (ins->mir()->canBeNegativeDividend()) { |
| masm.jump(&done); |
| |
| masm.bind(&negative); |
| |
| // Prevent an integer overflow exception from -2147483648 % -1 |
| Label notmin; |
| masm.cmp32(lhs, Imm32(INT32_MIN)); |
| overflow = new(alloc()) ModOverflowCheck(ins, rhs); |
| masm.j(Assembler::Equal, overflow->entry()); |
| masm.bind(overflow->rejoin()); |
| masm.cdq(); |
| masm.idiv(rhs); |
| |
| if (!ins->mir()->isTruncated()) { |
| // A remainder of 0 means that the rval must be -0, which is a double. |
| masm.test32(remainder, remainder); |
| bailoutIf(Assembler::Zero, ins->snapshot()); |
| } |
| } |
| |
| masm.bind(&done); |
| |
| if (overflow) { |
| addOutOfLineCode(overflow, ins->mir()); |
| masm.bind(overflow->done()); |
| } |
| |
| if (ool) { |
| addOutOfLineCode(ool, ins->mir()); |
| masm.bind(ool->rejoin()); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitBitNotI(LBitNotI* ins) |
| { |
| const LAllocation* input = ins->getOperand(0); |
| MOZ_ASSERT(!input->isConstant()); |
| |
| masm.notl(ToOperand(input)); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitBitOpI(LBitOpI* ins) |
| { |
| const LAllocation* lhs = ins->getOperand(0); |
| const LAllocation* rhs = ins->getOperand(1); |
| |
| switch (ins->bitop()) { |
| case JSOP_BITOR: |
| if (rhs->isConstant()) |
| masm.orl(Imm32(ToInt32(rhs)), ToOperand(lhs)); |
| else |
| masm.orl(ToOperand(rhs), ToRegister(lhs)); |
| break; |
| case JSOP_BITXOR: |
| if (rhs->isConstant()) |
| masm.xorl(Imm32(ToInt32(rhs)), ToOperand(lhs)); |
| else |
| masm.xorl(ToOperand(rhs), ToRegister(lhs)); |
| break; |
| case JSOP_BITAND: |
| if (rhs->isConstant()) |
| masm.andl(Imm32(ToInt32(rhs)), ToOperand(lhs)); |
| else |
| masm.andl(ToOperand(rhs), ToRegister(lhs)); |
| break; |
| default: |
| MOZ_CRASH("unexpected binary opcode"); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitShiftI(LShiftI* ins) |
| { |
| Register lhs = ToRegister(ins->lhs()); |
| const LAllocation* rhs = ins->rhs(); |
| |
| if (rhs->isConstant()) { |
| int32_t shift = ToInt32(rhs) & 0x1F; |
| switch (ins->bitop()) { |
| case JSOP_LSH: |
| if (shift) |
| masm.shll(Imm32(shift), lhs); |
| break; |
| case JSOP_RSH: |
| if (shift) |
| masm.sarl(Imm32(shift), lhs); |
| break; |
| case JSOP_URSH: |
| if (shift) { |
| masm.shrl(Imm32(shift), lhs); |
| } else if (ins->mir()->toUrsh()->fallible()) { |
| // x >>> 0 can overflow. |
| masm.test32(lhs, lhs); |
| bailoutIf(Assembler::Signed, ins->snapshot()); |
| } |
| break; |
| default: |
| MOZ_CRASH("Unexpected shift op"); |
| } |
| } else { |
| MOZ_ASSERT(ToRegister(rhs) == ecx); |
| switch (ins->bitop()) { |
| case JSOP_LSH: |
| masm.shll_cl(lhs); |
| break; |
| case JSOP_RSH: |
| masm.sarl_cl(lhs); |
| break; |
| case JSOP_URSH: |
| masm.shrl_cl(lhs); |
| if (ins->mir()->toUrsh()->fallible()) { |
| // x >>> 0 can overflow. |
| masm.test32(lhs, lhs); |
| bailoutIf(Assembler::Signed, ins->snapshot()); |
| } |
| break; |
| default: |
| MOZ_CRASH("Unexpected shift op"); |
| } |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitUrshD(LUrshD* ins) |
| { |
| Register lhs = ToRegister(ins->lhs()); |
| MOZ_ASSERT(ToRegister(ins->temp()) == lhs); |
| |
| const LAllocation* rhs = ins->rhs(); |
| FloatRegister out = ToFloatRegister(ins->output()); |
| |
| if (rhs->isConstant()) { |
| int32_t shift = ToInt32(rhs) & 0x1F; |
| if (shift) |
| masm.shrl(Imm32(shift), lhs); |
| } else { |
| MOZ_ASSERT(ToRegister(rhs) == ecx); |
| masm.shrl_cl(lhs); |
| } |
| |
| masm.convertUInt32ToDouble(lhs, out); |
| } |
| |
| MoveOperand |
| CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const |
| { |
| if (a.isGeneralReg()) |
| return MoveOperand(ToRegister(a)); |
| if (a.isFloatReg()) |
| return MoveOperand(ToFloatRegister(a)); |
| return MoveOperand(StackPointer, ToStackOffset(a)); |
| } |
| |
| class OutOfLineTableSwitch : public OutOfLineCodeBase<CodeGeneratorX86Shared> |
| { |
| MTableSwitch* mir_; |
| CodeLabel jumpLabel_; |
| |
| void accept(CodeGeneratorX86Shared* codegen) { |
| codegen->visitOutOfLineTableSwitch(this); |
| } |
| |
| public: |
| explicit OutOfLineTableSwitch(MTableSwitch* mir) |
| : mir_(mir) |
| {} |
| |
| MTableSwitch* mir() const { |
| return mir_; |
| } |
| |
| CodeLabel* jumpLabel() { |
| return &jumpLabel_; |
| } |
| }; |
| |
| void |
| CodeGeneratorX86Shared::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool) |
| { |
| MTableSwitch* mir = ool->mir(); |
| |
| masm.haltingAlign(sizeof(void*)); |
| masm.use(ool->jumpLabel()->target()); |
| masm.addCodeLabel(*ool->jumpLabel()); |
| |
| for (size_t i = 0; i < mir->numCases(); i++) { |
| LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir(); |
| Label* caseheader = caseblock->label(); |
| uint32_t caseoffset = caseheader->offset(); |
| |
| // The entries of the jump table need to be absolute addresses and thus |
| // must be patched after codegen is finished. |
| CodeLabel cl; |
| masm.writeCodePointer(cl.patchAt()); |
| cl.target()->bind(caseoffset); |
| masm.addCodeLabel(cl); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::emitTableSwitchDispatch(MTableSwitch* mir, Register index, Register base) |
| { |
| Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label(); |
| |
| // Lower value with low value |
| if (mir->low() != 0) |
| masm.subl(Imm32(mir->low()), index); |
| |
| // Jump to default case if input is out of range |
| int32_t cases = mir->numCases(); |
| masm.cmp32(index, Imm32(cases)); |
| masm.j(AssemblerX86Shared::AboveOrEqual, defaultcase); |
| |
| // To fill in the CodeLabels for the case entries, we need to first |
| // generate the case entries (we don't yet know their offsets in the |
| // instruction stream). |
| OutOfLineTableSwitch* ool = new(alloc()) OutOfLineTableSwitch(mir); |
| addOutOfLineCode(ool, mir); |
| |
| // Compute the position where a pointer to the right case stands. |
| masm.mov(ool->jumpLabel()->patchAt(), base); |
| Operand pointer = Operand(base, index, ScalePointer); |
| |
| // Jump to the right case |
| masm.jmp(pointer); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitMathD(LMathD* math) |
| { |
| FloatRegister lhs = ToFloatRegister(math->lhs()); |
| Operand rhs = ToOperand(math->rhs()); |
| FloatRegister output = ToFloatRegister(math->output()); |
| |
| switch (math->jsop()) { |
| case JSOP_ADD: |
| masm.vaddsd(rhs, lhs, output); |
| break; |
| case JSOP_SUB: |
| masm.vsubsd(rhs, lhs, output); |
| break; |
| case JSOP_MUL: |
| masm.vmulsd(rhs, lhs, output); |
| break; |
| case JSOP_DIV: |
| masm.vdivsd(rhs, lhs, output); |
| break; |
| default: |
| MOZ_CRASH("unexpected opcode"); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitMathF(LMathF* math) |
| { |
| FloatRegister lhs = ToFloatRegister(math->lhs()); |
| Operand rhs = ToOperand(math->rhs()); |
| FloatRegister output = ToFloatRegister(math->output()); |
| |
| switch (math->jsop()) { |
| case JSOP_ADD: |
| masm.vaddss(rhs, lhs, output); |
| break; |
| case JSOP_SUB: |
| masm.vsubss(rhs, lhs, output); |
| break; |
| case JSOP_MUL: |
| masm.vmulss(rhs, lhs, output); |
| break; |
| case JSOP_DIV: |
| masm.vdivss(rhs, lhs, output); |
| break; |
| default: |
| MOZ_CRASH("unexpected opcode"); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitFloor(LFloor* lir) |
| { |
| FloatRegister input = ToFloatRegister(lir->input()); |
| Register output = ToRegister(lir->output()); |
| |
| Label bailout; |
| |
| if (AssemblerX86Shared::HasSSE41()) { |
| // Bail on negative-zero. |
| masm.branchNegativeZero(input, output, &bailout); |
| bailoutFrom(&bailout, lir->snapshot()); |
| |
| // Round toward -Infinity. |
| { |
| ScratchDoubleScope scratch(masm); |
| masm.vroundsd(X86Encoding::RoundDown, input, scratch, scratch); |
| bailoutCvttsd2si(scratch, output, lir->snapshot()); |
| } |
| } else { |
| Label negative, end; |
| |
| // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. |
| { |
| ScratchDoubleScope scratch(masm); |
| masm.zeroDouble(scratch); |
| masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative); |
| } |
| |
| // Bail on negative-zero. |
| masm.branchNegativeZero(input, output, &bailout); |
| bailoutFrom(&bailout, lir->snapshot()); |
| |
| // Input is non-negative, so truncation correctly rounds. |
| bailoutCvttsd2si(input, output, lir->snapshot()); |
| |
| masm.jump(&end); |
| |
| // Input is negative, but isn't -0. |
| // Negative values go on a comparatively expensive path, since no |
| // native rounding mode matches JS semantics. Still better than callVM. |
| masm.bind(&negative); |
| { |
| // Truncate and round toward zero. |
| // This is off-by-one for everything but integer-valued inputs. |
| bailoutCvttsd2si(input, output, lir->snapshot()); |
| |
| // Test whether the input double was integer-valued. |
| { |
| ScratchDoubleScope scratch(masm); |
| masm.convertInt32ToDouble(output, scratch); |
| masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end); |
| } |
| |
| // Input is not integer-valued, so we rounded off-by-one in the |
| // wrong direction. Correct by subtraction. |
| masm.subl(Imm32(1), output); |
| // Cannot overflow: output was already checked against INT_MIN. |
| } |
| |
| masm.bind(&end); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitFloorF(LFloorF* lir) |
| { |
| FloatRegister input = ToFloatRegister(lir->input()); |
| Register output = ToRegister(lir->output()); |
| |
| Label bailout; |
| |
| if (AssemblerX86Shared::HasSSE41()) { |
| // Bail on negative-zero. |
| masm.branchNegativeZeroFloat32(input, output, &bailout); |
| bailoutFrom(&bailout, lir->snapshot()); |
| |
| // Round toward -Infinity. |
| { |
| ScratchFloat32Scope scratch(masm); |
| masm.vroundss(X86Encoding::RoundDown, input, scratch, scratch); |
| bailoutCvttss2si(scratch, output, lir->snapshot()); |
| } |
| } else { |
| Label negative, end; |
| |
| // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. |
| { |
| ScratchFloat32Scope scratch(masm); |
| masm.zeroFloat32(scratch); |
| masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative); |
| } |
| |
| // Bail on negative-zero. |
| masm.branchNegativeZeroFloat32(input, output, &bailout); |
| bailoutFrom(&bailout, lir->snapshot()); |
| |
| // Input is non-negative, so truncation correctly rounds. |
| bailoutCvttss2si(input, output, lir->snapshot()); |
| |
| masm.jump(&end); |
| |
| // Input is negative, but isn't -0. |
| // Negative values go on a comparatively expensive path, since no |
| // native rounding mode matches JS semantics. Still better than callVM. |
| masm.bind(&negative); |
| { |
| // Truncate and round toward zero. |
| // This is off-by-one for everything but integer-valued inputs. |
| bailoutCvttss2si(input, output, lir->snapshot()); |
| |
| // Test whether the input double was integer-valued. |
| { |
| ScratchFloat32Scope scratch(masm); |
| masm.convertInt32ToFloat32(output, scratch); |
| masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end); |
| } |
| |
| // Input is not integer-valued, so we rounded off-by-one in the |
| // wrong direction. Correct by subtraction. |
| masm.subl(Imm32(1), output); |
| // Cannot overflow: output was already checked against INT_MIN. |
| } |
| |
| masm.bind(&end); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitCeil(LCeil* lir) |
| { |
| FloatRegister input = ToFloatRegister(lir->input()); |
| ScratchDoubleScope scratch(masm); |
| Register output = ToRegister(lir->output()); |
| |
| Label bailout, lessThanMinusOne; |
| |
| // Bail on ]-1; -0] range |
| masm.loadConstantDouble(-1, scratch); |
| masm.branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, input, |
| scratch, &lessThanMinusOne); |
| |
| // Test for remaining values with the sign bit set, i.e. ]-1; -0] |
| masm.vmovmskpd(input, output); |
| masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout); |
| bailoutFrom(&bailout, lir->snapshot()); |
| |
| if (AssemblerX86Shared::HasSSE41()) { |
| // x <= -1 or x > -0 |
| masm.bind(&lessThanMinusOne); |
| // Round toward +Infinity. |
| masm.vroundsd(X86Encoding::RoundUp, input, scratch, scratch); |
| bailoutCvttsd2si(scratch, output, lir->snapshot()); |
| return; |
| } |
| |
| // No SSE4.1 |
| Label end; |
| |
| // x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for |
| // integer (resp. non-integer) values. |
| // Will also work for values >= INT_MAX + 1, as the truncate |
| // operation will return INT_MIN and there'll be a bailout. |
| bailoutCvttsd2si(input, output, lir->snapshot()); |
| masm.convertInt32ToDouble(output, scratch); |
| masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end); |
| |
| // Input is not integer-valued, add 1 to obtain the ceiling value |
| masm.addl(Imm32(1), output); |
| // if input > INT_MAX, output == INT_MAX so adding 1 will overflow. |
| bailoutIf(Assembler::Overflow, lir->snapshot()); |
| masm.jump(&end); |
| |
| // x <= -1, truncation is the way to go. |
| masm.bind(&lessThanMinusOne); |
| bailoutCvttsd2si(input, output, lir->snapshot()); |
| |
| masm.bind(&end); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitCeilF(LCeilF* lir) |
| { |
| FloatRegister input = ToFloatRegister(lir->input()); |
| ScratchFloat32Scope scratch(masm); |
| Register output = ToRegister(lir->output()); |
| |
| Label bailout, lessThanMinusOne; |
| |
| // Bail on ]-1; -0] range |
| masm.loadConstantFloat32(-1.f, scratch); |
| masm.branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, input, |
| scratch, &lessThanMinusOne); |
| |
| // Test for remaining values with the sign bit set, i.e. ]-1; -0] |
| masm.vmovmskps(input, output); |
| masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout); |
| bailoutFrom(&bailout, lir->snapshot()); |
| |
| if (AssemblerX86Shared::HasSSE41()) { |
| // x <= -1 or x > -0 |
| masm.bind(&lessThanMinusOne); |
| // Round toward +Infinity. |
| masm.vroundss(X86Encoding::RoundUp, input, scratch, scratch); |
| bailoutCvttss2si(scratch, output, lir->snapshot()); |
| return; |
| } |
| |
| // No SSE4.1 |
| Label end; |
| |
| // x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for |
| // integer (resp. non-integer) values. |
| // Will also work for values >= INT_MAX + 1, as the truncate |
| // operation will return INT_MIN and there'll be a bailout. |
| bailoutCvttss2si(input, output, lir->snapshot()); |
| masm.convertInt32ToFloat32(output, scratch); |
| masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end); |
| |
| // Input is not integer-valued, add 1 to obtain the ceiling value |
| masm.addl(Imm32(1), output); |
| // if input > INT_MAX, output == INT_MAX so adding 1 will overflow. |
| bailoutIf(Assembler::Overflow, lir->snapshot()); |
| masm.jump(&end); |
| |
| // x <= -1, truncation is the way to go. |
| masm.bind(&lessThanMinusOne); |
| bailoutCvttss2si(input, output, lir->snapshot()); |
| |
| masm.bind(&end); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitRound(LRound* lir) |
| { |
| FloatRegister input = ToFloatRegister(lir->input()); |
| FloatRegister temp = ToFloatRegister(lir->temp()); |
| ScratchDoubleScope scratch(masm); |
| Register output = ToRegister(lir->output()); |
| |
| Label negativeOrZero, negative, end, bailout; |
| |
| // Branch to a slow path for non-positive inputs. Doesn't catch NaN. |
| masm.zeroDouble(scratch); |
| masm.loadConstantDouble(GetBiggestNumberLessThan(0.5), temp); |
| masm.branchDouble(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero); |
| |
| // Input is positive. Add the biggest double less than 0.5 and |
| // truncate, rounding down (because if the input is the biggest double less |
| // than 0.5, adding 0.5 would undesirably round up to 1). Note that we have |
| // to add the input to the temp register because we're not allowed to |
| // modify the input register. |
| masm.addDouble(input, temp); |
| bailoutCvttsd2si(temp, output, lir->snapshot()); |
| |
| masm.jump(&end); |
| |
| // Input is negative, +0 or -0. |
| masm.bind(&negativeOrZero); |
| // Branch on negative input. |
| masm.j(Assembler::NotEqual, &negative); |
| |
| // Bail on negative-zero. |
| masm.branchNegativeZero(input, output, &bailout, /* maybeNonZero = */ false); |
| bailoutFrom(&bailout, lir->snapshot()); |
| |
| // Input is +0 |
| masm.xor32(output, output); |
| masm.jump(&end); |
| |
| // Input is negative. |
| masm.bind(&negative); |
| |
| // Inputs in ]-0.5; 0] need to be added 0.5, other negative inputs need to |
| // be added the biggest double less than 0.5. |
| Label loadJoin; |
| masm.loadConstantDouble(-0.5, scratch); |
| masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &loadJoin); |
| masm.loadConstantDouble(0.5, temp); |
| masm.bind(&loadJoin); |
| |
| if (AssemblerX86Shared::HasSSE41()) { |
| // Add 0.5 and round toward -Infinity. The result is stored in the temp |
| // register (currently contains 0.5). |
| masm.addDouble(input, temp); |
| masm.vroundsd(X86Encoding::RoundDown, temp, scratch, scratch); |
| |
| // Truncate. |
| bailoutCvttsd2si(scratch, output, lir->snapshot()); |
| |
| // If the result is positive zero, then the actual result is -0. Bail. |
| // Otherwise, the truncation will have produced the correct negative integer. |
| masm.test32(output, output); |
| bailoutIf(Assembler::Zero, lir->snapshot()); |
| } else { |
| masm.addDouble(input, temp); |
| |
| // Round toward -Infinity without the benefit of ROUNDSD. |
| { |
| // If input + 0.5 >= 0, input is a negative number >= -0.5 and the result is -0. |
| masm.compareDouble(Assembler::DoubleGreaterThanOrEqual, temp, scratch); |
| bailoutIf(Assembler::DoubleGreaterThanOrEqual, lir->snapshot()); |
| |
| // Truncate and round toward zero. |
| // This is off-by-one for everything but integer-valued inputs. |
| bailoutCvttsd2si(temp, output, lir->snapshot()); |
| |
| // Test whether the truncated double was integer-valued. |
| masm.convertInt32ToDouble(output, scratch); |
| masm.branchDouble(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); |
| |
| // Input is not integer-valued, so we rounded off-by-one in the |
| // wrong direction. Correct by subtraction. |
| masm.subl(Imm32(1), output); |
| // Cannot overflow: output was already checked against INT_MIN. |
| } |
| } |
| |
| masm.bind(&end); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitRoundF(LRoundF* lir) |
| { |
| FloatRegister input = ToFloatRegister(lir->input()); |
| FloatRegister temp = ToFloatRegister(lir->temp()); |
| ScratchFloat32Scope scratch(masm); |
| Register output = ToRegister(lir->output()); |
| |
| Label negativeOrZero, negative, end, bailout; |
| |
| // Branch to a slow path for non-positive inputs. Doesn't catch NaN. |
| masm.zeroFloat32(scratch); |
| masm.loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp); |
| masm.branchFloat(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero); |
| |
| // Input is non-negative. Add the biggest float less than 0.5 and truncate, |
| // rounding down (because if the input is the biggest float less than 0.5, |
| // adding 0.5 would undesirably round up to 1). Note that we have to add |
| // the input to the temp register because we're not allowed to modify the |
| // input register. |
| masm.addFloat32(input, temp); |
| |
| bailoutCvttss2si(temp, output, lir->snapshot()); |
| |
| masm.jump(&end); |
| |
| // Input is negative, +0 or -0. |
| masm.bind(&negativeOrZero); |
| // Branch on negative input. |
| masm.j(Assembler::NotEqual, &negative); |
| |
| // Bail on negative-zero. |
| masm.branchNegativeZeroFloat32(input, output, &bailout); |
| bailoutFrom(&bailout, lir->snapshot()); |
| |
| // Input is +0. |
| masm.xor32(output, output); |
| masm.jump(&end); |
| |
| // Input is negative. |
| masm.bind(&negative); |
| |
| // Inputs in ]-0.5; 0] need to be added 0.5, other negative inputs need to |
| // be added the biggest double less than 0.5. |
| Label loadJoin; |
| masm.loadConstantFloat32(-0.5f, scratch); |
| masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &loadJoin); |
| masm.loadConstantFloat32(0.5f, temp); |
| masm.bind(&loadJoin); |
| |
| if (AssemblerX86Shared::HasSSE41()) { |
| // Add 0.5 and round toward -Infinity. The result is stored in the temp |
| // register (currently contains 0.5). |
| masm.addFloat32(input, temp); |
| masm.vroundss(X86Encoding::RoundDown, temp, scratch, scratch); |
| |
| // Truncate. |
| bailoutCvttss2si(scratch, output, lir->snapshot()); |
| |
| // If the result is positive zero, then the actual result is -0. Bail. |
| // Otherwise, the truncation will have produced the correct negative integer. |
| masm.test32(output, output); |
| bailoutIf(Assembler::Zero, lir->snapshot()); |
| } else { |
| masm.addFloat32(input, temp); |
| // Round toward -Infinity without the benefit of ROUNDSS. |
| { |
| // If input + 0.5 >= 0, input is a negative number >= -0.5 and the result is -0. |
| masm.compareFloat(Assembler::DoubleGreaterThanOrEqual, temp, scratch); |
| bailoutIf(Assembler::DoubleGreaterThanOrEqual, lir->snapshot()); |
| |
| // Truncate and round toward zero. |
| // This is off-by-one for everything but integer-valued inputs. |
| bailoutCvttss2si(temp, output, lir->snapshot()); |
| |
| // Test whether the truncated double was integer-valued. |
| masm.convertInt32ToFloat32(output, scratch); |
| masm.branchFloat(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); |
| |
| // Input is not integer-valued, so we rounded off-by-one in the |
| // wrong direction. Correct by subtraction. |
| masm.subl(Imm32(1), output); |
| // Cannot overflow: output was already checked against INT_MIN. |
| } |
| } |
| |
| masm.bind(&end); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitGuardShape(LGuardShape* guard) |
| { |
| Register obj = ToRegister(guard->input()); |
| masm.cmpPtr(Operand(obj, JSObject::offsetOfShape()), ImmGCPtr(guard->mir()->shape())); |
| |
| bailoutIf(Assembler::NotEqual, guard->snapshot()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitGuardObjectGroup(LGuardObjectGroup* guard) |
| { |
| Register obj = ToRegister(guard->input()); |
| |
| masm.cmpPtr(Operand(obj, JSObject::offsetOfGroup()), ImmGCPtr(guard->mir()->group())); |
| |
| Assembler::Condition cond = |
| guard->mir()->bailOnEquality() ? Assembler::Equal : Assembler::NotEqual; |
| bailoutIf(cond, guard->snapshot()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitGuardClass(LGuardClass* guard) |
| { |
| Register obj = ToRegister(guard->input()); |
| Register tmp = ToRegister(guard->tempInt()); |
| |
| masm.loadPtr(Address(obj, JSObject::offsetOfGroup()), tmp); |
| masm.cmpPtr(Operand(tmp, ObjectGroup::offsetOfClasp()), ImmPtr(guard->mir()->getClass())); |
| bailoutIf(Assembler::NotEqual, guard->snapshot()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitEffectiveAddress(LEffectiveAddress* ins) |
| { |
| const MEffectiveAddress* mir = ins->mir(); |
| Register base = ToRegister(ins->base()); |
| Register index = ToRegister(ins->index()); |
| Register output = ToRegister(ins->output()); |
| masm.leal(Operand(base, index, mir->scale(), mir->displacement()), output); |
| } |
| |
| void |
| CodeGeneratorX86Shared::generateInvalidateEpilogue() |
| { |
| // Ensure that there is enough space in the buffer for the OsiPoint |
| // patching to occur. Otherwise, we could overwrite the invalidation |
| // epilogue. |
| for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) |
| masm.nop(); |
| |
| masm.bind(&invalidate_); |
| |
| // Push the Ion script onto the stack (when we determine what that pointer is). |
| invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1))); |
| JitCode* thunk = gen->jitRuntime()->getInvalidationThunk(); |
| |
| masm.call(thunk); |
| |
| // We should never reach this point in JIT code -- the invalidation thunk should |
| // pop the invalidated JS frame and return directly to its caller. |
| masm.assumeUnreachable("Should have returned directly to its caller instead of here."); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitNegI(LNegI* ins) |
| { |
| Register input = ToRegister(ins->input()); |
| MOZ_ASSERT(input == ToRegister(ins->output())); |
| |
| masm.neg32(input); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitNegD(LNegD* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| MOZ_ASSERT(input == ToFloatRegister(ins->output())); |
| |
| masm.negateDouble(input); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitNegF(LNegF* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| MOZ_ASSERT(input == ToFloatRegister(ins->output())); |
| |
| masm.negateFloat(input); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitInt32x4(LInt32x4* ins) |
| { |
| const LDefinition* out = ins->getDef(0); |
| masm.loadConstantInt32x4(ins->getValue(), ToFloatRegister(out)); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitFloat32x4(LFloat32x4* ins) |
| { |
| const LDefinition* out = ins->getDef(0); |
| masm.loadConstantFloat32x4(ins->getValue(), ToFloatRegister(out)); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4* ins) |
| { |
| FloatRegister in = ToFloatRegister(ins->input()); |
| FloatRegister out = ToFloatRegister(ins->output()); |
| masm.convertInt32x4ToFloat32x4(in, out); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins) |
| { |
| FloatRegister in = ToFloatRegister(ins->input()); |
| FloatRegister out = ToFloatRegister(ins->output()); |
| Register temp = ToRegister(ins->temp()); |
| |
| masm.convertFloat32x4ToInt32x4(in, out); |
| |
| OutOfLineSimdFloatToIntCheck *ool = new(alloc()) OutOfLineSimdFloatToIntCheck(temp, in, ins); |
| addOutOfLineCode(ool, ins->mir()); |
| |
| static const SimdConstant InvalidResult = SimdConstant::SplatX4(int32_t(-2147483648)); |
| |
| ScratchSimd128Scope scratch(masm); |
| masm.loadConstantInt32x4(InvalidResult, scratch); |
| masm.packedEqualInt32x4(Operand(out), scratch); |
| // TODO (bug 1156228): If we have SSE4.1, we can use PTEST here instead of |
| // the two following instructions. |
| masm.vmovmskps(scratch, temp); |
| masm.cmp32(temp, Imm32(0)); |
| masm.j(Assembler::NotEqual, ool->entry()); |
| |
| masm.bind(ool->rejoin()); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitOutOfLineSimdFloatToIntCheck(OutOfLineSimdFloatToIntCheck *ool) |
| { |
| static const SimdConstant Int32MaxX4 = SimdConstant::SplatX4(2147483647.f); |
| static const SimdConstant Int32MinX4 = SimdConstant::SplatX4(-2147483648.f); |
| |
| Label bail; |
| Label* onConversionError = gen->compilingAsmJS() ? masm.asmOnConversionErrorLabel() : &bail; |
| |
| FloatRegister input = ool->input(); |
| Register temp = ool->temp(); |
| |
| ScratchSimd128Scope scratch(masm); |
| masm.loadConstantFloat32x4(Int32MinX4, scratch); |
| masm.vcmpleps(Operand(input), scratch, scratch); |
| masm.vmovmskps(scratch, temp); |
| masm.cmp32(temp, Imm32(15)); |
| masm.j(Assembler::NotEqual, onConversionError); |
| |
| masm.loadConstantFloat32x4(Int32MaxX4, scratch); |
| masm.vcmpleps(Operand(input), scratch, scratch); |
| masm.vmovmskps(scratch, temp); |
| masm.cmp32(temp, Imm32(0)); |
| masm.j(Assembler::NotEqual, onConversionError); |
| |
| masm.jump(ool->rejoin()); |
| |
| if (bail.used()) { |
| masm.bind(&bail); |
| bailout(ool->ins()->snapshot()); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdValueInt32x4(LSimdValueInt32x4* ins) |
| { |
| MOZ_ASSERT(ins->mir()->type() == MIRType_Int32x4); |
| |
| FloatRegister output = ToFloatRegister(ins->output()); |
| if (AssemblerX86Shared::HasSSE41()) { |
| masm.vmovd(ToRegister(ins->getOperand(0)), output); |
| for (size_t i = 1; i < 4; ++i) { |
| Register r = ToRegister(ins->getOperand(i)); |
| masm.vpinsrd(i, r, output, output); |
| } |
| return; |
| } |
| |
| masm.reserveStack(Simd128DataSize); |
| for (size_t i = 0; i < 4; ++i) { |
| Register r = ToRegister(ins->getOperand(i)); |
| masm.store32(r, Address(StackPointer, i * sizeof(int32_t))); |
| } |
| masm.loadAlignedInt32x4(Address(StackPointer, 0), output); |
| masm.freeStack(Simd128DataSize); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdValueFloat32x4(LSimdValueFloat32x4* ins) |
| { |
| MOZ_ASSERT(ins->mir()->type() == MIRType_Float32x4); |
| |
| FloatRegister r0 = ToFloatRegister(ins->getOperand(0)); |
| FloatRegister r1 = ToFloatRegister(ins->getOperand(1)); |
| FloatRegister r2 = ToFloatRegister(ins->getOperand(2)); |
| FloatRegister r3 = ToFloatRegister(ins->getOperand(3)); |
| FloatRegister tmp = ToFloatRegister(ins->getTemp(0)); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| FloatRegister r0Copy = masm.reusedInputFloat32x4(r0, output); |
| FloatRegister r1Copy = masm.reusedInputFloat32x4(r1, tmp); |
| |
| masm.vunpcklps(r3, r1Copy, tmp); |
| masm.vunpcklps(r2, r0Copy, output); |
| masm.vunpcklps(tmp, output, output); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdSplatX4(LSimdSplatX4* ins) |
| { |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| MSimdSplatX4* mir = ins->mir(); |
| MOZ_ASSERT(IsSimdType(mir->type())); |
| JS_STATIC_ASSERT(sizeof(float) == sizeof(int32_t)); |
| |
| switch (mir->type()) { |
| case MIRType_Int32x4: { |
| Register r = ToRegister(ins->getOperand(0)); |
| masm.vmovd(r, output); |
| masm.vpshufd(0, output, output); |
| break; |
| } |
| case MIRType_Float32x4: { |
| FloatRegister r = ToFloatRegister(ins->getOperand(0)); |
| FloatRegister rCopy = masm.reusedInputFloat32x4(r, output); |
| masm.vshufps(0, rCopy, rCopy, output); |
| break; |
| } |
| default: |
| MOZ_CRASH("Unknown SIMD kind"); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdReinterpretCast(LSimdReinterpretCast* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| if (input.aliases(output)) |
| return; |
| |
| switch (ins->mir()->type()) { |
| case MIRType_Int32x4: |
| masm.vmovdqa(input, output); |
| break; |
| case MIRType_Float32x4: |
| masm.vmovaps(input, output); |
| break; |
| default: |
| MOZ_CRASH("Unknown SIMD kind"); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| Register output = ToRegister(ins->output()); |
| |
| SimdLane lane = ins->lane(); |
| if (lane == LaneX) { |
| // The value we want to extract is in the low double-word |
| masm.moveLowInt32(input, output); |
| } else if (AssemblerX86Shared::HasSSE41()) { |
| masm.vpextrd(lane, input, output); |
| } else { |
| uint32_t mask = MacroAssembler::ComputeShuffleMask(lane); |
| ScratchSimd128Scope scratch(masm); |
| masm.shuffleInt32(mask, input, scratch); |
| masm.moveLowInt32(scratch, output); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdExtractElementF(LSimdExtractElementF* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| SimdLane lane = ins->lane(); |
| if (lane == LaneX) { |
| // The value we want to extract is in the low double-word |
| if (input != output) |
| masm.moveFloat32(input, output); |
| } else if (lane == LaneZ) { |
| masm.moveHighPairToLowPairFloat32(input, output); |
| } else { |
| uint32_t mask = MacroAssembler::ComputeShuffleMask(lane); |
| masm.shuffleFloat32(mask, input, output); |
| } |
| // NaNs contained within SIMD values are not enforced to be canonical, so |
| // when we extract an element into a "regular" scalar JS value, we have to |
| // canonicalize. In asm.js code, we can skip this, as asm.js only has to |
| // canonicalize NaNs at FFI boundaries. |
| if (!gen->compilingAsmJS()) |
| masm.canonicalizeFloat(output); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdInsertElementI(LSimdInsertElementI* ins) |
| { |
| FloatRegister vector = ToFloatRegister(ins->vector()); |
| Register value = ToRegister(ins->value()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| MOZ_ASSERT(vector == output); // defineReuseInput(0) |
| |
| unsigned component = unsigned(ins->lane()); |
| |
| // Note that, contrarily to float32x4, we cannot use vmovd if the inserted |
| // value goes into the first component, as vmovd clears out the higher lanes |
| // of the output. |
| if (AssemblerX86Shared::HasSSE41()) { |
| // TODO: Teach Lowering that we don't need defineReuseInput if we have AVX. |
| masm.vpinsrd(component, value, vector, output); |
| return; |
| } |
| |
| masm.reserveStack(Simd128DataSize); |
| masm.storeAlignedInt32x4(vector, Address(StackPointer, 0)); |
| masm.store32(value, Address(StackPointer, component * sizeof(int32_t))); |
| masm.loadAlignedInt32x4(Address(StackPointer, 0), output); |
| masm.freeStack(Simd128DataSize); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdInsertElementF(LSimdInsertElementF* ins) |
| { |
| FloatRegister vector = ToFloatRegister(ins->vector()); |
| FloatRegister value = ToFloatRegister(ins->value()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| MOZ_ASSERT(vector == output); // defineReuseInput(0) |
| |
| if (ins->lane() == SimdLane::LaneX) { |
| // As both operands are registers, vmovss doesn't modify the upper bits |
| // of the destination operand. |
| if (value != output) |
| masm.vmovss(value, vector, output); |
| return; |
| } |
| |
| if (AssemblerX86Shared::HasSSE41()) { |
| // The input value is in the low float32 of the 'value' FloatRegister. |
| masm.vinsertps(masm.vinsertpsMask(SimdLane::LaneX, ins->lane()), value, output, output); |
| return; |
| } |
| |
| unsigned component = unsigned(ins->lane()); |
| masm.reserveStack(Simd128DataSize); |
| masm.storeAlignedFloat32x4(vector, Address(StackPointer, 0)); |
| masm.storeFloat32(value, Address(StackPointer, component * sizeof(int32_t))); |
| masm.loadAlignedFloat32x4(Address(StackPointer, 0), output); |
| masm.freeStack(Simd128DataSize); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdSignMaskX4(LSimdSignMaskX4* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| Register output = ToRegister(ins->output()); |
| |
| // For Float32x4 and Int32x4. |
| masm.vmovmskps(input, output); |
| } |
| |
| template <class T, class Reg> void |
| CodeGeneratorX86Shared::visitSimdGeneralShuffle(LSimdGeneralShuffleBase* ins, Reg tempRegister) |
| { |
| MSimdGeneralShuffle* mir = ins->mir(); |
| unsigned numVectors = mir->numVectors(); |
| |
| Register laneTemp = ToRegister(ins->temp()); |
| |
| // This won't generate fast code, but it's fine because we expect users |
| // to have used constant indices (and thus MSimdGeneralShuffle to be fold |
| // into MSimdSwizzle/MSimdShuffle, which are fast). |
| |
| // We need stack space for the numVectors inputs and for the output vector. |
| unsigned stackSpace = Simd128DataSize * (numVectors + 1); |
| masm.reserveStack(stackSpace); |
| |
| for (unsigned i = 0; i < numVectors; i++) { |
| masm.storeAlignedVector<T>(ToFloatRegister(ins->vector(i)), |
| Address(StackPointer, Simd128DataSize * (1 + i))); |
| } |
| |
| Label bail; |
| |
| for (size_t i = 0; i < mir->numLanes(); i++) { |
| Operand lane = ToOperand(ins->lane(i)); |
| |
| masm.cmp32(lane, Imm32(numVectors * mir->numLanes() - 1)); |
| masm.j(Assembler::Above, &bail); |
| |
| if (lane.kind() == Operand::REG) { |
| masm.loadScalar<T>(Operand(StackPointer, ToRegister(ins->lane(i)), TimesFour, Simd128DataSize), |
| tempRegister); |
| } else { |
| masm.load32(lane, laneTemp); |
| masm.loadScalar<T>(Operand(StackPointer, laneTemp, TimesFour, Simd128DataSize), tempRegister); |
| } |
| |
| masm.storeScalar<T>(tempRegister, Address(StackPointer, i * sizeof(T))); |
| } |
| |
| FloatRegister output = ToFloatRegister(ins->output()); |
| masm.loadAlignedVector<T>(Address(StackPointer, 0), output); |
| |
| Label join; |
| masm.jump(&join); |
| |
| { |
| masm.bind(&bail); |
| masm.freeStack(stackSpace); |
| bailout(ins->snapshot()); |
| } |
| |
| masm.bind(&join); |
| masm.setFramePushed(masm.framePushed() + stackSpace); |
| masm.freeStack(stackSpace); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdGeneralShuffleI(LSimdGeneralShuffleI* ins) |
| { |
| visitSimdGeneralShuffle<int32_t, Register>(ins, ToRegister(ins->temp())); |
| } |
| void |
| CodeGeneratorX86Shared::visitSimdGeneralShuffleF(LSimdGeneralShuffleF* ins) |
| { |
| ScratchFloat32Scope scratch(masm); |
| visitSimdGeneralShuffle<float, FloatRegister>(ins, scratch); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdSwizzleI(LSimdSwizzleI* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| uint32_t x = ins->laneX(); |
| uint32_t y = ins->laneY(); |
| uint32_t z = ins->laneZ(); |
| uint32_t w = ins->laneW(); |
| |
| uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w); |
| masm.shuffleInt32(mask, input, output); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF* ins) |
| { |
| FloatRegister input = ToFloatRegister(ins->input()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| uint32_t x = ins->laneX(); |
| uint32_t y = ins->laneY(); |
| uint32_t z = ins->laneZ(); |
| uint32_t w = ins->laneW(); |
| |
| if (AssemblerX86Shared::HasSSE3()) { |
| if (ins->lanesMatch(0, 0, 2, 2)) { |
| masm.vmovsldup(input, output); |
| return; |
| } |
| if (ins->lanesMatch(1, 1, 3, 3)) { |
| masm.vmovshdup(input, output); |
| return; |
| } |
| } |
| |
| // TODO Here and below, arch specific lowering could identify this pattern |
| // and use defineReuseInput to avoid this move (bug 1084404) |
| if (ins->lanesMatch(2, 3, 2, 3)) { |
| FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output); |
| masm.vmovhlps(input, inputCopy, output); |
| return; |
| } |
| |
| if (ins->lanesMatch(0, 1, 0, 1)) { |
| if (AssemblerX86Shared::HasSSE3() && !AssemblerX86Shared::HasAVX()) { |
| masm.vmovddup(input, output); |
| return; |
| } |
| FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output); |
| masm.vmovlhps(input, inputCopy, output); |
| return; |
| } |
| |
| if (ins->lanesMatch(0, 0, 1, 1)) { |
| FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output); |
| masm.vunpcklps(input, inputCopy, output); |
| return; |
| } |
| |
| if (ins->lanesMatch(2, 2, 3, 3)) { |
| FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output); |
| masm.vunpckhps(input, inputCopy, output); |
| return; |
| } |
| |
| uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w); |
| masm.shuffleFloat32(mask, input, output); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle* ins) |
| { |
| FloatRegister lhs = ToFloatRegister(ins->lhs()); |
| Operand rhs = ToOperand(ins->rhs()); |
| FloatRegister out = ToFloatRegister(ins->output()); |
| |
| uint32_t x = ins->laneX(); |
| uint32_t y = ins->laneY(); |
| uint32_t z = ins->laneZ(); |
| uint32_t w = ins->laneW(); |
| |
| // Check that lanes come from LHS in majority: |
| unsigned numLanesFromLHS = (x < 4) + (y < 4) + (z < 4) + (w < 4); |
| MOZ_ASSERT(numLanesFromLHS >= 2); |
| |
| // When reading this method, remember that vshufps takes the two first |
| // inputs of the destination operand (right operand) and the two last |
| // inputs of the source operand (left operand). |
| // |
| // Legend for explanations: |
| // - L: LHS |
| // - R: RHS |
| // - T: temporary |
| |
| uint32_t mask; |
| |
| // If all lanes came from a single vector, we should have constructed a |
| // MSimdSwizzle instead. |
| MOZ_ASSERT(numLanesFromLHS < 4); |
| |
| // If all values stay in their lane, this is a blend. |
| if (AssemblerX86Shared::HasSSE41()) { |
| if (x % 4 == 0 && y % 4 == 1 && z % 4 == 2 && w % 4 == 3) { |
| masm.vblendps(masm.blendpsMask(x >= 4, y >= 4, z >= 4, w >= 4), rhs, lhs, out); |
| return; |
| } |
| } |
| |
| // One element of the second, all other elements of the first |
| if (numLanesFromLHS == 3) { |
| unsigned firstMask = -1, secondMask = -1; |
| |
| // register-register vmovss preserves the high lanes. |
| if (ins->lanesMatch(4, 1, 2, 3) && rhs.kind() == Operand::FPREG) { |
| masm.vmovss(FloatRegister::FromCode(rhs.fpu()), lhs, out); |
| return; |
| } |
| |
| // SSE4.1 vinsertps can handle any single element. |
| unsigned numLanesUnchanged = (x == 0) + (y == 1) + (z == 2) + (w == 3); |
| if (AssemblerX86Shared::HasSSE41() && numLanesUnchanged == 3) { |
| SimdLane srcLane; |
| SimdLane dstLane; |
| if (x >= 4) { |
| srcLane = SimdLane(x - 4); |
| dstLane = LaneX; |
| } else if (y >= 4) { |
| srcLane = SimdLane(y - 4); |
| dstLane = LaneY; |
| } else if (z >= 4) { |
| srcLane = SimdLane(z - 4); |
| dstLane = LaneZ; |
| } else { |
| MOZ_ASSERT(w >= 4); |
| srcLane = SimdLane(w - 4); |
| dstLane = LaneW; |
| } |
| masm.vinsertps(masm.vinsertpsMask(srcLane, dstLane), rhs, lhs, out); |
| return; |
| } |
| |
| FloatRegister rhsCopy = ToFloatRegister(ins->temp()); |
| |
| if (x < 4 && y < 4) { |
| if (w >= 4) { |
| w %= 4; |
| // T = (Rw Rw Lz Lz) = vshufps(firstMask, lhs, rhs, rhsCopy) |
| firstMask = MacroAssembler::ComputeShuffleMask(w, w, z, z); |
| // (Lx Ly Lz Rw) = (Lx Ly Tz Tx) = vshufps(secondMask, T, lhs, out) |
| secondMask = MacroAssembler::ComputeShuffleMask(x, y, LaneZ, LaneX); |
| } else { |
| MOZ_ASSERT(z >= 4); |
| z %= 4; |
| // T = (Rz Rz Lw Lw) = vshufps(firstMask, lhs, rhs, rhsCopy) |
| firstMask = MacroAssembler::ComputeShuffleMask(z, z, w, w); |
| // (Lx Ly Rz Lw) = (Lx Ly Tx Tz) = vshufps(secondMask, T, lhs, out) |
| secondMask = MacroAssembler::ComputeShuffleMask(x, y, LaneX, LaneZ); |
| } |
| |
| masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy); |
| masm.vshufps(secondMask, rhsCopy, lhs, out); |
| return; |
| } |
| |
| MOZ_ASSERT(z < 4 && w < 4); |
| |
| if (y >= 4) { |
| y %= 4; |
| // T = (Ry Ry Lx Lx) = vshufps(firstMask, lhs, rhs, rhsCopy) |
| firstMask = MacroAssembler::ComputeShuffleMask(y, y, x, x); |
| // (Lx Ry Lz Lw) = (Tz Tx Lz Lw) = vshufps(secondMask, lhs, T, out) |
| secondMask = MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, z, w); |
| } else { |
| MOZ_ASSERT(x >= 4); |
| x %= 4; |
| // T = (Rx Rx Ly Ly) = vshufps(firstMask, lhs, rhs, rhsCopy) |
| firstMask = MacroAssembler::ComputeShuffleMask(x, x, y, y); |
| // (Rx Ly Lz Lw) = (Tx Tz Lz Lw) = vshufps(secondMask, lhs, T, out) |
| secondMask = MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, z, w); |
| } |
| |
| masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy); |
| if (AssemblerX86Shared::HasAVX()) { |
| masm.vshufps(secondMask, lhs, rhsCopy, out); |
| } else { |
| masm.vshufps(secondMask, lhs, rhsCopy, rhsCopy); |
| masm.moveFloat32x4(rhsCopy, out); |
| } |
| return; |
| } |
| |
| // Two elements from one vector, two other elements from the other |
| MOZ_ASSERT(numLanesFromLHS == 2); |
| |
| // TODO Here and below, symmetric case would be more handy to avoid a move, |
| // but can't be reached because operands would get swapped (bug 1084404). |
| if (ins->lanesMatch(2, 3, 6, 7)) { |
| ScratchSimd128Scope scratch(masm); |
| if (AssemblerX86Shared::HasAVX()) { |
| FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch); |
| masm.vmovhlps(lhs, rhsCopy, out); |
| } else { |
| masm.loadAlignedFloat32x4(rhs, scratch); |
| masm.vmovhlps(lhs, scratch, scratch); |
| masm.moveFloat32x4(scratch, out); |
| } |
| return; |
| } |
| |
| if (ins->lanesMatch(0, 1, 4, 5)) { |
| FloatRegister rhsCopy; |
| ScratchSimd128Scope scratch(masm); |
| if (rhs.kind() == Operand::FPREG) { |
| // No need to make an actual copy, since the operand is already |
| // in a register, and it won't be clobbered by the vmovlhps. |
| rhsCopy = FloatRegister::FromCode(rhs.fpu()); |
| } else { |
| masm.loadAlignedFloat32x4(rhs, scratch); |
| rhsCopy = scratch; |
| } |
| masm.vmovlhps(rhsCopy, lhs, out); |
| return; |
| } |
| |
| if (ins->lanesMatch(0, 4, 1, 5)) { |
| masm.vunpcklps(rhs, lhs, out); |
| return; |
| } |
| |
| // TODO swapped case would be better (bug 1084404) |
| if (ins->lanesMatch(4, 0, 5, 1)) { |
| ScratchSimd128Scope scratch(masm); |
| if (AssemblerX86Shared::HasAVX()) { |
| FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch); |
| masm.vunpcklps(lhs, rhsCopy, out); |
| } else { |
| masm.loadAlignedFloat32x4(rhs, scratch); |
| masm.vunpcklps(lhs, scratch, scratch); |
| masm.moveFloat32x4(scratch, out); |
| } |
| return; |
| } |
| |
| if (ins->lanesMatch(2, 6, 3, 7)) { |
| masm.vunpckhps(rhs, lhs, out); |
| return; |
| } |
| |
| // TODO swapped case would be better (bug 1084404) |
| if (ins->lanesMatch(6, 2, 7, 3)) { |
| ScratchSimd128Scope scratch(masm); |
| if (AssemblerX86Shared::HasAVX()) { |
| FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch); |
| masm.vunpckhps(lhs, rhsCopy, out); |
| } else { |
| masm.loadAlignedFloat32x4(rhs, scratch); |
| masm.vunpckhps(lhs, scratch, scratch); |
| masm.moveFloat32x4(scratch, out); |
| } |
| return; |
| } |
| |
| // In one vshufps |
| if (x < 4 && y < 4) { |
| mask = MacroAssembler::ComputeShuffleMask(x, y, z % 4, w % 4); |
| masm.vshufps(mask, rhs, lhs, out); |
| return; |
| } |
| |
| // At creation, we should have explicitly swapped in this case. |
| MOZ_ASSERT(!(z >= 4 && w >= 4)); |
| |
| // In two vshufps, for the most generic case: |
| uint32_t firstMask[4], secondMask[4]; |
| unsigned i = 0, j = 2, k = 0; |
| |
| #define COMPUTE_MASK(lane) \ |
| if (lane >= 4) { \ |
| firstMask[j] = lane % 4; \ |
| secondMask[k++] = j++; \ |
| } else { \ |
| firstMask[i] = lane; \ |
| secondMask[k++] = i++; \ |
| } |
| |
| COMPUTE_MASK(x) |
| COMPUTE_MASK(y) |
| COMPUTE_MASK(z) |
| COMPUTE_MASK(w) |
| #undef COMPUTE_MASK |
| |
| MOZ_ASSERT(i == 2 && j == 4 && k == 4); |
| |
| mask = MacroAssembler::ComputeShuffleMask(firstMask[0], firstMask[1], |
| firstMask[2], firstMask[3]); |
| masm.vshufps(mask, rhs, lhs, lhs); |
| |
| mask = MacroAssembler::ComputeShuffleMask(secondMask[0], secondMask[1], |
| secondMask[2], secondMask[3]); |
| masm.vshufps(mask, lhs, lhs, lhs); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4* ins) |
| { |
| static const SimdConstant allOnes = SimdConstant::SplatX4(-1); |
| |
| FloatRegister lhs = ToFloatRegister(ins->lhs()); |
| Operand rhs = ToOperand(ins->rhs()); |
| MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs); |
| |
| ScratchSimd128Scope scratch(masm); |
| |
| MSimdBinaryComp::Operation op = ins->operation(); |
| switch (op) { |
| case MSimdBinaryComp::greaterThan: |
| masm.packedGreaterThanInt32x4(rhs, lhs); |
| return; |
| case MSimdBinaryComp::equal: |
| masm.packedEqualInt32x4(rhs, lhs); |
| return; |
| case MSimdBinaryComp::lessThan: |
| // src := rhs |
| if (rhs.kind() == Operand::FPREG) |
| masm.moveInt32x4(ToFloatRegister(ins->rhs()), scratch); |
| else |
| masm.loadAlignedInt32x4(rhs, scratch); |
| |
| // src := src > lhs (i.e. lhs < rhs) |
| // Improve by doing custom lowering (rhs is tied to the output register) |
| masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch); |
| masm.moveInt32x4(scratch, lhs); |
| return; |
| case MSimdBinaryComp::notEqual: |
| // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we |
| // should invert the comparison by, e.g. swapping the arms of a select |
| // if that's what it's used in. |
| masm.loadConstantInt32x4(allOnes, scratch); |
| masm.packedEqualInt32x4(rhs, lhs); |
| masm.bitwiseXorX4(Operand(scratch), lhs); |
| return; |
| case MSimdBinaryComp::greaterThanOrEqual: |
| // src := rhs |
| if (rhs.kind() == Operand::FPREG) |
| masm.moveInt32x4(ToFloatRegister(ins->rhs()), scratch); |
| else |
| masm.loadAlignedInt32x4(rhs, scratch); |
| masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch); |
| masm.loadConstantInt32x4(allOnes, lhs); |
| masm.bitwiseXorX4(Operand(scratch), lhs); |
| return; |
| case MSimdBinaryComp::lessThanOrEqual: |
| // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here. |
| masm.loadConstantInt32x4(allOnes, scratch); |
| masm.packedGreaterThanInt32x4(rhs, lhs); |
| masm.bitwiseXorX4(Operand(scratch), lhs); |
| return; |
| } |
| MOZ_CRASH("unexpected SIMD op"); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdBinaryCompFx4(LSimdBinaryCompFx4* ins) |
| { |
| FloatRegister lhs = ToFloatRegister(ins->lhs()); |
| Operand rhs = ToOperand(ins->rhs()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| MSimdBinaryComp::Operation op = ins->operation(); |
| switch (op) { |
| case MSimdBinaryComp::equal: |
| masm.vcmpeqps(rhs, lhs, output); |
| return; |
| case MSimdBinaryComp::lessThan: |
| masm.vcmpltps(rhs, lhs, output); |
| return; |
| case MSimdBinaryComp::lessThanOrEqual: |
| masm.vcmpleps(rhs, lhs, output); |
| return; |
| case MSimdBinaryComp::notEqual: |
| masm.vcmpneqps(rhs, lhs, output); |
| return; |
| case MSimdBinaryComp::greaterThanOrEqual: |
| case MSimdBinaryComp::greaterThan: |
| // We reverse these before register allocation so that we don't have to |
| // copy into and out of temporaries after codegen. |
| MOZ_CRASH("lowering should have reversed this"); |
| } |
| MOZ_CRASH("unexpected SIMD op"); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4* ins) |
| { |
| FloatRegister lhs = ToFloatRegister(ins->lhs()); |
| Operand rhs = ToOperand(ins->rhs()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| ScratchSimd128Scope scratch(masm); |
| |
| MSimdBinaryArith::Operation op = ins->operation(); |
| switch (op) { |
| case MSimdBinaryArith::Op_add: |
| masm.vpaddd(rhs, lhs, output); |
| return; |
| case MSimdBinaryArith::Op_sub: |
| masm.vpsubd(rhs, lhs, output); |
| return; |
| case MSimdBinaryArith::Op_mul: { |
| if (AssemblerX86Shared::HasSSE41()) { |
| masm.vpmulld(rhs, lhs, output); |
| return; |
| } |
| |
| masm.loadAlignedInt32x4(rhs, scratch); |
| masm.vpmuludq(lhs, scratch, scratch); |
| // scratch contains (Rx, _, Rz, _) where R is the resulting vector. |
| |
| FloatRegister temp = ToFloatRegister(ins->temp()); |
| masm.vpshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), lhs, lhs); |
| masm.vpshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), rhs, temp); |
| masm.vpmuludq(temp, lhs, lhs); |
| // lhs contains (Ry, _, Rw, _) where R is the resulting vector. |
| |
| masm.vshufps(MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, LaneX, LaneZ), scratch, lhs, lhs); |
| // lhs contains (Ry, Rw, Rx, Rz) |
| masm.vshufps(MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, LaneW, LaneY), lhs, lhs, lhs); |
| return; |
| } |
| case MSimdBinaryArith::Op_div: |
| // x86 doesn't have SIMD i32 div. |
| break; |
| case MSimdBinaryArith::Op_max: |
| // we can do max with a single instruction only if we have SSE4.1 |
| // using the PMAXSD instruction. |
| break; |
| case MSimdBinaryArith::Op_min: |
| // we can do max with a single instruction only if we have SSE4.1 |
| // using the PMINSD instruction. |
| break; |
| case MSimdBinaryArith::Op_minNum: |
| case MSimdBinaryArith::Op_maxNum: |
| break; |
| } |
| MOZ_CRASH("unexpected SIMD op"); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4* ins) |
| { |
| FloatRegister lhs = ToFloatRegister(ins->lhs()); |
| Operand rhs = ToOperand(ins->rhs()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| ScratchSimd128Scope scratch(masm); |
| |
| MSimdBinaryArith::Operation op = ins->operation(); |
| switch (op) { |
| case MSimdBinaryArith::Op_add: |
| masm.vaddps(rhs, lhs, output); |
| return; |
| case MSimdBinaryArith::Op_sub: |
| masm.vsubps(rhs, lhs, output); |
| return; |
| case MSimdBinaryArith::Op_mul: |
| masm.vmulps(rhs, lhs, output); |
| return; |
| case MSimdBinaryArith::Op_div: |
| masm.vdivps(rhs, lhs, output); |
| return; |
| case MSimdBinaryArith::Op_max: { |
| FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, scratch); |
| masm.vcmpunordps(rhs, lhsCopy, scratch); |
| |
| FloatRegister tmp = ToFloatRegister(ins->temp()); |
| FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, tmp); |
| masm.vmaxps(Operand(lhs), rhsCopy, tmp); |
| masm.vmaxps(rhs, lhs, output); |
| |
| masm.vandps(tmp, output, output); |
| masm.vorps(scratch, output, output); // or in the all-ones NaNs |
| return; |
| } |
| case MSimdBinaryArith::Op_min: { |
| FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch); |
| masm.vminps(Operand(lhs), rhsCopy, scratch); |
| masm.vminps(rhs, lhs, output); |
| masm.vorps(scratch, output, output); // NaN or'd with arbitrary bits is NaN |
| return; |
| } |
| case MSimdBinaryArith::Op_minNum: { |
| FloatRegister tmp = ToFloatRegister(ins->temp()); |
| masm.loadConstantInt32x4(SimdConstant::SplatX4(int32_t(0x80000000)), tmp); |
| |
| FloatRegister mask = scratch; |
| FloatRegister tmpCopy = masm.reusedInputFloat32x4(tmp, scratch); |
| masm.vpcmpeqd(Operand(lhs), tmpCopy, mask); |
| masm.vandps(tmp, mask, mask); |
| |
| FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp); |
| masm.vminps(rhs, lhsCopy, tmp); |
| masm.vorps(mask, tmp, tmp); |
| |
| FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask); |
| masm.vcmpneqps(rhs, rhsCopy, mask); |
| |
| if (AssemblerX86Shared::HasAVX()) { |
| masm.vblendvps(mask, lhs, tmp, output); |
| } else { |
| // Emulate vblendvps. |
| // With SSE.4.1 we could use blendvps, however it's awkward since |
| // it requires the mask to be in xmm0. |
| if (lhs != output) |
| masm.moveFloat32x4(lhs, output); |
| masm.vandps(Operand(mask), output, output); |
| masm.vandnps(Operand(tmp), mask, mask); |
| masm.vorps(Operand(mask), output, output); |
| } |
| return; |
| } |
| case MSimdBinaryArith::Op_maxNum: { |
| FloatRegister mask = scratch; |
| masm.loadConstantInt32x4(SimdConstant::SplatX4(0), mask); |
| masm.vpcmpeqd(Operand(lhs), mask, mask); |
| |
| FloatRegister tmp = ToFloatRegister(ins->temp()); |
| masm.loadConstantInt32x4(SimdConstant::SplatX4(int32_t(0x80000000)), tmp); |
| masm.vandps(tmp, mask, mask); |
| |
| FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp); |
| masm.vmaxps(rhs, lhsCopy, tmp); |
| masm.vandnps(Operand(tmp), mask, mask); |
| |
| // Ensure tmp always contains the temporary result |
| mask = tmp; |
| tmp = scratch; |
| |
| FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask); |
| masm.vcmpneqps(rhs, rhsCopy, mask); |
| |
| if (AssemblerX86Shared::HasAVX()) { |
| masm.vblendvps(mask, lhs, tmp, output); |
| } else { |
| // Emulate vblendvps. |
| // With SSE.4.1 we could use blendvps, however it's awkward since |
| // it requires the mask to be in xmm0. |
| if (lhs != output) |
| masm.moveFloat32x4(lhs, output); |
| masm.vandps(Operand(mask), output, output); |
| masm.vandnps(Operand(tmp), mask, mask); |
| masm.vorps(Operand(mask), output, output); |
| } |
| return; |
| } |
| } |
| MOZ_CRASH("unexpected SIMD op"); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4* ins) |
| { |
| Operand in = ToOperand(ins->input()); |
| FloatRegister out = ToFloatRegister(ins->output()); |
| |
| static const SimdConstant allOnes = SimdConstant::CreateX4(-1, -1, -1, -1); |
| |
| switch (ins->operation()) { |
| case MSimdUnaryArith::neg: |
| masm.zeroInt32x4(out); |
| masm.packedSubInt32(in, out); |
| return; |
| case MSimdUnaryArith::not_: |
| masm.loadConstantInt32x4(allOnes, out); |
| masm.bitwiseXorX4(in, out); |
| return; |
| case MSimdUnaryArith::abs: |
| case MSimdUnaryArith::reciprocalApproximation: |
| case MSimdUnaryArith::reciprocalSqrtApproximation: |
| case MSimdUnaryArith::sqrt: |
| break; |
| } |
| MOZ_CRASH("unexpected SIMD op"); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdUnaryArithFx4(LSimdUnaryArithFx4* ins) |
| { |
| Operand in = ToOperand(ins->input()); |
| FloatRegister out = ToFloatRegister(ins->output()); |
| |
| // All ones but the sign bit |
| float signMask = SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits); |
| static const SimdConstant signMasks = SimdConstant::SplatX4(signMask); |
| |
| // All ones including the sign bit |
| float ones = SpecificNaN<float>(1, FloatingPoint<float>::kSignificandBits); |
| static const SimdConstant allOnes = SimdConstant::SplatX4(ones); |
| |
| // All zeros but the sign bit |
| static const SimdConstant minusZero = SimdConstant::SplatX4(-0.f); |
| |
| switch (ins->operation()) { |
| case MSimdUnaryArith::abs: |
| masm.loadConstantFloat32x4(signMasks, out); |
| masm.bitwiseAndX4(in, out); |
| return; |
| case MSimdUnaryArith::neg: |
| masm.loadConstantFloat32x4(minusZero, out); |
| masm.bitwiseXorX4(in, out); |
| return; |
| case MSimdUnaryArith::not_: |
| masm.loadConstantFloat32x4(allOnes, out); |
| masm.bitwiseXorX4(in, out); |
| return; |
| case MSimdUnaryArith::reciprocalApproximation: |
| masm.packedRcpApproximationFloat32x4(in, out); |
| return; |
| case MSimdUnaryArith::reciprocalSqrtApproximation: |
| masm.packedRcpSqrtApproximationFloat32x4(in, out); |
| return; |
| case MSimdUnaryArith::sqrt: |
| masm.packedSqrtFloat32x4(in, out); |
| return; |
| } |
| MOZ_CRASH("unexpected SIMD op"); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4* ins) |
| { |
| FloatRegister lhs = ToFloatRegister(ins->lhs()); |
| Operand rhs = ToOperand(ins->rhs()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| |
| MSimdBinaryBitwise::Operation op = ins->operation(); |
| switch (op) { |
| case MSimdBinaryBitwise::and_: |
| if (ins->type() == MIRType_Float32x4) |
| masm.vandps(rhs, lhs, output); |
| else |
| masm.vpand(rhs, lhs, output); |
| return; |
| case MSimdBinaryBitwise::or_: |
| if (ins->type() == MIRType_Float32x4) |
| masm.vorps(rhs, lhs, output); |
| else |
| masm.vpor(rhs, lhs, output); |
| return; |
| case MSimdBinaryBitwise::xor_: |
| if (ins->type() == MIRType_Float32x4) |
| masm.vxorps(rhs, lhs, output); |
| else |
| masm.vpxor(rhs, lhs, output); |
| return; |
| } |
| MOZ_CRASH("unexpected SIMD bitwise op"); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins) |
| { |
| FloatRegister out = ToFloatRegister(ins->output()); |
| MOZ_ASSERT(ToFloatRegister(ins->vector()) == out); // defineReuseInput(0); |
| |
| // If the shift count is greater than 31, this will just zero all lanes by |
| // default for lsh and ursh, and for rsh extend the sign bit to all bits, |
| // per the SIMD.js spec (as of March 19th 2015). |
| const LAllocation* val = ins->value(); |
| if (val->isConstant()) { |
| uint32_t c = uint32_t(ToInt32(val)); |
| if (c > 31) { |
| switch (ins->operation()) { |
| case MSimdShift::lsh: |
| case MSimdShift::ursh: |
| masm.zeroInt32x4(out); |
| return; |
| default: |
| c = 31; |
| break; |
| } |
| } |
| Imm32 count(c); |
| switch (ins->operation()) { |
| case MSimdShift::lsh: |
| masm.packedLeftShiftByScalar(count, out); |
| return; |
| case MSimdShift::rsh: |
| masm.packedRightShiftByScalar(count, out); |
| return; |
| case MSimdShift::ursh: |
| masm.packedUnsignedRightShiftByScalar(count, out); |
| return; |
| } |
| MOZ_CRASH("unexpected SIMD bitwise op"); |
| } |
| |
| MOZ_ASSERT(val->isRegister()); |
| ScratchFloat32Scope scratch(masm); |
| masm.vmovd(ToRegister(val), scratch); |
| |
| switch (ins->operation()) { |
| case MSimdShift::lsh: |
| masm.packedLeftShiftByScalar(scratch, out); |
| return; |
| case MSimdShift::rsh: |
| masm.packedRightShiftByScalar(scratch, out); |
| return; |
| case MSimdShift::ursh: |
| masm.packedUnsignedRightShiftByScalar(scratch, out); |
| return; |
| } |
| MOZ_CRASH("unexpected SIMD bitwise op"); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect* ins) |
| { |
| FloatRegister mask = ToFloatRegister(ins->mask()); |
| FloatRegister onTrue = ToFloatRegister(ins->lhs()); |
| FloatRegister onFalse = ToFloatRegister(ins->rhs()); |
| FloatRegister output = ToFloatRegister(ins->output()); |
| FloatRegister temp = ToFloatRegister(ins->temp()); |
| |
| if (onTrue != output) |
| masm.vmovaps(onTrue, output); |
| if (mask != temp) |
| masm.vmovaps(mask, temp); |
| |
| MSimdSelect* mir = ins->mir(); |
| if (mir->isElementWise()) { |
| if (AssemblerX86Shared::HasAVX()) { |
| masm.vblendvps(mask, onTrue, onFalse, output); |
| return; |
| } |
| |
| // SSE4.1 has plain blendvps which can do this, but it is awkward |
| // to use because it requires the mask to be in xmm0. |
| |
| // Propagate sign to all bits of mask vector, if necessary. |
| if (!mir->mask()->isSimdBinaryComp()) |
| masm.packedRightShiftByScalar(Imm32(31), temp); |
| } |
| |
| masm.bitwiseAndX4(Operand(temp), output); |
| masm.bitwiseAndNotX4(Operand(onFalse), temp); |
| masm.bitwiseOrX4(Operand(temp), output); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement* lir) |
| { |
| Register elements = ToRegister(lir->elements()); |
| AnyRegister output = ToAnyRegister(lir->output()); |
| Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); |
| |
| Register oldval = ToRegister(lir->oldval()); |
| Register newval = ToRegister(lir->newval()); |
| |
| Scalar::Type arrayType = lir->mir()->arrayType(); |
| int width = Scalar::byteSize(arrayType); |
| |
| if (lir->index()->isConstant()) { |
| Address dest(elements, ToInt32(lir->index()) * width); |
| masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output); |
| } else { |
| BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width)); |
| masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitAtomicExchangeTypedArrayElement(LAtomicExchangeTypedArrayElement* lir) |
| { |
| Register elements = ToRegister(lir->elements()); |
| AnyRegister output = ToAnyRegister(lir->output()); |
| Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); |
| |
| Register value = ToRegister(lir->value()); |
| |
| Scalar::Type arrayType = lir->mir()->arrayType(); |
| int width = Scalar::byteSize(arrayType); |
| |
| if (lir->index()->isConstant()) { |
| Address dest(elements, ToInt32(lir->index()) * width); |
| masm.atomicExchangeToTypedIntArray(arrayType, dest, value, temp, output); |
| } else { |
| BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width)); |
| masm.atomicExchangeToTypedIntArray(arrayType, dest, value, temp, output); |
| } |
| } |
| |
| template<typename S, typename T> |
| void |
| CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value, |
| const T& mem, Register temp1, Register temp2, AnyRegister output) |
| { |
| switch (arrayType) { |
| case Scalar::Int8: |
| switch (op) { |
| case AtomicFetchAddOp: |
| masm.atomicFetchAdd8SignExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchSubOp: |
| masm.atomicFetchSub8SignExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchAndOp: |
| masm.atomicFetchAnd8SignExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchOrOp: |
| masm.atomicFetchOr8SignExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchXorOp: |
| masm.atomicFetchXor8SignExtend(value, mem, temp1, output.gpr()); |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array atomic operation"); |
| } |
| break; |
| case Scalar::Uint8: |
| switch (op) { |
| case AtomicFetchAddOp: |
| masm.atomicFetchAdd8ZeroExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchSubOp: |
| masm.atomicFetchSub8ZeroExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchAndOp: |
| masm.atomicFetchAnd8ZeroExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchOrOp: |
| masm.atomicFetchOr8ZeroExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchXorOp: |
| masm.atomicFetchXor8ZeroExtend(value, mem, temp1, output.gpr()); |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array atomic operation"); |
| } |
| break; |
| case Scalar::Int16: |
| switch (op) { |
| case AtomicFetchAddOp: |
| masm.atomicFetchAdd16SignExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchSubOp: |
| masm.atomicFetchSub16SignExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchAndOp: |
| masm.atomicFetchAnd16SignExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchOrOp: |
| masm.atomicFetchOr16SignExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchXorOp: |
| masm.atomicFetchXor16SignExtend(value, mem, temp1, output.gpr()); |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array atomic operation"); |
| } |
| break; |
| case Scalar::Uint16: |
| switch (op) { |
| case AtomicFetchAddOp: |
| masm.atomicFetchAdd16ZeroExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchSubOp: |
| masm.atomicFetchSub16ZeroExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchAndOp: |
| masm.atomicFetchAnd16ZeroExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchOrOp: |
| masm.atomicFetchOr16ZeroExtend(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchXorOp: |
| masm.atomicFetchXor16ZeroExtend(value, mem, temp1, output.gpr()); |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array atomic operation"); |
| } |
| break; |
| case Scalar::Int32: |
| switch (op) { |
| case AtomicFetchAddOp: |
| masm.atomicFetchAdd32(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchSubOp: |
| masm.atomicFetchSub32(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchAndOp: |
| masm.atomicFetchAnd32(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchOrOp: |
| masm.atomicFetchOr32(value, mem, temp1, output.gpr()); |
| break; |
| case AtomicFetchXorOp: |
| masm.atomicFetchXor32(value, mem, temp1, output.gpr()); |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array atomic operation"); |
| } |
| break; |
| case Scalar::Uint32: |
| // At the moment, the code in MCallOptimize.cpp requires the output |
| // type to be double for uint32 arrays. See bug 1077305. |
| MOZ_ASSERT(output.isFloat()); |
| switch (op) { |
| case AtomicFetchAddOp: |
| masm.atomicFetchAdd32(value, mem, InvalidReg, temp1); |
| break; |
| case AtomicFetchSubOp: |
| masm.atomicFetchSub32(value, mem, InvalidReg, temp1); |
| break; |
| case AtomicFetchAndOp: |
| masm.atomicFetchAnd32(value, mem, temp2, temp1); |
| break; |
| case AtomicFetchOrOp: |
| masm.atomicFetchOr32(value, mem, temp2, temp1); |
| break; |
| case AtomicFetchXorOp: |
| masm.atomicFetchXor32(value, mem, temp2, temp1); |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array atomic operation"); |
| } |
| masm.convertUInt32ToDouble(temp1, output.fpu()); |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array type"); |
| } |
| } |
| |
| template void |
| CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, |
| const Imm32& value, const Address& mem, |
| Register temp1, Register temp2, AnyRegister output); |
| template void |
| CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, |
| const Imm32& value, const BaseIndex& mem, |
| Register temp1, Register temp2, AnyRegister output); |
| template void |
| CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, |
| const Register& value, const Address& mem, |
| Register temp1, Register temp2, AnyRegister output); |
| template void |
| CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, |
| const Register& value, const BaseIndex& mem, |
| Register temp1, Register temp2, AnyRegister output); |
| |
| // Binary operation for effect, result discarded. |
| template<typename S, typename T> |
| void |
| CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value, |
| const T& mem) |
| { |
| switch (arrayType) { |
| case Scalar::Int8: |
| case Scalar::Uint8: |
| switch (op) { |
| case AtomicFetchAddOp: |
| masm.atomicAdd8(value, mem); |
| break; |
| case AtomicFetchSubOp: |
| masm.atomicSub8(value, mem); |
| break; |
| case AtomicFetchAndOp: |
| masm.atomicAnd8(value, mem); |
| break; |
| case AtomicFetchOrOp: |
| masm.atomicOr8(value, mem); |
| break; |
| case AtomicFetchXorOp: |
| masm.atomicXor8(value, mem); |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array atomic operation"); |
| } |
| break; |
| case Scalar::Int16: |
| case Scalar::Uint16: |
| switch (op) { |
| case AtomicFetchAddOp: |
| masm.atomicAdd16(value, mem); |
| break; |
| case AtomicFetchSubOp: |
| masm.atomicSub16(value, mem); |
| break; |
| case AtomicFetchAndOp: |
| masm.atomicAnd16(value, mem); |
| break; |
| case AtomicFetchOrOp: |
| masm.atomicOr16(value, mem); |
| break; |
| case AtomicFetchXorOp: |
| masm.atomicXor16(value, mem); |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array atomic operation"); |
| } |
| break; |
| case Scalar::Int32: |
| case Scalar::Uint32: |
| switch (op) { |
| case AtomicFetchAddOp: |
| masm.atomicAdd32(value, mem); |
| break; |
| case AtomicFetchSubOp: |
| masm.atomicSub32(value, mem); |
| break; |
| case AtomicFetchAndOp: |
| masm.atomicAnd32(value, mem); |
| break; |
| case AtomicFetchOrOp: |
| masm.atomicOr32(value, mem); |
| break; |
| case AtomicFetchXorOp: |
| masm.atomicXor32(value, mem); |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array atomic operation"); |
| } |
| break; |
| default: |
| MOZ_CRASH("Invalid typed array type"); |
| } |
| } |
| |
| template void |
| CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, |
| const Imm32& value, const Address& mem); |
| template void |
| CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, |
| const Imm32& value, const BaseIndex& mem); |
| template void |
| CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, |
| const Register& value, const Address& mem); |
| template void |
| CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, |
| const Register& value, const BaseIndex& mem); |
| |
| |
| template <typename T> |
| static inline void |
| AtomicBinopToTypedArray(CodeGeneratorX86Shared* cg, AtomicOp op, |
| Scalar::Type arrayType, const LAllocation* value, const T& mem, |
| Register temp1, Register temp2, AnyRegister output) |
| { |
| if (value->isConstant()) |
| cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem, temp1, temp2, output); |
| else |
| cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem, temp1, temp2, output); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop* lir) |
| { |
| MOZ_ASSERT(lir->mir()->hasUses()); |
| |
| AnyRegister output = ToAnyRegister(lir->output()); |
| Register elements = ToRegister(lir->elements()); |
| Register temp1 = lir->temp1()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp1()); |
| Register temp2 = lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2()); |
| const LAllocation* value = lir->value(); |
| |
| Scalar::Type arrayType = lir->mir()->arrayType(); |
| int width = Scalar::byteSize(arrayType); |
| |
| if (lir->index()->isConstant()) { |
| Address mem(elements, ToInt32(lir->index()) * width); |
| AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output); |
| } else { |
| BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width)); |
| AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output); |
| } |
| } |
| |
| template <typename T> |
| static inline void |
| AtomicBinopToTypedArray(CodeGeneratorX86Shared* cg, AtomicOp op, |
| Scalar::Type arrayType, const LAllocation* value, const T& mem) |
| { |
| if (value->isConstant()) |
| cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem); |
| else |
| cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem); |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitAtomicTypedArrayElementBinopForEffect(LAtomicTypedArrayElementBinopForEffect* lir) |
| { |
| MOZ_ASSERT(!lir->mir()->hasUses()); |
| |
| Register elements = ToRegister(lir->elements()); |
| const LAllocation* value = lir->value(); |
| Scalar::Type arrayType = lir->mir()->arrayType(); |
| int width = Scalar::byteSize(arrayType); |
| |
| if (lir->index()->isConstant()) { |
| Address mem(elements, ToInt32(lir->index()) * width); |
| AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem); |
| } else { |
| BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width)); |
| AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem); |
| } |
| } |
| |
| void |
| CodeGeneratorX86Shared::visitMemoryBarrier(LMemoryBarrier* ins) |
| { |
| if (ins->type() & MembarStoreLoad) |
| masm.storeLoadFence(); |
| } |
| |
| void |
| CodeGeneratorX86Shared::setReturnDoubleRegs(LiveRegisterSet* regs) |
| { |
| MOZ_ASSERT(ReturnFloat32Reg.encoding() == X86Encoding::xmm0); |
| MOZ_ASSERT(ReturnDoubleReg.encoding() == X86Encoding::xmm0); |
| MOZ_ASSERT(ReturnSimd128Reg.encoding() == X86Encoding::xmm0); |
| regs->add(ReturnFloat32Reg); |
| regs->add(ReturnDoubleReg); |
| regs->add(ReturnSimd128Reg); |
| } |
| |
| } // namespace jit |
| } // namespace js |