| //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the PPCISelLowering class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "PPCISelLowering.h" |
| #include "MCTargetDesc/PPCPredicates.h" |
| #include "PPC.h" |
| #include "PPCCCState.h" |
| #include "PPCCallingConv.h" |
| #include "PPCFrameLowering.h" |
| #include "PPCInstrInfo.h" |
| #include "PPCMachineFunctionInfo.h" |
| #include "PPCPerfectShuffle.h" |
| #include "PPCRegisterInfo.h" |
| #include "PPCSubtarget.h" |
| #include "PPCTargetMachine.h" |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/None.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/StringSwitch.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/ISDOpcodes.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineJumpTableInfo.h" |
| #include "llvm/CodeGen/MachineLoopInfo.h" |
| #include "llvm/CodeGen/MachineMemOperand.h" |
| #include "llvm/CodeGen/MachineOperand.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/RuntimeLibcalls.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/SelectionDAGNodes.h" |
| #include "llvm/CodeGen/TargetInstrInfo.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| #include "llvm/CodeGen/TargetRegisterInfo.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/CallSite.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/Constant.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DebugLoc.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Use.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/MC/MCExpr.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/Support/AtomicOrdering.h" |
| #include "llvm/Support/BranchProbability.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CodeGen.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/Format.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MachineValueType.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Target/TargetOptions.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <cstdint> |
| #include <iterator> |
| #include <list> |
| #include <utility> |
| #include <vector> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "ppc-lowering" |
| |
| static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", |
| cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); |
| |
| static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", |
| cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); |
| |
| static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", |
| cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); |
| |
| static cl::opt<bool> DisableSCO("disable-ppc-sco", |
| cl::desc("disable sibling call optimization on ppc"), cl::Hidden); |
| |
| static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision", |
| cl::desc("enable quad precision float support on ppc"), cl::Hidden); |
| |
| STATISTIC(NumTailCalls, "Number of tail calls"); |
| STATISTIC(NumSiblingCalls, "Number of sibling calls"); |
| |
| static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); |
| |
| // FIXME: Remove this once the bug has been fixed! |
| extern cl::opt<bool> ANDIGlueBug; |
| |
| PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, |
| const PPCSubtarget &STI) |
| : TargetLowering(TM), Subtarget(STI) { |
| // Use _setjmp/_longjmp instead of setjmp/longjmp. |
| setUseUnderscoreSetJmp(true); |
| setUseUnderscoreLongJmp(true); |
| |
| // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all |
| // arguments are at least 4/8 bytes aligned. |
| bool isPPC64 = Subtarget.isPPC64(); |
| setMinStackArgumentAlignment(isPPC64 ? 8:4); |
| |
| // Set up the register classes. |
| addRegisterClass(MVT::i32, &PPC::GPRCRegClass); |
| if (!useSoftFloat()) { |
| if (hasSPE()) { |
| addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass); |
| addRegisterClass(MVT::f64, &PPC::SPERCRegClass); |
| } else { |
| addRegisterClass(MVT::f32, &PPC::F4RCRegClass); |
| addRegisterClass(MVT::f64, &PPC::F8RCRegClass); |
| } |
| } |
| |
| // Match BITREVERSE to customized fast code sequence in the td file. |
| setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
| setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); |
| |
| // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. |
| setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); |
| |
| // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. |
| for (MVT VT : MVT::integer_valuetypes()) { |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); |
| } |
| |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| |
| // PowerPC has pre-inc load and store's. |
| setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); |
| setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); |
| setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); |
| setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); |
| setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); |
| if (!Subtarget.hasSPE()) { |
| setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); |
| setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); |
| } |
| |
| // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry. |
| const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; |
| for (MVT VT : ScalarIntVTs) { |
| setOperationAction(ISD::ADDC, VT, Legal); |
| setOperationAction(ISD::ADDE, VT, Legal); |
| setOperationAction(ISD::SUBC, VT, Legal); |
| setOperationAction(ISD::SUBE, VT, Legal); |
| } |
| |
| if (Subtarget.useCRBits()) { |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
| |
| if (isPPC64 || Subtarget.hasFPCVT()) { |
| setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); |
| AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, |
| isPPC64 ? MVT::i64 : MVT::i32); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); |
| AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, |
| isPPC64 ? MVT::i64 : MVT::i32); |
| } else { |
| setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); |
| } |
| |
| // PowerPC does not support direct load/store of condition registers. |
| setOperationAction(ISD::LOAD, MVT::i1, Custom); |
| setOperationAction(ISD::STORE, MVT::i1, Custom); |
| |
| // FIXME: Remove this once the ANDI glue bug is fixed: |
| if (ANDIGlueBug) |
| setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); |
| |
| for (MVT VT : MVT::integer_valuetypes()) { |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); |
| setTruncStoreAction(VT, MVT::i1, Expand); |
| } |
| |
| addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); |
| } |
| |
| // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on |
| // PPC (the libcall is not available). |
| setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); |
| |
| // We do not currently implement these libm ops for PowerPC. |
| setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); |
| setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); |
| setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); |
| setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); |
| setOperationAction(ISD::FREM, MVT::ppcf128, Expand); |
| |
| // PowerPC has no SREM/UREM instructions unless we are on P9 |
| // On P9 we may use a hardware instruction to compute the remainder. |
| // The instructions are not legalized directly because in the cases where the |
| // result of both the remainder and the division is required it is more |
| // efficient to compute the remainder from the result of the division rather |
| // than use the remainder instruction. |
| if (Subtarget.isISA3_0()) { |
| setOperationAction(ISD::SREM, MVT::i32, Custom); |
| setOperationAction(ISD::UREM, MVT::i32, Custom); |
| setOperationAction(ISD::SREM, MVT::i64, Custom); |
| setOperationAction(ISD::UREM, MVT::i64, Custom); |
| } else { |
| setOperationAction(ISD::SREM, MVT::i32, Expand); |
| setOperationAction(ISD::UREM, MVT::i32, Expand); |
| setOperationAction(ISD::SREM, MVT::i64, Expand); |
| setOperationAction(ISD::UREM, MVT::i64, Expand); |
| } |
| |
| if (Subtarget.hasP9Vector()) { |
| setOperationAction(ISD::ABS, MVT::v4i32, Legal); |
| setOperationAction(ISD::ABS, MVT::v8i16, Legal); |
| setOperationAction(ISD::ABS, MVT::v16i8, Legal); |
| } |
| |
| // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. |
| setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); |
| setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i64, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i64, Expand); |
| |
| // We don't support sin/cos/sqrt/fmod/pow |
| setOperationAction(ISD::FSIN , MVT::f64, Expand); |
| setOperationAction(ISD::FCOS , MVT::f64, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
| setOperationAction(ISD::FREM , MVT::f64, Expand); |
| setOperationAction(ISD::FPOW , MVT::f64, Expand); |
| setOperationAction(ISD::FSIN , MVT::f32, Expand); |
| setOperationAction(ISD::FCOS , MVT::f32, Expand); |
| setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
| setOperationAction(ISD::FREM , MVT::f32, Expand); |
| setOperationAction(ISD::FPOW , MVT::f32, Expand); |
| if (Subtarget.hasSPE()) { |
| setOperationAction(ISD::FMA , MVT::f64, Expand); |
| setOperationAction(ISD::FMA , MVT::f32, Expand); |
| } else { |
| setOperationAction(ISD::FMA , MVT::f64, Legal); |
| setOperationAction(ISD::FMA , MVT::f32, Legal); |
| } |
| |
| setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); |
| |
| // If we're enabling GP optimizations, use hardware square root |
| if (!Subtarget.hasFSQRT() && |
| !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && |
| Subtarget.hasFRE())) |
| setOperationAction(ISD::FSQRT, MVT::f64, Expand); |
| |
| if (!Subtarget.hasFSQRT() && |
| !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && |
| Subtarget.hasFRES())) |
| setOperationAction(ISD::FSQRT, MVT::f32, Expand); |
| |
| if (Subtarget.hasFCPSGN()) { |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); |
| } else { |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); |
| } |
| |
| if (Subtarget.hasFPRND()) { |
| setOperationAction(ISD::FFLOOR, MVT::f64, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f64, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::f64, Legal); |
| setOperationAction(ISD::FROUND, MVT::f64, Legal); |
| |
| setOperationAction(ISD::FFLOOR, MVT::f32, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f32, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::f32, Legal); |
| setOperationAction(ISD::FROUND, MVT::f32, Legal); |
| } |
| |
| // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd |
| // to speed up scalar BSWAP64. |
| // CTPOP or CTTZ were introduced in P8/P9 respectively |
| setOperationAction(ISD::BSWAP, MVT::i32 , Expand); |
| if (Subtarget.isISA3_0()) { |
| setOperationAction(ISD::BSWAP, MVT::i64 , Custom); |
| setOperationAction(ISD::CTTZ , MVT::i32 , Legal); |
| setOperationAction(ISD::CTTZ , MVT::i64 , Legal); |
| } else { |
| setOperationAction(ISD::BSWAP, MVT::i64 , Expand); |
| setOperationAction(ISD::CTTZ , MVT::i32 , Expand); |
| setOperationAction(ISD::CTTZ , MVT::i64 , Expand); |
| } |
| |
| if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { |
| setOperationAction(ISD::CTPOP, MVT::i32 , Legal); |
| setOperationAction(ISD::CTPOP, MVT::i64 , Legal); |
| } else { |
| setOperationAction(ISD::CTPOP, MVT::i32 , Expand); |
| setOperationAction(ISD::CTPOP, MVT::i64 , Expand); |
| } |
| |
| // PowerPC does not have ROTR |
| setOperationAction(ISD::ROTR, MVT::i32 , Expand); |
| setOperationAction(ISD::ROTR, MVT::i64 , Expand); |
| |
| if (!Subtarget.useCRBits()) { |
| // PowerPC does not have Select |
| setOperationAction(ISD::SELECT, MVT::i32, Expand); |
| setOperationAction(ISD::SELECT, MVT::i64, Expand); |
| setOperationAction(ISD::SELECT, MVT::f32, Expand); |
| setOperationAction(ISD::SELECT, MVT::f64, Expand); |
| } |
| |
| // PowerPC wants to turn select_cc of FP into fsel when possible. |
| setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); |
| |
| // PowerPC wants to optimize integer setcc a bit |
| if (!Subtarget.useCRBits()) |
| setOperationAction(ISD::SETCC, MVT::i32, Custom); |
| |
| // PowerPC does not have BRCOND which requires SetCC |
| if (!Subtarget.useCRBits()) |
| setOperationAction(ISD::BRCOND, MVT::Other, Expand); |
| |
| setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
| |
| if (Subtarget.hasSPE()) { |
| // SPE has built-in conversions |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); |
| } else { |
| // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| |
| // PowerPC does not have [U|S]INT_TO_FP |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); |
| } |
| |
| if (Subtarget.hasDirectMove() && isPPC64) { |
| setOperationAction(ISD::BITCAST, MVT::f32, Legal); |
| setOperationAction(ISD::BITCAST, MVT::i32, Legal); |
| setOperationAction(ISD::BITCAST, MVT::i64, Legal); |
| setOperationAction(ISD::BITCAST, MVT::f64, Legal); |
| } else { |
| setOperationAction(ISD::BITCAST, MVT::f32, Expand); |
| setOperationAction(ISD::BITCAST, MVT::i32, Expand); |
| setOperationAction(ISD::BITCAST, MVT::i64, Expand); |
| setOperationAction(ISD::BITCAST, MVT::f64, Expand); |
| } |
| |
| // We cannot sextinreg(i1). Expand to shifts. |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
| |
| // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support |
| // SjLj exception handling but a light-weight setjmp/longjmp replacement to |
| // support continuation, user-level threading, and etc.. As a result, no |
| // other SjLj exception interfaces are implemented and please don't build |
| // your own exception handling based on them. |
| // LLVM/Clang supports zero-cost DWARF exception handling. |
| setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
| setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
| |
| // We want to legalize GlobalAddress and ConstantPool nodes into the |
| // appropriate instructions to materialize the address. |
| setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); |
| setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); |
| setOperationAction(ISD::BlockAddress, MVT::i32, Custom); |
| setOperationAction(ISD::ConstantPool, MVT::i32, Custom); |
| setOperationAction(ISD::JumpTable, MVT::i32, Custom); |
| setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); |
| setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); |
| setOperationAction(ISD::BlockAddress, MVT::i64, Custom); |
| setOperationAction(ISD::ConstantPool, MVT::i64, Custom); |
| setOperationAction(ISD::JumpTable, MVT::i64, Custom); |
| |
| // TRAP is legal. |
| setOperationAction(ISD::TRAP, MVT::Other, Legal); |
| |
| // TRAMPOLINE is custom lowered. |
| setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); |
| setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); |
| |
| // VASTART needs to be custom lowered to use the VarArgsFrameIndex |
| setOperationAction(ISD::VASTART , MVT::Other, Custom); |
| |
| if (Subtarget.isSVR4ABI()) { |
| if (isPPC64) { |
| // VAARG always uses double-word chunks, so promote anything smaller. |
| setOperationAction(ISD::VAARG, MVT::i1, Promote); |
| AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); |
| setOperationAction(ISD::VAARG, MVT::i8, Promote); |
| AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); |
| setOperationAction(ISD::VAARG, MVT::i16, Promote); |
| AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); |
| setOperationAction(ISD::VAARG, MVT::i32, Promote); |
| AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); |
| setOperationAction(ISD::VAARG, MVT::Other, Expand); |
| } else { |
| // VAARG is custom lowered with the 32-bit SVR4 ABI. |
| setOperationAction(ISD::VAARG, MVT::Other, Custom); |
| setOperationAction(ISD::VAARG, MVT::i64, Custom); |
| } |
| } else |
| setOperationAction(ISD::VAARG, MVT::Other, Expand); |
| |
| if (Subtarget.isSVR4ABI() && !isPPC64) |
| // VACOPY is custom lowered with the 32-bit SVR4 ABI. |
| setOperationAction(ISD::VACOPY , MVT::Other, Custom); |
| else |
| setOperationAction(ISD::VACOPY , MVT::Other, Expand); |
| |
| // Use the default implementation. |
| setOperationAction(ISD::VAEND , MVT::Other, Expand); |
| setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); |
| setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); |
| setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); |
| setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); |
| setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); |
| setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); |
| |
| // We want to custom lower some of our intrinsics. |
| setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
| |
| // To handle counter-based loop conditions. |
| setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); |
| |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); |
| setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
| |
| // Comparisons that require checking two conditions. |
| if (Subtarget.hasSPE()) { |
| setCondCodeAction(ISD::SETO, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETO, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETUO, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETUO, MVT::f64, Expand); |
| } |
| setCondCodeAction(ISD::SETULT, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETULT, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); |
| setCondCodeAction(ISD::SETONE, MVT::f32, Expand); |
| setCondCodeAction(ISD::SETONE, MVT::f64, Expand); |
| |
| if (Subtarget.has64BitSupport()) { |
| // They also have instructions for converting between i64 and fp. |
| setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); |
| // This is just the low 32 bits of a (signed) fp->i64 conversion. |
| // We cannot do this with Promote because i64 is not a legal type. |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| |
| if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
| } else { |
| // PowerPC does not have FP_TO_UINT on 32-bit implementations. |
| if (Subtarget.hasSPE()) |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); |
| else |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); |
| } |
| |
| // With the instructions enabled under FPCVT, we can do everything. |
| if (Subtarget.hasFPCVT()) { |
| if (Subtarget.has64BitSupport()) { |
| setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
| } |
| |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
| } |
| |
| if (Subtarget.use64BitRegs()) { |
| // 64-bit PowerPC implementations can support i64 types directly |
| addRegisterClass(MVT::i64, &PPC::G8RCRegClass); |
| // BUILD_PAIR can't be handled natively, and should be expanded to shl/or |
| setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); |
| // 64-bit PowerPC wants to expand i128 shifts itself. |
| setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); |
| setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); |
| setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); |
| } else { |
| // 32-bit PowerPC wants to expand i64 shifts itself. |
| setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); |
| setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); |
| setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); |
| } |
| |
| if (Subtarget.hasAltivec()) { |
| // First set operation action for all vector types to expand. Then we |
| // will selectively turn on ones that can be effectively codegen'd. |
| for (MVT VT : MVT::vector_valuetypes()) { |
| // add/sub are legal for all supported vector VT's. |
| setOperationAction(ISD::ADD, VT, Legal); |
| setOperationAction(ISD::SUB, VT, Legal); |
| |
| // Vector instructions introduced in P8 |
| if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { |
| setOperationAction(ISD::CTPOP, VT, Legal); |
| setOperationAction(ISD::CTLZ, VT, Legal); |
| } |
| else { |
| setOperationAction(ISD::CTPOP, VT, Expand); |
| setOperationAction(ISD::CTLZ, VT, Expand); |
| } |
| |
| // Vector instructions introduced in P9 |
| if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) |
| setOperationAction(ISD::CTTZ, VT, Legal); |
| else |
| setOperationAction(ISD::CTTZ, VT, Expand); |
| |
| // We promote all shuffles to v16i8. |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); |
| AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); |
| |
| // We promote all non-typed operations to v4i32. |
| setOperationAction(ISD::AND , VT, Promote); |
| AddPromotedToType (ISD::AND , VT, MVT::v4i32); |
| setOperationAction(ISD::OR , VT, Promote); |
| AddPromotedToType (ISD::OR , VT, MVT::v4i32); |
| setOperationAction(ISD::XOR , VT, Promote); |
| AddPromotedToType (ISD::XOR , VT, MVT::v4i32); |
| setOperationAction(ISD::LOAD , VT, Promote); |
| AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); |
| setOperationAction(ISD::SELECT, VT, Promote); |
| AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); |
| setOperationAction(ISD::SELECT_CC, VT, Promote); |
| AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); |
| setOperationAction(ISD::STORE, VT, Promote); |
| AddPromotedToType (ISD::STORE, VT, MVT::v4i32); |
| |
| // No other operations are legal. |
| setOperationAction(ISD::MUL , VT, Expand); |
| setOperationAction(ISD::SDIV, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::UDIV, VT, Expand); |
| setOperationAction(ISD::UREM, VT, Expand); |
| setOperationAction(ISD::FDIV, VT, Expand); |
| setOperationAction(ISD::FREM, VT, Expand); |
| setOperationAction(ISD::FNEG, VT, Expand); |
| setOperationAction(ISD::FSQRT, VT, Expand); |
| setOperationAction(ISD::FLOG, VT, Expand); |
| setOperationAction(ISD::FLOG10, VT, Expand); |
| setOperationAction(ISD::FLOG2, VT, Expand); |
| setOperationAction(ISD::FEXP, VT, Expand); |
| setOperationAction(ISD::FEXP2, VT, Expand); |
| setOperationAction(ISD::FSIN, VT, Expand); |
| setOperationAction(ISD::FCOS, VT, Expand); |
| setOperationAction(ISD::FABS, VT, Expand); |
| setOperationAction(ISD::FFLOOR, VT, Expand); |
| setOperationAction(ISD::FCEIL, VT, Expand); |
| setOperationAction(ISD::FTRUNC, VT, Expand); |
| setOperationAction(ISD::FRINT, VT, Expand); |
| setOperationAction(ISD::FNEARBYINT, VT, Expand); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); |
| setOperationAction(ISD::BUILD_VECTOR, VT, Expand); |
| setOperationAction(ISD::MULHU, VT, Expand); |
| setOperationAction(ISD::MULHS, VT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::UDIVREM, VT, Expand); |
| setOperationAction(ISD::SDIVREM, VT, Expand); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); |
| setOperationAction(ISD::FPOW, VT, Expand); |
| setOperationAction(ISD::BSWAP, VT, Expand); |
| setOperationAction(ISD::VSELECT, VT, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); |
| setOperationAction(ISD::ROTL, VT, Expand); |
| setOperationAction(ISD::ROTR, VT, Expand); |
| |
| for (MVT InnerVT : MVT::vector_valuetypes()) { |
| setTruncStoreAction(VT, InnerVT, Expand); |
| setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
| setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
| } |
| } |
| |
| // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle |
| // with merges, splats, etc. |
| setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); |
| |
| setOperationAction(ISD::AND , MVT::v4i32, Legal); |
| setOperationAction(ISD::OR , MVT::v4i32, Legal); |
| setOperationAction(ISD::XOR , MVT::v4i32, Legal); |
| setOperationAction(ISD::LOAD , MVT::v4i32, Legal); |
| setOperationAction(ISD::SELECT, MVT::v4i32, |
| Subtarget.useCRBits() ? Legal : Expand); |
| setOperationAction(ISD::STORE , MVT::v4i32, Legal); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); |
| setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); |
| setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); |
| |
| addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); |
| addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); |
| addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); |
| addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); |
| |
| setOperationAction(ISD::MUL, MVT::v4f32, Legal); |
| setOperationAction(ISD::FMA, MVT::v4f32, Legal); |
| |
| if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { |
| setOperationAction(ISD::FDIV, MVT::v4f32, Legal); |
| setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); |
| } |
| |
| if (Subtarget.hasP8Altivec()) |
| setOperationAction(ISD::MUL, MVT::v4i32, Legal); |
| else |
| setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
| |
| setOperationAction(ISD::MUL, MVT::v8i16, Custom); |
| setOperationAction(ISD::MUL, MVT::v16i8, Custom); |
| |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); |
| |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); |
| |
| // Altivec does not contain unordered floating-point compare instructions |
| setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); |
| setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); |
| setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); |
| setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); |
| |
| if (Subtarget.hasVSX()) { |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); |
| if (Subtarget.hasP8Vector()) { |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); |
| } |
| if (Subtarget.hasDirectMove() && isPPC64) { |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); |
| } |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); |
| |
| setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); |
| setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); |
| setOperationAction(ISD::FROUND, MVT::v2f64, Legal); |
| |
| setOperationAction(ISD::FROUND, MVT::v4f32, Legal); |
| |
| setOperationAction(ISD::MUL, MVT::v2f64, Legal); |
| setOperationAction(ISD::FMA, MVT::v2f64, Legal); |
| |
| setOperationAction(ISD::FDIV, MVT::v2f64, Legal); |
| setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); |
| |
| setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); |
| setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); |
| setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); |
| setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); |
| setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); |
| |
| // Share the Altivec comparison restrictions. |
| setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); |
| setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); |
| setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); |
| setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); |
| |
| setOperationAction(ISD::LOAD, MVT::v2f64, Legal); |
| setOperationAction(ISD::STORE, MVT::v2f64, Legal); |
| |
| setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); |
| |
| if (Subtarget.hasP8Vector()) |
| addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); |
| |
| addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); |
| |
| addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); |
| addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); |
| addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); |
| |
| if (Subtarget.hasP8Altivec()) { |
| setOperationAction(ISD::SHL, MVT::v2i64, Legal); |
| setOperationAction(ISD::SRA, MVT::v2i64, Legal); |
| setOperationAction(ISD::SRL, MVT::v2i64, Legal); |
| |
| // 128 bit shifts can be accomplished via 3 instructions for SHL and |
| // SRL, but not for SRA because of the instructions available: |
| // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth |
| // doing |
| setOperationAction(ISD::SHL, MVT::v1i128, Expand); |
| setOperationAction(ISD::SRL, MVT::v1i128, Expand); |
| setOperationAction(ISD::SRA, MVT::v1i128, Expand); |
| |
| setOperationAction(ISD::SETCC, MVT::v2i64, Legal); |
| } |
| else { |
| setOperationAction(ISD::SHL, MVT::v2i64, Expand); |
| setOperationAction(ISD::SRA, MVT::v2i64, Expand); |
| setOperationAction(ISD::SRL, MVT::v2i64, Expand); |
| |
| setOperationAction(ISD::SETCC, MVT::v2i64, Custom); |
| |
| // VSX v2i64 only supports non-arithmetic operations. |
| setOperationAction(ISD::ADD, MVT::v2i64, Expand); |
| setOperationAction(ISD::SUB, MVT::v2i64, Expand); |
| } |
| |
| setOperationAction(ISD::LOAD, MVT::v2i64, Promote); |
| AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); |
| setOperationAction(ISD::STORE, MVT::v2i64, Promote); |
| AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); |
| |
| setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); |
| |
| setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); |
| setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); |
| setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); |
| |
| // Vector operation legalization checks the result type of |
| // SIGN_EXTEND_INREG, overall legalization checks the inner type. |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); |
| |
| setOperationAction(ISD::FNEG, MVT::v4f32, Legal); |
| setOperationAction(ISD::FNEG, MVT::v2f64, Legal); |
| setOperationAction(ISD::FABS, MVT::v4f32, Legal); |
| setOperationAction(ISD::FABS, MVT::v2f64, Legal); |
| |
| if (Subtarget.hasDirectMove()) |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); |
| |
| addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); |
| } |
| |
| if (Subtarget.hasP8Altivec()) { |
| addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); |
| addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); |
| } |
| |
| if (Subtarget.hasP9Vector()) { |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); |
| |
| // 128 bit shifts can be accomplished via 3 instructions for SHL and |
| // SRL, but not for SRA because of the instructions available: |
| // VS{RL} and VS{RL}O. |
| setOperationAction(ISD::SHL, MVT::v1i128, Legal); |
| setOperationAction(ISD::SRL, MVT::v1i128, Legal); |
| setOperationAction(ISD::SRA, MVT::v1i128, Expand); |
| |
| if (EnableQuadPrecision) { |
| addRegisterClass(MVT::f128, &PPC::VRRCRegClass); |
| setOperationAction(ISD::FADD, MVT::f128, Legal); |
| setOperationAction(ISD::FSUB, MVT::f128, Legal); |
| setOperationAction(ISD::FDIV, MVT::f128, Legal); |
| setOperationAction(ISD::FMUL, MVT::f128, Legal); |
| setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); |
| // No extending loads to f128 on PPC. |
| for (MVT FPT : MVT::fp_valuetypes()) |
| setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); |
| setOperationAction(ISD::FMA, MVT::f128, Legal); |
| setCondCodeAction(ISD::SETULT, MVT::f128, Expand); |
| setCondCodeAction(ISD::SETUGT, MVT::f128, Expand); |
| setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand); |
| setCondCodeAction(ISD::SETOGE, MVT::f128, Expand); |
| setCondCodeAction(ISD::SETOLE, MVT::f128, Expand); |
| setCondCodeAction(ISD::SETONE, MVT::f128, Expand); |
| |
| setOperationAction(ISD::FTRUNC, MVT::f128, Legal); |
| setOperationAction(ISD::FRINT, MVT::f128, Legal); |
| setOperationAction(ISD::FFLOOR, MVT::f128, Legal); |
| setOperationAction(ISD::FCEIL, MVT::f128, Legal); |
| setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal); |
| setOperationAction(ISD::FROUND, MVT::f128, Legal); |
| |
| setOperationAction(ISD::SELECT, MVT::f128, Expand); |
| setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); |
| setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); |
| setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
| setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
| setOperationAction(ISD::BITCAST, MVT::i128, Custom); |
| // No implementation for these ops for PowerPC. |
| setOperationAction(ISD::FSIN , MVT::f128, Expand); |
| setOperationAction(ISD::FCOS , MVT::f128, Expand); |
| setOperationAction(ISD::FPOW, MVT::f128, Expand); |
| setOperationAction(ISD::FPOWI, MVT::f128, Expand); |
| setOperationAction(ISD::FREM, MVT::f128, Expand); |
| } |
| |
| } |
| |
| if (Subtarget.hasP9Altivec()) { |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); |
| } |
| } |
| |
| if (Subtarget.hasQPX()) { |
| setOperationAction(ISD::FADD, MVT::v4f64, Legal); |
| setOperationAction(ISD::FSUB, MVT::v4f64, Legal); |
| setOperationAction(ISD::FMUL, MVT::v4f64, Legal); |
| setOperationAction(ISD::FREM, MVT::v4f64, Expand); |
| |
| setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); |
| setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); |
| |
| setOperationAction(ISD::LOAD , MVT::v4f64, Custom); |
| setOperationAction(ISD::STORE , MVT::v4f64, Custom); |
| |
| setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); |
| setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); |
| |
| if (!Subtarget.useCRBits()) |
| setOperationAction(ISD::SELECT, MVT::v4f64, Expand); |
| setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); |
| |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); |
| setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); |
| setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); |
| setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); |
| |
| setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); |
| setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); |
| |
| setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); |
| setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); |
| setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); |
| |
| setOperationAction(ISD::FNEG , MVT::v4f64, Legal); |
| setOperationAction(ISD::FABS , MVT::v4f64, Legal); |
| setOperationAction(ISD::FSIN , MVT::v4f64, Expand); |
| setOperationAction(ISD::FCOS , MVT::v4f64, Expand); |
| setOperationAction(ISD::FPOW , MVT::v4f64, Expand); |
| setOperationAction(ISD::FLOG , MVT::v4f64, Expand); |
| setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); |
| setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); |
| setOperationAction(ISD::FEXP , MVT::v4f64, Expand); |
| setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); |
| |
| setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); |
| |
| setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); |
| |
| addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); |
| |
| setOperationAction(ISD::FADD, MVT::v4f32, Legal); |
| setOperationAction(ISD::FSUB, MVT::v4f32, Legal); |
| setOperationAction(ISD::FMUL, MVT::v4f32, Legal); |
| setOperationAction(ISD::FREM, MVT::v4f32, Expand); |
| |
| setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); |
| setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); |
| |
| setOperationAction(ISD::LOAD , MVT::v4f32, Custom); |
| setOperationAction(ISD::STORE , MVT::v4f32, Custom); |
| |
| if (!Subtarget.useCRBits()) |
| setOperationAction(ISD::SELECT, MVT::v4f32, Expand); |
| setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); |
| |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); |
| setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); |
| setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); |
| setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); |
| |
| setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); |
| setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); |
| |
| setOperationAction(ISD::FNEG , MVT::v4f32, Legal); |
| setOperationAction(ISD::FABS , MVT::v4f32, Legal); |
| setOperationAction(ISD::FSIN , MVT::v4f32, Expand); |
| setOperationAction(ISD::FCOS , MVT::v4f32, Expand); |
| setOperationAction(ISD::FPOW , MVT::v4f32, Expand); |
| setOperationAction(ISD::FLOG , MVT::v4f32, Expand); |
| setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); |
| setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); |
| setOperationAction(ISD::FEXP , MVT::v4f32, Expand); |
| setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); |
| |
| setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); |
| setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); |
| |
| setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); |
| setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); |
| |
| addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); |
| |
| setOperationAction(ISD::AND , MVT::v4i1, Legal); |
| setOperationAction(ISD::OR , MVT::v4i1, Legal); |
| setOperationAction(ISD::XOR , MVT::v4i1, Legal); |
| |
| if (!Subtarget.useCRBits()) |
| setOperationAction(ISD::SELECT, MVT::v4i1, Expand); |
| setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); |
| |
| setOperationAction(ISD::LOAD , MVT::v4i1, Custom); |
| setOperationAction(ISD::STORE , MVT::v4i1, Custom); |
| |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); |
| setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); |
| setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); |
| setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); |
| setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); |
| |
| setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); |
| |
| addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); |
| |
| setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); |
| setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); |
| setOperationAction(ISD::FROUND, MVT::v4f64, Legal); |
| |
| setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); |
| setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); |
| setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); |
| setOperationAction(ISD::FROUND, MVT::v4f32, Legal); |
| |
| setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); |
| setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); |
| |
| // These need to set FE_INEXACT, and so cannot be vectorized here. |
| setOperationAction(ISD::FRINT, MVT::v4f64, Expand); |
| setOperationAction(ISD::FRINT, MVT::v4f32, Expand); |
| |
| if (TM.Options.UnsafeFPMath) { |
| setOperationAction(ISD::FDIV, MVT::v4f64, Legal); |
| setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); |
| |
| setOperationAction(ISD::FDIV, MVT::v4f32, Legal); |
| setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); |
| } else { |
| setOperationAction(ISD::FDIV, MVT::v4f64, Expand); |
| setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); |
| |
| setOperationAction(ISD::FDIV, MVT::v4f32, Expand); |
| setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); |
| } |
| } |
| |
| if (Subtarget.has64BitSupport()) |
| setOperationAction(ISD::PREFETCH, MVT::Other, Legal); |
| |
| setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); |
| |
| if (!isPPC64) { |
| setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); |
| setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); |
| } |
| |
| setBooleanContents(ZeroOrOneBooleanContent); |
| |
| if (Subtarget.hasAltivec()) { |
| // Altivec instructions set fields to all zeros or all ones. |
| setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
| } |
| |
| if (!isPPC64) { |
| // These libcalls are not available in 32-bit. |
| setLibcallName(RTLIB::SHL_I128, nullptr); |
| setLibcallName(RTLIB::SRL_I128, nullptr); |
| setLibcallName(RTLIB::SRA_I128, nullptr); |
| } |
| |
| setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); |
| |
| // We have target-specific dag combine patterns for the following nodes: |
| setTargetDAGCombine(ISD::SHL); |
| setTargetDAGCombine(ISD::SRA); |
| setTargetDAGCombine(ISD::SRL); |
| setTargetDAGCombine(ISD::SINT_TO_FP); |
| setTargetDAGCombine(ISD::BUILD_VECTOR); |
| if (Subtarget.hasFPCVT()) |
| setTargetDAGCombine(ISD::UINT_TO_FP); |
| setTargetDAGCombine(ISD::LOAD); |
| setTargetDAGCombine(ISD::STORE); |
| setTargetDAGCombine(ISD::BR_CC); |
| if (Subtarget.useCRBits()) |
| setTargetDAGCombine(ISD::BRCOND); |
| setTargetDAGCombine(ISD::BSWAP); |
| setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
| setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); |
| setTargetDAGCombine(ISD::INTRINSIC_VOID); |
| |
| setTargetDAGCombine(ISD::SIGN_EXTEND); |
| setTargetDAGCombine(ISD::ZERO_EXTEND); |
| setTargetDAGCombine(ISD::ANY_EXTEND); |
| |
| if (Subtarget.useCRBits()) { |
| setTargetDAGCombine(ISD::TRUNCATE); |
| setTargetDAGCombine(ISD::SETCC); |
| setTargetDAGCombine(ISD::SELECT_CC); |
| } |
| |
| // Use reciprocal estimates. |
| if (TM.Options.UnsafeFPMath) { |
| setTargetDAGCombine(ISD::FDIV); |
| setTargetDAGCombine(ISD::FSQRT); |
| } |
| |
| // Darwin long double math library functions have $LDBL128 appended. |
| if (Subtarget.isDarwin()) { |
| setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); |
| setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); |
| setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); |
| setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); |
| setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); |
| setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); |
| setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); |
| setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); |
| setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); |
| setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); |
| } |
| |
| if (EnableQuadPrecision) { |
| setLibcallName(RTLIB::LOG_F128, "logf128"); |
| setLibcallName(RTLIB::LOG2_F128, "log2f128"); |
| setLibcallName(RTLIB::LOG10_F128, "log10f128"); |
| setLibcallName(RTLIB::EXP_F128, "expf128"); |
| setLibcallName(RTLIB::EXP2_F128, "exp2f128"); |
| setLibcallName(RTLIB::SIN_F128, "sinf128"); |
| setLibcallName(RTLIB::COS_F128, "cosf128"); |
| setLibcallName(RTLIB::POW_F128, "powf128"); |
| setLibcallName(RTLIB::FMIN_F128, "fminf128"); |
| setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); |
| setLibcallName(RTLIB::POWI_F128, "__powikf2"); |
| setLibcallName(RTLIB::REM_F128, "fmodf128"); |
| } |
| |
| // With 32 condition bits, we don't need to sink (and duplicate) compares |
| // aggressively in CodeGenPrep. |
| if (Subtarget.useCRBits()) { |
| setHasMultipleConditionRegisters(); |
| setJumpIsExpensive(); |
| } |
| |
| setMinFunctionAlignment(2); |
| if (Subtarget.isDarwin()) |
| setPrefFunctionAlignment(4); |
| |
| switch (Subtarget.getDarwinDirective()) { |
| default: break; |
| case PPC::DIR_970: |
| case PPC::DIR_A2: |
| case PPC::DIR_E500: |
| case PPC::DIR_E500mc: |
| case PPC::DIR_E5500: |
| case PPC::DIR_PWR4: |
| case PPC::DIR_PWR5: |
| case PPC::DIR_PWR5X: |
| case PPC::DIR_PWR6: |
| case PPC::DIR_PWR6X: |
| case PPC::DIR_PWR7: |
| case PPC::DIR_PWR8: |
| case PPC::DIR_PWR9: |
| setPrefFunctionAlignment(4); |
| setPrefLoopAlignment(4); |
| break; |
| } |
| |
| if (Subtarget.enableMachineScheduler()) |
| setSchedulingPreference(Sched::Source); |
| else |
| setSchedulingPreference(Sched::Hybrid); |
| |
| computeRegisterProperties(STI.getRegisterInfo()); |
| |
| // The Freescale cores do better with aggressive inlining of memcpy and |
| // friends. GCC uses same threshold of 128 bytes (= 32 word stores). |
| if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || |
| Subtarget.getDarwinDirective() == PPC::DIR_E5500) { |
| MaxStoresPerMemset = 32; |
| MaxStoresPerMemsetOptSize = 16; |
| MaxStoresPerMemcpy = 32; |
| MaxStoresPerMemcpyOptSize = 8; |
| MaxStoresPerMemmove = 32; |
| MaxStoresPerMemmoveOptSize = 8; |
| } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { |
| // The A2 also benefits from (very) aggressive inlining of memcpy and |
| // friends. The overhead of a the function call, even when warm, can be |
| // over one hundred cycles. |
| MaxStoresPerMemset = 128; |
| MaxStoresPerMemcpy = 128; |
| MaxStoresPerMemmove = 128; |
| MaxLoadsPerMemcmp = 128; |
| } else { |
| MaxLoadsPerMemcmp = 8; |
| MaxLoadsPerMemcmpOptSize = 4; |
| } |
| } |
| |
| /// getMaxByValAlign - Helper for getByValTypeAlignment to determine |
| /// the desired ByVal argument alignment. |
| static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, |
| unsigned MaxMaxAlign) { |
| if (MaxAlign == MaxMaxAlign) |
| return; |
| if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { |
| if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) |
| MaxAlign = 32; |
| else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) |
| MaxAlign = 16; |
| } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { |
| unsigned EltAlign = 0; |
| getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); |
| if (EltAlign > MaxAlign) |
| MaxAlign = EltAlign; |
| } else if (StructType *STy = dyn_cast<StructType>(Ty)) { |
| for (auto *EltTy : STy->elements()) { |
| unsigned EltAlign = 0; |
| getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); |
| if (EltAlign > MaxAlign) |
| MaxAlign = EltAlign; |
| if (MaxAlign == MaxMaxAlign) |
| break; |
| } |
| } |
| } |
| |
| /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate |
| /// function arguments in the caller parameter area. |
| unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, |
| const DataLayout &DL) const { |
| // Darwin passes everything on 4 byte boundary. |
| if (Subtarget.isDarwin()) |
| return 4; |
| |
| // 16byte and wider vectors are passed on 16byte boundary. |
| // The rest is 8 on PPC64 and 4 on PPC32 boundary. |
| unsigned Align = Subtarget.isPPC64() ? 8 : 4; |
| if (Subtarget.hasAltivec() || Subtarget.hasQPX()) |
| getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); |
| return Align; |
| } |
| |
| unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
| CallingConv:: ID CC, |
| EVT VT) const { |
| if (Subtarget.hasSPE() && VT == MVT::f64) |
| return 2; |
| return PPCTargetLowering::getNumRegisters(Context, VT); |
| } |
| |
| MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
| CallingConv:: ID CC, |
| EVT VT) const { |
| if (Subtarget.hasSPE() && VT == MVT::f64) |
| return MVT::i32; |
| return PPCTargetLowering::getRegisterType(Context, VT); |
| } |
| |
| bool PPCTargetLowering::useSoftFloat() const { |
| return Subtarget.useSoftFloat(); |
| } |
| |
| bool PPCTargetLowering::hasSPE() const { |
| return Subtarget.hasSPE(); |
| } |
| |
| const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { |
| switch ((PPCISD::NodeType)Opcode) { |
| case PPCISD::FIRST_NUMBER: break; |
| case PPCISD::FSEL: return "PPCISD::FSEL"; |
| case PPCISD::FCFID: return "PPCISD::FCFID"; |
| case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; |
| case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; |
| case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; |
| case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; |
| case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; |
| case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; |
| case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; |
| case PPCISD::FP_TO_UINT_IN_VSR: |
| return "PPCISD::FP_TO_UINT_IN_VSR,"; |
| case PPCISD::FP_TO_SINT_IN_VSR: |
| return "PPCISD::FP_TO_SINT_IN_VSR"; |
| case PPCISD::FRE: return "PPCISD::FRE"; |
| case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; |
| case PPCISD::STFIWX: return "PPCISD::STFIWX"; |
| case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; |
| case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; |
| case PPCISD::VPERM: return "PPCISD::VPERM"; |
| case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; |
| case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; |
| case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; |
| case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; |
| case PPCISD::VECSHL: return "PPCISD::VECSHL"; |
| case PPCISD::CMPB: return "PPCISD::CMPB"; |
| case PPCISD::Hi: return "PPCISD::Hi"; |
| case PPCISD::Lo: return "PPCISD::Lo"; |
| case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; |
| case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; |
| case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; |
| case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; |
| case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; |
| case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; |
| case PPCISD::SRL: return "PPCISD::SRL"; |
| case PPCISD::SRA: return "PPCISD::SRA"; |
| case PPCISD::SHL: return "PPCISD::SHL"; |
| case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; |
| case PPCISD::CALL: return "PPCISD::CALL"; |
| case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; |
| case PPCISD::MTCTR: return "PPCISD::MTCTR"; |
| case PPCISD::BCTRL: return "PPCISD::BCTRL"; |
| case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; |
| case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; |
| case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; |
| case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; |
| case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; |
| case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; |
| case PPCISD::MFVSR: return "PPCISD::MFVSR"; |
| case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; |
| case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; |
| case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; |
| case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; |
| case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; |
| case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; |
| case PPCISD::VCMP: return "PPCISD::VCMP"; |
| case PPCISD::VCMPo: return "PPCISD::VCMPo"; |
| case PPCISD::LBRX: return "PPCISD::LBRX"; |
| case PPCISD::STBRX: return "PPCISD::STBRX"; |
| case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; |
| case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; |
| case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; |
| case PPCISD::STXSIX: return "PPCISD::STXSIX"; |
| case PPCISD::VEXTS: return "PPCISD::VEXTS"; |
| case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; |
| case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; |
| case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; |
| case PPCISD::ST_VSR_SCAL_INT: |
| return "PPCISD::ST_VSR_SCAL_INT"; |
| case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; |
| case PPCISD::BDNZ: return "PPCISD::BDNZ"; |
| case PPCISD::BDZ: return "PPCISD::BDZ"; |
| case PPCISD::MFFS: return "PPCISD::MFFS"; |
| case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; |
| case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; |
| case PPCISD::CR6SET: return "PPCISD::CR6SET"; |
| case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; |
| case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; |
| case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; |
| case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; |
| case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; |
| case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; |
| case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; |
| case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; |
| case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; |
| case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; |
| case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; |
| case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; |
| case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; |
| case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; |
| case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; |
| case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; |
| case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; |
| case PPCISD::SC: return "PPCISD::SC"; |
| case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; |
| case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; |
| case PPCISD::RFEBB: return "PPCISD::RFEBB"; |
| case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; |
| case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; |
| case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; |
| case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; |
| case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; |
| case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; |
| case PPCISD::QBFLT: return "PPCISD::QBFLT"; |
| case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; |
| case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; |
| } |
| return nullptr; |
| } |
| |
| EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, |
| EVT VT) const { |
| if (!VT.isVector()) |
| return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; |
| |
| if (Subtarget.hasQPX()) |
| return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); |
| |
| return VT.changeVectorElementTypeToInteger(); |
| } |
| |
| bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { |
| assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); |
| return true; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Node matching predicates, for use by the tblgen matching code. |
| //===----------------------------------------------------------------------===// |
| |
| /// isFloatingPointZero - Return true if this is 0.0 or -0.0. |
| static bool isFloatingPointZero(SDValue Op) { |
| if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) |
| return CFP->getValueAPF().isZero(); |
| else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { |
| // Maybe this has already been legalized into the constant pool? |
| if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) |
| if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) |
| return CFP->getValueAPF().isZero(); |
| } |
| return false; |
| } |
| |
| /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return |
| /// true if Op is undef or if it matches the specified value. |
| static bool isConstantOrUndef(int Op, int Val) { |
| return Op < 0 || Op == Val; |
| } |
| |
| /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a |
| /// VPKUHUM instruction. |
| /// The ShuffleKind distinguishes between big-endian operations with |
| /// two different inputs (0), either-endian operations with two identical |
| /// inputs (1), and little-endian operations with two different inputs (2). |
| /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). |
| bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
| SelectionDAG &DAG) { |
| bool IsLE = DAG.getDataLayout().isLittleEndian(); |
| if (ShuffleKind == 0) { |
| if (IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; ++i) |
| if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) |
| return false; |
| } else if (ShuffleKind == 2) { |
| if (!IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; ++i) |
| if (!isConstantOrUndef(N->getMaskElt(i), i*2)) |
| return false; |
| } else if (ShuffleKind == 1) { |
| unsigned j = IsLE ? 0 : 1; |
| for (unsigned i = 0; i != 8; ++i) |
| if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || |
| !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a |
| /// VPKUWUM instruction. |
| /// The ShuffleKind distinguishes between big-endian operations with |
| /// two different inputs (0), either-endian operations with two identical |
| /// inputs (1), and little-endian operations with two different inputs (2). |
| /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). |
| bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
| SelectionDAG &DAG) { |
| bool IsLE = DAG.getDataLayout().isLittleEndian(); |
| if (ShuffleKind == 0) { |
| if (IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; i += 2) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) |
| return false; |
| } else if (ShuffleKind == 2) { |
| if (!IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; i += 2) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) |
| return false; |
| } else if (ShuffleKind == 1) { |
| unsigned j = IsLE ? 0 : 2; |
| for (unsigned i = 0; i != 8; i += 2) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || |
| !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || |
| !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a |
| /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the |
| /// current subtarget. |
| /// |
| /// The ShuffleKind distinguishes between big-endian operations with |
| /// two different inputs (0), either-endian operations with two identical |
| /// inputs (1), and little-endian operations with two different inputs (2). |
| /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). |
| bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
| SelectionDAG &DAG) { |
| const PPCSubtarget& Subtarget = |
| static_cast<const PPCSubtarget&>(DAG.getSubtarget()); |
| if (!Subtarget.hasP8Vector()) |
| return false; |
| |
| bool IsLE = DAG.getDataLayout().isLittleEndian(); |
| if (ShuffleKind == 0) { |
| if (IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; i += 4) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || |
| !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || |
| !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) |
| return false; |
| } else if (ShuffleKind == 2) { |
| if (!IsLE) |
| return false; |
| for (unsigned i = 0; i != 16; i += 4) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || |
| !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || |
| !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) |
| return false; |
| } else if (ShuffleKind == 1) { |
| unsigned j = IsLE ? 0 : 4; |
| for (unsigned i = 0; i != 8; i += 4) |
| if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || |
| !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || |
| !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || |
| !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || |
| !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || |
| !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || |
| !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || |
| !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// isVMerge - Common function, used to match vmrg* shuffles. |
| /// |
| static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, |
| unsigned LHSStart, unsigned RHSStart) { |
| if (N->getValueType(0) != MVT::v16i8) |
| return false; |
| assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && |
| "Unsupported merge size!"); |
| |
| for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units |
| for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit |
| if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), |
| LHSStart+j+i*UnitSize) || |
| !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), |
| RHSStart+j+i*UnitSize)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for |
| /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). |
| /// The ShuffleKind distinguishes between big-endian merges with two |
| /// different inputs (0), either-endian merges with two identical inputs (1), |
| /// and little-endian merges with two different inputs (2). For the latter, |
| /// the input operands are swapped (see PPCInstrAltivec.td). |
| bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, |
| unsigned ShuffleKind, SelectionDAG &DAG) { |
| if (DAG.getDataLayout().isLittleEndian()) { |
| if (ShuffleKind == 1) // unary |
| return isVMerge(N, UnitSize, 0, 0); |
| else if (ShuffleKind == 2) // swapped |
| return isVMerge(N, UnitSize, 0, 16); |
| else |
| return false; |
| } else { |
| if (ShuffleKind == 1) // unary |
| return isVMerge(N, UnitSize, 8, 8); |
| else if (ShuffleKind == 0) // normal |
| return isVMerge(N, UnitSize, 8, 24); |
| else |
| return false; |
| } |
| } |
| |
| /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for |
| /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). |
| /// The ShuffleKind distinguishes between big-endian merges with two |
| /// different inputs (0), either-endian merges with two identical inputs (1), |
| /// and little-endian merges with two different inputs (2). For the latter, |
| /// the input operands are swapped (see PPCInstrAltivec.td). |
| bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, |
| unsigned ShuffleKind, SelectionDAG &DAG) { |
| if (DAG.getDataLayout().isLittleEndian()) { |
| if (ShuffleKind == 1) // unary |
| return isVMerge(N, UnitSize, 8, 8); |
| else if (ShuffleKind == 2) // swapped |
| return isVMerge(N, UnitSize, 8, 24); |
| else |
| return false; |
| } else { |
| if (ShuffleKind == 1) // unary |
| return isVMerge(N, UnitSize, 0, 0); |
| else if (ShuffleKind == 0) // normal |
| return isVMerge(N, UnitSize, 0, 16); |
| else |
| return false; |
| } |
| } |
| |
| /** |
| * Common function used to match vmrgew and vmrgow shuffles |
| * |
| * The indexOffset determines whether to look for even or odd words in |
| * the shuffle mask. This is based on the of the endianness of the target |
| * machine. |
| * - Little Endian: |
| * - Use offset of 0 to check for odd elements |
| * - Use offset of 4 to check for even elements |
| * - Big Endian: |
| * - Use offset of 0 to check for even elements |
| * - Use offset of 4 to check for odd elements |
| * A detailed description of the vector element ordering for little endian and |
| * big endian can be found at |
| * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html |
| * Targeting your applications - what little endian and big endian IBM XL C/C++ |
| * compiler differences mean to you |
| * |
| * The mask to the shuffle vector instruction specifies the indices of the |
| * elements from the two input vectors to place in the result. The elements are |
| * numbered in array-access order, starting with the first vector. These vectors |
| * are always of type v16i8, thus each vector will contain 16 elements of size |
| * 8. More info on the shuffle vector can be found in the |
| * http://llvm.org/docs/LangRef.html#shufflevector-instruction |
| * Language Reference. |
| * |
| * The RHSStartValue indicates whether the same input vectors are used (unary) |
| * or two different input vectors are used, based on the following: |
| * - If the instruction uses the same vector for both inputs, the range of the |
| * indices will be 0 to 15. In this case, the RHSStart value passed should |
| * be 0. |
| * - If the instruction has two different vectors then the range of the |
| * indices will be 0 to 31. In this case, the RHSStart value passed should |
| * be 16 (indices 0-15 specify elements in the first vector while indices 16 |
| * to 31 specify elements in the second vector). |
| * |
| * \param[in] N The shuffle vector SD Node to analyze |
| * \param[in] IndexOffset Specifies whether to look for even or odd elements |
| * \param[in] RHSStartValue Specifies the starting index for the righthand input |
| * vector to the shuffle_vector instruction |
| * \return true iff this shuffle vector represents an even or odd word merge |
| */ |
| static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, |
| unsigned RHSStartValue) { |
| if (N->getValueType(0) != MVT::v16i8) |
| return false; |
| |
| for (unsigned i = 0; i < 2; ++i) |
| for (unsigned j = 0; j < 4; ++j) |
| if (!isConstantOrUndef(N->getMaskElt(i*4+j), |
| i*RHSStartValue+j+IndexOffset) || |
| !isConstantOrUndef(N->getMaskElt(i*4+j+8), |
| i*RHSStartValue+j+IndexOffset+8)) |
| return false; |
| return true; |
| } |
| |
| /** |
| * Determine if the specified shuffle mask is suitable for the vmrgew or |
| * vmrgow instructions. |
| * |
| * \param[in] N The shuffle vector SD Node to analyze |
| * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) |
| * \param[in] ShuffleKind Identify the type of merge: |
| * - 0 = big-endian merge with two different inputs; |
| * - 1 = either-endian merge with two identical inputs; |
| * - 2 = little-endian merge with two different inputs (inputs are swapped for |
| * little-endian merges). |
| * \param[in] DAG The current SelectionDAG |
| * \return true iff this shuffle mask |
| */ |
| bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, |
| unsigned ShuffleKind, SelectionDAG &DAG) { |
| if (DAG.getDataLayout().isLittleEndian()) { |
| unsigned indexOffset = CheckEven ? 4 : 0; |
| if (ShuffleKind == 1) // Unary |
| return isVMerge(N, indexOffset, 0); |
| else if (ShuffleKind == 2) // swapped |
| return isVMerge(N, indexOffset, 16); |
| else |
| return false; |
| } |
| else { |
| unsigned indexOffset = CheckEven ? 0 : 4; |
| if (ShuffleKind == 1) // Unary |
| return isVMerge(N, indexOffset, 0); |
| else if (ShuffleKind == 0) // Normal |
| return isVMerge(N, indexOffset, 16); |
| else |
| return false; |
| } |
| return false; |
| } |
| |
| /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift |
| /// amount, otherwise return -1. |
| /// The ShuffleKind distinguishes between big-endian operations with two |
| /// different inputs (0), either-endian operations with two identical inputs |
| /// (1), and little-endian operations with two different inputs (2). For the |
| /// latter, the input operands are swapped (see PPCInstrAltivec.td). |
| int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, |
| SelectionDAG &DAG) { |
| if (N->getValueType(0) != MVT::v16i8) |
| return -1; |
| |
| ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
| |
| // Find the first non-undef value in the shuffle mask. |
| unsigned i; |
| for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) |
| /*search*/; |
| |
| if (i == 16) return -1; // all undef. |
| |
| // Otherwise, check to see if the rest of the elements are consecutively |
| // numbered from this value. |
| unsigned ShiftAmt = SVOp->getMaskElt(i); |
| if (ShiftAmt < i) return -1; |
| |
| ShiftAmt -= i; |
| bool isLE = DAG.getDataLayout().isLittleEndian(); |
| |
| if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { |
| // Check the rest of the elements to see if they are consecutive. |
| for (++i; i != 16; ++i) |
| if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) |
| return -1; |
| } else if (ShuffleKind == 1) { |
| // Check the rest of the elements to see if they are consecutive. |
| for (++i; i != 16; ++i) |
| if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) |
| return -1; |
| } else |
| return -1; |
| |
| if (isLE) |
| ShiftAmt = 16 - ShiftAmt; |
| |
| return ShiftAmt; |
| } |
| |
| /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand |
| /// specifies a splat of a single element that is suitable for input to |
| /// VSPLTB/VSPLTH/VSPLTW. |
| bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { |
| assert(N->getValueType(0) == MVT::v16i8 && |
| (EltSize == 1 || EltSize == 2 || EltSize == 4)); |
| |
| // The consecutive indices need to specify an element, not part of two |
| // different elements. So abandon ship early if this isn't the case. |
| if (N->getMaskElt(0) % EltSize != 0) |
| return false; |
| |
| // This is a splat operation if each element of the permute is the same, and |
| // if the value doesn't reference the second vector. |
| unsigned ElementBase = N->getMaskElt(0); |
| |
| // FIXME: Handle UNDEF elements too! |
| if (ElementBase >= 16) |
| return false; |
| |
| // Check that the indices are consecutive, in the case of a multi-byte element |
| // splatted with a v16i8 mask. |
| for (unsigned i = 1; i != EltSize; ++i) |
| if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) |
| return false; |
| |
| for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { |
| if (N->getMaskElt(i) < 0) continue; |
| for (unsigned j = 0; j != EltSize; ++j) |
| if (N->getMaskElt(i+j) != N->getMaskElt(j)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// Check that the mask is shuffling N byte elements. Within each N byte |
| /// element of the mask, the indices could be either in increasing or |
| /// decreasing order as long as they are consecutive. |
| /// \param[in] N the shuffle vector SD Node to analyze |
| /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/ |
| /// Word/DoubleWord/QuadWord). |
| /// \param[in] StepLen the delta indices number among the N byte element, if |
| /// the mask is in increasing/decreasing order then it is 1/-1. |
| /// \return true iff the mask is shuffling N byte elements. |
| static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, |
| int StepLen) { |
| assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && |
| "Unexpected element width."); |
| assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); |
| |
| unsigned NumOfElem = 16 / Width; |
| unsigned MaskVal[16]; // Width is never greater than 16 |
| for (unsigned i = 0; i < NumOfElem; ++i) { |
| MaskVal[0] = N->getMaskElt(i * Width); |
| if ((StepLen == 1) && (MaskVal[0] % Width)) { |
| return false; |
| } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { |
| return false; |
| } |
| |
| for (unsigned int j = 1; j < Width; ++j) { |
| MaskVal[j] = N->getMaskElt(i * Width + j); |
| if (MaskVal[j] != MaskVal[j-1] + StepLen) { |
| return false; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
| unsigned &InsertAtByte, bool &Swap, bool IsLE) { |
| if (!isNByteElemShuffleMask(N, 4, 1)) |
| return false; |
| |
| // Now we look at mask elements 0,4,8,12 |
| unsigned M0 = N->getMaskElt(0) / 4; |
| unsigned M1 = N->getMaskElt(4) / 4; |
| unsigned M2 = N->getMaskElt(8) / 4; |
| unsigned M3 = N->getMaskElt(12) / 4; |
| unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; |
| unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; |
| |
| // Below, let H and L be arbitrary elements of the shuffle mask |
| // where H is in the range [4,7] and L is in the range [0,3]. |
| // H, 1, 2, 3 or L, 5, 6, 7 |
| if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || |
| (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { |
| ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; |
| InsertAtByte = IsLE ? 12 : 0; |
| Swap = M0 < 4; |
| return true; |
| } |
| // 0, H, 2, 3 or 4, L, 6, 7 |
| if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || |
| (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { |
| ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; |
| InsertAtByte = IsLE ? 8 : 4; |
| Swap = M1 < 4; |
| return true; |
| } |
| // 0, 1, H, 3 or 4, 5, L, 7 |
| if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || |
| (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { |
| ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; |
| InsertAtByte = IsLE ? 4 : 8; |
| Swap = M2 < 4; |
| return true; |
| } |
| // 0, 1, 2, H or 4, 5, 6, L |
| if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || |
| (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { |
| ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; |
| InsertAtByte = IsLE ? 0 : 12; |
| Swap = M3 < 4; |
| return true; |
| } |
| |
| // If both vector operands for the shuffle are the same vector, the mask will |
| // contain only elements from the first one and the second one will be undef. |
| if (N->getOperand(1).isUndef()) { |
| ShiftElts = 0; |
| Swap = true; |
| unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; |
| if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { |
| InsertAtByte = IsLE ? 12 : 0; |
| return true; |
| } |
| if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { |
| InsertAtByte = IsLE ? 8 : 4; |
| return true; |
| } |
| if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { |
| InsertAtByte = IsLE ? 4 : 8; |
| return true; |
| } |
| if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { |
| InsertAtByte = IsLE ? 0 : 12; |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
| bool &Swap, bool IsLE) { |
| assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); |
| // Ensure each byte index of the word is consecutive. |
| if (!isNByteElemShuffleMask(N, 4, 1)) |
| return false; |
| |
| // Now we look at mask elements 0,4,8,12, which are the beginning of words. |
| unsigned M0 = N->getMaskElt(0) / 4; |
| unsigned M1 = N->getMaskElt(4) / 4; |
| unsigned M2 = N->getMaskElt(8) / 4; |
| unsigned M3 = N->getMaskElt(12) / 4; |
| |
| // If both vector operands for the shuffle are the same vector, the mask will |
| // contain only elements from the first one and the second one will be undef. |
| if (N->getOperand(1).isUndef()) { |
| assert(M0 < 4 && "Indexing into an undef vector?"); |
| if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) |
| return false; |
| |
| ShiftElts = IsLE ? (4 - M0) % 4 : M0; |
| Swap = false; |
| return true; |
| } |
| |
| // Ensure each word index of the ShuffleVector Mask is consecutive. |
| if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) |
| return false; |
| |
| if (IsLE) { |
| if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { |
| // Input vectors don't need to be swapped if the leading element |
| // of the result is one of the 3 left elements of the second vector |
| // (or if there is no shift to be done at all). |
| Swap = false; |
| ShiftElts = (8 - M0) % 8; |
| } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { |
| // Input vectors need to be swapped if the leading element |
| // of the result is one of the 3 left elements of the first vector |
| // (or if we're shifting by 4 - thereby simply swapping the vectors). |
| Swap = true; |
| ShiftElts = (4 - M0) % 4; |
| } |
| |
| return true; |
| } else { // BE |
| if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { |
| // Input vectors don't need to be swapped if the leading element |
| // of the result is one of the 4 elements of the first vector. |
| Swap = false; |
| ShiftElts = M0; |
| } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { |
| // Input vectors need to be swapped if the leading element |
| // of the result is one of the 4 elements of the right vector. |
| Swap = true; |
| ShiftElts = M0 - 4; |
| } |
| |
| return true; |
| } |
| } |
| |
| bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { |
| assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); |
| |
| if (!isNByteElemShuffleMask(N, Width, -1)) |
| return false; |
| |
| for (int i = 0; i < 16; i += Width) |
| if (N->getMaskElt(i) != i + Width - 1) |
| return false; |
| |
| return true; |
| } |
| |
| bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { |
| return isXXBRShuffleMaskHelper(N, 2); |
| } |
| |
| bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { |
| return isXXBRShuffleMaskHelper(N, 4); |
| } |
| |
| bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { |
| return isXXBRShuffleMaskHelper(N, 8); |
| } |
| |
| bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { |
| return isXXBRShuffleMaskHelper(N, 16); |
| } |
| |
| /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap |
| /// if the inputs to the instruction should be swapped and set \p DM to the |
| /// value for the immediate. |
| /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI |
| /// AND element 0 of the result comes from the first input (LE) or second input |
| /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered. |
| /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle |
| /// mask. |
| bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, |
| bool &Swap, bool IsLE) { |
| assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); |
| |
| // Ensure each byte index of the double word is consecutive. |
| if (!isNByteElemShuffleMask(N, 8, 1)) |
| return false; |
| |
| unsigned M0 = N->getMaskElt(0) / 8; |
| unsigned M1 = N->getMaskElt(8) / 8; |
| assert(((M0 | M1) < 4) && "A mask element out of bounds?"); |
| |
| // If both vector operands for the shuffle are the same vector, the mask will |
| // contain only elements from the first one and the second one will be undef. |
| if (N->getOperand(1).isUndef()) { |
| if ((M0 | M1) < 2) { |
| DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1); |
| Swap = false; |
| return true; |
| } else |
| return false; |
| } |
| |
| if (IsLE) { |
| if (M0 > 1 && M1 < 2) { |
| Swap = false; |
| } else if (M0 < 2 && M1 > 1) { |
| M0 = (M0 + 2) % 4; |
| M1 = (M1 + 2) % 4; |
| Swap = true; |
| } else |
| return false; |
| |
| // Note: if control flow comes here that means Swap is already set above |
| DM = (((~M1) & 1) << 1) + ((~M0) & 1); |
| return true; |
| } else { // BE |
| if (M0 < 2 && M1 > 1) { |
| Swap = false; |
| } else if (M0 > 1 && M1 < 2) { |
| M0 = (M0 + 2) % 4; |
| M1 = (M1 + 2) % 4; |
| Swap = true; |
| } else |
| return false; |
| |
| // Note: if control flow comes here that means Swap is already set above |
| DM = (M0 << 1) + (M1 & 1); |
| return true; |
| } |
| } |
| |
| |
| /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the |
| /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. |
| unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, |
| SelectionDAG &DAG) { |
| ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
| assert(isSplatShuffleMask(SVOp, EltSize)); |
| if (DAG.getDataLayout().isLittleEndian()) |
| return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); |
| else |
| return SVOp->getMaskElt(0) / EltSize; |
| } |
| |
| /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed |
| /// by using a vspltis[bhw] instruction of the specified element size, return |
| /// the constant being splatted. The ByteSize field indicates the number of |
| /// bytes of each element [124] -> [bhw]. |
| SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { |
| SDValue OpVal(nullptr, 0); |
| |
| // If ByteSize of the splat is bigger than the element size of the |
| // build_vector, then we have a case where we are checking for a splat where |
| // multiple elements of the buildvector are folded together into a single |
| // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). |
| unsigned EltSize = 16/N->getNumOperands(); |
| if (EltSize < ByteSize) { |
| unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. |
| SDValue UniquedVals[4]; |
| assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); |
| |
| // See if all of the elements in the buildvector agree across. |
| for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { |
| if (N->getOperand(i).isUndef()) continue; |
| // If the element isn't a constant, bail fully out. |
| if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); |
| |
| if (!UniquedVals[i&(Multiple-1)].getNode()) |
| UniquedVals[i&(Multiple-1)] = N->getOperand(i); |
| else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) |
| return SDValue(); // no match. |
| } |
| |
| // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains |
| // either constant or undef values that are identical for each chunk. See |
| // if these chunks can form into a larger vspltis*. |
| |
| // Check to see if all of the leading entries are either 0 or -1. If |
| // neither, then this won't fit into the immediate field. |
| bool LeadingZero = true; |
| bool LeadingOnes = true; |
| for (unsigned i = 0; i != Multiple-1; ++i) { |
| if (!UniquedVals[i].getNode()) continue; // Must have been undefs. |
| |
| LeadingZero &= isNullConstant(UniquedVals[i]); |
| LeadingOnes &= isAllOnesConstant(UniquedVals[i]); |
| } |
| // Finally, check the least significant entry. |
| if (LeadingZero) { |
| if (!UniquedVals[Multiple-1].getNode()) |
| return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef |
| int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); |
| if (Val < 16) // 0,0,0,4 -> vspltisw(4) |
| return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); |
| } |
| if (LeadingOnes) { |
| if (!UniquedVals[Multiple-1].getNode()) |
| return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef |
| int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); |
| if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) |
| return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); |
| } |
| |
| return SDValue(); |
| } |
| |
| // Check to see if this buildvec has a single non-undef value in its elements. |
| for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { |
| if (N->getOperand(i).isUndef()) continue; |
| if (!OpVal.getNode()) |
| OpVal = N->getOperand(i); |
| else if (OpVal != N->getOperand(i)) |
| return SDValue(); |
| } |
| |
| if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. |
| |
| unsigned ValSizeInBytes = EltSize; |
| uint64_t Value = 0; |
| if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { |
| Value = CN->getZExtValue(); |
| } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { |
| assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); |
| Value = FloatToBits(CN->getValueAPF().convertToFloat()); |
| } |
| |
| // If the splat value is larger than the element value, then we can never do |
| // this splat. The only case that we could fit the replicated bits into our |
| // immediate field for would be zero, and we prefer to use vxor for it. |
| if (ValSizeInBytes < ByteSize) return SDValue(); |
| |
| // If the element value is larger than the splat value, check if it consists |
| // of a repeated bit pattern of size ByteSize. |
| if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) |
| return SDValue(); |
| |
| // Properly sign extend the value. |
| int MaskVal = SignExtend32(Value, ByteSize * 8); |
| |
| // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. |
| if (MaskVal == 0) return SDValue(); |
| |
| // Finally, if this value fits in a 5 bit sext field, return it |
| if (SignExtend32<5>(MaskVal) == MaskVal) |
| return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); |
| return SDValue(); |
| } |
| |
| /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift |
| /// amount, otherwise return -1. |
| int PPC::isQVALIGNIShuffleMask(SDNode *N) { |
| EVT VT = N->getValueType(0); |
| if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) |
| return -1; |
| |
| ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
| |
| // Find the first non-undef value in the shuffle mask. |
| unsigned i; |
| for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) |
| /*search*/; |
| |
| if (i == 4) return -1; // all undef. |
| |
| // Otherwise, check to see if the rest of the elements are consecutively |
| // numbered from this value. |
| unsigned ShiftAmt = SVOp->getMaskElt(i); |
| if (ShiftAmt < i) return -1; |
| ShiftAmt -= i; |
| |
| // Check the rest of the elements to see if they are consecutive. |
| for (++i; i != 4; ++i) |
| if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) |
| return -1; |
| |
| return ShiftAmt; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Addressing Mode Selection |
| //===----------------------------------------------------------------------===// |
| |
| /// isIntS16Immediate - This method tests to see if the node is either a 32-bit |
| /// or 64-bit immediate, and if the value can be accurately represented as a |
| /// sign extension from a 16-bit value. If so, this returns true and the |
| /// immediate. |
| bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) { |
| if (!isa<ConstantSDNode>(N)) |
| return false; |
| |
| Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue(); |
| if (N->getValueType(0) == MVT::i32) |
| return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); |
| else |
| return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); |
| } |
| bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { |
| return isIntS16Immediate(Op.getNode(), Imm); |
| } |
| |
| /// SelectAddressRegReg - Given the specified addressed, check to see if it |
| /// can be represented as an indexed [r+r] operation. Returns false if it |
| /// can be more efficiently represented with [r+imm]. |
| bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, |
| SDValue &Index, |
| SelectionDAG &DAG) const { |
| int16_t imm = 0; |
| if (N.getOpcode() == ISD::ADD) { |
| if (isIntS16Immediate(N.getOperand(1), imm)) |
| return false; // r+i |
| if (N.getOperand(1).getOpcode() == PPCISD::Lo) |
| return false; // r+i |
| |
| Base = N.getOperand(0); |
| Index = N.getOperand(1); |
| return true; |
| } else if (N.getOpcode() == ISD::OR) { |
| if (isIntS16Immediate(N.getOperand(1), imm)) |
| return false; // r+i can fold it if we can. |
| |
| // If this is an or of disjoint bitfields, we can codegen this as an add |
| // (for better address arithmetic) if the LHS and RHS of the OR are provably |
| // disjoint. |
| KnownBits LHSKnown, RHSKnown; |
| DAG.computeKnownBits(N.getOperand(0), LHSKnown); |
| |
| if (LHSKnown.Zero.getBoolValue()) { |
| DAG.computeKnownBits(N.getOperand(1), RHSKnown); |
| // If all of the bits are known zero on the LHS or RHS, the add won't |
| // carry. |
| if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) { |
| Base = N.getOperand(0); |
| Index = N.getOperand(1); |
| return true; |
| } |
| } |
| } |
| |
| return false; |
| } |
| |
| // If we happen to be doing an i64 load or store into a stack slot that has |
| // less than a 4-byte alignment, then the frame-index elimination may need to |
| // use an indexed load or store instruction (because the offset may not be a |
| // multiple of 4). The extra register needed to hold the offset comes from the |
| // register scavenger, and it is possible that the scavenger will need to use |
| // an emergency spill slot. As a result, we need to make sure that a spill slot |
| // is allocated when doing an i64 load/store into a less-than-4-byte-aligned |
| // stack slot. |
| static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { |
| // FIXME: This does not handle the LWA case. |
| if (VT != MVT::i64) |
| return; |
| |
| // NOTE: We'll exclude negative FIs here, which come from argument |
| // lowering, because there are no known test cases triggering this problem |
| // using packed structures (or similar). We can remove this exclusion if |
| // we find such a test case. The reason why this is so test-case driven is |
| // because this entire 'fixup' is only to prevent crashes (from the |
| // register scavenger) on not-really-valid inputs. For example, if we have: |
| // %a = alloca i1 |
| // %b = bitcast i1* %a to i64* |
| // store i64* a, i64 b |
| // then the store should really be marked as 'align 1', but is not. If it |
| // were marked as 'align 1' then the indexed form would have been |
| // instruction-selected initially, and the problem this 'fixup' is preventing |
| // won't happen regardless. |
| if (FrameIdx < 0) |
| return; |
| |
| MachineFunction &MF = DAG.getMachineFunction(); |
| MachineFrameInfo &MFI = MF.getFrameInfo(); |
| |
| unsigned Align = MFI.getObjectAlignment(FrameIdx); |
| if (Align >= 4) |
| return; |
| |
| PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
| FuncInfo->setHasNonRISpills(); |
| } |
| |
| /// Returns true if the address N can be represented by a base register plus |
| /// a signed 16-bit displacement [r+imm], and if it is not better |
| /// represented as reg+reg. If \p Alignment is non-zero, only accept |
| /// displacements that are multiples of that value. |
| bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, |
| SDValue &Base, |
| SelectionDAG &DAG, |
| unsigned Alignment) const { |
| // FIXME dl should come from parent load or store, not from address |
| SDLoc dl(N); |
| // If this can be more profitably realized as r+r, fail. |
| if (SelectAddressRegReg(N, Disp, Base, DAG)) |
| return false; |
| |
| if (N.getOpcode() == ISD::ADD) { |
| int16_t imm = 0; |
| if (isIntS16Immediate(N.getOperand(1), imm) && |
| (!Alignment || (imm % Alignment) == 0)) { |
| Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); |
| if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { |
| Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); |
| fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); |
| } else { |
| Base = N.getOperand(0); |
| } |
| return true; // [r+i] |
| } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { |
| // Match LOAD (ADD (X, Lo(G))). |
| assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() |
| && "Cannot handle constant offsets yet!"); |
| Disp = N.getOperand(1).getOperand(0); // The global address. |
| assert(Disp.getOpcode() == ISD::TargetGlobalAddress || |
| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || |
| Disp.getOpcode() == ISD::TargetConstantPool || |
| Disp.getOpcode() == ISD::TargetJumpTable); |
| Base = N.getOperand(0); |
| return true; // [&g+r] |
| } |
| } else if (N.getOpcode() == ISD::OR) { |
| int16_t imm = 0; |
| if (isIntS16Immediate(N.getOperand(1), imm) && |
| (!Alignment || (imm % Alignment) == 0)) { |
| // If this is an or of disjoint bitfields, we can codegen this as an add |
| // (for better address arithmetic) if the LHS and RHS of the OR are |
| // provably disjoint. |
| KnownBits LHSKnown; |
| DAG.computeKnownBits(N.getOperand(0), LHSKnown); |
| |
| if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { |
| // If all of the bits are known zero on the LHS or RHS, the add won't |
| // carry. |
| if (FrameIndexSDNode *FI = |
| dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { |
| Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); |
| fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); |
| } else { |
| Base = N.getOperand(0); |
| } |
| Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); |
| return true; |
| } |
| } |
| } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { |
| // Loading from a constant address. |
| |
| // If this address fits entirely in a 16-bit sext immediate field, codegen |
| // this as "d, 0" |
| int16_t Imm; |
| if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) { |
| Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); |
| Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, |
| CN->getValueType(0)); |
| return true; |
| } |
| |
| // Handle 32-bit sext immediates with LIS + addr mode. |
| if ((CN->getValueType(0) == MVT::i32 || |
| (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && |
| (!Alignment || (CN->getZExtValue() % Alignment) == 0)) { |
| int Addr = (int)CN->getZExtValue(); |
| |
| // Otherwise, break this down into an LIS + disp. |
| Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); |
| |
| Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, |
| MVT::i32); |
| unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; |
| Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); |
| return true; |
| } |
| } |
| |
| Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); |
| if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { |
| Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); |
| fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); |
| } else |
| Base = N; |
| return true; // [r+0] |
| } |
| |
| /// SelectAddressRegRegOnly - Given the specified addressed, force it to be |
| /// represented as an indexed [r+r] operation. |
| bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, |
| SDValue &Index, |
| SelectionDAG &DAG) const { |
| // Check to see if we can easily represent this as an [r+r] address. This |
| // will fail if it thinks that the address is more profitably represented as |
| // reg+imm, e.g. where imm = 0. |
| if (SelectAddressRegReg(N, Base, Index, DAG)) |
| return true; |
| |
| // If the address is the result of an add, we will utilize the fact that the |
| // address calculation includes an implicit add. However, we can reduce |
| // register pressure if we do not materialize a constant just for use as the |
| // index register. We only get rid of the add if it is not an add of a |
| // value and a 16-bit signed constant and both have a single use. |
| int16_t imm = 0; |
| if (N.getOpcode() == ISD::ADD && |
| (!isIntS16Immediate(N.getOperand(1), imm) || |
| !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { |
| Base = N.getOperand(0); |
| Index = N.getOperand(1); |
| return true; |
| } |
| |
| // Otherwise, do it the hard way, using R0 as the base register. |
| Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, |
| N.getValueType()); |
| Index = N; |
| return true; |
| } |
| |
| /// getPreIndexedAddressParts - returns true by value, base pointer and |
| /// offset pointer and addressing mode by reference if the node's address |
| /// can be legally represented as pre-indexed load / store address. |
| bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, |
| SDValue &Offset, |
| ISD::MemIndexedMode &AM, |
| SelectionDAG &DAG) const { |
| if (DisablePPCPreinc) return false; |
| |
| bool isLoad = true; |
| SDValue Ptr; |
| EVT VT; |
| unsigned Alignment; |
| if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { |
| Ptr = LD->getBasePtr(); |
| VT = LD->getMemoryVT(); |
| Alignment = LD->getAlignment(); |
| } else if (StoreSDNode *ST = dyn_cast |