blob: b5bdf47ce37ae00a01413c6ade62aae3c6c3efe4 [file] [log] [blame]
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the PPCISelLowering class.
//
//===----------------------------------------------------------------------===//
#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCCCState.h"
#include "PPCCallingConv.h"
#include "PPCFrameLowering.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCRegisterInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <list>
#include <utility>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "ppc-lowering"
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableSCO("disable-ppc-sco",
cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
cl::desc("enable quad precision float support on ppc"), cl::Hidden);
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
const PPCSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
bool isPPC64 = Subtarget.isPPC64();
setMinStackArgumentAlignment(isPPC64 ? 8:4);
// Set up the register classes.
addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
if (!useSoftFloat()) {
if (hasSPE()) {
addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
} else {
addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
}
}
// Match BITREVERSE to customized fast code sequence in the td file.
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PowerPC has pre-inc load and store's.
setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
if (!Subtarget.hasSPE()) {
setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
}
// PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {
setOperationAction(ISD::ADDC, VT, Legal);
setOperationAction(ISD::ADDE, VT, Legal);
setOperationAction(ISD::SUBC, VT, Legal);
setOperationAction(ISD::SUBE, VT, Legal);
}
if (Subtarget.useCRBits()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (isPPC64 || Subtarget.hasFPCVT()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
} else {
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
}
// PowerPC does not support direct load/store of condition registers.
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
// FIXME: Remove this once the ANDI glue bug is fixed:
if (ANDIGlueBug)
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
setTruncStoreAction(VT, MVT::i1, Expand);
}
addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
}
// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
// PPC (the libcall is not available).
setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
// We do not currently implement these libm ops for PowerPC.
setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
// PowerPC has no SREM/UREM instructions unless we are on P9
// On P9 we may use a hardware instruction to compute the remainder.
// The instructions are not legalized directly because in the cases where the
// result of both the remainder and the division is required it is more
// efficient to compute the remainder from the result of the division rather
// than use the remainder instruction.
if (Subtarget.isISA3_0()) {
setOperationAction(ISD::SREM, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Custom);
setOperationAction(ISD::SREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
} else {
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
}
if (Subtarget.hasP9Vector()) {
setOperationAction(ISD::ABS, MVT::v4i32, Legal);
setOperationAction(ISD::ABS, MVT::v8i16, Legal);
setOperationAction(ISD::ABS, MVT::v16i8, Legal);
}
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
if (Subtarget.hasSPE()) {
setOperationAction(ISD::FMA , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
} else {
setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FMA , MVT::f32, Legal);
}
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
if (Subtarget.hasFCPSGN()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
} else {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
}
if (Subtarget.hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
// PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
// to speed up scalar BSWAP64.
// CTPOP or CTTZ were introduced in P8/P9 respectively
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
if (Subtarget.isISA3_0()) {
setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
} else {
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
}
if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
} else {
setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
}
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
if (!Subtarget.useCRBits()) {
// PowerPC does not have Select
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Expand);
}
// PowerPC wants to turn select_cc of FP into fsel when possible.
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// PowerPC wants to optimize integer setcc a bit
if (!Subtarget.useCRBits())
setOperationAction(ISD::SETCC, MVT::i32, Custom);
// PowerPC does not have BRCOND which requires SetCC
if (!Subtarget.useCRBits())
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
if (Subtarget.hasSPE()) {
// SPE has built-in conversions
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
} else {
// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
// PowerPC does not have [U|S]INT_TO_FP
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
}
if (Subtarget.hasDirectMove() && isPPC64) {
setOperationAction(ISD::BITCAST, MVT::f32, Legal);
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
} else {
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
setOperationAction(ISD::BITCAST, MVT::i64, Expand);
setOperationAction(ISD::BITCAST, MVT::f64, Expand);
}
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
// other SjLj exception interfaces are implemented and please don't build
// your own exception handling based on them.
// LLVM/Clang supports zero-cost DWARF exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
// TRAP is legal.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// TRAMPOLINE is custom lowered.
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
if (Subtarget.isSVR4ABI()) {
if (isPPC64) {
// VAARG always uses double-word chunks, so promote anything smaller.
setOperationAction(ISD::VAARG, MVT::i1, Promote);
AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
setOperationAction(ISD::VAARG, MVT::i8, Promote);
AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
setOperationAction(ISD::VAARG, MVT::i16, Promote);
AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
setOperationAction(ISD::VAARG, MVT::i32, Promote);
AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
} else {
// VAARG is custom lowered with the 32-bit SVR4 ABI.
setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::i64, Custom);
}
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
if (Subtarget.isSVR4ABI() && !isPPC64)
// VACOPY is custom lowered with the 32-bit SVR4 ABI.
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
else
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
// Use the default implementation.
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// To handle counter-based loop conditions.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
// Comparisons that require checking two conditions.
if (Subtarget.hasSPE()) {
setCondCodeAction(ISD::SETO, MVT::f32, Expand);
setCondCodeAction(ISD::SETO, MVT::f64, Expand);
setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
}
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
if (Subtarget.has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
// This is just the low 32 bits of a (signed) fp->i64 conversion.
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
if (Subtarget.hasSPE())
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
else
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
// With the instructions enabled under FPCVT, we can do everything.
if (Subtarget.hasFPCVT()) {
if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
}
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
}
if (Subtarget.use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// 64-bit PowerPC wants to expand i128 shifts itself.
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
} else {
// 32-bit PowerPC wants to expand i64 shifts itself.
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (MVT VT : MVT::vector_valuetypes()) {
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
setOperationAction(ISD::CTPOP, VT, Legal);
setOperationAction(ISD::CTLZ, VT, Legal);
}
else {
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
}
// Vector instructions introduced in P9
if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
setOperationAction(ISD::CTTZ, VT, Legal);
else
setOperationAction(ISD::CTTZ, VT, Expand);
// We promote all shuffles to v16i8.
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
// We promote all non-typed operations to v4i32.
setOperationAction(ISD::AND , VT, Promote);
AddPromotedToType (ISD::AND , VT, MVT::v4i32);
setOperationAction(ISD::OR , VT, Promote);
AddPromotedToType (ISD::OR , VT, MVT::v4i32);
setOperationAction(ISD::XOR , VT, Promote);
AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
setOperationAction(ISD::LOAD , VT, Promote);
AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
setOperationAction(ISD::SELECT_CC, VT, Promote);
AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
setOperationAction(ISD::STORE, VT, Promote);
AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
// No other operations are legal.
setOperationAction(ISD::MUL , VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
setOperationAction(ISD::AND , MVT::v4i32, Legal);
setOperationAction(ISD::OR , MVT::v4i32, Legal);
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
setOperationAction(ISD::SELECT, MVT::v4i32,
Subtarget.useCRBits() ? Legal : Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
if (Subtarget.hasP8Altivec())
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
else
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
// Altivec does not contain unordered floating-point compare instructions
setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
if (Subtarget.hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
if (Subtarget.hasP8Vector()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
}
if (Subtarget.hasDirectMove() && isPPC64) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
// Share the Altivec comparison restrictions.
setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::STORE, MVT::v2f64, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
if (Subtarget.hasP8Vector())
addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
if (Subtarget.hasP8Altivec()) {
setOperationAction(ISD::SHL, MVT::v2i64, Legal);
setOperationAction(ISD::SRA, MVT::v2i64, Legal);
setOperationAction(ISD::SRL, MVT::v2i64, Legal);
// 128 bit shifts can be accomplished via 3 instructions for SHL and
// SRL, but not for SRA because of the instructions available:
// VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
// doing
setOperationAction(ISD::SHL, MVT::v1i128, Expand);
setOperationAction(ISD::SRL, MVT::v1i128, Expand);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
}
else {
setOperationAction(ISD::SHL, MVT::v2i64, Expand);
setOperationAction(ISD::SRA, MVT::v2i64, Expand);
setOperationAction(ISD::SRL, MVT::v2i64, Expand);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
// VSX v2i64 only supports non-arithmetic operations.
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
setOperationAction(ISD::SUB, MVT::v2i64, Expand);
}
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
setOperationAction(ISD::STORE, MVT::v2i64, Promote);
AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
// Vector operation legalization checks the result type of
// SIGN_EXTEND_INREG, overall legalization checks the inner type.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
setOperationAction(ISD::FABS, MVT::v4f32, Legal);
setOperationAction(ISD::FABS, MVT::v2f64, Legal);
if (Subtarget.hasDirectMove())
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
}
if (Subtarget.hasP8Altivec()) {
addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
}
if (Subtarget.hasP9Vector()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
// 128 bit shifts can be accomplished via 3 instructions for SHL and
// SRL, but not for SRA because of the instructions available:
// VS{RL} and VS{RL}O.
setOperationAction(ISD::SHL, MVT::v1i128, Legal);
setOperationAction(ISD::SRL, MVT::v1i128, Legal);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
if (EnableQuadPrecision) {
addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
setOperationAction(ISD::FADD, MVT::f128, Legal);
setOperationAction(ISD::FSUB, MVT::f128, Legal);
setOperationAction(ISD::FDIV, MVT::f128, Legal);
setOperationAction(ISD::FMUL, MVT::f128, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
// No extending loads to f128 on PPC.
for (MVT FPT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
setOperationAction(ISD::FMA, MVT::f128, Legal);
setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
setOperationAction(ISD::FRINT, MVT::f128, Legal);
setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
setOperationAction(ISD::FCEIL, MVT::f128, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
setOperationAction(ISD::FROUND, MVT::f128, Legal);
setOperationAction(ISD::SELECT, MVT::f128, Expand);
setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i128, Custom);
// No implementation for these ops for PowerPC.
setOperationAction(ISD::FSIN , MVT::f128, Expand);
setOperationAction(ISD::FCOS , MVT::f128, Expand);
setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FPOWI, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
}
}
if (Subtarget.hasP9Altivec()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
}
}
if (Subtarget.hasQPX()) {
setOperationAction(ISD::FADD, MVT::v4f64, Legal);
setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
setOperationAction(ISD::FREM, MVT::v4f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
setOperationAction(ISD::LOAD , MVT::v4f64, Custom);
setOperationAction(ISD::STORE , MVT::v4f64, Custom);
setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
if (!Subtarget.useCRBits())
setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
setOperationAction(ISD::FABS , MVT::v4f64, Legal);
setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
setOperationAction(ISD::FREM, MVT::v4f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
setOperationAction(ISD::LOAD , MVT::v4f32, Custom);
setOperationAction(ISD::STORE , MVT::v4f32, Custom);
if (!Subtarget.useCRBits())
setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
setOperationAction(ISD::FABS , MVT::v4f32, Legal);
setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
setOperationAction(ISD::AND , MVT::v4i1, Legal);
setOperationAction(ISD::OR , MVT::v4i1, Legal);
setOperationAction(ISD::XOR , MVT::v4i1, Legal);
if (!Subtarget.useCRBits())
setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
setOperationAction(ISD::LOAD , MVT::v4i1, Custom);
setOperationAction(ISD::STORE , MVT::v4i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
// These need to set FE_INEXACT, and so cannot be vectorized here.
setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
if (TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
} else {
setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
}
}
if (Subtarget.has64BitSupport())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
if (!isPPC64) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
}
setBooleanContents(ZeroOrOneBooleanContent);
if (Subtarget.hasAltivec()) {
// Altivec instructions set fields to all zeros or all ones.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
}
if (!isPPC64) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
}
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
if (Subtarget.useCRBits())
setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::SELECT_CC);
}
// Use reciprocal estimates.
if (TM.Options.UnsafeFPMath) {
setTargetDAGCombine(ISD::FDIV);
setTargetDAGCombine(ISD::FSQRT);
}
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
}
if (EnableQuadPrecision) {
setLibcallName(RTLIB::LOG_F128, "logf128");
setLibcallName(RTLIB::LOG2_F128, "log2f128");
setLibcallName(RTLIB::LOG10_F128, "log10f128");
setLibcallName(RTLIB::EXP_F128, "expf128");
setLibcallName(RTLIB::EXP2_F128, "exp2f128");
setLibcallName(RTLIB::SIN_F128, "sinf128");
setLibcallName(RTLIB::COS_F128, "cosf128");
setLibcallName(RTLIB::POW_F128, "powf128");
setLibcallName(RTLIB::FMIN_F128, "fminf128");
setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
setLibcallName(RTLIB::POWI_F128, "__powikf2");
setLibcallName(RTLIB::REM_F128, "fmodf128");
}
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
if (Subtarget.useCRBits()) {
setHasMultipleConditionRegisters();
setJumpIsExpensive();
}
setMinFunctionAlignment(2);
if (Subtarget.isDarwin())
setPrefFunctionAlignment(4);
switch (Subtarget.getDarwinDirective()) {
default: break;
case PPC::DIR_970:
case PPC::DIR_A2:
case PPC::DIR_E500:
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
case PPC::DIR_PWR4:
case PPC::DIR_PWR5:
case PPC::DIR_PWR5X:
case PPC::DIR_PWR6:
case PPC::DIR_PWR6X:
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
setPrefFunctionAlignment(4);
setPrefLoopAlignment(4);
break;
}
if (Subtarget.enableMachineScheduler())
setSchedulingPreference(Sched::Source);
else
setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties(STI.getRegisterInfo());
// The Freescale cores do better with aggressive inlining of memcpy and
// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemsetOptSize = 16;
MaxStoresPerMemcpy = 32;
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
} else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
// The A2 also benefits from (very) aggressive inlining of memcpy and
// friends. The overhead of a the function call, even when warm, can be
// over one hundred cycles.
MaxStoresPerMemset = 128;
MaxStoresPerMemcpy = 128;
MaxStoresPerMemmove = 128;
MaxLoadsPerMemcmp = 128;
} else {
MaxLoadsPerMemcmp = 8;
MaxLoadsPerMemcmpOptSize = 4;
}
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
unsigned MaxMaxAlign) {
if (MaxAlign == MaxMaxAlign)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
MaxAlign = 32;
else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
MaxAlign = 16;
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (auto *EltTy : STy->elements()) {
unsigned EltAlign = 0;
getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == MaxMaxAlign)
break;
}
}
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
// Darwin passes everything on 4 byte boundary.
if (Subtarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
unsigned Align = Subtarget.isPPC64() ? 8 : 4;
if (Subtarget.hasAltivec() || Subtarget.hasQPX())
getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
return Align;
}
unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv:: ID CC,
EVT VT) const {
if (Subtarget.hasSPE() && VT == MVT::f64)
return 2;
return PPCTargetLowering::getNumRegisters(Context, VT);
}
MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv:: ID CC,
EVT VT) const {
if (Subtarget.hasSPE() && VT == MVT::f64)
return MVT::i32;
return PPCTargetLowering::getRegisterType(Context, VT);
}
bool PPCTargetLowering::useSoftFloat() const {
return Subtarget.useSoftFloat();
}
bool PPCTargetLowering::hasSPE() const {
return Subtarget.hasSPE();
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
case PPCISD::FSEL: return "PPCISD::FSEL";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
case PPCISD::FP_TO_UINT_IN_VSR:
return "PPCISD::FP_TO_UINT_IN_VSR,";
case PPCISD::FP_TO_SINT_IN_VSR:
return "PPCISD::FP_TO_SINT_IN_VSR";
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
case PPCISD::MFVSR: return "PPCISD::MFVSR";
case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
case PPCISD::STXSIX: return "PPCISD::STXSIX";
case PPCISD::VEXTS: return "PPCISD::VEXTS";
case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
case PPCISD::ST_VSR_SCAL_INT:
return "PPCISD::ST_VSR_SCAL_INT";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::BDNZ: return "PPCISD::BDNZ";
case PPCISD::BDZ: return "PPCISD::BDZ";
case PPCISD::MFFS: return "PPCISD::MFFS";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
case PPCISD::CR6SET: return "PPCISD::CR6SET";
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
case PPCISD::SC: return "PPCISD::SC";
case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
case PPCISD::RFEBB: return "PPCISD::RFEBB";
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
case PPCISD::QBFLT: return "PPCISD::QBFLT";
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
}
return nullptr;
}
EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
EVT VT) const {
if (!VT.isVector())
return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
if (Subtarget.hasQPX())
return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
return VT.changeVectorElementTypeToInteger();
}
bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
return true;
}
//===----------------------------------------------------------------------===//
// Node matching predicates, for use by the tblgen matching code.
//===----------------------------------------------------------------------===//
/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
static bool isFloatingPointZero(SDValue Op) {
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
return CFP->getValueAPF().isZero();
else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
// Maybe this has already been legalized into the constant pool?
if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
return CFP->getValueAPF().isZero();
}
return false;
}
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
/// true if Op is undef or if it matches the specified value.
static bool isConstantOrUndef(int Op, int Val) {
return Op < 0 || Op == Val;
}
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
/// The ShuffleKind distinguishes between big-endian operations with
/// two different inputs (0), either-endian operations with two identical
/// inputs (1), and little-endian operations with two different inputs (2).
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
for (unsigned i = 0; i != 16; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
return false;
} else if (ShuffleKind == 2) {
if (!IsLE)
return false;
for (unsigned i = 0; i != 16; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2))
return false;
} else if (ShuffleKind == 1) {
unsigned j = IsLE ? 0 : 1;
for (unsigned i = 0; i != 8; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
return false;
}
return true;
}
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
/// The ShuffleKind distinguishes between big-endian operations with
/// two different inputs (0), either-endian operations with two identical
/// inputs (1), and little-endian operations with two different inputs (2).
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
for (unsigned i = 0; i != 16; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
return false;
} else if (ShuffleKind == 2) {
if (!IsLE)
return false;
for (unsigned i = 0; i != 16; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
return false;
} else if (ShuffleKind == 1) {
unsigned j = IsLE ? 0 : 2;
for (unsigned i = 0; i != 8; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
return false;
}
return true;
}
/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
/// current subtarget.
///
/// The ShuffleKind distinguishes between big-endian operations with
/// two different inputs (0), either-endian operations with two identical
/// inputs (1), and little-endian operations with two different inputs (2).
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
const PPCSubtarget& Subtarget =
static_cast<const PPCSubtarget&>(DAG.getSubtarget());
if (!Subtarget.hasP8Vector())
return false;
bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
for (unsigned i = 0; i != 16; i += 4)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
!isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
!isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
return false;
} else if (ShuffleKind == 2) {
if (!IsLE)
return false;
for (unsigned i = 0; i != 16; i += 4)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
!isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
return false;
} else if (ShuffleKind == 1) {
unsigned j = IsLE ? 0 : 4;
for (unsigned i = 0; i != 8; i += 4)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
!isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
!isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
!isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
!isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
return false;
}
return true;
}
/// isVMerge - Common function, used to match vmrg* shuffles.
///
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned LHSStart, unsigned RHSStart) {
if (N->getValueType(0) != MVT::v16i8)
return false;
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!");
for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
LHSStart+j+i*UnitSize) ||
!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
RHSStart+j+i*UnitSize))
return false;
}
return true;
}
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
/// The ShuffleKind distinguishes between big-endian merges with two
/// different inputs (0), either-endian merges with two identical inputs (1),
/// and little-endian merges with two different inputs (2). For the latter,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
if (DAG.getDataLayout().isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 0, 0);
else if (ShuffleKind == 2) // swapped
return isVMerge(N, UnitSize, 0, 16);
else
return false;
} else {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 8, 8);
else if (ShuffleKind == 0) // normal
return isVMerge(N, UnitSize, 8, 24);
else
return false;
}
}
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
/// The ShuffleKind distinguishes between big-endian merges with two
/// different inputs (0), either-endian merges with two identical inputs (1),
/// and little-endian merges with two different inputs (2). For the latter,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
if (DAG.getDataLayout().isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 8, 8);
else if (ShuffleKind == 2) // swapped
return isVMerge(N, UnitSize, 8, 24);
else
return false;
} else {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 0, 0);
else if (ShuffleKind == 0) // normal
return isVMerge(N, UnitSize, 0, 16);
else
return false;
}
}
/**
* Common function used to match vmrgew and vmrgow shuffles
*
* The indexOffset determines whether to look for even or odd words in
* the shuffle mask. This is based on the of the endianness of the target
* machine.
* - Little Endian:
* - Use offset of 0 to check for odd elements
* - Use offset of 4 to check for even elements
* - Big Endian:
* - Use offset of 0 to check for even elements
* - Use offset of 4 to check for odd elements
* A detailed description of the vector element ordering for little endian and
* big endian can be found at
* http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
* Targeting your applications - what little endian and big endian IBM XL C/C++
* compiler differences mean to you
*
* The mask to the shuffle vector instruction specifies the indices of the
* elements from the two input vectors to place in the result. The elements are
* numbered in array-access order, starting with the first vector. These vectors
* are always of type v16i8, thus each vector will contain 16 elements of size
* 8. More info on the shuffle vector can be found in the
* http://llvm.org/docs/LangRef.html#shufflevector-instruction
* Language Reference.
*
* The RHSStartValue indicates whether the same input vectors are used (unary)
* or two different input vectors are used, based on the following:
* - If the instruction uses the same vector for both inputs, the range of the
* indices will be 0 to 15. In this case, the RHSStart value passed should
* be 0.
* - If the instruction has two different vectors then the range of the
* indices will be 0 to 31. In this case, the RHSStart value passed should
* be 16 (indices 0-15 specify elements in the first vector while indices 16
* to 31 specify elements in the second vector).
*
* \param[in] N The shuffle vector SD Node to analyze
* \param[in] IndexOffset Specifies whether to look for even or odd elements
* \param[in] RHSStartValue Specifies the starting index for the righthand input
* vector to the shuffle_vector instruction
* \return true iff this shuffle vector represents an even or odd word merge
*/
static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
unsigned RHSStartValue) {
if (N->getValueType(0) != MVT::v16i8)
return false;
for (unsigned i = 0; i < 2; ++i)
for (unsigned j = 0; j < 4; ++j)
if (!isConstantOrUndef(N->getMaskElt(i*4+j),
i*RHSStartValue+j+IndexOffset) ||
!isConstantOrUndef(N->getMaskElt(i*4+j+8),
i*RHSStartValue+j+IndexOffset+8))
return false;
return true;
}
/**
* Determine if the specified shuffle mask is suitable for the vmrgew or
* vmrgow instructions.
*
* \param[in] N The shuffle vector SD Node to analyze
* \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
* \param[in] ShuffleKind Identify the type of merge:
* - 0 = big-endian merge with two different inputs;
* - 1 = either-endian merge with two identical inputs;
* - 2 = little-endian merge with two different inputs (inputs are swapped for
* little-endian merges).
* \param[in] DAG The current SelectionDAG
* \return true iff this shuffle mask
*/
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG) {
if (DAG.getDataLayout().isLittleEndian()) {
unsigned indexOffset = CheckEven ? 4 : 0;
if (ShuffleKind == 1) // Unary
return isVMerge(N, indexOffset, 0);
else if (ShuffleKind == 2) // swapped
return isVMerge(N, indexOffset, 16);
else
return false;
}
else {
unsigned indexOffset = CheckEven ? 0 : 4;
if (ShuffleKind == 1) // Unary
return isVMerge(N, indexOffset, 0);
else if (ShuffleKind == 0) // Normal
return isVMerge(N, indexOffset, 16);
else
return false;
}
return false;
}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
/// The ShuffleKind distinguishes between big-endian operations with two
/// different inputs (0), either-endian operations with two identical inputs
/// (1), and little-endian operations with two different inputs (2). For the
/// latter, the input operands are swapped (see PPCInstrAltivec.td).
int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
if (N->getValueType(0) != MVT::v16i8)
return -1;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
// Find the first non-undef value in the shuffle mask.
unsigned i;
for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
/*search*/;
if (i == 16) return -1; // all undef.
// Otherwise, check to see if the rest of the elements are consecutively
// numbered from this value.
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
bool isLE = DAG.getDataLayout().isLittleEndian();
if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
return -1;
} else if (ShuffleKind == 1) {
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
return -1;
} else
return -1;
if (isLE)
ShiftAmt = 16 - ShiftAmt;
return ShiftAmt;
}
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW.
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
assert(N->getValueType(0) == MVT::v16i8 &&
(EltSize == 1 || EltSize == 2 || EltSize == 4));
// The consecutive indices need to specify an element, not part of two
// different elements. So abandon ship early if this isn't the case.
if (N->getMaskElt(0) % EltSize != 0)
return false;
// This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector.
unsigned ElementBase = N->getMaskElt(0);
// FIXME: Handle UNDEF elements too!
if (ElementBase >= 16)
return false;
// Check that the indices are consecutive, in the case of a multi-byte element
// splatted with a v16i8 mask.
for (unsigned i = 1; i != EltSize; ++i)
if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
return false;
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
if (N->getMaskElt(i) < 0) continue;
for (unsigned j = 0; j != EltSize; ++j)
if (N->getMaskElt(i+j) != N->getMaskElt(j))
return false;
}
return true;
}
/// Check that the mask is shuffling N byte elements. Within each N byte
/// element of the mask, the indices could be either in increasing or
/// decreasing order as long as they are consecutive.
/// \param[in] N the shuffle vector SD Node to analyze
/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
/// Word/DoubleWord/QuadWord).
/// \param[in] StepLen the delta indices number among the N byte element, if
/// the mask is in increasing/decreasing order then it is 1/-1.
/// \return true iff the mask is shuffling N byte elements.
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
int StepLen) {
assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.");
assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
unsigned NumOfElem = 16 / Width;
unsigned MaskVal[16]; // Width is never greater than 16
for (unsigned i = 0; i < NumOfElem; ++i) {
MaskVal[0] = N->getMaskElt(i * Width);
if ((StepLen == 1) && (MaskVal[0] % Width)) {
return false;
} else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
return false;
}
for (unsigned int j = 1; j < Width; ++j) {
MaskVal[j] = N->getMaskElt(i * Width + j);
if (MaskVal[j] != MaskVal[j-1] + StepLen) {
return false;
}
}
}
return true;
}
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
if (!isNByteElemShuffleMask(N, 4, 1))
return false;
// Now we look at mask elements 0,4,8,12
unsigned M0 = N->getMaskElt(0) / 4;
unsigned M1 = N->getMaskElt(4) / 4;
unsigned M2 = N->getMaskElt(8) / 4;
unsigned M3 = N->getMaskElt(12) / 4;
unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
// Below, let H and L be arbitrary elements of the shuffle mask
// where H is in the range [4,7] and L is in the range [0,3].
// H, 1, 2, 3 or L, 5, 6, 7
if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
(M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
InsertAtByte = IsLE ? 12 : 0;
Swap = M0 < 4;
return true;
}
// 0, H, 2, 3 or 4, L, 6, 7
if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
(M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
InsertAtByte = IsLE ? 8 : 4;
Swap = M1 < 4;
return true;
}
// 0, 1, H, 3 or 4, 5, L, 7
if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
(M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
InsertAtByte = IsLE ? 4 : 8;
Swap = M2 < 4;
return true;
}
// 0, 1, 2, H or 4, 5, 6, L
if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
(M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
InsertAtByte = IsLE ? 0 : 12;
Swap = M3 < 4;
return true;
}
// If both vector operands for the shuffle are the same vector, the mask will
// contain only elements from the first one and the second one will be undef.
if (N->getOperand(1).isUndef()) {
ShiftElts = 0;
Swap = true;
unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
InsertAtByte = IsLE ? 12 : 0;
return true;
}
if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
InsertAtByte = IsLE ? 8 : 4;
return true;
}
if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
InsertAtByte = IsLE ? 4 : 8;
return true;
}
if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
InsertAtByte = IsLE ? 0 : 12;
return true;
}
}
return false;
}
bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the word is consecutive.
if (!isNByteElemShuffleMask(N, 4, 1))
return false;
// Now we look at mask elements 0,4,8,12, which are the beginning of words.
unsigned M0 = N->getMaskElt(0) / 4;
unsigned M1 = N->getMaskElt(4) / 4;
unsigned M2 = N->getMaskElt(8) / 4;
unsigned M3 = N->getMaskElt(12) / 4;
// If both vector operands for the shuffle are the same vector, the mask will
// contain only elements from the first one and the second one will be undef.
if (N->getOperand(1).isUndef()) {
assert(M0 < 4 && "Indexing into an undef vector?");
if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
return false;
ShiftElts = IsLE ? (4 - M0) % 4 : M0;
Swap = false;
return true;
}
// Ensure each word index of the ShuffleVector Mask is consecutive.
if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
return false;
if (IsLE) {
if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
// Input vectors don't need to be swapped if the leading element
// of the result is one of the 3 left elements of the second vector
// (or if there is no shift to be done at all).
Swap = false;
ShiftElts = (8 - M0) % 8;
} else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
// Input vectors need to be swapped if the leading element
// of the result is one of the 3 left elements of the first vector
// (or if we're shifting by 4 - thereby simply swapping the vectors).
Swap = true;
ShiftElts = (4 - M0) % 4;
}
return true;
} else { // BE
if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
// Input vectors don't need to be swapped if the leading element
// of the result is one of the 4 elements of the first vector.
Swap = false;
ShiftElts = M0;
} else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
// Input vectors need to be swapped if the leading element
// of the result is one of the 4 elements of the right vector.
Swap = true;
ShiftElts = M0 - 4;
}
return true;
}
}
bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
if (!isNByteElemShuffleMask(N, Width, -1))
return false;
for (int i = 0; i < 16; i += Width)
if (N->getMaskElt(i) != i + Width - 1)
return false;
return true;
}
bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
return isXXBRShuffleMaskHelper(N, 2);
}
bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
return isXXBRShuffleMaskHelper(N, 4);
}
bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
return isXXBRShuffleMaskHelper(N, 8);
}
bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
return isXXBRShuffleMaskHelper(N, 16);
}
/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
/// if the inputs to the instruction should be swapped and set \p DM to the
/// value for the immediate.
/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
/// AND element 0 of the result comes from the first input (LE) or second input
/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
/// mask.
bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the double word is consecutive.
if (!isNByteElemShuffleMask(N, 8, 1))
return false;
unsigned M0 = N->getMaskElt(0) / 8;
unsigned M1 = N->getMaskElt(8) / 8;
assert(((M0 | M1) < 4) && "A mask element out of bounds?");
// If both vector operands for the shuffle are the same vector, the mask will
// contain only elements from the first one and the second one will be undef.
if (N->getOperand(1).isUndef()) {
if ((M0 | M1) < 2) {
DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
Swap = false;
return true;
} else
return false;
}
if (IsLE) {
if (M0 > 1 && M1 < 2) {
Swap = false;
} else if (M0 < 2 && M1 > 1) {
M0 = (M0 + 2) % 4;
M1 = (M1 + 2) % 4;
Swap = true;
} else
return false;
// Note: if control flow comes here that means Swap is already set above
DM = (((~M1) & 1) << 1) + ((~M0) & 1);
return true;
} else { // BE
if (M0 < 2 && M1 > 1) {
Swap = false;
} else if (M0 > 1 && M1 < 2) {
M0 = (M0 + 2) % 4;
M1 = (M1 + 2) % 4;
Swap = true;
} else
return false;
// Note: if control flow comes here that means Swap is already set above
DM = (M0 << 1) + (M1 & 1);
return true;
}
}
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
if (DAG.getDataLayout().isLittleEndian())
return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
else
return SVOp->getMaskElt(0) / EltSize;
}
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
/// by using a vspltis[bhw] instruction of the specified element size, return
/// the constant being splatted. The ByteSize field indicates the number of
/// bytes of each element [124] -> [bhw].
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
SDValue OpVal(nullptr, 0);
// If ByteSize of the splat is bigger than the element size of the
// build_vector, then we have a case where we are checking for a splat where
// multiple elements of the buildvector are folded together into a single
// logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
unsigned EltSize = 16/N->getNumOperands();
if (EltSize < ByteSize) {
unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
SDValue UniquedVals[4];
assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
// See if all of the elements in the buildvector agree across.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (N->getOperand(i).isUndef()) continue;
// If the element isn't a constant, bail fully out.
if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
if (!UniquedVals[i&(Multiple-1)].getNode())
UniquedVals[i&(Multiple-1)] = N->getOperand(i);
else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
return SDValue(); // no match.
}
// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
// either constant or undef values that are identical for each chunk. See
// if these chunks can form into a larger vspltis*.
// Check to see if all of the leading entries are either 0 or -1. If
// neither, then this won't fit into the immediate field.
bool LeadingZero = true;
bool LeadingOnes = true;
for (unsigned i = 0; i != Multiple-1; ++i) {
if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
LeadingZero &= isNullConstant(UniquedVals[i]);
LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
}
// Finally, check the least significant entry.
if (LeadingZero) {
if (!UniquedVals[Multiple-1].getNode())
return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
if (Val < 16) // 0,0,0,4 -> vspltisw(4)
return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
}
if (LeadingOnes) {
if (!UniquedVals[Multiple-1].getNode())
return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
}
return SDValue();
}
// Check to see if this buildvec has a single non-undef value in its elements.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (N->getOperand(i).isUndef()) continue;
if (!OpVal.getNode())
OpVal = N->getOperand(i);
else if (OpVal != N->getOperand(i))
return SDValue();
}
if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
unsigned ValSizeInBytes = EltSize;
uint64_t Value = 0;
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
Value = CN->getZExtValue();
} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
Value = FloatToBits(CN->getValueAPF().convertToFloat());
}
// If the splat value is larger than the element value, then we can never do
// this splat. The only case that we could fit the replicated bits into our
// immediate field for would be zero, and we prefer to use vxor for it.
if (ValSizeInBytes < ByteSize) return SDValue();
// If the element value is larger than the splat value, check if it consists
// of a repeated bit pattern of size ByteSize.
if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
return SDValue();
// Properly sign extend the value.
int MaskVal = SignExtend32(Value, ByteSize * 8);
// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
if (MaskVal == 0) return SDValue();
// Finally, if this value fits in a 5 bit sext field, return it
if (SignExtend32<5>(MaskVal) == MaskVal)
return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
return SDValue();
}
/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
/// amount, otherwise return -1.
int PPC::isQVALIGNIShuffleMask(SDNode *N) {
EVT VT = N->getValueType(0);
if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
return -1;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
// Find the first non-undef value in the shuffle mask.
unsigned i;
for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
/*search*/;
if (i == 4) return -1; // all undef.
// Otherwise, check to see if the rest of the elements are consecutively
// numbered from this value.
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 4; ++i)
if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
return -1;
return ShiftAmt;
}
//===----------------------------------------------------------------------===//
// Addressing Mode Selection
//===----------------------------------------------------------------------===//
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
/// or 64-bit immediate, and if the value can be accurately represented as a
/// sign extension from a 16-bit value. If so, this returns true and the
/// immediate.
bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
if (!isa<ConstantSDNode>(N))
return false;
Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
if (N->getValueType(0) == MVT::i32)
return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
else
return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
}
bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
/// can be more efficiently represented with [r+imm].
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
SDValue &Index,
SelectionDAG &DAG) const {
int16_t imm = 0;
if (N.getOpcode() == ISD::ADD) {
if (isIntS16Immediate(N.getOperand(1), imm))
return false; // r+i
if (N.getOperand(1).getOpcode() == PPCISD::Lo)
return false; // r+i
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
} else if (N.getOpcode() == ISD::OR) {
if (isIntS16Immediate(N.getOperand(1), imm))
return false; // r+i can fold it if we can.
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are provably
// disjoint.
KnownBits LHSKnown, RHSKnown;
DAG.computeKnownBits(N.getOperand(0), LHSKnown);
if (LHSKnown.Zero.getBoolValue()) {
DAG.computeKnownBits(N.getOperand(1), RHSKnown);
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
}
}
}
return false;
}
// If we happen to be doing an i64 load or store into a stack slot that has
// less than a 4-byte alignment, then the frame-index elimination may need to
// use an indexed load or store instruction (because the offset may not be a
// multiple of 4). The extra register needed to hold the offset comes from the
// register scavenger, and it is possible that the scavenger will need to use
// an emergency spill slot. As a result, we need to make sure that a spill slot
// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
// stack slot.
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
// FIXME: This does not handle the LWA case.
if (VT != MVT::i64)
return;
// NOTE: We'll exclude negative FIs here, which come from argument
// lowering, because there are no known test cases triggering this problem
// using packed structures (or similar). We can remove this exclusion if
// we find such a test case. The reason why this is so test-case driven is
// because this entire 'fixup' is only to prevent crashes (from the
// register scavenger) on not-really-valid inputs. For example, if we have:
// %a = alloca i1
// %b = bitcast i1* %a to i64*
// store i64* a, i64 b
// then the store should really be marked as 'align 1', but is not. If it
// were marked as 'align 1' then the indexed form would have been
// instruction-selected initially, and the problem this 'fixup' is preventing
// won't happen regardless.
if (FrameIdx < 0)
return;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FrameIdx);
if (Align >= 4)
return;
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setHasNonRISpills();
}
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
/// represented as reg+reg. If \p Alignment is non-zero, only accept
/// displacements that are multiples of that value.
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
SDValue &Base,
SelectionDAG &DAG,
unsigned Alignment) const {
// FIXME dl should come from parent load or store, not from address
SDLoc dl(N);
// If this can be more profitably realized as r+r, fail.
if (SelectAddressRegReg(N, Disp, Base, DAG))
return false;
if (N.getOpcode() == ISD::ADD) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
(!Alignment || (imm % Alignment) == 0)) {
Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else {
Base = N.getOperand(0);
}
return true; // [r+i]
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))).
assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
Disp = N.getOperand(1).getOperand(0); // The global address.
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
Disp.getOpcode() == ISD::TargetConstantPool ||
Disp.getOpcode() == ISD::TargetJumpTable);
Base = N.getOperand(0);
return true; // [&g+r]
}
} else if (N.getOpcode() == ISD::OR) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
(!Alignment || (imm % Alignment) == 0)) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
KnownBits LHSKnown;
DAG.computeKnownBits(N.getOperand(0), LHSKnown);
if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else {
Base = N.getOperand(0);
}
Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
return true;
}
}
} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
// Loading from a constant address.
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0"
int16_t Imm;
if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
// Handle 32-bit sext immediates with LIS + addr mode.
if ((CN->getValueType(0) == MVT::i32 ||
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
(!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
MVT::i32);
unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
return true;
}
}
Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else
Base = N;
return true; // [r+0]
}
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
SDValue &Index,
SelectionDAG &DAG) const {
// Check to see if we can easily represent this as an [r+r] address. This
// will fail if it thinks that the address is more profitably represented as
// reg+imm, e.g. where imm = 0.
if (SelectAddressRegReg(N, Base, Index, DAG))
return true;
// If the address is the result of an add, we will utilize the fact that the
// address calculation includes an implicit add. However, we can reduce
// register pressure if we do not materialize a constant just for use as the
// index register. We only get rid of the add if it is not an add of a
// value and a 16-bit signed constant and both have a single use.
int16_t imm = 0;
if (N.getOpcode() == ISD::ADD &&
(!isIntS16Immediate(N.getOperand(1), imm) ||
!N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
}
// Otherwise, do it the hard way, using R0 as the base register.
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
}
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
if (DisablePPCPreinc) return false;
bool isLoad = true;
SDValue Ptr;
EVT VT;
unsigned Alignment;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
Alignment = LD->getAlignment();
} else if (StoreSDNode *ST = dyn_cast