src/third_party/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h - cobalt - Git at Google

 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H

 #include "AMDGPU.h"
 #include "AMDKernelCodeT.h"
 #include "SIDefines.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/AMDHSAKernelDescriptor.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstdint>
 #include <string>
 #include <utility>

 namespace llvm {

 class Argument;
 class FeatureBitset;
 class Function;
 class GlobalValue;
 class MCContext;
 class MCRegisterClass;
 class MCRegisterInfo;
 class MCSection;
 class MCSubtargetInfo;
 class MachineMemOperand;
 class Triple;

 namespace AMDGPU {

 #define GET_MIMGBaseOpcode_DECL
 #define GET_MIMGDim_DECL
 #define GET_MIMGEncoding_DECL
 #define GET_MIMGLZMapping_DECL
 #include "AMDGPUGenSearchableTables.inc"

 namespace IsaInfo {

 enum {
   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
   // doesn't spill SGPRs as much as when 80 is set.
   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
   TRAP_NUM_SGPRS = 16
 };

 /// Instruction set architecture version.
 struct IsaVersion {
   unsigned Major;
   unsigned Minor;
   unsigned Stepping;
 };

 /// \returns Isa version for given subtarget \p Features.
 IsaVersion getIsaVersion(const FeatureBitset &Features);

 /// Streams isa version string for given subtarget \p STI into \p Stream.
 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);

 /// \returns True if given subtarget \p STI supports code object version 3,
 /// false otherwise.
 bool hasCodeObjectV3(const MCSubtargetInfo *STI);

 /// \returns Wavefront size for given subtarget \p Features.
 unsigned getWavefrontSize(const FeatureBitset &Features);

 /// \returns Local memory size in bytes for given subtarget \p Features.
 unsigned getLocalMemorySize(const FeatureBitset &Features);

 /// \returns Number of execution units per compute unit for given subtarget \p
 /// Features.
 unsigned getEUsPerCU(const FeatureBitset &Features);

 /// \returns Maximum number of work groups per compute unit for given subtarget
 /// \p Features and limited by given \p FlatWorkGroupSize.
 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
                                unsigned FlatWorkGroupSize);

 /// \returns Maximum number of waves per compute unit for given subtarget \p
 /// Features without any kind of limitation.
 unsigned getMaxWavesPerCU(const FeatureBitset &Features);

 /// \returns Maximum number of waves per compute unit for given subtarget \p
 /// Features and limited by given \p FlatWorkGroupSize.
 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
                           unsigned FlatWorkGroupSize);

 /// \returns Minimum number of waves per execution unit for given subtarget \p
 /// Features.
 unsigned getMinWavesPerEU(const FeatureBitset &Features);

 /// \returns Maximum number of waves per execution unit for given subtarget \p
 /// Features without any kind of limitation.
 unsigned getMaxWavesPerEU();

 /// \returns Maximum number of waves per execution unit for given subtarget \p
 /// Features and limited by given \p FlatWorkGroupSize.
 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
                           unsigned FlatWorkGroupSize);

 /// \returns Minimum flat work group size for given subtarget \p Features.
 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);

 /// \returns Maximum flat work group size for given subtarget \p Features.
 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);

 /// \returns Number of waves per work group for given subtarget \p Features and
 /// limited by given \p FlatWorkGroupSize.
 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
                               unsigned FlatWorkGroupSize);

 /// \returns SGPR allocation granularity for given subtarget \p Features.
 unsigned getSGPRAllocGranule(const FeatureBitset &Features);

 /// \returns SGPR encoding granularity for given subtarget \p Features.
 unsigned getSGPREncodingGranule(const FeatureBitset &Features);

 /// \returns Total number of SGPRs for given subtarget \p Features.
 unsigned getTotalNumSGPRs(const FeatureBitset &Features);

 /// \returns Addressable number of SGPRs for given subtarget \p Features.
 unsigned getAddressableNumSGPRs(const FeatureBitset &Features);

 /// \returns Minimum number of SGPRs that meets the given number of waves per
 /// execution unit requirement for given subtarget \p Features.
 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);

 /// \returns Maximum number of SGPRs that meets the given number of waves per
 /// execution unit requirement for given subtarget \p Features.
 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
                         bool Addressable);

 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
 /// Features when the given special registers are used.
 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
                           bool FlatScrUsed, bool XNACKUsed);

 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
 /// Features when the given special registers are used. XNACK is inferred from
 /// \p Features.
 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
                           bool FlatScrUsed);

 /// \returns Number of SGPR blocks needed for given subtarget \p Features when
 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
 /// register counts.
 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);

 /// \returns VGPR allocation granularity for given subtarget \p Features.
 unsigned getVGPRAllocGranule(const FeatureBitset &Features);

 /// \returns VGPR encoding granularity for given subtarget \p Features.
 unsigned getVGPREncodingGranule(const FeatureBitset &Features);

 /// \returns Total number of VGPRs for given subtarget \p Features.
 unsigned getTotalNumVGPRs(const FeatureBitset &Features);

 /// \returns Addressable number of VGPRs for given subtarget \p Features.
 unsigned getAddressableNumVGPRs(const FeatureBitset &Features);

 /// \returns Minimum number of VGPRs that meets given number of waves per
 /// execution unit requirement for given subtarget \p Features.
 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);

 /// \returns Maximum number of VGPRs that meets given number of waves per
 /// execution unit requirement for given subtarget \p Features.
 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);

 /// \returns Number of VGPR blocks needed for given subtarget \p Features when
 /// \p NumVGPRs are used.
 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);

 } // end namespace IsaInfo

 LLVM_READONLY
 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);

 struct MIMGBaseOpcodeInfo {
   MIMGBaseOpcode BaseOpcode;
   bool Store;
   bool Atomic;
   bool AtomicX2;
   bool Sampler;

   uint8_t NumExtraArgs;
   bool Gradients;
   bool Coordinates;
   bool LodOrClampOrMip;
   bool HasD16;
 };

 LLVM_READONLY
 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);

 struct MIMGDimInfo {
   MIMGDim Dim;
   uint8_t NumCoords;
   uint8_t NumGradients;
   bool DA;
 };

 LLVM_READONLY
 const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);

 struct MIMGLZMappingInfo {
   MIMGBaseOpcode L;
   MIMGBaseOpcode LZ;
 };

 LLVM_READONLY
 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);

 LLVM_READONLY
 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
                   unsigned VDataDwords, unsigned VAddrDwords);

 LLVM_READONLY
 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);

 LLVM_READONLY
 int getMCOpcode(uint16_t Opcode, unsigned Gen);

 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
                                const FeatureBitset &Features);

 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();

 bool isGroupSegment(const GlobalValue *GV);
 bool isGlobalSegment(const GlobalValue *GV);
 bool isReadOnlySegment(const GlobalValue *GV);

 /// \returns True if constants should be emitted to .text section for given
 /// target triple \p TT, false otherwise.
 bool shouldEmitConstantsToTextSection(const Triple &TT);

 /// \returns Integer value requested using \p F's \p Name attribute.
 ///
 /// \returns \p Default if attribute is not present.
 ///
 /// \returns \p Default and emits error if requested value cannot be converted
 /// to integer.
 int getIntegerAttribute(const Function &F, StringRef Name, int Default);

 /// \returns A pair of integer values requested using \p F's \p Name attribute
 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
 /// is false).
 ///
 /// \returns \p Default if attribute is not present.
 ///
 /// \returns \p Default and emits error if one of the requested values cannot be
 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
 /// not present.
 std::pair<int, int> getIntegerPairAttribute(const Function &F,
                                             StringRef Name,
                                             std::pair<int, int> Default,
                                             bool OnlyFirstRequired = false);

 /// \returns Vmcnt bit mask for given isa \p Version.
 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);

 /// \returns Expcnt bit mask for given isa \p Version.
 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);

 /// \returns Lgkmcnt bit mask for given isa \p Version.
 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);

 /// \returns Waitcnt bit mask for given isa \p Version.
 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);

 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);

 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);

 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);

 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
 /// \p Lgkmcnt respectively.
 ///
 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
 ///     \p Expcnt = \p Waitcnt[6:4]
 ///     \p Lgkmcnt = \p Waitcnt[11:8]
 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);

 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
                      unsigned Vmcnt);

 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
                       unsigned Expcnt);

 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
                        unsigned Lgkmcnt);

 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
 /// \p Version.
 ///
 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
 ///     Waitcnt[6:4]   = \p Expcnt
 ///     Waitcnt[11:8]  = \p Lgkmcnt
 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
 ///
 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
 /// isa \p Version.
 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);

 unsigned getInitialPSInputAddr(const Function &F);

 LLVM_READNONE
 bool isShader(CallingConv::ID CC);

 LLVM_READNONE
 bool isCompute(CallingConv::ID CC);

 LLVM_READNONE
 bool isEntryFunctionCC(CallingConv::ID CC);

 // FIXME: Remove this when calling conventions cleaned up
 LLVM_READNONE
 inline bool isKernel(CallingConv::ID CC) {
   switch (CC) {
   case CallingConv::AMDGPU_KERNEL:
   case CallingConv::SPIR_KERNEL:
     return true;
   default:
     return false;
   }
 }

 bool hasXNACK(const MCSubtargetInfo &STI);
 bool hasMIMG_R128(const MCSubtargetInfo &STI);
 bool hasPackedD16(const MCSubtargetInfo &STI);

 bool isSI(const MCSubtargetInfo &STI);
 bool isCI(const MCSubtargetInfo &STI);
 bool isVI(const MCSubtargetInfo &STI);
 bool isGFX9(const MCSubtargetInfo &STI);

 /// Is Reg - scalar register
 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);

 /// Is there any intersection between registers
 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);

 /// If \p Reg is a pseudo reg, return the correct hardware register given
 /// \p STI otherwise return \p Reg.
 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);

 /// Convert hardware register \p Reg to a pseudo register
 LLVM_READNONE
 unsigned mc2PseudoReg(unsigned Reg);

 /// Can this operand also contain immediate values?
 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);

 /// Is this floating-point operand?
 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);

 /// Does this opearnd support only inlinable literals?
 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);

 /// Get the size in bits of a register from the register class \p RC.
 unsigned getRegBitWidth(unsigned RCID);

 /// Get the size in bits of a register from the register class \p RC.
 unsigned getRegBitWidth(const MCRegisterClass &RC);

 /// Get size of register operand
 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
                            unsigned OpNo);

 LLVM_READNONE
 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
   switch (OpInfo.OperandType) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
     return 4;

   case AMDGPU::OPERAND_REG_IMM_INT64:
   case AMDGPU::OPERAND_REG_IMM_FP64:
   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
     return 8;

   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
     return 2;

   default:
     llvm_unreachable("unhandled operand type");
   }
 }

 LLVM_READNONE
 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
   return getOperandSize(Desc.OpInfo[OpNo]);
 }

 /// Is this literal inlinable
 LLVM_READNONE
 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);

 LLVM_READNONE
 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);

 LLVM_READNONE
 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);

 LLVM_READNONE
 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);

 bool isArgPassedInSGPR(const Argument *Arg);

 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
 /// offset field.
 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);

 /// \returns true if this offset is small enough to fit in the SMRD
 /// offset field.  \p ByteOffset should be the offset in bytes and
 /// not the encoded offset.
 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);

 /// \returns true if the intrinsic is divergent
 bool isIntrinsicSourceOfDivergence(unsigned IntrID);

 } // end namespace AMDGPU
 } // end namespace llvm

 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
	//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
	#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H

	#include "AMDGPU.h"
	#include "AMDKernelCodeT.h"
	#include "SIDefines.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/MC/MCInstrDesc.h"
	#include "llvm/Support/AMDHSAKernelDescriptor.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/ErrorHandling.h"
	#include <cstdint>
	#include <string>
	#include <utility>

	namespace llvm {

	class Argument;
	class FeatureBitset;
	class Function;
	class GlobalValue;
	class MCContext;
	class MCRegisterClass;
	class MCRegisterInfo;
	class MCSection;
	class MCSubtargetInfo;
	class MachineMemOperand;
	class Triple;

	namespace AMDGPU {

	#define GET_MIMGBaseOpcode_DECL
	#define GET_MIMGDim_DECL
	#define GET_MIMGEncoding_DECL
	#define GET_MIMGLZMapping_DECL
	#include "AMDGPUGenSearchableTables.inc"

	namespace IsaInfo {

	enum {
	// The closed Vulkan driver sets 96, which limits the wave count to 8 but
	// doesn't spill SGPRs as much as when 80 is set.
	FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
	TRAP_NUM_SGPRS = 16
	};

	/// Instruction set architecture version.
	struct IsaVersion {
	unsigned Major;
	unsigned Minor;
	unsigned Stepping;
	};

	/// \returns Isa version for given subtarget \p Features.
	IsaVersion getIsaVersion(const FeatureBitset &Features);

	/// Streams isa version string for given subtarget \p STI into \p Stream.
	void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);

	/// \returns True if given subtarget \p STI supports code object version 3,
	/// false otherwise.
	bool hasCodeObjectV3(const MCSubtargetInfo *STI);

	/// \returns Wavefront size for given subtarget \p Features.
	unsigned getWavefrontSize(const FeatureBitset &Features);

	/// \returns Local memory size in bytes for given subtarget \p Features.
	unsigned getLocalMemorySize(const FeatureBitset &Features);

	/// \returns Number of execution units per compute unit for given subtarget \p
	/// Features.
	unsigned getEUsPerCU(const FeatureBitset &Features);

	/// \returns Maximum number of work groups per compute unit for given subtarget
	/// \p Features and limited by given \p FlatWorkGroupSize.
	unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
	unsigned FlatWorkGroupSize);

	/// \returns Maximum number of waves per compute unit for given subtarget \p
	/// Features without any kind of limitation.
	unsigned getMaxWavesPerCU(const FeatureBitset &Features);

	/// \returns Maximum number of waves per compute unit for given subtarget \p
	/// Features and limited by given \p FlatWorkGroupSize.
	unsigned getMaxWavesPerCU(const FeatureBitset &Features,
	unsigned FlatWorkGroupSize);

	/// \returns Minimum number of waves per execution unit for given subtarget \p
	/// Features.
	unsigned getMinWavesPerEU(const FeatureBitset &Features);

	/// \returns Maximum number of waves per execution unit for given subtarget \p
	/// Features without any kind of limitation.
	unsigned getMaxWavesPerEU();

	/// \returns Maximum number of waves per execution unit for given subtarget \p
	/// Features and limited by given \p FlatWorkGroupSize.
	unsigned getMaxWavesPerEU(const FeatureBitset &Features,
	unsigned FlatWorkGroupSize);

	/// \returns Minimum flat work group size for given subtarget \p Features.
	unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);

	/// \returns Maximum flat work group size for given subtarget \p Features.
	unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);

	/// \returns Number of waves per work group for given subtarget \p Features and
	/// limited by given \p FlatWorkGroupSize.
	unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
	unsigned FlatWorkGroupSize);

	/// \returns SGPR allocation granularity for given subtarget \p Features.
	unsigned getSGPRAllocGranule(const FeatureBitset &Features);

	/// \returns SGPR encoding granularity for given subtarget \p Features.
	unsigned getSGPREncodingGranule(const FeatureBitset &Features);

	/// \returns Total number of SGPRs for given subtarget \p Features.
	unsigned getTotalNumSGPRs(const FeatureBitset &Features);

	/// \returns Addressable number of SGPRs for given subtarget \p Features.
	unsigned getAddressableNumSGPRs(const FeatureBitset &Features);

	/// \returns Minimum number of SGPRs that meets the given number of waves per
	/// execution unit requirement for given subtarget \p Features.
	unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);

	/// \returns Maximum number of SGPRs that meets the given number of waves per
	/// execution unit requirement for given subtarget \p Features.
	unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
	bool Addressable);

	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
	/// Features when the given special registers are used.
	unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
	bool FlatScrUsed, bool XNACKUsed);

	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
	/// Features when the given special registers are used. XNACK is inferred from
	/// \p Features.
	unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
	bool FlatScrUsed);

	/// \returns Number of SGPR blocks needed for given subtarget \p Features when
	/// \p NumSGPRs are used. \p NumSGPRs should already include any special
	/// register counts.
	unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);

	/// \returns VGPR allocation granularity for given subtarget \p Features.
	unsigned getVGPRAllocGranule(const FeatureBitset &Features);

	/// \returns VGPR encoding granularity for given subtarget \p Features.
	unsigned getVGPREncodingGranule(const FeatureBitset &Features);

	/// \returns Total number of VGPRs for given subtarget \p Features.
	unsigned getTotalNumVGPRs(const FeatureBitset &Features);

	/// \returns Addressable number of VGPRs for given subtarget \p Features.
	unsigned getAddressableNumVGPRs(const FeatureBitset &Features);

	/// \returns Minimum number of VGPRs that meets given number of waves per
	/// execution unit requirement for given subtarget \p Features.
	unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);

	/// \returns Maximum number of VGPRs that meets given number of waves per
	/// execution unit requirement for given subtarget \p Features.
	unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);

	/// \returns Number of VGPR blocks needed for given subtarget \p Features when
	/// \p NumVGPRs are used.
	unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);

	} // end namespace IsaInfo

	LLVM_READONLY
	int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);

	struct MIMGBaseOpcodeInfo {
	MIMGBaseOpcode BaseOpcode;
	bool Store;
	bool Atomic;
	bool AtomicX2;
	bool Sampler;

	uint8_t NumExtraArgs;
	bool Gradients;
	bool Coordinates;
	bool LodOrClampOrMip;
	bool HasD16;
	};

	LLVM_READONLY
	const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);

	struct MIMGDimInfo {
	MIMGDim Dim;
	uint8_t NumCoords;
	uint8_t NumGradients;
	bool DA;
	};

	LLVM_READONLY
	const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);

	struct MIMGLZMappingInfo {
	MIMGBaseOpcode L;
	MIMGBaseOpcode LZ;
	};

	LLVM_READONLY
	const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);

	LLVM_READONLY
	int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
	unsigned VDataDwords, unsigned VAddrDwords);

	LLVM_READONLY
	int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);

	LLVM_READONLY
	int getMCOpcode(uint16_t Opcode, unsigned Gen);

	void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
	const FeatureBitset &Features);

	amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();

	bool isGroupSegment(const GlobalValue *GV);
	bool isGlobalSegment(const GlobalValue *GV);
	bool isReadOnlySegment(const GlobalValue *GV);

	/// \returns True if constants should be emitted to .text section for given
	/// target triple \p TT, false otherwise.
	bool shouldEmitConstantsToTextSection(const Triple &TT);

	/// \returns Integer value requested using \p F's \p Name attribute.
	///
	/// \returns \p Default if attribute is not present.
	///
	/// \returns \p Default and emits error if requested value cannot be converted
	/// to integer.
	int getIntegerAttribute(const Function &F, StringRef Name, int Default);

	/// \returns A pair of integer values requested using \p F's \p Name attribute
	/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
	/// is false).
	///
	/// \returns \p Default if attribute is not present.
	///
	/// \returns \p Default and emits error if one of the requested values cannot be
	/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
	/// not present.
	std::pair<int, int> getIntegerPairAttribute(const Function &F,
	StringRef Name,
	std::pair<int, int> Default,
	bool OnlyFirstRequired = false);

	/// \returns Vmcnt bit mask for given isa \p Version.
	unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);

	/// \returns Expcnt bit mask for given isa \p Version.
	unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);

	/// \returns Lgkmcnt bit mask for given isa \p Version.
	unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);

	/// \returns Waitcnt bit mask for given isa \p Version.
	unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);

	/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
	unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);

	/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
	unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);

	/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
	unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);

	/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
	/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
	/// \p Lgkmcnt respectively.
	///
	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
	/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
	/// \p Vmcnt = \p Waitcnt[3:0] \| \p Waitcnt[15:14] (gfx9+ only)
	/// \p Expcnt = \p Waitcnt[6:4]
	/// \p Lgkmcnt = \p Waitcnt[11:8]
	void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
	unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);

	/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
	unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
	unsigned Vmcnt);

	/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
	unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
	unsigned Expcnt);

	/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
	unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
	unsigned Lgkmcnt);

	/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
	/// \p Version.
	///
	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
	/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
	/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
	/// Waitcnt[6:4] = \p Expcnt
	/// Waitcnt[11:8] = \p Lgkmcnt
	/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
	///
	/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
	/// isa \p Version.
	unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
	unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);

	unsigned getInitialPSInputAddr(const Function &F);

	LLVM_READNONE
	bool isShader(CallingConv::ID CC);

	LLVM_READNONE
	bool isCompute(CallingConv::ID CC);

	LLVM_READNONE
	bool isEntryFunctionCC(CallingConv::ID CC);

	// FIXME: Remove this when calling conventions cleaned up
	LLVM_READNONE
	inline bool isKernel(CallingConv::ID CC) {
	switch (CC) {
	case CallingConv::AMDGPU_KERNEL:
	case CallingConv::SPIR_KERNEL:
	return true;
	default:
	return false;
	}
	}

	bool hasXNACK(const MCSubtargetInfo &STI);
	bool hasMIMG_R128(const MCSubtargetInfo &STI);
	bool hasPackedD16(const MCSubtargetInfo &STI);

	bool isSI(const MCSubtargetInfo &STI);
	bool isCI(const MCSubtargetInfo &STI);
	bool isVI(const MCSubtargetInfo &STI);
	bool isGFX9(const MCSubtargetInfo &STI);

	/// Is Reg - scalar register
	bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);

	/// Is there any intersection between registers
	bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);

	/// If \p Reg is a pseudo reg, return the correct hardware register given
	/// \p STI otherwise return \p Reg.
	unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);

	/// Convert hardware register \p Reg to a pseudo register
	LLVM_READNONE
	unsigned mc2PseudoReg(unsigned Reg);

	/// Can this operand also contain immediate values?
	bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);

	/// Is this floating-point operand?
	bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);

	/// Does this opearnd support only inlinable literals?
	bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);

	/// Get the size in bits of a register from the register class \p RC.
	unsigned getRegBitWidth(unsigned RCID);

	/// Get the size in bits of a register from the register class \p RC.
	unsigned getRegBitWidth(const MCRegisterClass &RC);

	/// Get size of register operand
	unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
	unsigned OpNo);

	LLVM_READNONE
	inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
	switch (OpInfo.OperandType) {
	case AMDGPU::OPERAND_REG_IMM_INT32:
	case AMDGPU::OPERAND_REG_IMM_FP32:
	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
	return 4;

	case AMDGPU::OPERAND_REG_IMM_INT64:
	case AMDGPU::OPERAND_REG_IMM_FP64:
	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
	return 8;

	case AMDGPU::OPERAND_REG_IMM_INT16:
	case AMDGPU::OPERAND_REG_IMM_FP16:
	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
	return 2;

	default:
	llvm_unreachable("unhandled operand type");
	}
	}

	LLVM_READNONE
	inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
	return getOperandSize(Desc.OpInfo[OpNo]);
	}

	/// Is this literal inlinable
	LLVM_READNONE
	bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);

	LLVM_READNONE
	bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);

	LLVM_READNONE
	bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);

	LLVM_READNONE
	bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);

	bool isArgPassedInSGPR(const Argument *Arg);

	/// \returns The encoding that will be used for \p ByteOffset in the SMRD
	/// offset field.
	int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);

	/// \returns true if this offset is small enough to fit in the SMRD
	/// offset field. \p ByteOffset should be the offset in bytes and
	/// not the encoded offset.
	bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);

	/// \returns true if the intrinsic is divergent
	bool isIntrinsicSourceOfDivergence(unsigned IntrID);

	} // end namespace AMDGPU
	} // end namespace llvm

	#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H