third_party/llvm-project/clang/test/CodeGenCUDA/link-device-bitcode.cu - cobalt - Git at Google

 // Test for linking with CUDA's libdevice as outlined in
 // http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
 //
 // REQUIRES: nvptx-registered-target
 //
 // Prepare bitcode file to link with
 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
 // RUN:    -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll
 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
 // RUN:    -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll
 //
 // Make sure function in device-code gets linked in and internalized.
 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
 // RUN:    -mlink-cuda-bitcode %t.bc  -emit-llvm \
 // RUN:    -disable-llvm-passes -o - %s \
 // RUN:    | FileCheck %s -check-prefix CHECK-IR
 //
 // Make sure we can link two bitcode files.
 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
 // RUN:    -mlink-cuda-bitcode %t.bc -mlink-cuda-bitcode %t-2.bc \
 // RUN:    -emit-llvm -disable-llvm-passes -o - %s \
 // RUN:    | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
 //
 // Make sure function in device-code gets linked but is not internalized
 // without -fcuda-uses-libdevice
 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
 // RUN:    -mlink-bitcode-file %t.bc -emit-llvm \
 // RUN:    -disable-llvm-passes -o - %s \
 // RUN:    | FileCheck %s -check-prefix CHECK-IR-NLD
 //
 // Make sure NVVMReflect pass is enabled in NVPTX back-end.
 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
 // RUN:    -mlink-cuda-bitcode %t.bc -S -o /dev/null %s \
 // RUN:    -mllvm -debug-pass=Structure 2>&1 \
 // RUN:    | FileCheck %s -check-prefix CHECK-REFLECT

 #include "Inputs/cuda.h"

 __device__ float device_mul_or_add(float a, float b);
 extern "C" __device__ double __nv_sin(double x);
 extern "C" __device__ double __nv_exp(double x);

 // CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(
 // CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
 __device__ void should_not_be_internalized(float *data) {}

 // Make sure kernel call has not been internalized.
 // CHECK-IR-LABEL: define void @_Z6kernelPfS_
 // CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
 __global__ __attribute__((used)) void kernel(float *out, float *in) {
   *out = device_mul_or_add(in[0], in[1]);
   *out += __nv_exp(__nv_sin(*out));
   should_not_be_internalized(out);
 }

 // Make sure device_mul_or_add() is present in IR, is internal and
 // calls __nvvm_reflect().
 // CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
 // CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
 // CHECK-IR: call i32 @__nvvm_reflect
 // CHECK-IR: ret float

 // Make sure we've linked in and internalized only needed functions
 // from the second bitcode file.
 // CHECK-IR-2-LABEL: define internal double @__nv_sin
 // CHECK-IR-2-LABEL: define internal double @__nv_exp
 // CHECK-IR-2-NOT: double @__unused

 // Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
 // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1
	// Test for linking with CUDA's libdevice as outlined in
	// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
	//
	// REQUIRES: nvptx-registered-target
	//
	// Prepare bitcode file to link with
	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
	// RUN: -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll
	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
	// RUN: -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll
	//
	// Make sure function in device-code gets linked in and internalized.
	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
	// RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \
	// RUN: -disable-llvm-passes -o - %s \
	// RUN: \| FileCheck %s -check-prefix CHECK-IR
	//
	// Make sure we can link two bitcode files.
	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
	// RUN: -mlink-cuda-bitcode %t.bc -mlink-cuda-bitcode %t-2.bc \
	// RUN: -emit-llvm -disable-llvm-passes -o - %s \
	// RUN: \| FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
	//
	// Make sure function in device-code gets linked but is not internalized
	// without -fcuda-uses-libdevice
	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
	// RUN: -mlink-bitcode-file %t.bc -emit-llvm \
	// RUN: -disable-llvm-passes -o - %s \
	// RUN: \| FileCheck %s -check-prefix CHECK-IR-NLD
	//
	// Make sure NVVMReflect pass is enabled in NVPTX back-end.
	// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
	// RUN: -mlink-cuda-bitcode %t.bc -S -o /dev/null %s \
	// RUN: -mllvm -debug-pass=Structure 2>&1 \
	// RUN: \| FileCheck %s -check-prefix CHECK-REFLECT

	#include "Inputs/cuda.h"

	__device__ float device_mul_or_add(float a, float b);
	extern "C" __device__ double __nv_sin(double x);
	extern "C" __device__ double __nv_exp(double x);

	// CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(
	// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
	__device__ void should_not_be_internalized(float *data) {}

	// Make sure kernel call has not been internalized.
	// CHECK-IR-LABEL: define void @_Z6kernelPfS_
	// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
	__global__ __attribute__((used)) void kernel(float out, float in) {
	*out = device_mul_or_add(in[0], in[1]);
	out += __nv_exp(__nv_sin(out));
	should_not_be_internalized(out);
	}

	// Make sure device_mul_or_add() is present in IR, is internal and
	// calls __nvvm_reflect().
	// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
	// CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
	// CHECK-IR: call i32 @__nvvm_reflect
	// CHECK-IR: ret float

	// Make sure we've linked in and internalized only needed functions
	// from the second bitcode file.
	// CHECK-IR-2-LABEL: define internal double @__nv_sin
	// CHECK-IR-2-LABEL: define internal double @__nv_exp
	// CHECK-IR-2-NOT: double @__unused

	// Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
	// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1