Import Cobalt 19.master.0.203780

Includes the following patches:
  https://cobalt-review.googlesource.com/c/cobalt/+/5210
    by errong.leng@samsung.com
  https://cobalt-review.googlesource.com/c/cobalt/+/5270
    by linus.wang@samsung.com
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/2011-04-15-vec-init-from-vec.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/2011-04-15-vec-init-from-vec.cl
new file mode 100644
index 0000000..fbe3d89
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/2011-04-15-vec-init-from-vec.cl
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 %s -emit-llvm -o %t
+
+typedef __attribute__((ext_vector_type(4)))  unsigned char uchar4;
+typedef __attribute__((ext_vector_type(8))) unsigned char uchar8;
+
+// OpenCL allows vectors to be initialized by vectors Handle bug in
+// VisitInitListExpr for this case below.
+void foo( uchar8 x )
+{
+  uchar4 val[4] = {{(uchar4){x.lo}}};
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
new file mode 100644
index 0000000..f8d7073
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -0,0 +1,168 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s
+
+typedef struct {
+  int cells[9];
+} Mat3X3;
+
+typedef struct {
+  int cells[16];
+} Mat4X4;
+
+typedef struct {
+  int cells[1024];
+} Mat32X32;
+
+typedef struct {
+  int cells[4096];
+} Mat64X64;
+
+struct StructOneMember {
+  int2 x;
+};
+
+struct StructTwoMember {
+  int2 x;
+  int2 y;
+};
+
+struct LargeStructOneMember {
+  int2 x[100];
+};
+
+struct LargeStructTwoMember {
+  int2 x[40];
+  int2 y[20];
+};
+
+#if __OPENCL_C_VERSION__ >= 200
+struct LargeStructOneMember g_s;
+#endif
+
+// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval align 4 %in)
+// AMDGCN-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
+Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
+  Mat4X4 out;
+  return out;
+}
+
+// COM-LABEL: define {{.*}} void @ker
+// Expect two mem copies: one for the argument "in", and one for
+// the return value.
+// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
+// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
+
+// AMDGCN: load [9 x i32], [9 x i32] addrspace(1)*
+// AMDGCN: call %struct.Mat4X4 @foo([9 x i32]
+// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
+kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
+  out[0] = foo(in[1]);
+}
+
+// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval align 4 %in)
+// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in)
+Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
+  Mat64X64 out;
+  return out;
+}
+
+// COM-LABEL: define {{.*}} void @ker_large
+// Expect two mem copies: one for the argument "in", and one for
+// the return value.
+// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
+// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
+// AMDGCN: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
+// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
+kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
+  out[0] = foo_large(in[1]);
+}
+
+// AMDGCN-LABEL: define void @FuncOneMember(<2 x i32> %u.coerce)
+void FuncOneMember(struct StructOneMember u) {
+  u.x = (int2)(0, 0);
+}
+
+// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
+// AMDGCN-NOT: addrspacecast
+// AMDGCN:   store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)*
+void FuncOneLargeMember(struct LargeStructOneMember u) {
+  u.x[0] = (int2)(0, 0);
+}
+
+// AMDGCN20-LABEL: define void @test_indirect_arg_globl()
+// AMDGCN20:  %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN20:  %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
+// AMDGCN20:  call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false)
+// AMDGCN20:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+#if __OPENCL_C_VERSION__ >= 200
+void test_indirect_arg_globl(void) {
+  FuncOneLargeMember(g_s);
+}
+#endif
+
+// AMDGCN-LABEL: define amdgpu_kernel void @test_indirect_arg_local()
+// AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
+// AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false)
+// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+kernel void test_indirect_arg_local(void) {
+  local struct LargeStructOneMember l_s;
+  FuncOneLargeMember(l_s);
+}
+
+// AMDGCN-LABEL: define void @test_indirect_arg_private()
+// AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN-NOT: @llvm.memcpy
+// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[p_s]])
+void test_indirect_arg_private(void) {
+  struct LargeStructOneMember p_s;
+  FuncOneLargeMember(p_s);
+}
+
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelOneMember
+// AMDGCN-SAME:  (<2 x i32> %[[u_coerce:.*]])
+// AMDGCN:  %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
+// AMDGCN:  %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
+// AMDGCN:  store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
+// AMDGCN:  call void @FuncOneMember(<2 x i32>
+kernel void KernelOneMember(struct StructOneMember u) {
+  FuncOneMember(u);
+}
+
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
+// AMDGCN:  %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN:  store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
+// AMDGCN:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]])
+kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
+  FuncOneLargeMember(u);
+}
+
+// AMDGCN-LABEL: define void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1)
+void FuncTwoMember(struct StructTwoMember u) {
+  u.y = (int2)(0, 0);
+}
+
+// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u)
+void FuncLargeTwoMember(struct LargeStructTwoMember u) {
+  u.y[0] = (int2)(0, 0);
+}
+
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelTwoMember
+// AMDGCN-SAME:  (%struct.StructTwoMember %[[u_coerce:.*]])
+// AMDGCN:  %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
+// AMDGCN: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
+// AMDGCN: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
+// AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]])
+kernel void KernelTwoMember(struct StructTwoMember u) {
+  FuncTwoMember(u);
+}
+
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeTwoMember
+// AMDGCN-SAME:  (%struct.LargeStructTwoMember %[[u_coerce:.*]])
+// AMDGCN:  %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
+// AMDGCN:  store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
+// AMDGCN:  call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]])
+kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
+  FuncLargeTwoMember(u);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-space-constant-initializers.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-space-constant-initializers.cl
new file mode 100644
index 0000000..da971cf
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-space-constant-initializers.cl
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 %s -ffake-address-space-map -emit-llvm -o - | FileCheck -check-prefix=FAKE %s
+// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck -check-prefix=AMDGCN %s
+
+typedef struct {
+    int i;
+    float f; // At non-zero offset.
+} ArrayStruct;
+
+__constant ArrayStruct constant_array_struct = { 0, 0.0f };
+
+typedef struct {
+    __constant float* constant_float_ptr;
+} ConstantArrayPointerStruct;
+
+// FAKE: %struct.ConstantArrayPointerStruct = type { float addrspace(2)* }
+// FAKE: addrspace(2) constant %struct.ConstantArrayPointerStruct { float addrspace(2)* bitcast (i8 addrspace(2)* getelementptr (i8, i8 addrspace(2)* bitcast (%struct.ArrayStruct addrspace(2)* @constant_array_struct to i8 addrspace(2)*), i64 4) to float addrspace(2)*) }
+// AMDGCN: %struct.ConstantArrayPointerStruct = type { float addrspace(4)* }
+// AMDGCN: addrspace(4) constant %struct.ConstantArrayPointerStruct { float addrspace(4)* bitcast (i8 addrspace(4)* getelementptr (i8, i8 addrspace(4)* bitcast (%struct.ArrayStruct addrspace(4)* @constant_array_struct to i8 addrspace(4)*), i64 4) to float addrspace(4)*) }
+// Bug  18567
+__constant ConstantArrayPointerStruct constant_array_pointer_struct = {
+    &constant_array_struct.f
+};
+
+__kernel void initializer_cast_is_valid_crash()
+{
+  unsigned char v512[64] = {
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00
+  };
+
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
new file mode 100644
index 0000000..c947db4
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s
+// When -ffake-address-space-map is not used, all addr space mapped to 0 for x86_64.
+
+// test that we generate address space casts everywhere we need conversions of
+// pointers to different address spaces
+
+// CHECK: define void @test
+void test(global int *arg_glob, generic int *arg_gen) {
+  int var_priv;
+  arg_gen = arg_glob; // implicit cast global -> generic
+  // CHECK: %{{[0-9]+}} = addrspacecast i32 addrspace(1)* %{{[0-9]+}} to i32 addrspace(4)*
+  // CHECK-NOFAKE-NOT: addrspacecast
+
+  arg_gen = &var_priv; // implicit cast with obtaining adr, private -> generic
+  // CHECK: %{{[0-9]+}} = addrspacecast i32* %var_priv to i32 addrspace(4)*
+  // CHECK-NOFAKE-NOT: addrspacecast
+
+  arg_glob = (global int *)arg_gen; // explicit cast
+  // CHECK: %{{[0-9]+}} = addrspacecast i32 addrspace(4)* %{{[0-9]+}} to i32 addrspace(1)*
+  // CHECK-NOFAKE-NOT: addrspacecast
+
+  global int *var_glob =
+      (global int *)arg_glob; // explicit cast in the same address space
+  // CHECK-NOT: %{{[0-9]+}} = addrspacecast i32 addrspace(1)* %{{[0-9]+}} to i32 addrspace(1)*
+  // CHECK-NOFAKE-NOT: addrspacecast
+
+  var_priv = arg_gen - arg_glob; // arithmetic operation
+  // CHECK: %{{.*}} = ptrtoint i32 addrspace(4)* %{{.*}} to i64
+  // CHECK: %{{.*}} = ptrtoint i32 addrspace(1)* %{{.*}} to i64
+  // CHECK-NOFAKE: %{{.*}} = ptrtoint i32* %{{.*}} to i64
+  // CHECK-NOFAKE: %{{.*}} = ptrtoint i32* %{{.*}} to i64
+
+  var_priv = arg_gen > arg_glob; // comparison
+  // CHECK: %{{[0-9]+}} = addrspacecast i32 addrspace(1)* %{{[0-9]+}} to i32 addrspace(4)*
+
+  generic void *var_gen_v = arg_glob;
+  // CHECK: addrspacecast
+  // CHECK-NOT: bitcast
+  // CHECK-NOFAKE: bitcast
+  // CHECK-NOFAKE-NOT: addrspacecast
+}
+
+// Test ternary operator.
+// CHECK: define void @test_ternary
+void test_ternary(void) {
+  global int *var_glob;
+  generic int *var_gen;
+  generic int *var_gen2;
+  generic float *var_gen_f;
+  generic void *var_gen_v;
+
+  var_gen = var_gen ? var_gen : var_gen2; // operands of the same addr spaces and the same type
+  // CHECK: icmp
+  // CHECK-NOT: addrspacecast
+  // CHECK-NOT: bitcast
+  // CHECK: phi
+  // CHECK: store i32 addrspace(4)* %{{.+}}, i32 addrspace(4)** %{{.+}}
+
+  var_gen = var_gen ? var_gen : var_glob; // operands of overlapping addr spaces and the same type
+  // CHECK: icmp
+  // CHECK-NOT: bitcast
+  // CHECK: %{{.+}} = addrspacecast i32 addrspace(1)* %{{.+}} to i32 addrspace(4)*
+  // CHECK: phi
+  // CHECK: store
+
+  typedef int int_t;
+  global int_t *var_glob_typedef;
+  var_gen = var_gen ? var_gen : var_glob_typedef; // operands of overlapping addr spaces and equivalent types
+  // CHECK: icmp
+  // CHECK-NOT: bitcast
+  // CHECK: %{{.+}} = addrspacecast i32 addrspace(1)* %{{.+}} to i32 addrspace(4)*
+  // CHECK: phi
+  // CHECK: store
+ 
+  var_gen_v = var_gen ? var_gen : var_gen_f; // operands of the same addr space and different types
+  // CHECK: icmp
+  // CHECK: %{{.+}} = bitcast i32 addrspace(4)* %{{.+}} to i8 addrspace(4)*
+  // CHECK: %{{.+}} = bitcast float addrspace(4)* %{{.+}} to i8 addrspace(4)*
+  // CHECK: phi
+  // CHECK: store
+
+  var_gen_v = var_gen ? var_glob : var_gen_f; // operands of overlapping addr spaces and different types
+  // CHECK: icmp
+  // CHECK: %{{.+}} = addrspacecast i32 addrspace(1)* %{{.+}} to i8 addrspace(4)*
+  // CHECK: %{{.+}} = bitcast float addrspace(4)* %{{.+}} to i8 addrspace(4)*
+  // CHECK: phi
+  // CHECK: store
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
new file mode 100644
index 0000000..b6e6b87
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=ASMANG,ASMAN10 %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=ASMANG,ASMAN20 %s
+// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=NOASMANG,NOASMAN10 %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=NOASMANG,NOASMAN20 %s
+
+// We check that the address spaces are mangled the same in both version of OpenCL
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s
+
+// We can't name this f as private is equivalent to default
+// no specifier given address space so we get multiple definition
+// warnings, but we do want it for comparison purposes.
+__attribute__((overloadable))
+void ff(int *arg) { }
+// ASMANG10: @_Z2ffPi
+// ASMANG20: @_Z2ffPU3AS4i
+// NOASMANG10: @_Z2ffPi
+// NOASMANG20: @_Z2ffPU9CLgenerici
+// OCL-20-DAG: @_Z2ffPU3AS4i
+// OCL-12-DAG: @_Z2ffPi
+
+__attribute__((overloadable))
+void f(private int *arg) { }
+// ASMANG: @_Z1fPi
+// NOASMANG: @_Z1fPU9CLprivatei
+// OCL-20-DAG: @_Z1fPi
+// OCL-12-DAG: @_Z1fPi
+
+__attribute__((overloadable))
+void f(global int *arg) { }
+// ASMANG: @_Z1fPU3AS1i
+// NOASMANG: @_Z1fPU8CLglobali
+// OCL-20-DAG: @_Z1fPU3AS1i
+// OCL-12-DAG: @_Z1fPU3AS1i
+
+__attribute__((overloadable))
+void f(local int *arg) { }
+// ASMANG: @_Z1fPU3AS3i
+// NOASMANG: @_Z1fPU7CLlocali
+// OCL-20-DAG: @_Z1fPU3AS3i
+// OCL-12-DAG: @_Z1fPU3AS3i
+
+__attribute__((overloadable))
+void f(constant int *arg) { }
+// ASMANG: @_Z1fPU3AS2i
+// NOASMANG: @_Z1fPU10CLconstanti
+// OCL-20-DAG: @_Z1fPU3AS2i
+// OCL-12-DAG: @_Z1fPU3AS2i
+
+#if __OPENCL_C_VERSION__ >= 200
+__attribute__((overloadable))
+void f(generic int *arg) { }
+// ASMANG20: @_Z1fPU3AS4i
+// NOASMANG20: @_Z1fPU9CLgenerici
+// OCL-20-DAG: @_Z1fPU3AS4i
+#endif
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-spaces.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-spaces.cl
new file mode 100644
index 0000000..60f5e30
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/address-spaces.cl
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR
+// RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20AMDGCN
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
+// RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
+
+// SPIR: %struct.S = type { i32, i32, i32* }
+// CL20SPIR: %struct.S = type { i32, i32, i32 addrspace(4)* }
+struct S {
+  int x;
+  int y;
+  int *z;
+};
+
+// CL20-DAG: @g_extern_var = external {{(dso_local )?}}addrspace(1) global float
+// CL20-DAG: @l_extern_var = external {{(dso_local )?}}addrspace(1) global float
+// CL20-DAG: @test_static.l_static_var = internal addrspace(1) global float 0.000000e+00
+// CL20-DAG: @g_static_var = internal addrspace(1) global float 0.000000e+00
+
+#ifdef CL20
+// CL20-DAG: @g_s = common {{(dso_local )?}}addrspace(1) global %struct.S zeroinitializer
+struct S g_s;
+#endif
+
+// SPIR: i32* %arg
+// AMDGCN: i32 addrspace(5)* %arg
+void f__p(__private int *arg) {}
+
+// CHECK: i32 addrspace(1)* %arg
+void f__g(__global int *arg) {}
+
+// CHECK: i32 addrspace(3)* %arg
+void f__l(__local int *arg) {}
+
+// SPIR: i32 addrspace(2)* %arg
+// AMDGCN: i32 addrspace(4)* %arg
+void f__c(__constant int *arg) {}
+
+// SPIR: i32* %arg
+// AMDGCN: i32 addrspace(5)* %arg
+void fp(private int *arg) {}
+
+// CHECK: i32 addrspace(1)* %arg
+void fg(global int *arg) {}
+
+// CHECK: i32 addrspace(3)* %arg
+void fl(local int *arg) {}
+
+// SPIR: i32 addrspace(2)* %arg
+// AMDGCN: i32 addrspace(4)* %arg
+void fc(constant int *arg) {}
+
+#ifdef CL20
+int i;
+// CL20-DAG: @i = common {{(dso_local )?}}addrspace(1) global i32 0
+int *ptr;
+// CL20SPIR-DAG: @ptr = common {{(dso_local )?}}addrspace(1) global i32 addrspace(4)* null
+// CL20AMDGCN-DAG: @ptr = common {{(dso_local )?}}addrspace(1) global i32* null
+#endif
+
+// SPIR: i32* %arg
+// AMDGCN: i32 addrspace(5)* %arg
+// CL20SPIR-DAG: i32 addrspace(4)* %arg
+// CL20AMDGCN-DAG: i32* %arg
+void f(int *arg) {
+
+  int i;
+// SPIR: %i = alloca i32,
+// AMDGCN: %i = alloca i32{{.*}}addrspace(5)
+// CL20SPIR-DAG: %i = alloca i32,
+// CL20AMDGCN-DAG: %i = alloca i32{{.*}}addrspace(5)
+
+#ifdef CL20
+  static int ii;
+// CL20-DAG: @f.ii = internal addrspace(1) global i32 0
+#endif
+}
+
+typedef int int_td;
+typedef int *intp_td;
+// SPIR: define {{(dso_local )?}}void @test_typedef(i32 addrspace(1)* %x, i32 addrspace(2)* %y, i32* %z)
+void test_typedef(global int_td *x, constant int_td *y, intp_td z) {
+  *x = *y;
+  *z = 0;
+}
+
+// SPIR: define {{(dso_local )?}}void @test_struct()
+void test_struct() {
+  // SPIR: %ps = alloca %struct.S*
+  // CL20SPIR: %ps = alloca %struct.S addrspace(4)*
+  struct S *ps;
+  // SPIR: store i32 0, i32* %x
+  // CL20SPIR: store i32 0, i32 addrspace(4)* %x
+  ps->x = 0;
+#ifdef CL20
+  // CL20SPIR: store i32 0, i32 addrspace(1)* getelementptr inbounds (%struct.S, %struct.S addrspace(1)* @g_s, i32 0, i32 0)
+  g_s.x = 0;
+#endif
+}
+
+// SPIR-LABEL: define {{(dso_local )?}}void @test_void_par()
+void test_void_par(void) {}
+
+// On ppc64 returns signext i32.
+// SPIR-LABEL: define{{.*}} i32 @test_func_return_type()
+int test_func_return_type(void) {
+  return 0;
+}
+
+#ifdef CL20
+extern float g_extern_var;
+
+// CL20-LABEL: define {{.*}}void @test_extern(
+kernel void test_extern(global float *buf) {
+  extern float l_extern_var;
+  buf[0] += g_extern_var + l_extern_var;
+}
+
+static float g_static_var;
+
+// CL20-LABEL: define {{.*}}void @test_static(
+kernel void test_static(global float *buf) {
+  static float l_static_var;
+  buf[0] += g_static_var + l_static_var;
+}
+
+#endif
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
new file mode 100644
index 0000000..59f38f8
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s
+
+// CL12-LABEL: define void @func1(i32 addrspace(5)* %x)
+// CL20-LABEL: define void @func1(i32* %x)
+void func1(int *x) {
+  // CL12: %[[x_addr:.*]] = alloca i32 addrspace(5)*{{.*}}addrspace(5)
+  // CL12: store i32 addrspace(5)* %x, i32 addrspace(5)* addrspace(5)* %[[x_addr]]
+  // CL12: %[[r0:.*]] = load i32 addrspace(5)*, i32 addrspace(5)* addrspace(5)* %[[x_addr]]
+  // CL12: store i32 1, i32 addrspace(5)* %[[r0]]
+  // CL20: %[[x_addr:.*]] = alloca i32*{{.*}}addrspace(5)
+  // CL20: store i32* %x, i32* addrspace(5)* %[[x_addr]]
+  // CL20: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[x_addr]]
+  // CL20: store i32 1, i32* %[[r0]]
+  *x = 1;
+}
+
+// CHECK-LABEL: define void @func2()
+void func2(void) {
+  // CHECK: %lv1 = alloca i32, align 4, addrspace(5)
+  // CHECK: %lv2 = alloca i32, align 4, addrspace(5)
+  // CHECK: %la = alloca [100 x i32], align 4, addrspace(5)
+  // CL12: %lp1 = alloca i32 addrspace(5)*, align 4, addrspace(5)
+  // CL12: %lp2 = alloca i32 addrspace(5)*, align 4, addrspace(5)
+  // CL20: %lp1 = alloca i32*, align 8, addrspace(5)
+  // CL20: %lp2 = alloca i32*, align 8, addrspace(5)
+  // CHECK: %lvc = alloca i32, align 4, addrspace(5)
+
+  // CHECK: store i32 1, i32 addrspace(5)* %lv1
+  int lv1;
+  lv1 = 1;
+  // CHECK: store i32 2, i32 addrspace(5)* %lv2
+  int lv2 = 2;
+
+  // CHECK: %[[arrayidx:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i64 0, i64 0
+  // CHECK: store i32 3, i32 addrspace(5)* %[[arrayidx]], align 4
+  int la[100];
+  la[0] = 3;
+
+  // CL12: store i32 addrspace(5)* %lv1, i32 addrspace(5)* addrspace(5)* %lp1, align 4
+  // CL20: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %lv1 to i32*
+  // CL20: store i32* %[[r0]], i32* addrspace(5)* %lp1, align 8
+  int *lp1 = &lv1;
+
+  // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i32 0, i32 0
+  // CL12: store i32 addrspace(5)* %[[arraydecay]], i32 addrspace(5)* addrspace(5)* %lp2, align 4
+  // CL20: %[[r1:.*]] = addrspacecast i32 addrspace(5)* %[[arraydecay]] to i32*
+  // CL20: store i32* %[[r1]], i32* addrspace(5)* %lp2, align 8
+  int *lp2 = la;
+
+  // CL12: call void @func1(i32 addrspace(5)* %lv1)
+  // CL20: %[[r2:.*]] = addrspacecast i32 addrspace(5)* %lv1 to i32*
+  // CL20: call void @func1(i32* %[[r2]])
+  func1(&lv1);
+
+  // CHECK: store i32 4, i32 addrspace(5)* %lvc
+  // CHECK: store i32 4, i32 addrspace(5)* %lv1
+  const int lvc = 4;
+  lv1 = lvc;
+}
+
+// CHECK-LABEL: define void @func3()
+// CHECK: %a = alloca [16 x [1 x float]], align 4, addrspace(5)
+// CHECK: %[[CAST:.+]] = bitcast [16 x [1 x float]] addrspace(5)* %a to i8 addrspace(5)*
+// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 4 %[[CAST]], i8 0, i64 64, i1 false)
+void func3(void) {
+  float a[16][1] = {{0.}};
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgcn-flat-scratch-name.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgcn-flat-scratch-name.cl
new file mode 100644
index 0000000..3a98e90
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgcn-flat-scratch-name.cl
@@ -0,0 +1,15 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: @use_flat_scratch_name
+kernel void use_flat_scratch_name()
+{
+// CHECK: tail call void asm sideeffect "s_mov_b64 flat_scratch, 0", "~{flat_scratch}"()
+  __asm__ volatile("s_mov_b64 flat_scratch, 0" : : : "flat_scratch");
+
+// CHECK: tail call void asm sideeffect "s_mov_b32 flat_scratch_lo, 0", "~{flat_scratch_lo}"()
+  __asm__ volatile("s_mov_b32 flat_scratch_lo, 0" : : : "flat_scratch_lo");
+
+// CHECK: tail call void asm sideeffect "s_mov_b32 flat_scratch_hi, 0", "~{flat_scratch_hi}"()
+  __asm__ volatile("s_mov_b32 flat_scratch_hi, 0" : : : "flat_scratch_hi");
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgcn-large-globals.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgcn-large-globals.cl
new file mode 100644
index 0000000..ea9ea61
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgcn-large-globals.cl
@@ -0,0 +1,12 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: @One = common local_unnamed_addr addrspace(1) global [6442450944 x i8] zeroinitializer, align 1
+unsigned char One[6442450944];
+// CHECK: @Two = common local_unnamed_addr addrspace(1) global [6442450944 x i32] zeroinitializer, align 4
+global unsigned int Two[6442450944];
+ 
+kernel void large_globals(unsigned int id) {
+  One[id] = id;
+  Two[id + 1] = id + 1;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
new file mode 100644
index 0000000..aec00e7
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -0,0 +1,523 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(2) )) char char2;
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+
+typedef __attribute__(( ext_vector_type(2) )) short short2;
+typedef __attribute__(( ext_vector_type(3) )) short short3;
+typedef __attribute__(( ext_vector_type(4) )) short short4;
+
+typedef __attribute__(( ext_vector_type(2) )) int int2;
+typedef __attribute__(( ext_vector_type(3) )) int int3;
+typedef __attribute__(( ext_vector_type(4) )) int int4;
+typedef __attribute__(( ext_vector_type(16) )) int int16;
+typedef __attribute__(( ext_vector_type(32) )) int int32;
+
+// CHECK: %struct.empty_struct = type {}
+typedef struct empty_struct
+{
+} empty_struct;
+
+// CHECK-NOT: %struct.single_element_struct_arg
+typedef struct single_element_struct_arg
+{
+    int i;
+} single_element_struct_arg_t;
+
+// CHECK-NOT: %struct.nested_single_element_struct_arg
+typedef struct nested_single_element_struct_arg
+{
+  single_element_struct_arg_t i;
+} nested_single_element_struct_arg_t;
+
+// CHECK: %struct.struct_arg = type { i32, float, i32 }
+typedef struct struct_arg
+{
+    int i1;
+    float f;
+    int i2;
+} struct_arg_t;
+
+// CHECK: %struct.struct_padding_arg = type { i8, i64 }
+typedef struct struct_padding_arg
+{
+  char i1;
+  long f;
+} struct_padding_arg;
+
+// CHECK: %struct.struct_of_arrays_arg = type { [2 x i32], float, [4 x i32], [3 x float], i32 }
+typedef struct struct_of_arrays_arg
+{
+    int i1[2];
+    float f1;
+    int i2[4];
+    float f2[3];
+    int i3;
+} struct_of_arrays_arg_t;
+
+// CHECK: %struct.struct_of_structs_arg = type { i32, float, %struct.struct_arg, i32 }
+typedef struct struct_of_structs_arg
+{
+    int i1;
+    float f1;
+    struct_arg_t s1;
+    int i2;
+} struct_of_structs_arg_t;
+
+// CHECK: %union.transparent_u = type { i32 }
+typedef union
+{
+  int b1;
+  float b2;
+} transparent_u __attribute__((__transparent_union__));
+
+// CHECK: %struct.single_array_element_struct_arg = type { [4 x i32] }
+typedef struct single_array_element_struct_arg
+{
+    int i[4];
+} single_array_element_struct_arg_t;
+
+// CHECK: %struct.single_struct_element_struct_arg = type { %struct.inner }
+// CHECK: %struct.inner = type { i32, i64 }
+typedef struct single_struct_element_struct_arg
+{
+  struct inner {
+    int a;
+    long b;
+  } s;
+} single_struct_element_struct_arg_t;
+
+// CHECK: %struct.different_size_type_pair
+typedef struct different_size_type_pair {
+  long l;
+  int i;
+} different_size_type_pair;
+
+// CHECK: %struct.flexible_array = type { i32, [0 x i32] }
+typedef struct flexible_array
+{
+  int i;
+  int flexible[];
+} flexible_array;
+
+// CHECK: %struct.struct_arr16 = type { [16 x i32] }
+typedef struct struct_arr16
+{
+    int arr[16];
+} struct_arr16;
+
+// CHECK: %struct.struct_arr32 = type { [32 x i32] }
+typedef struct struct_arr32
+{
+    int arr[32];
+} struct_arr32;
+
+// CHECK: %struct.struct_arr33 = type { [33 x i32] }
+typedef struct struct_arr33
+{
+    int arr[33];
+} struct_arr33;
+
+// CHECK: %struct.struct_char_arr32 = type { [32 x i8] }
+typedef struct struct_char_arr32
+{
+  char arr[32];
+} struct_char_arr32;
+
+// CHECK-NOT: %struct.struct_char_x8
+typedef struct struct_char_x8 {
+  char x, y, z, w;
+  char a, b, c, d;
+} struct_char_x8;
+
+// CHECK-NOT: %struct.struct_char_x4
+typedef struct struct_char_x4 {
+  char x, y, z, w;
+} struct_char_x4;
+
+// CHECK-NOT: %struct.struct_char_x3
+typedef struct struct_char_x3 {
+  char x, y, z;
+} struct_char_x3;
+
+// CHECK-NOT: %struct.struct_char_x2
+typedef struct struct_char_x2 {
+  char x, y;
+} struct_char_x2;
+
+// CHECK-NOT: %struct.struct_char_x1
+typedef struct struct_char_x1 {
+  char x;
+} struct_char_x1;
+
+// 4 registers from fields, 5 if padding included.
+// CHECK: %struct.nested = type { i8, i64 }
+// CHECK: %struct.num_regs_nested_struct = type { i32, %struct.nested }
+typedef struct num_regs_nested_struct {
+  int x;
+  struct nested {
+    char z;
+    long y;
+  } inner;
+} num_regs_nested_struct;
+
+// CHECK: %struct.double_nested = type { %struct.inner_inner }
+// CHECK: %struct.inner_inner = type { i8, i32, i8 }
+// CHECK: %struct.double_nested_struct = type { i32, %struct.double_nested, i16 }
+typedef struct double_nested_struct {
+  int x;
+  struct double_nested {
+    struct inner_inner {
+      char y;
+      int q;
+      char z;
+    } inner_inner;
+  } inner;
+
+  short w;
+} double_nested_struct;
+
+// This is a large struct, but uses fewer registers than the limit.
+// CHECK: %struct.large_struct_padding = type { i8, i32, i8, i32, i8, i8, i16, i16, [3 x i8], i64, i32, i8, i32, i16, i8 }
+typedef struct large_struct_padding {
+  char e0;
+  int e1;
+  char e2;
+  int e3;
+  char e4;
+  char e5;
+  short e6;
+  short e7;
+  char e8[3];
+  long e9;
+  int e10;
+  char e11;
+  int e12;
+  short e13;
+  char e14;
+} large_struct_padding;
+
+// CHECK: %struct.int3_pair = type { <3 x i32>, <3 x i32> }
+// The number of registers computed should be 6, not 8.
+typedef struct int3_pair {
+	int3 dx;
+	int3 dy;
+} int3_pair;
+
+// CHECK: %struct.struct_4regs = type { i32, i32, i32, i32 }
+typedef struct struct_4regs
+{
+  int x;
+  int y;
+  int z;
+  int w;
+} struct_4regs;
+
+// CHECK: void @kernel_empty_struct_arg(%struct.empty_struct %s.coerce)
+__kernel void kernel_empty_struct_arg(empty_struct s) { }
+
+// CHECK: void @kernel_single_element_struct_arg(i32 %arg1.coerce)
+__kernel void kernel_single_element_struct_arg(single_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_nested_single_element_struct_arg(i32 %arg1.coerce)
+__kernel void kernel_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_arg(%struct.struct_arg %arg1.coerce)
+__kernel void kernel_struct_arg(struct_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_padding_arg(%struct.struct_padding_arg %arg1.coerce)
+__kernel void kernel_struct_padding_arg(struct_padding_arg arg1) { }
+
+// CHECK: void @kernel_test_struct_of_arrays_arg(%struct.struct_of_arrays_arg %arg1.coerce)
+__kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_of_structs_arg(%struct.struct_of_structs_arg %arg1.coerce)
+__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
+
+// CHECK: void @test_kernel_transparent_union_arg(%union.transparent_u %u.coerce)
+__kernel void test_kernel_transparent_union_arg(transparent_u u) { }
+
+// CHECK: void @kernel_single_array_element_struct_arg(%struct.single_array_element_struct_arg %arg1.coerce)
+__kernel void kernel_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_single_struct_element_struct_arg(%struct.single_struct_element_struct_arg %arg1.coerce)
+__kernel void kernel_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_different_size_type_pair_arg(%struct.different_size_type_pair %arg1.coerce)
+__kernel void kernel_different_size_type_pair_arg(different_size_type_pair arg1) { }
+
+// CHECK: define void @func_f32_arg(float %arg)
+void func_f32_arg(float arg) { }
+
+// CHECK: define void @func_v2i16_arg(<2 x i16> %arg)
+void func_v2i16_arg(short2 arg) { }
+
+// CHECK: define void @func_v3i32_arg(<3 x i32> %arg)
+void func_v3i32_arg(int3 arg) { }
+
+// CHECK: define void @func_v4i32_arg(<4 x i32> %arg)
+void func_v4i32_arg(int4 arg) { }
+
+// CHECK: define void @func_v16i32_arg(<16 x i32> %arg)
+void func_v16i32_arg(int16 arg) { }
+
+// CHECK: define void @func_v32i32_arg(<32 x i32> %arg)
+void func_v32i32_arg(int32 arg) { }
+
+// CHECK: define void @func_empty_struct_arg()
+void func_empty_struct_arg(empty_struct empty) { }
+
+// CHECK: void @func_single_element_struct_arg(i32 %arg1.coerce)
+void func_single_element_struct_arg(single_element_struct_arg_t arg1) { }
+
+// CHECK: void @func_nested_single_element_struct_arg(i32 %arg1.coerce)
+void func_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
+
+// CHECK: void @func_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2)
+void func_struct_arg(struct_arg_t arg1) { }
+
+// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
+void func_struct_padding_arg(struct_padding_arg arg1) { }
+
+// CHECK: define void @func_struct_char_x8([2 x i32] %arg.coerce)
+void func_struct_char_x8(struct_char_x8 arg) { }
+
+// CHECK: define void @func_struct_char_x4(i32 %arg.coerce)
+void func_struct_char_x4(struct_char_x4 arg) { }
+
+// CHECK: define void @func_struct_char_x3(i32 %arg.coerce)
+void func_struct_char_x3(struct_char_x3 arg) { }
+
+// CHECK: define void @func_struct_char_x2(i16 %arg.coerce)
+void func_struct_char_x2(struct_char_x2 arg) { }
+
+// CHECK: define void @func_struct_char_x1(i8 %arg.coerce)
+void func_struct_char_x1(struct_char_x1 arg) { }
+
+// CHECK: void @func_transparent_union_arg(i32 %u.coerce)
+void func_transparent_union_arg(transparent_u u) { }
+
+// CHECK: void @func_single_array_element_struct_arg([4 x i32] %arg1.coerce)
+void func_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
+
+// CHECK: void @func_single_struct_element_struct_arg(%struct.inner %arg1.coerce)
+void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
+
+// CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
+void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
+
+// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
+void func_flexible_array_arg(flexible_array arg) { }
+
+// CHECK: define float @func_f32_ret()
+float func_f32_ret()
+{
+  return 0.0f;
+}
+
+// CHECK: define void @func_empty_struct_ret()
+empty_struct func_empty_struct_ret()
+{
+  empty_struct s = {};
+  return s;
+}
+
+// CHECK: define i32 @single_element_struct_ret()
+// CHECK: ret i32 0
+single_element_struct_arg_t single_element_struct_ret()
+{
+  single_element_struct_arg_t s = { 0 };
+  return s;
+}
+
+// CHECK: define i32 @nested_single_element_struct_ret()
+// CHECK: ret i32 0
+nested_single_element_struct_arg_t nested_single_element_struct_ret()
+{
+  nested_single_element_struct_arg_t s = { 0 };
+  return s;
+}
+
+// CHECK: define %struct.struct_arg @func_struct_ret()
+// CHECK: ret %struct.struct_arg zeroinitializer
+struct_arg_t func_struct_ret()
+{
+  struct_arg_t s = { 0 };
+  return s;
+}
+
+// CHECK: define %struct.struct_padding_arg @func_struct_padding_ret()
+// CHECK: ret %struct.struct_padding_arg zeroinitializer
+struct_padding_arg func_struct_padding_ret()
+{
+  struct_padding_arg s = { 0 };
+  return s;
+}
+
+// CHECK: define [2 x i32] @func_struct_char_x8_ret()
+// CHECK: ret [2 x i32] zeroinitializer
+struct_char_x8 func_struct_char_x8_ret()
+{
+  struct_char_x8 s = { 0 };
+  return s;
+}
+
+// CHECK: define i32 @func_struct_char_x4_ret()
+// CHECK: ret i32 0
+struct_char_x4 func_struct_char_x4_ret()
+{
+  struct_char_x4 s = { 0 };
+  return s;
+}
+
+// CHECK: define i32 @func_struct_char_x3_ret()
+// CHECK: ret i32 0
+struct_char_x3 func_struct_char_x3_ret()
+{
+  struct_char_x3 s = { 0 };
+  return s;
+}
+
+// CHECK: define i16 @func_struct_char_x2_ret()
+struct_char_x2 func_struct_char_x2_ret()
+{
+  struct_char_x2 s = { 0 };
+  return s;
+}
+
+// CHECK: define i8 @func_struct_char_x1_ret()
+// CHECK: ret i8 0
+struct_char_x1 func_struct_char_x1_ret()
+{
+  struct_char_x1 s = { 0 };
+  return s;
+}
+
+// CHECK: define %struct.struct_arr16 @func_ret_struct_arr16()
+// CHECK: ret %struct.struct_arr16 zeroinitializer
+struct_arr16 func_ret_struct_arr16()
+{
+  struct_arr16 s = { 0 };
+  return s;
+}
+
+// CHECK: define void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture sret %agg.result)
+struct_arr32 func_ret_struct_arr32()
+{
+  struct_arr32 s = { 0 };
+  return s;
+}
+
+// CHECK: define void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture sret %agg.result)
+struct_arr33 func_ret_struct_arr33()
+{
+  struct_arr33 s = { 0 };
+  return s;
+}
+
+// CHECK: define %struct.struct_char_arr32 @func_ret_struct_char_arr32()
+struct_char_arr32 func_ret_struct_char_arr32()
+{
+  struct_char_arr32 s = { 0 };
+  return s;
+}
+
+// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #1 {
+// CHECK: ret i32 0
+transparent_u func_transparent_union_ret()
+{
+  transparent_u u = { 0 };
+  return u;
+}
+
+// CHECK: define %struct.different_size_type_pair @func_different_size_type_pair_ret()
+different_size_type_pair func_different_size_type_pair_ret()
+{
+  different_size_type_pair s = { 0 };
+  return s;
+}
+
+// CHECK: define void @func_flexible_array_ret(%struct.flexible_array addrspace(5)* noalias nocapture sret %agg.result)
+flexible_array func_flexible_array_ret()
+{
+  flexible_array s = { 0 };
+  return s;
+}
+
+// CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
+void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
+
+// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
+void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
+
+// XXX - Why don't the inner structs flatten?
+// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
+void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
+
+// CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
+void func_double_nested_struct_arg(int4 arg0, int arg1, double_nested_struct arg2) { }
+
+// CHECK: define %struct.double_nested_struct @func_double_nested_struct_ret(<4 x i32> %arg0, i32 %arg1)
+double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
+  double_nested_struct s = { 0 };
+  return s;
+}
+
+// CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
+void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
+
+// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
+void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
+  *out = arg;
+}
+
+// CHECK: define void @v3i32_reg_count(<3 x i32> %arg1, <3 x i32> %arg2, <3 x i32> %arg3, <3 x i32> %arg4, i32 %arg5.coerce0, float %arg5.coerce1, i32 %arg5.coerce2)
+void v3i32_reg_count(int3 arg1, int3 arg2, int3 arg3, int3 arg4, struct_arg_t arg5) { }
+
+// Function signature from blender, nothing should be passed byval. The v3i32
+// should not count as 4 passed registers.
+// CHECK: define void @v3i32_pair_reg_count(%struct.int3_pair addrspace(5)* nocapture %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> %arg4, float %arg5)
+void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair arg3, int3 arg4, float arg5) { }
+
+// Each short4 should fit pack into 2 registers.
+// CHECK: define void @v4i16_reg_count(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
+                     short4 arg4, short4 arg5, struct_4regs arg6) { }
+
+// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
+                               short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
+
+// CHECK: define void @v3i16_reg_count(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
+                     short3 arg4, short3 arg5, struct_4regs arg6) { }
+
+// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
+                          short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
+
+// CHECK: define void @v2i16_reg_count(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, i32 %arg13.coerce0, i32 %arg13.coerce1, i32 %arg13.coerce2, i32 %arg13.coerce3)
+void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
+                     short2 arg4, short2 arg5, short2 arg6, short2 arg7,
+                     short2 arg8, short2 arg9, short2 arg10, short2 arg11,
+                     struct_4regs arg13) { }
+
+// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
+void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
+                          short2 arg4, short2 arg5, short2 arg6, short2 arg7,
+                          short2 arg8, short2 arg9, short2 arg10, short2 arg11,
+                          short2 arg12, struct_4regs arg13) { }
+
+// CHECK: define void @v2i8_reg_count(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
+                    char2 arg4, char2 arg5, struct_4regs arg6) { }
+
+// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
+                         char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
+
+// CHECK: define void @num_regs_left_64bit_aggregate(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, <3 x i32> %arg3, [2 x i32] %arg4.coerce, i32 %arg5)
+void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
new file mode 100644
index 0000000..b5dc47a
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
@@ -0,0 +1,523 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown-opencl -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef char __attribute__((ext_vector_type(2))) char2;
+typedef char __attribute__((ext_vector_type(3))) char3;
+typedef char __attribute__((ext_vector_type(4))) char4;
+typedef char __attribute__((ext_vector_type(8))) char8;
+typedef char __attribute__((ext_vector_type(16))) char16;
+
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef short __attribute__((ext_vector_type(3))) short3;
+typedef short __attribute__((ext_vector_type(4))) short4;
+typedef short __attribute__((ext_vector_type(8))) short8;
+typedef short __attribute__((ext_vector_type(16))) short16;
+
+typedef int __attribute__((ext_vector_type(2))) int2;
+typedef int __attribute__((ext_vector_type(3))) int3;
+typedef int __attribute__((ext_vector_type(4))) int4;
+typedef int __attribute__((ext_vector_type(8))) int8;
+typedef int __attribute__((ext_vector_type(16))) int16;
+
+typedef long __attribute__((ext_vector_type(2))) long2;
+typedef long __attribute__((ext_vector_type(3))) long3;
+typedef long __attribute__((ext_vector_type(4))) long4;
+typedef long __attribute__((ext_vector_type(8))) long8;
+typedef long __attribute__((ext_vector_type(16))) long16;
+
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef half __attribute__((ext_vector_type(3))) half3;
+typedef half __attribute__((ext_vector_type(4))) half4;
+typedef half __attribute__((ext_vector_type(8))) half8;
+typedef half __attribute__((ext_vector_type(16))) half16;
+
+typedef float __attribute__((ext_vector_type(2))) float2;
+typedef float __attribute__((ext_vector_type(3))) float3;
+typedef float __attribute__((ext_vector_type(4))) float4;
+typedef float __attribute__((ext_vector_type(8))) float8;
+typedef float __attribute__((ext_vector_type(16))) float16;
+
+typedef double __attribute__((ext_vector_type(2))) double2;
+typedef double __attribute__((ext_vector_type(3))) double3;
+typedef double __attribute__((ext_vector_type(4))) double4;
+typedef double __attribute__((ext_vector_type(8))) double8;
+typedef double __attribute__((ext_vector_type(16))) double16;
+
+// CHECK: @local_memory_alignment_global.lds_i8 = internal addrspace(3) global [4 x i8] undef, align 1
+// CHECK: @local_memory_alignment_global.lds_v2i8 = internal addrspace(3) global [4 x <2 x i8>] undef, align 2
+// CHECK: @local_memory_alignment_global.lds_v3i8 = internal addrspace(3) global [4 x <3 x i8>] undef, align 4
+// CHECK: @local_memory_alignment_global.lds_v4i8 = internal addrspace(3) global [4 x <4 x i8>] undef, align 4
+// CHECK: @local_memory_alignment_global.lds_v8i8 = internal addrspace(3) global [4 x <8 x i8>] undef, align 8
+// CHECK: @local_memory_alignment_global.lds_v16i8 = internal addrspace(3) global [4 x <16 x i8>] undef, align 16
+// CHECK: @local_memory_alignment_global.lds_i16 = internal addrspace(3) global [4 x i16] undef, align 2
+// CHECK: @local_memory_alignment_global.lds_v2i16 = internal addrspace(3) global [4 x <2 x i16>] undef, align 4
+// CHECK: @local_memory_alignment_global.lds_v3i16 = internal addrspace(3) global [4 x <3 x i16>] undef, align 8
+// CHECK: @local_memory_alignment_global.lds_v4i16 = internal addrspace(3) global [4 x <4 x i16>] undef, align 8
+// CHECK: @local_memory_alignment_global.lds_v8i16 = internal addrspace(3) global [4 x <8 x i16>] undef, align 16
+// CHECK: @local_memory_alignment_global.lds_v16i16 = internal addrspace(3) global [4 x <16 x i16>] undef, align 32
+// CHECK: @local_memory_alignment_global.lds_i32 = internal addrspace(3) global [4 x i32] undef, align 4
+// CHECK: @local_memory_alignment_global.lds_v2i32 = internal addrspace(3) global [4 x <2 x i32>] undef, align 8
+// CHECK: @local_memory_alignment_global.lds_v3i32 = internal addrspace(3) global [4 x <3 x i32>] undef, align 16
+// CHECK: @local_memory_alignment_global.lds_v4i32 = internal addrspace(3) global [4 x <4 x i32>] undef, align 16
+// CHECK: @local_memory_alignment_global.lds_v8i32 = internal addrspace(3) global [4 x <8 x i32>] undef, align 32
+// CHECK: @local_memory_alignment_global.lds_v16i32 = internal addrspace(3) global [4 x <16 x i32>] undef, align 64
+// CHECK: @local_memory_alignment_global.lds_i64 = internal addrspace(3) global [4 x i64] undef, align 8
+// CHECK: @local_memory_alignment_global.lds_v2i64 = internal addrspace(3) global [4 x <2 x i64>] undef, align 16
+// CHECK: @local_memory_alignment_global.lds_v3i64 = internal addrspace(3) global [4 x <3 x i64>] undef, align 32
+// CHECK: @local_memory_alignment_global.lds_v4i64 = internal addrspace(3) global [4 x <4 x i64>] undef, align 32
+// CHECK: @local_memory_alignment_global.lds_v8i64 = internal addrspace(3) global [4 x <8 x i64>] undef, align 64
+// CHECK: @local_memory_alignment_global.lds_v16i64 = internal addrspace(3) global [4 x <16 x i64>] undef, align 128
+// CHECK: @local_memory_alignment_global.lds_f16 = internal addrspace(3) global [4 x half] undef, align 2
+// CHECK: @local_memory_alignment_global.lds_v2f16 = internal addrspace(3) global [4 x <2 x half>] undef, align 4
+// CHECK: @local_memory_alignment_global.lds_v3f16 = internal addrspace(3) global [4 x <3 x half>] undef, align 8
+// CHECK: @local_memory_alignment_global.lds_v4f16 = internal addrspace(3) global [4 x <4 x half>] undef, align 8
+// CHECK: @local_memory_alignment_global.lds_v8f16 = internal addrspace(3) global [4 x <8 x half>] undef, align 16
+// CHECK: @local_memory_alignment_global.lds_v16f16 = internal addrspace(3) global [4 x <16 x half>] undef, align 32
+// CHECK: @local_memory_alignment_global.lds_f32 = internal addrspace(3) global [4 x float] undef, align 4
+// CHECK: @local_memory_alignment_global.lds_v2f32 = internal addrspace(3) global [4 x <2 x float>] undef, align 8
+// CHECK: @local_memory_alignment_global.lds_v3f32 = internal addrspace(3) global [4 x <3 x float>] undef, align 16
+// CHECK: @local_memory_alignment_global.lds_v4f32 = internal addrspace(3) global [4 x <4 x float>] undef, align 16
+// CHECK: @local_memory_alignment_global.lds_v8f32 = internal addrspace(3) global [4 x <8 x float>] undef, align 32
+// CHECK: @local_memory_alignment_global.lds_v16f32 = internal addrspace(3) global [4 x <16 x float>] undef, align 64
+// CHECK: @local_memory_alignment_global.lds_f64 = internal addrspace(3) global [4 x double] undef, align 8
+// CHECK: @local_memory_alignment_global.lds_v2f64 = internal addrspace(3) global [4 x <2 x double>] undef, align 16
+// CHECK: @local_memory_alignment_global.lds_v3f64 = internal addrspace(3) global [4 x <3 x double>] undef, align 32
+// CHECK: @local_memory_alignment_global.lds_v4f64 = internal addrspace(3) global [4 x <4 x double>] undef, align 32
+// CHECK: @local_memory_alignment_global.lds_v8f64 = internal addrspace(3) global [4 x <8 x double>] undef, align 64
+// CHECK: @local_memory_alignment_global.lds_v16f64 = internal addrspace(3) global [4 x <16 x double>] undef, align 128
+
+
+// CHECK-LABEL: @local_memory_alignment_global(
+// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i32 0, i32 0), align 1
+// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i32 0, i32 0), align 2
+// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(3)* bitcast ([4 x <3 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v3i8 to <4 x i8> addrspace(3)*), align 4
+// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i32 0, i32 0), align 4
+// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i32 0, i32 0), align 8
+// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i32 0, i32 0), align 16
+// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i32 0, i32 0), align 2
+// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i32 0, i32 0), align 4
+// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(3)* bitcast ([4 x <3 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v3i16 to <4 x i16> addrspace(3)*), align 8
+// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i32 0, i32 0), align 8
+// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i32 0, i32 0), align 16
+// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i32 0, i32 0), align 32
+// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i32 0, i32 0), align 4
+// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i32 0, i32 0), align 8
+// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(3)* bitcast ([4 x <3 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v3i32 to <4 x i32> addrspace(3)*), align 16
+// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i32 0, i32 0), align 16
+// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i32 0, i32 0), align 32
+// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i32 0, i32 0), align 64
+// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i32 0, i32 0), align 8
+// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i32 0, i32 0), align 16
+// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(3)* bitcast ([4 x <3 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v3i64 to <4 x i64> addrspace(3)*), align 32
+// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i32 0, i32 0), align 32
+// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i32 0, i32 0), align 64
+// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i32 0, i32 0), align 128
+// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i32 0, i32 0), align 2
+// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i32 0, i32 0), align 4
+// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(3)* bitcast ([4 x <3 x half>] addrspace(3)* @local_memory_alignment_global.lds_v3f16 to <4 x half> addrspace(3)*), align 8
+// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i32 0, i32 0), align 8
+// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i32 0, i32 0), align 16
+// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i32 0, i32 0), align 32
+// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i32 0, i32 0), align 4
+// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i32 0, i32 0), align 8
+// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(3)* bitcast ([4 x <3 x float>] addrspace(3)* @local_memory_alignment_global.lds_v3f32 to <4 x float> addrspace(3)*), align 16
+// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i32 0, i32 0), align 16
+// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i32 0, i32 0), align 32
+// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i32 0, i32 0), align 64
+// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i32 0, i32 0), align 8
+// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i32 0, i32 0), align 16
+// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(3)* bitcast ([4 x <3 x double>] addrspace(3)* @local_memory_alignment_global.lds_v3f64 to <4 x double> addrspace(3)*), align 32
+// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i32 0, i32 0), align 32
+// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i32 0, i32 0), align 64
+// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i32 0, i32 0), align 128
+kernel void local_memory_alignment_global()
+{
+  volatile local char lds_i8[4];
+  volatile local char2 lds_v2i8[4];
+  volatile local char3 lds_v3i8[4];
+  volatile local char4 lds_v4i8[4];
+  volatile local char8 lds_v8i8[4];
+  volatile local char16 lds_v16i8[4];
+
+  volatile local short lds_i16[4];
+  volatile local short2 lds_v2i16[4];
+  volatile local short3 lds_v3i16[4];
+  volatile local short4 lds_v4i16[4];
+  volatile local short8 lds_v8i16[4];
+  volatile local short16 lds_v16i16[4];
+
+  volatile local int lds_i32[4];
+  volatile local int2 lds_v2i32[4];
+  volatile local int3 lds_v3i32[4];
+  volatile local int4 lds_v4i32[4];
+  volatile local int8 lds_v8i32[4];
+  volatile local int16 lds_v16i32[4];
+
+  volatile local long lds_i64[4];
+  volatile local long2 lds_v2i64[4];
+  volatile local long3 lds_v3i64[4];
+  volatile local long4 lds_v4i64[4];
+  volatile local long8 lds_v8i64[4];
+  volatile local long16 lds_v16i64[4];
+
+  volatile local half lds_f16[4];
+  volatile local half2 lds_v2f16[4];
+  volatile local half3 lds_v3f16[4];
+  volatile local half4 lds_v4f16[4];
+  volatile local half8 lds_v8f16[4];
+  volatile local half16 lds_v16f16[4];
+
+  volatile local float lds_f32[4];
+  volatile local float2 lds_v2f32[4];
+  volatile local float3 lds_v3f32[4];
+  volatile local float4 lds_v4f32[4];
+  volatile local float8 lds_v8f32[4];
+  volatile local float16 lds_v16f32[4];
+
+  volatile local double lds_f64[4];
+  volatile local double2 lds_v2f64[4];
+  volatile local double3 lds_v3f64[4];
+  volatile local double4 lds_v4f64[4];
+  volatile local double8 lds_v8f64[4];
+  volatile local double16 lds_v16f64[4];
+
+  *lds_i8 = 0;
+  *lds_v2i8 = 0;
+  *lds_v3i8 = 0;
+  *lds_v4i8 = 0;
+  *lds_v8i8 = 0;
+  *lds_v16i8 = 0;
+
+  *lds_i16 = 0;
+  *lds_v2i16 = 0;
+  *lds_v3i16 = 0;
+  *lds_v4i16 = 0;
+  *lds_v8i16 = 0;
+  *lds_v16i16 = 0;
+
+  *lds_i32 = 0;
+  *lds_v2i32 = 0;
+  *lds_v3i32 = 0;
+  *lds_v4i32 = 0;
+  *lds_v8i32 = 0;
+  *lds_v16i32 = 0;
+
+  *lds_i64 = 0;
+  *lds_v2i64 = 0;
+  *lds_v3i64 = 0;
+  *lds_v4i64 = 0;
+  *lds_v8i64 = 0;
+  *lds_v16i64 = 0;
+
+  *lds_f16 = 0;
+  *lds_v2f16 = 0;
+  *lds_v3f16 = 0;
+  *lds_v4f16 = 0;
+  *lds_v8f16 = 0;
+  *lds_v16f16 = 0;
+
+  *lds_f32 = 0;
+  *lds_v2f32 = 0;
+  *lds_v3f32 = 0;
+  *lds_v4f32 = 0;
+  *lds_v8f32 = 0;
+  *lds_v16f32 = 0;
+
+  *lds_f64 = 0;
+  *lds_v2f64 = 0;
+  *lds_v3f64 = 0;
+  *lds_v4f64 = 0;
+  *lds_v8f64 = 0;
+  *lds_v16f64 = 0;
+}
+
+kernel void local_memory_alignment_arg(
+  volatile local char* lds_i8,
+  volatile local char2* lds_v2i8,
+  volatile local char3* lds_v3i8,
+  volatile local char4* lds_v4i8,
+  volatile local char8* lds_v8i8,
+  volatile local char16* lds_v16i8,
+
+  volatile local short* lds_i16,
+  volatile local short2* lds_v2i16,
+  volatile local short3* lds_v3i16,
+  volatile local short4* lds_v4i16,
+  volatile local short8* lds_v8i16,
+  volatile local short16* lds_v16i16,
+
+  volatile local int* lds_i32,
+  volatile local int2* lds_v2i32,
+  volatile local int3* lds_v3i32,
+  volatile local int4* lds_v4i32,
+  volatile local int8* lds_v8i32,
+  volatile local int16* lds_v16i32,
+
+  volatile local long* lds_i64,
+  volatile local long2* lds_v2i64,
+  volatile local long3* lds_v3i64,
+  volatile local long4* lds_v4i64,
+  volatile local long8* lds_v8i64,
+  volatile local long16* lds_v16i64,
+
+  volatile local half* lds_f16,
+  volatile local half2* lds_v2f16,
+  volatile local half3* lds_v3f16,
+  volatile local half4* lds_v4f16,
+  volatile local half8* lds_v8f16,
+  volatile local half16* lds_v16f16,
+
+  volatile local float* lds_f32,
+  volatile local float2* lds_v2f32,
+  volatile local float3* lds_v3f32,
+  volatile local float4* lds_v4f32,
+  volatile local float8* lds_v8f32,
+  volatile local float16* lds_v16f32,
+
+  volatile local double* lds_f64,
+  volatile local double2* lds_v2f64,
+  volatile local double3* lds_v3f64,
+  volatile local double4* lds_v4f64,
+  volatile local double8* lds_v8f64,
+  volatile local double16* lds_v16f64)
+{
+  *lds_i8 = 0;
+  *lds_v2i8 = 0;
+  *lds_v3i8 = 0;
+  *lds_v4i8 = 0;
+  *lds_v8i8 = 0;
+  *lds_v16i8 = 0;
+
+  *lds_i16 = 0;
+  *lds_v2i16 = 0;
+  *lds_v3i16 = 0;
+  *lds_v4i16 = 0;
+  *lds_v8i16 = 0;
+  *lds_v16i16 = 0;
+
+  *lds_i32 = 0;
+  *lds_v2i32 = 0;
+  *lds_v3i32 = 0;
+  *lds_v4i32 = 0;
+  *lds_v8i32 = 0;
+  *lds_v16i32 = 0;
+
+  *lds_i64 = 0;
+  *lds_v2i64 = 0;
+  *lds_v3i64 = 0;
+  *lds_v4i64 = 0;
+  *lds_v8i64 = 0;
+  *lds_v16i64 = 0;
+
+  *lds_f16 = 0;
+  *lds_v2f16 = 0;
+  *lds_v3f16 = 0;
+  *lds_v4f16 = 0;
+  *lds_v8f16 = 0;
+  *lds_v16f16 = 0;
+
+  *lds_f32 = 0;
+  *lds_v2f32 = 0;
+  *lds_v3f32 = 0;
+  *lds_v4f32 = 0;
+  *lds_v8f32 = 0;
+  *lds_v16f32 = 0;
+
+  *lds_f64 = 0;
+  *lds_v2f64 = 0;
+  *lds_v3f64 = 0;
+  *lds_v4f64 = 0;
+  *lds_v8f64 = 0;
+  *lds_v16f64 = 0;
+}
+
+// CHECK-LABEL: @private_memory_alignment_alloca(
+// CHECK: %private_i8 = alloca [4 x i8], align 1, addrspace(5)
+// CHECK: %private_v2i8 = alloca [4 x <2 x i8>], align 2, addrspace(5)
+// CHECK: %private_v3i8 = alloca [4 x <3 x i8>], align 4, addrspace(5)
+// CHECK: %private_v4i8 = alloca [4 x <4 x i8>], align 4, addrspace(5)
+// CHECK: %private_v8i8 = alloca [4 x <8 x i8>], align 8, addrspace(5)
+// CHECK: %private_v16i8 = alloca [4 x <16 x i8>], align 16, addrspace(5)
+// CHECK: %private_i16 = alloca [4 x i16], align 2, addrspace(5)
+// CHECK: %private_v2i16 = alloca [4 x <2 x i16>], align 4, addrspace(5)
+// CHECK: %private_v3i16 = alloca [4 x <3 x i16>], align 8, addrspace(5)
+// CHECK: %private_v4i16 = alloca [4 x <4 x i16>], align 8, addrspace(5)
+// CHECK: %private_v8i16 = alloca [4 x <8 x i16>], align 16, addrspace(5)
+// CHECK: %private_v16i16 = alloca [4 x <16 x i16>], align 32, addrspace(5)
+// CHECK: %private_i32 = alloca [4 x i32], align 4, addrspace(5)
+// CHECK: %private_v2i32 = alloca [4 x <2 x i32>], align 8, addrspace(5)
+// CHECK: %private_v3i32 = alloca [4 x <3 x i32>], align 16, addrspace(5)
+// CHECK: %private_v4i32 = alloca [4 x <4 x i32>], align 16, addrspace(5)
+// CHECK: %private_v8i32 = alloca [4 x <8 x i32>], align 32, addrspace(5)
+// CHECK: %private_v16i32 = alloca [4 x <16 x i32>], align 64, addrspace(5)
+// CHECK: %private_i64 = alloca [4 x i64], align 8, addrspace(5)
+// CHECK: %private_v2i64 = alloca [4 x <2 x i64>], align 16, addrspace(5)
+// CHECK: %private_v3i64 = alloca [4 x <3 x i64>], align 32, addrspace(5)
+// CHECK: %private_v4i64 = alloca [4 x <4 x i64>], align 32, addrspace(5)
+// CHECK: %private_v8i64 = alloca [4 x <8 x i64>], align 64, addrspace(5)
+// CHECK: %private_v16i64 = alloca [4 x <16 x i64>], align 128, addrspace(5)
+// CHECK: %private_f16 = alloca [4 x half], align 2, addrspace(5)
+// CHECK: %private_v2f16 = alloca [4 x <2 x half>], align 4, addrspace(5)
+// CHECK: %private_v3f16 = alloca [4 x <3 x half>], align 8, addrspace(5)
+// CHECK: %private_v4f16 = alloca [4 x <4 x half>], align 8, addrspace(5)
+// CHECK: %private_v8f16 = alloca [4 x <8 x half>], align 16, addrspace(5)
+// CHECK: %private_v16f16 = alloca [4 x <16 x half>], align 32, addrspace(5)
+// CHECK: %private_f32 = alloca [4 x float], align 4, addrspace(5)
+// CHECK: %private_v2f32 = alloca [4 x <2 x float>], align 8, addrspace(5)
+// CHECK: %private_v3f32 = alloca [4 x <3 x float>], align 16, addrspace(5)
+// CHECK: %private_v4f32 = alloca [4 x <4 x float>], align 16, addrspace(5)
+// CHECK: %private_v8f32 = alloca [4 x <8 x float>], align 32, addrspace(5)
+// CHECK: %private_v16f32 = alloca [4 x <16 x float>], align 64, addrspace(5)
+// CHECK: %private_f64 = alloca [4 x double], align 8, addrspace(5)
+// CHECK: %private_v2f64 = alloca [4 x <2 x double>], align 16, addrspace(5)
+// CHECK: %private_v3f64 = alloca [4 x <3 x double>], align 32, addrspace(5)
+// CHECK: %private_v4f64 = alloca [4 x <4 x double>], align 32, addrspace(5)
+// CHECK: %private_v8f64 = alloca [4 x <8 x double>], align 64, addrspace(5)
+// CHECK: %private_v16f64 = alloca [4 x <16 x double>], align 128, addrspace(5)
+
+// CHECK: store volatile i8 0, i8 addrspace(5)* %arraydecay, align 1
+// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 2
+// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(5)* %storetmp, align 4
+// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile i16 0, i16 addrspace(5)* %arraydecay{{[0-9]+}}, align 2
+// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(5)* %storetmp{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile i32 0, i32 addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(5)* %storetmp16, align 16
+// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile i64 0, i64 addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(5)* %storetmp23, align 32
+// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 128
+// CHECK: store volatile half 0xH0000, half addrspace(5)* %arraydecay{{[0-9]+}}, align 2
+// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(5)* %storetmp{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile float 0.000000e+00, float addrspace(5)* %arraydecay34, align 4
+// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(5)* %storetmp{{[0-9]+}}, align 16
+// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile double 0.000000e+00, double addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(5)* %storetmp{{[0-9]+}}, align 32
+// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 128
+kernel void private_memory_alignment_alloca()
+{
+  volatile private char private_i8[4];
+  volatile private char2 private_v2i8[4];
+  volatile private char3 private_v3i8[4];
+  volatile private char4 private_v4i8[4];
+  volatile private char8 private_v8i8[4];
+  volatile private char16 private_v16i8[4];
+
+  volatile private short private_i16[4];
+  volatile private short2 private_v2i16[4];
+  volatile private short3 private_v3i16[4];
+  volatile private short4 private_v4i16[4];
+  volatile private short8 private_v8i16[4];
+  volatile private short16 private_v16i16[4];
+
+  volatile private int private_i32[4];
+  volatile private int2 private_v2i32[4];
+  volatile private int3 private_v3i32[4];
+  volatile private int4 private_v4i32[4];
+  volatile private int8 private_v8i32[4];
+  volatile private int16 private_v16i32[4];
+
+  volatile private long private_i64[4];
+  volatile private long2 private_v2i64[4];
+  volatile private long3 private_v3i64[4];
+  volatile private long4 private_v4i64[4];
+  volatile private long8 private_v8i64[4];
+  volatile private long16 private_v16i64[4];
+
+  volatile private half private_f16[4];
+  volatile private half2 private_v2f16[4];
+  volatile private half3 private_v3f16[4];
+  volatile private half4 private_v4f16[4];
+  volatile private half8 private_v8f16[4];
+  volatile private half16 private_v16f16[4];
+
+  volatile private float private_f32[4];
+  volatile private float2 private_v2f32[4];
+  volatile private float3 private_v3f32[4];
+  volatile private float4 private_v4f32[4];
+  volatile private float8 private_v8f32[4];
+  volatile private float16 private_v16f32[4];
+
+  volatile private double private_f64[4];
+  volatile private double2 private_v2f64[4];
+  volatile private double3 private_v3f64[4];
+  volatile private double4 private_v4f64[4];
+  volatile private double8 private_v8f64[4];
+  volatile private double16 private_v16f64[4];
+
+  *private_i8 = 0;
+  *private_v2i8 = 0;
+  *private_v3i8 = 0;
+  *private_v4i8 = 0;
+  *private_v8i8 = 0;
+  *private_v16i8 = 0;
+
+  *private_i16 = 0;
+  *private_v2i16 = 0;
+  *private_v3i16 = 0;
+  *private_v4i16 = 0;
+  *private_v8i16 = 0;
+  *private_v16i16 = 0;
+
+  *private_i32 = 0;
+  *private_v2i32 = 0;
+  *private_v3i32 = 0;
+  *private_v4i32 = 0;
+  *private_v8i32 = 0;
+  *private_v16i32 = 0;
+
+  *private_i64 = 0;
+  *private_v2i64 = 0;
+  *private_v3i64 = 0;
+  *private_v4i64 = 0;
+  *private_v8i64 = 0;
+  *private_v16i64 = 0;
+
+  *private_f16 = 0;
+  *private_v2f16 = 0;
+  *private_v3f16 = 0;
+  *private_v4f16 = 0;
+  *private_v8f16 = 0;
+  *private_v16f16 = 0;
+
+  *private_f32 = 0;
+  *private_v2f32 = 0;
+  *private_v3f32 = 0;
+  *private_v4f32 = 0;
+  *private_v8f32 = 0;
+  *private_v16f32 = 0;
+
+  *private_f64 = 0;
+  *private_v2f64 = 0;
+  *private_v3f64 = 0;
+  *private_v4f64 = 0;
+  *private_v8f64 = 0;
+  *private_v16f64 = 0;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
new file mode 100644
index 0000000..ad13a2c
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -0,0 +1,187 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s -check-prefix=NONAMDHSA
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
+
+__attribute__((amdgpu_flat_work_group_size(0, 0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0() {}
+__attribute__((amdgpu_waves_per_eu(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_0() {}
+__attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void num_sgpr0() {}
+__attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void num_vgpr0() {}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_num_sgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_num_vgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_num_sgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_num_vgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_0_num_sgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_0_num_vgpr_0() {}
+__attribute__((amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void num_sgpr_0_num_vgpr_0() {}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_num_vgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_vgpr_0() {}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0_num_vgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0_num_vgpr_0() {}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
+kernel void waves_per_eu_2() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2() [[WAVES_PER_EU_2:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics
+kernel void waves_per_eu_2_4() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_4() [[WAVES_PER_EU_2_4:#[0-9]+]]
+}
+__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @num_sgpr_32() [[NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @num_vgpr_64() [[NUM_VGPR_64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void waves_per_eu_2_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_sgpr_32() [[WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void waves_per_eu_2_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_vgpr_64() [[WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void waves_per_eu_2_4_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_sgpr_32() [[WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void waves_per_eu_2_4_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_vgpr_64() [[WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void num_sgpr_32_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @num_sgpr_32_num_vgpr_64() [[NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
+}
+
+__attribute__((reqd_work_group_size(32, 2, 1))) // expected-no-diagnostics
+kernel void reqd_work_group_size_32_2_1() {
+// CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1() [[FLAT_WORK_GROUP_SIZE_64_64:#[0-9]+]]
+}
+__attribute__((reqd_work_group_size(32, 2, 1), amdgpu_flat_work_group_size(16, 128))) // expected-no-diagnostics
+kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
+// CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_16_128() [[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]]
+}
+
+void a_function() {
+// CHECK: define void @a_function() [[A_FUNCTION:#[0-9]+]]
+}
+
+
+// Make sure this is silently accepted on other targets.
+// X86-NOT: "amdgpu-flat-work-group-size"
+// X86-NOT: "amdgpu-waves-per-eu"
+// X86-NOT: "amdgpu-num-vgpr"
+// X86-NOT: "amdgpu-num-sgpr"
+// X86-NOT: "amdgpu-implicitarg-num-bytes"
+// NONAMDHSA-NOT: "amdgpu-implicitarg-num-bytes"
+
+// CHECK-NOT: "amdgpu-flat-work-group-size"="0,0"
+// CHECK-NOT: "amdgpu-waves-per-eu"="0"
+// CHECK-NOT: "amdgpu-waves-per-eu"="0,0"
+// CHECK-NOT: "amdgpu-num-sgpr"="0"
+// CHECK-NOT: "amdgpu-num-vgpr"="0"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="48" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="48" 
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" 
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" 
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" 
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" 
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+
+// CHECK-DAG: attributes [[A_FUNCTION]] = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false"
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
new file mode 100755
index 0000000..0057939
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
@@ -0,0 +1,14 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// CHECK: define amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture %out)
+// CHECK: store i32 4, i32 addrspace(1)* %out, align 4
+
+kernel void test_kernel(global int *out)
+{
+  out[0] = 4;
+}
+
+__kernel void test_call_kernel(__global int *out)
+{
+  test_kernel(out);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-calling-conv.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-calling-conv.cl
new file mode 100644
index 0000000..7da9d7f
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-calling-conv.cl
@@ -0,0 +1,12 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: define amdgpu_kernel void @calling_conv_amdgpu_kernel()
+kernel void calling_conv_amdgpu_kernel()
+{
+}
+
+// CHECK: define void @calling_conv_none()
+void calling_conv_none()
+{
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
new file mode 100644
index 0000000..952b25d
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
@@ -0,0 +1,126 @@
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
+
+// CHECK-DAG: ![[DWARF_ADDRESS_SPACE_NONE:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}})
+// CHECK-DAG: ![[DWARF_ADDRESS_SPACE_LOCAL:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}}, dwarfAddressSpace: 2)
+// CHECK-DAG: ![[DWARF_ADDRESS_SPACE_PRIVATE:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}}, dwarfAddressSpace: 1)
+
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: false, isDefinition: true)
+global int *FileVar0;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: false, isDefinition: true)
+constant int *FileVar1;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar2", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_LOCAL]], isLocal: false, isDefinition: true)
+local int *FileVar2;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_PRIVATE]], isLocal: false, isDefinition: true)
+private int *FileVar3;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: false, isDefinition: true)
+int *FileVar4;
+
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: false, isDefinition: true)
+global int *global FileVar5;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar6", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: false, isDefinition: true)
+constant int *global FileVar6;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar7", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_LOCAL]], isLocal: false, isDefinition: true)
+local int *global FileVar7;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar8", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_PRIVATE]], isLocal: false, isDefinition: true)
+private int *global FileVar8;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar9", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: false, isDefinition: true)
+int *global FileVar9;
+
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar10", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: false, isDefinition: true)
+global int *constant FileVar10 = 0;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar11", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: false, isDefinition: true)
+constant int *constant FileVar11 = 0;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar12", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_LOCAL]], isLocal: false, isDefinition: true)
+local int *constant FileVar12 = 0;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar13", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_PRIVATE]], isLocal: false, isDefinition: true)
+private int *constant FileVar13 = 0;
+// CHECK-DAG: distinct !DIGlobalVariable(name: "FileVar14", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: false, isDefinition: true)
+int *constant FileVar14 = 0;
+
+kernel void kernel1(
+    // CHECK-DAG: !DILocalVariable(name: "KernelArg0", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
+    global int *KernelArg0,
+    // CHECK-DAG: !DILocalVariable(name: "KernelArg1", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
+    constant int *KernelArg1,
+    // CHECK-DAG: !DILocalVariable(name: "KernelArg2", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_LOCAL]])
+    local int *KernelArg2) {
+  private int *Tmp0;
+  int *Tmp1;
+
+  // CHECK-DAG: !DILocalVariable(name: "FuncVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
+  global int *FuncVar0 = KernelArg0;
+  // CHECK-DAG: !DILocalVariable(name: "FuncVar1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
+  constant int *FuncVar1 = KernelArg1;
+  // CHECK-DAG: !DILocalVariable(name: "FuncVar2", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_LOCAL]])
+  local int *FuncVar2 = KernelArg2;
+  // CHECK-DAG: !DILocalVariable(name: "FuncVar3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_PRIVATE]])
+  private int *FuncVar3 = Tmp0;
+  // CHECK-DAG: !DILocalVariable(name: "FuncVar4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
+  int *FuncVar4 = Tmp1;
+
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: true, isDefinition: true)
+  global int *constant FuncVar5 = 0;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar6", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: true, isDefinition: true)
+  constant int *constant FuncVar6 = 0;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar7", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_LOCAL]], isLocal: true, isDefinition: true)
+  local int *constant FuncVar7 = 0;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar8", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_PRIVATE]], isLocal: true, isDefinition: true)
+  private int *constant FuncVar8 = 0;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar9", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: true, isDefinition: true)
+  int *constant FuncVar9 = 0;
+
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar10", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: true, isDefinition: true)
+  global int *local FuncVar10; FuncVar10 = KernelArg0;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar11", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: true, isDefinition: true)
+  constant int *local FuncVar11; FuncVar11 = KernelArg1;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar12", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_LOCAL]], isLocal: true, isDefinition: true)
+  local int *local FuncVar12; FuncVar12 = KernelArg2;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar13", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_PRIVATE]], isLocal: true, isDefinition: true)
+  private int *local FuncVar13; FuncVar13 = Tmp0;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar14", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: true, isDefinition: true)
+  int *local FuncVar14; FuncVar14 = Tmp1;
+
+  // CHECK-DAG: !DILocalVariable(name: "FuncVar15", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
+  global int *private FuncVar15 = KernelArg0;
+  // CHECK-DAG: !DILocalVariable(name: "FuncVar16", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
+  constant int *private FuncVar16 = KernelArg1;
+  // CHECK-DAG: !DILocalVariable(name: "FuncVar17", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_LOCAL]])
+  local int *private FuncVar17 = KernelArg2;
+  // CHECK-DAG: !DILocalVariable(name: "FuncVar18", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_PRIVATE]])
+  private int *private FuncVar18 = Tmp0;
+  // CHECK-DAG: !DILocalVariable(name: "FuncVar19", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
+  int *private FuncVar19 = Tmp1;
+}
+
+struct FileStruct0 {
+  // CHECK-DAG: !DIDerivedType(tag: DW_TAG_member, name: "StructMem0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, baseType: ![[DWARF_ADDRESS_SPACE_NONE]], size: {{[0-9]+}})
+  global int *StructMem0;
+  // CHECK-DAG: !DIDerivedType(tag: DW_TAG_member, name: "StructMem1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, baseType: ![[DWARF_ADDRESS_SPACE_NONE]], size: {{[0-9]+}}, offset: {{[0-9]+}})
+  constant int *StructMem1;
+  // CHECK-DAG: !DIDerivedType(tag: DW_TAG_member, name: "StructMem2", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, baseType: ![[DWARF_ADDRESS_SPACE_LOCAL]], size: {{[0-9]+}}, offset: {{[0-9]+}})
+  local int *StructMem2;
+  // CHECK-DAG: !DIDerivedType(tag: DW_TAG_member, name: "StructMem3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, baseType: ![[DWARF_ADDRESS_SPACE_PRIVATE]], size: {{[0-9]+}}, offset: {{[0-9]+}})
+  private int *StructMem3;
+  // CHECK-DAG: !DIDerivedType(tag: DW_TAG_member, name: "StructMem4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, baseType: ![[DWARF_ADDRESS_SPACE_NONE]], size: {{[0-9]+}}, offset: {{[0-9]+}})
+  int *StructMem4;
+};
+
+struct FileStruct1 {
+  union {
+    // CHECK-DAG: !DIDerivedType(tag: DW_TAG_member, name: "UnionMem0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, baseType: ![[DWARF_ADDRESS_SPACE_NONE]], size: {{[0-9]+}})
+    global int *UnionMem0;
+    // CHECK-DAG: !DIDerivedType(tag: DW_TAG_member, name: "UnionMem1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, baseType: ![[DWARF_ADDRESS_SPACE_NONE]], size: {{[0-9]+}})
+    constant int *UnionMem1;
+    // CHECK-DAG: !DIDerivedType(tag: DW_TAG_member, name: "UnionMem2", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, baseType: ![[DWARF_ADDRESS_SPACE_LOCAL]], size: {{[0-9]+}})
+    local int *UnionMem2;
+    // CHECK-DAG: !DIDerivedType(tag: DW_TAG_member, name: "UnionMem3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, baseType: ![[DWARF_ADDRESS_SPACE_PRIVATE]], size: {{[0-9]+}})
+    private int *UnionMem3;
+    // CHECK-DAG: !DIDerivedType(tag: DW_TAG_member, name: "UnionMem4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, baseType: ![[DWARF_ADDRESS_SPACE_NONE]], size: {{[0-9]+}})
+    int *UnionMem4;
+  };
+  long StructMem0;
+};
+
+kernel void kernel2(global struct FileStruct0 *Kernel2Arg0,
+                    global struct FileStruct1 *Kernel2Arg1) {}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
new file mode 100644
index 0000000..894611e
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
@@ -0,0 +1,128 @@
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
+
+// CHECK-DAG: ![[FILEVAR0:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR0]], expr: !DIExpression())
+global int *FileVar0;
+// CHECK-DAG: ![[FILEVAR1:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR1]], expr: !DIExpression())
+constant int *FileVar1;
+// CHECK-DAG: ![[FILEVAR2:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar2", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR2]], expr: !DIExpression())
+local int *FileVar2;
+// CHECK-DAG: ![[FILEVAR3:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR3]], expr: !DIExpression())
+private int *FileVar3;
+// CHECK-DAG: ![[FILEVAR4:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR4]], expr: !DIExpression())
+int *FileVar4;
+
+// CHECK-DAG: ![[FILEVAR5:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR5]], expr: !DIExpression())
+global int *global FileVar5;
+// CHECK-DAG: ![[FILEVAR6:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar6", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR6]], expr: !DIExpression())
+constant int *global FileVar6;
+// CHECK-DAG: ![[FILEVAR7:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar7", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR7]], expr: !DIExpression())
+local int *global FileVar7;
+// CHECK-DAG: ![[FILEVAR8:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar8", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR8]], expr: !DIExpression())
+private int *global FileVar8;
+// CHECK-DAG: ![[FILEVAR9:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar9", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR9]], expr: !DIExpression())
+int *global FileVar9;
+
+// CHECK-DAG: ![[FILEVAR10:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar10", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR10]], expr: !DIExpression())
+global int *constant FileVar10 = 0;
+// CHECK-DAG: ![[FILEVAR11:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar11", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR11]], expr: !DIExpression())
+constant int *constant FileVar11 = 0;
+// CHECK-DAG: ![[FILEVAR12:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar12", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR12]], expr: !DIExpression())
+local int *constant FileVar12 = 0;
+// CHECK-DAG: ![[FILEVAR13:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar13", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR13]], expr: !DIExpression())
+private int *constant FileVar13 = 0;
+// CHECK-DAG: ![[FILEVAR14:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar14", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
+// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR14]], expr: !DIExpression())
+int *constant FileVar14 = 0;
+
+kernel void kernel1(
+    // CHECK-DAG: ![[KERNELARG0:[0-9]+]] = !DILocalVariable(name: "KernelArg0", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+    // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* {{.*}}, metadata ![[KERNELARG0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+    global int *KernelArg0,
+    // CHECK-DAG: ![[KERNELARG1:[0-9]+]] = !DILocalVariable(name: "KernelArg1", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+    // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)* addrspace(5)* {{.*}}, metadata ![[KERNELARG1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+    constant int *KernelArg1,
+    // CHECK-DAG: ![[KERNELARG2:[0-9]+]] = !DILocalVariable(name: "KernelArg2", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+    // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* {{.*}}, metadata ![[KERNELARG2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+    local int *KernelArg2) {
+  private int *Tmp0;
+  int *Tmp1;
+
+  // CHECK-DAG: ![[FUNCVAR0:[0-9]+]] = !DILocalVariable(name: "FuncVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  global int *FuncVar0 = KernelArg0;
+  // CHECK-DAG: ![[FUNCVAR1:[0-9]+]] = !DILocalVariable(name: "FuncVar1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  constant int *FuncVar1 = KernelArg1;
+  // CHECK-DAG: ![[FUNCVAR2:[0-9]+]] = !DILocalVariable(name: "FuncVar2", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  local int *FuncVar2 = KernelArg2;
+  // CHECK-DAG: ![[FUNCVAR3:[0-9]+]] = !DILocalVariable(name: "FuncVar3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(5)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR3]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  private int *FuncVar3 = Tmp0;
+  // CHECK-DAG: ![[FUNCVAR4:[0-9]+]] = !DILocalVariable(name: "FuncVar4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32* addrspace(5)* {{.*}}, metadata ![[FUNCVAR4]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  int *FuncVar4 = Tmp1;
+
+  // CHECK-DAG: ![[FUNCVAR5:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR5]], expr: !DIExpression())
+  global int *constant FuncVar5 = 0;
+  // CHECK-DAG: ![[FUNCVAR6:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar6", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR6]], expr: !DIExpression())
+  constant int *constant FuncVar6 = 0;
+  // CHECK-DAG: ![[FUNCVAR7:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar7", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR7]], expr: !DIExpression())
+  local int *constant FuncVar7 = 0;
+  // CHECK-DAG: ![[FUNCVAR8:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar8", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR8]], expr: !DIExpression())
+  private int *constant FuncVar8 = 0;
+  // CHECK-DAG: ![[FUNCVAR9:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar9", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR9]], expr: !DIExpression())
+  int *constant FuncVar9 = 0;
+
+  // CHECK-DAG: ![[FUNCVAR10:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar10", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR10]], expr: !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef))
+  global int *local FuncVar10; FuncVar10 = KernelArg0;
+  // CHECK-DAG: ![[FUNCVAR11:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar11", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR11]], expr: !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef))
+  constant int *local FuncVar11; FuncVar11 = KernelArg1;
+  // CHECK-DAG: ![[FUNCVAR12:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar12", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR12]], expr: !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef))
+  local int *local FuncVar12; FuncVar12 = KernelArg2;
+  // CHECK-DAG: ![[FUNCVAR13:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar13", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR13]], expr: !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef))
+  private int *local FuncVar13; FuncVar13 = Tmp0;
+  // CHECK-DAG: ![[FUNCVAR14:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar14", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR14]], expr: !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef))
+  int *local FuncVar14; FuncVar14 = Tmp1;
+
+  // CHECK-DAG: ![[FUNCVAR15:[0-9]+]] = !DILocalVariable(name: "FuncVar15", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR15]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  global int *private FuncVar15 = KernelArg0;
+  // CHECK-DAG: ![[FUNCVAR16:[0-9]+]] = !DILocalVariable(name: "FuncVar16", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR16]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  constant int *private FuncVar16 = KernelArg1;
+  // CHECK-DAG: ![[FUNCVAR17:[0-9]+]] = !DILocalVariable(name: "FuncVar17", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR17]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  local int *private FuncVar17 = KernelArg2;
+  // CHECK-DAG: ![[FUNCVAR18:[0-9]+]] = !DILocalVariable(name: "FuncVar18", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(5)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR18]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  private int *private FuncVar18 = Tmp0;
+  // CHECK-DAG: ![[FUNCVAR19:[0-9]+]] = !DILocalVariable(name: "FuncVar19", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
+  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32* addrspace(5)* {{.*}}, metadata ![[FUNCVAR19]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+  int *private FuncVar19 = Tmp1;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
new file mode 100644
index 0000000..75fcecc
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn | FileCheck %s
+
+typedef struct {int a;} ndrange_t;
+
+void callee(long id, global long *out) {
+  out[id] = id;
+}
+
+// CHECK-LABEL: define amdgpu_kernel void @test
+kernel void test(global char *a, char b, global long *c, long d) {
+  queue_t default_queue;
+  unsigned flags = 0;
+  ndrange_t ndrange;
+
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(void) {
+                 a[0] = b;
+                 });
+
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(void) {
+                 a[0] = b;
+                 c[0] = d;
+                 });
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *lp) {
+                 a[0] = b;
+                 c[0] = d;
+                 ((local int*)lp)[0] = 1;
+                 }, 100);
+
+  void (^block)(void) = ^{
+    callee(d, c);
+  };
+
+  enqueue_kernel(default_queue, flags, ndrange, block);
+}
+
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }>)
+// CHECK-SAME: #[[ATTR:[0-9]+]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
+// CHECK: entry:
+// CHECK:  %1 = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5)
+// CHECK:  store <{ i32, i32, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1, align 8
+// CHECK:  %2 = addrspacecast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1 to i8*
+// CHECK:  call void @__test_block_invoke(i8* %2)
+// CHECK:  ret void
+// CHECK:}
+
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)
+// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
+
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
+// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
+
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i64, i64 addrspace(1)* }>)
+// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
+
+// CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" }
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
new file mode 100644
index 0000000..bcb00be
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
+
+// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+void foo(void) {}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl
new file mode 100644
index 0000000..a1815ce
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -0,0 +1,12 @@
+// REQUIRES: amdgpu-registered-target
+
+// Check that appropriate features are defined for every supported AMDGPU
+// "-target" and "-mcpu" options.
+
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx904 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s
+
+// GFX904: "target-features"="+16-bit-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime"
+// GFX906: "target-features"="+16-bit-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime"
+
+kernel void test() {}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
new file mode 100644
index 0000000..688d3a5
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
@@ -0,0 +1,625 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -include opencl-c.h -triple amdgcn -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -cl-std=CL2.0 -include opencl-c.h -triple amdgcn -emit-llvm -o - | FileCheck --check-prefix=NOOPT %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -include opencl-c.h -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
+
+typedef struct {
+  private char *p1;
+  local char *p2;
+  constant char *p3;
+  global char *p4;
+  generic char *p5;
+} StructTy1;
+
+typedef struct {
+  constant char *p3;
+  global char *p4;
+  generic char *p5;
+} StructTy2;
+
+// LLVM requests global variable with common linkage to be initialized with zeroinitializer, therefore use -fno-common
+// to suppress common linkage for tentative definition.
+
+// Test 0 as initializer.
+
+// CHECK: @private_p = local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4
+private char *private_p = 0;
+
+// CHECK: @local_p = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
+local char *local_p = 0;
+
+// CHECK: @global_p = local_unnamed_addr addrspace(1) global i8 addrspace(1)* null, align 8
+global char *global_p = 0;
+
+// CHECK: @constant_p = local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
+constant char *constant_p = 0;
+
+// CHECK: @generic_p = local_unnamed_addr addrspace(1) global i8* null, align 8
+generic char *generic_p = 0;
+
+// Test NULL as initializer.
+
+// CHECK: @private_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4
+private char *private_p_NULL = NULL;
+
+// CHECK: @local_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
+local char *local_p_NULL = NULL;
+
+// CHECK: @global_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(1)* null, align 8
+global char *global_p_NULL = NULL;
+
+// CHECK: @constant_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
+constant char *constant_p_NULL = NULL;
+
+// CHECK: @generic_p_NULL = local_unnamed_addr addrspace(1) global i8* null, align 8
+generic char *generic_p_NULL = NULL;
+
+// Test constant folding of null pointer.
+// A null pointer should be folded to a null pointer in the target address space.
+
+// CHECK: @fold_generic = local_unnamed_addr addrspace(1) global i32* null, align 8
+generic int *fold_generic = (global int*)(generic float*)(private char*)0;
+
+// CHECK: @fold_priv = local_unnamed_addr addrspace(1) global i16 addrspace(5)* null, align 4
+private short *fold_priv = (private short*)(generic int*)(global void*)0;
+
+// CHECK: @fold_priv_arith = local_unnamed_addr addrspace(1) global i8 addrspace(5)* inttoptr (i32 10 to i8 addrspace(5)*), align 4
+private char *fold_priv_arith = (private char*)0 + 10;
+
+// CHECK: @fold_int = local_unnamed_addr addrspace(1) global i32 14, align 4
+int fold_int = (int)(private void*)(generic char*)(global int*)0 + 14;
+
+// CHECK: @fold_int2 = local_unnamed_addr addrspace(1) global i32 13, align 4
+int fold_int2 = (int) ((private void*)0 + 13);
+
+// CHECK: @fold_int3 = local_unnamed_addr addrspace(1) global i32 0, align 4
+int fold_int3 = (int) ((private int*)0);
+
+// CHECK: @fold_int4 = local_unnamed_addr addrspace(1) global i32 8, align 4
+int fold_int4 = (int) &((private int*)0)[2];
+
+// CHECK: @fold_int5 = local_unnamed_addr addrspace(1) global i32 4, align 4
+int fold_int5 = (int) &((private StructTy1*)0)->p2;
+
+
+// CHECK: @fold_int_local = local_unnamed_addr addrspace(1) global i32 13, align 4
+int fold_int_local = (int)(local void*)(generic char*)(global int*)0 + 14;
+
+// CHECK: @fold_int2_local = local_unnamed_addr addrspace(1) global i32 12, align 4
+int fold_int2_local = (int) ((local void*)0 + 13);
+
+// CHECK: @fold_int3_local = local_unnamed_addr addrspace(1) global i32 -1, align 4
+int fold_int3_local = (int) ((local int*)0);
+
+// CHECK: @fold_int4_local = local_unnamed_addr addrspace(1) global i32 7, align 4
+int fold_int4_local = (int) &((local int*)0)[2];
+
+// CHECK: @fold_int5_local = local_unnamed_addr addrspace(1) global i32 3, align 4
+int fold_int5_local = (int) &((local StructTy1*)0)->p2;
+
+
+// Test static variable initialization.
+
+// NOOPT: @test_static_var_private.sp1 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp2 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp3 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp4 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp5 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.SS1 = internal addrspace(1) global %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, align 8
+// NOOPT: @test_static_var_private.SS2 = internal addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
+
+void test_static_var_private(void) {
+  static private char *sp1 = 0;
+  static private char *sp2 = NULL;
+  static private char *sp3;
+  static private char *sp4 = (private char*)((void)0, 0);
+  const int x = 0;
+  static private char *sp5 = (private char*)x;
+  static StructTy1 SS1;
+  static StructTy2 SS2;
+}
+
+// NOOPT: @test_static_var_local.sp1 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
+// NOOPT: @test_static_var_local.sp2 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
+// NOOPT: @test_static_var_local.sp3 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
+// NOOPT: @test_static_var_local.sp4 = internal addrspace(1) global i8 addrspace(3)* null, align 4
+// NOOPT: @test_static_var_local.sp5 = internal addrspace(1) global i8 addrspace(3)* null, align 4
+// NOOPT: @test_static_var_local.SS1 = internal addrspace(1) global %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, align 8
+// NOOPT: @test_static_var_local.SS2 = internal addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
+void test_static_var_local(void) {
+  static local char *sp1 = 0;
+  static local char *sp2 = NULL;
+  static local char *sp3;
+  static local char *sp4 = (local char*)((void)0, 0);
+  const int x = 0;
+  static local char *sp5 = (local char*)x;
+  static StructTy1 SS1;
+  static StructTy2 SS2;
+}
+
+// Test function-scope variable initialization.
+// NOOPT-LABEL: @test_func_scope_var_private(
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp1, align 4
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp2, align 4
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp3, align 4
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp4, align 4
+// NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1 addrspace(5)* %SS1 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 %[[SS1]], i8 addrspace(4)* align 8 bitcast (%struct.StructTy1 addrspace(4)* @test_func_scope_var_private.SS1 to i8 addrspace(4)*), i64 32, i1 false)
+// NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2 addrspace(5)* %SS2 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 %[[SS2]], i8 0, i64 24, i1 false)
+void test_func_scope_var_private(void) {
+  private char *sp1 = 0;
+  private char *sp2 = NULL;
+  private char *sp3 = (private char*)((void)0, 0);
+  const int x = 0;
+  private char *sp4 = (private char*)x;
+  StructTy1 SS1 = {0, 0, 0, 0, 0};
+  StructTy2 SS2 = {0, 0, 0};
+}
+
+// Test function-scope variable initialization.
+// NOOPT-LABEL: @test_func_scope_var_local(
+// NOOPT: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)* addrspace(5)* %sp1, align 4
+// NOOPT: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)* addrspace(5)* %sp2, align 4
+// NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(5)* %sp3, align 4
+// NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(5)* %sp4, align 4
+// NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1 addrspace(5)* %SS1 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 %[[SS1]], i8 addrspace(4)* align 8 bitcast (%struct.StructTy1 addrspace(4)* @test_func_scope_var_local.SS1 to i8 addrspace(4)*), i64 32, i1 false)
+// NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2 addrspace(5)* %SS2 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 %[[SS2]], i8 0, i64 24, i1 false)
+void test_func_scope_var_local(void) {
+  local char *sp1 = 0;
+  local char *sp2 = NULL;
+  local char *sp3 = (local char*)((void)0, 0);
+  const int x = 0;
+  local char *sp4 = (local char*)x;
+  StructTy1 SS1 = {0, 0, 0, 0, 0};
+  StructTy2 SS2 = {0, 0, 0};
+}
+
+
+// Test default initialization of pointers.
+
+// Tentative definition of global variables with non-zero initializer
+// cannot have common linkage since common linkage requires zero initialization
+// and does not have explicit section.
+
+// CHECK: @p1 = common local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4
+private char *p1;
+
+// CHECK: @p2 = weak local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
+local char *p2;
+
+// CHECK: @p3 = common local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
+constant char *p3;
+
+// CHECK: @p4 = common local_unnamed_addr addrspace(1) global i8 addrspace(1)* null, align 8
+global char *p4;
+
+// CHECK: @p5 = common local_unnamed_addr addrspace(1) global i8* null, align 8
+generic char *p5;
+
+// Test default initialization of structure.
+
+// CHECK: @S1 = weak local_unnamed_addr addrspace(1) global %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, align 8
+StructTy1 S1;
+
+// CHECK: @S2 = common local_unnamed_addr addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
+StructTy2 S2;
+
+// Test default initialization of array.
+// CHECK: @A1 = weak local_unnamed_addr addrspace(1) global [2 x %struct.StructTy1] [%struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }], align 8
+StructTy1 A1[2];
+
+// CHECK: @A2 = common local_unnamed_addr addrspace(1) global [2 x %struct.StructTy2] zeroinitializer, align 8
+StructTy2 A2[2];
+
+// Test comparison with 0.
+
+// CHECK-LABEL: cmp_private
+// CHECK: icmp eq i8 addrspace(5)* %p, null
+void cmp_private(private char* p) {
+  if (p != 0)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_local
+// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
+void cmp_local(local char* p) {
+  if (p != 0)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_global
+// CHECK: icmp eq i8 addrspace(1)* %p, null
+void cmp_global(global char* p) {
+  if (p != 0)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_constant
+// CHECK: icmp eq i8 addrspace(4)* %p, null
+char cmp_constant(constant char* p) {
+  if (p != 0)
+    return *p;
+  else
+    return 0;
+}
+
+// CHECK-LABEL: cmp_generic
+// CHECK: icmp eq i8* %p, null
+void cmp_generic(generic char* p) {
+  if (p != 0)
+    *p = 0;
+}
+
+// Test comparison with NULL.
+
+// CHECK-LABEL: cmp_NULL_private
+// CHECK: icmp eq i8 addrspace(5)* %p, null
+void cmp_NULL_private(private char* p) {
+  if (p != NULL)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_NULL_local
+// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
+void cmp_NULL_local(local char* p) {
+  if (p != NULL)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_NULL_global
+// CHECK: icmp eq i8 addrspace(1)* %p, null
+void cmp_NULL_global(global char* p) {
+  if (p != NULL)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_NULL_constant
+// CHECK: icmp eq i8 addrspace(4)* %p, null
+char cmp_NULL_constant(constant char* p) {
+  if (p != NULL)
+    return *p;
+  else
+    return 0;
+}
+
+// CHECK-LABEL: cmp_NULL_generic
+// CHECK: icmp eq i8* %p, null
+void cmp_NULL_generic(generic char* p) {
+  if (p != NULL)
+    *p = 0;
+}
+
+// Test storage 0 as null pointer.
+// CHECK-LABEL: test_storage_null_pointer
+// CHECK: store i8 addrspace(5)* null, i8 addrspace(5)** %arg_private
+// CHECK: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)** %arg_local
+// CHECK: store i8 addrspace(1)* null, i8 addrspace(1)** %arg_global
+// CHECK: store i8 addrspace(4)* null, i8 addrspace(4)** %arg_constant
+// CHECK: store i8* null, i8** %arg_generic
+void test_storage_null_pointer(private char** arg_private,
+                               local char** arg_local,
+                               global char** arg_global,
+                               constant char** arg_constant,
+                               generic char** arg_generic) {
+   *arg_private = 0;
+   *arg_local = 0;
+   *arg_global = 0;
+   *arg_constant = 0;
+   *arg_generic = 0;
+}
+
+// Test storage NULL as null pointer.
+// CHECK-LABEL: test_storage_null_pointer_NULL
+// CHECK: store i8 addrspace(5)* null, i8 addrspace(5)** %arg_private
+// CHECK: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)** %arg_local
+// CHECK: store i8 addrspace(1)* null, i8 addrspace(1)** %arg_global
+// CHECK: store i8 addrspace(4)* null, i8 addrspace(4)** %arg_constant
+// CHECK: store i8* null, i8** %arg_generic
+void test_storage_null_pointer_NULL(private char** arg_private,
+                                    local char** arg_local,
+                                    global char** arg_global,
+                                    constant char** arg_constant,
+                                    generic char** arg_generic) {
+   *arg_private = NULL;
+   *arg_local = NULL;
+   *arg_global = NULL;
+   *arg_constant = NULL;
+   *arg_generic = NULL;
+}
+
+// Test pass null pointer to function as argument.
+void test_pass_null_pointer_arg_calee(private char* arg_private,
+                                      local char* arg_local,
+                                      global char* arg_global,
+                                      constant char* arg_constant,
+                                      generic char* arg_generic);
+
+// CHECK-LABEL: test_pass_null_pointer_arg
+// CHECK: call void @test_pass_null_pointer_arg_calee(i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(1)* null, i8 addrspace(4)* null, i8* null)
+// CHECK: call void @test_pass_null_pointer_arg_calee(i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(1)* null, i8 addrspace(4)* null, i8* null)
+void test_pass_null_pointer_arg(void) {
+  test_pass_null_pointer_arg_calee(0, 0, 0, 0, 0);
+  test_pass_null_pointer_arg_calee(NULL, NULL, NULL, NULL, NULL);
+}
+
+// Test cast null pointer to size_t.
+void test_cast_null_pointer_to_sizet_calee(size_t arg_private,
+                                           size_t arg_local,
+                                           size_t arg_global,
+                                           size_t arg_constant,
+                                           size_t arg_generic);
+
+// CHECK-LABEL: test_cast_null_pointer_to_sizet
+// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 0, i64 ptrtoint (i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*) to i64), i64 0, i64 0, i64 0)
+// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 0, i64 ptrtoint (i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*) to i64), i64 0, i64 0, i64 0)
+void test_cast_null_pointer_to_sizet(void) {
+  test_cast_null_pointer_to_sizet_calee((size_t)((private char*)0),
+                                        (size_t)((local char*)0),
+                                        (size_t)((global char*)0),
+                                        (size_t)((constant char*)0),
+                                        (size_t)((generic char*)0));
+  test_cast_null_pointer_to_sizet_calee((size_t)((private char*)NULL),
+                                        (size_t)((local char*)NULL),
+                                        (size_t)((global char*)NULL),
+                                        (size_t)((constant char*)0), // NULL cannot be casted to constant pointer since it is defined as a generic pointer
+                                        (size_t)((generic char*)NULL));
+}
+
+// Test comparison between null pointers.
+#define TEST_EQ00(addr1, addr2) int test_eq00_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)0; }
+#define TEST_EQ0N(addr1, addr2) int test_eq0N_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)NULL; }
+#define TEST_EQN0(addr1, addr2) int test_eqN0_##addr1##_##addr2(void) { return (addr1 char*)NULL == (addr2 char*)0; }
+#define TEST_EQNN(addr1, addr2) int test_eqNN_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)NULL; }
+#define TEST_NE00(addr1, addr2) int test_ne00_##addr1##_##addr2(void) { return (addr1 char*)0 != (addr2 char*)0; }
+#define TEST_NE0N(addr1, addr2) int test_ne0N_##addr1##_##addr2(void) { return (addr1 char*)0 != (addr2 char*)NULL; }
+#define TEST_NEN0(addr1, addr2) int test_neN0_##addr1##_##addr2(void) { return (addr1 char*)NULL != (addr2 char*)0; }
+#define TEST_NENN(addr1, addr2) int test_neNN_##addr1##_##addr2(void) { return (addr1 char*)0 != (addr2 char*)NULL; }
+#define TEST(addr1, addr2) \
+        TEST_EQ00(addr1, addr2) \
+        TEST_EQ0N(addr1, addr2) \
+        TEST_EQN0(addr1, addr2) \
+        TEST_EQNN(addr1, addr2) \
+        TEST_NE00(addr1, addr2) \
+        TEST_NE0N(addr1, addr2) \
+        TEST_NEN0(addr1, addr2) \
+        TEST_NENN(addr1, addr2)
+
+// CHECK-LABEL: test_eq00_generic_private
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eq0N_generic_private
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqN0_generic_private
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqNN_generic_private
+// CHECK: ret i32 1
+// CHECK-LABEL: test_ne00_generic_private
+// CHECK: ret i32 0
+// CHECK-LABEL: test_ne0N_generic_private
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neN0_generic_private
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neNN_generic_private
+// CHECK: ret i32 0
+TEST(generic, private)
+
+// CHECK-LABEL: test_eq00_generic_local
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eq0N_generic_local
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqN0_generic_local
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqNN_generic_local
+// CHECK: ret i32 1
+// CHECK-LABEL: test_ne00_generic_local
+// CHECK: ret i32 0
+// CHECK-LABEL: test_ne0N_generic_local
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neN0_generic_local
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neNN_generic_local
+// CHECK: ret i32 0
+TEST(generic, local)
+
+// CHECK-LABEL: test_eq00_generic_global
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eq0N_generic_global
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqN0_generic_global
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqNN_generic_global
+// CHECK: ret i32 1
+// CHECK-LABEL: test_ne00_generic_global
+// CHECK: ret i32 0
+// CHECK-LABEL: test_ne0N_generic_global
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neN0_generic_global
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neNN_generic_global
+// CHECK: ret i32 0
+TEST(generic, global)
+
+// CHECK-LABEL: test_eq00_generic_generic
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eq0N_generic_generic
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqN0_generic_generic
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqNN_generic_generic
+// CHECK: ret i32 1
+// CHECK-LABEL: test_ne00_generic_generic
+// CHECK: ret i32 0
+// CHECK-LABEL: test_ne0N_generic_generic
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neN0_generic_generic
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neNN_generic_generic
+// CHECK: ret i32 0
+TEST(generic, generic)
+
+// CHECK-LABEL: test_eq00_constant_constant
+// CHECK: ret i32 1
+TEST_EQ00(constant, constant)
+
+// Test cast to bool.
+
+// CHECK-LABEL: cast_bool_private
+// CHECK: icmp eq i8 addrspace(5)* %p, null
+void cast_bool_private(private char* p) {
+  if (p)
+    *p = 0;
+}
+
+// CHECK-LABEL: cast_bool_local
+// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
+void cast_bool_local(local char* p) {
+  if (p)
+    *p = 0;
+}
+
+// CHECK-LABEL: cast_bool_global
+// CHECK: icmp eq i8 addrspace(1)* %p, null
+void cast_bool_global(global char* p) {
+  if (p)
+    *p = 0;
+}
+
+// CHECK-LABEL: cast_bool_constant
+// CHECK: icmp eq i8 addrspace(4)* %p, null
+char cast_bool_constant(constant char* p) {
+  if (p)
+    return *p;
+  else
+    return 0;
+}
+
+// CHECK-LABEL: cast_bool_generic
+// CHECK: icmp eq i8* %p, null
+void cast_bool_generic(generic char* p) {
+  if (p)
+    *p = 0;
+}
+
+// Test initialize a struct using memset.
+// For large structures which is mostly zero, clang generats llvm.memset for
+// the zero part and store for non-zero members.
+typedef struct {
+  long a, b, c, d;
+  private char *p;
+} StructTy3;
+
+// CHECK-LABEL: test_memset_private
+// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 {{.*}}, i8 0, i64 40, i1 false)
+void test_memset_private(private StructTy3 *ptr) {
+  StructTy3 S3 = {0, 0, 0, 0, 0};
+  *ptr = S3;
+}
+
+// Test casting literal 0 to pointer.
+// A 0 literal casted to pointer should become a null pointer.
+
+// CHECK-LABEL: test_cast_0_to_local_ptr
+// CHECK: ret i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
+local int* test_cast_0_to_local_ptr(void) {
+  return (local int*)0;
+}
+
+// CHECK-LABEL: test_cast_0_to_private_ptr
+// CHECK: ret i32 addrspace(5)* null
+private int* test_cast_0_to_private_ptr(void) {
+  return (private int*)0;
+}
+
+// Test casting non-literal integer with 0 value to pointer.
+// A non-literal integer expression with 0 value is casted to a pointer with
+// zero value.
+
+// CHECK-LABEL: test_cast_int_to_ptr1_private
+// CHECK: ret i32 addrspace(5)* null
+private int* test_cast_int_to_ptr1_private(void) {
+  return (private int*)((void)0, 0);
+}
+
+// CHECK-LABEL: test_cast_int_to_ptr1_local
+ // CHECK: ret i32 addrspace(3)* null
+local int* test_cast_int_to_ptr1_local(void) {
+  return (local int*)((void)0, 0);
+}
+
+// CHECK-LABEL: test_cast_int_to_ptr2
+// CHECK: ret i32 addrspace(5)* null
+private int* test_cast_int_to_ptr2(void) {
+  int x = 0;
+  return (private int*)x;
+}
+
+// Test logical operations.
+// CHECK-LABEL: test_not_nullptr
+// CHECK: ret i32 1
+int test_not_nullptr(void) {
+  return !(private char*)NULL;
+}
+
+// CHECK-LABEL: test_and_nullptr
+// CHECK: ret i32 0
+int test_and_nullptr(int a) {
+  return a && ((private char*)NULL);
+}
+
+// CHECK-LABEL: test_not_private_ptr
+// CHECK: %[[lnot:.*]] = icmp eq i8 addrspace(5)* %p, null
+// CHECK: %[[lnot_ext:.*]] = zext i1 %[[lnot]] to i32
+// CHECK: ret i32 %[[lnot_ext]]
+int test_not_private_ptr(private char* p) {
+  return !p;
+}
+
+// CHECK-LABEL: test_not_local_ptr
+// CHECK: %[[lnot:.*]] = icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
+// CHECK: %[[lnot_ext:.*]] = zext i1 %[[lnot]] to i32
+// CHECK: ret i32 %[[lnot_ext]]
+int test_not_local_ptr(local char* p) {
+  return !p;
+}
+
+
+// CHECK-LABEL: test_and_ptr
+// CHECK: %[[tobool:.*]] = icmp ne i8 addrspace(5)* %p1, null
+// CHECK: %[[tobool1:.*]] = icmp ne i8 addrspace(3)* %p2, addrspacecast (i8* null to i8 addrspace(3)*)
+// CHECK: %[[res:.*]] = and i1 %[[tobool]], %[[tobool1]]
+// CHECK: %[[land_ext:.*]] = zext i1 %[[res]] to i32
+// CHECK: ret i32 %[[land_ext]]
+int test_and_ptr(private char* p1, local char* p2) {
+  return p1 && p2;
+}
+
+// Test folding of null pointer in function scope.
+// NOOPT-LABEL: test_fold_private
+// NOOPT: call void @test_fold_callee
+// NOOPT: store i32 addrspace(1)* null, i32 addrspace(1)* addrspace(5)* %glob, align 8
+// NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
+// NOOPT: call void @test_fold_callee
+// NOOPT: %{{.*}} = add nsw i64 %{{.*}}, 0
+// NOOPT: %{{.*}} = sub nsw i64 %{{.*}}, 1
+void test_fold_callee(void);
+void test_fold_private(void) {
+  global int* glob = (test_fold_callee(), (global int*)(generic char*)0);
+  long x = glob - (global int*)(generic char*)0;
+  x = x + (int)(test_fold_callee(), (private int*)(generic char*)(global short*)0);
+  x = x - (int)((private int*)0 == (private int*)(generic char*)0);
+}
+
+// NOOPT-LABEL: test_fold_local
+// NOOPT: call void @test_fold_callee
+// NOOPT: store i32 addrspace(1)* null, i32 addrspace(1)* addrspace(5)* %glob, align 8
+// NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
+// NOOPT: call void @test_fold_callee
+// NOOPT: %{{.*}} = add nsw i64 %{{.*}}, sext (i32 ptrtoint (i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*) to i32) to i64)
+// NOOPT: %{{.*}} = sub nsw i64 %{{.*}}, 1
+void test_fold_local(void) {
+  global int* glob = (test_fold_callee(), (global int*)(generic char*)0);
+  long x = glob - (global int*)(generic char*)0;
+  x = x + (int)(test_fold_callee(), (local int*)(generic char*)(global short*)0);
+  x = x - (int)((local int*)0 == (local int*)(generic char*)0);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl
new file mode 100644
index 0000000..a5d4389
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -triple r600 -cl-std=CL1.2 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-std=CL1.2 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL1.2 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL1.2 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s
+
+#ifdef __AMDGCN__
+#define PTSIZE 8
+#else
+#define PTSIZE 4
+#endif
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+typedef __INTPTR_TYPE__ intptr_t;
+typedef __UINTPTR_TYPE__ uintptr_t;
+typedef global void *global_ptr_t;
+typedef constant void *constant_ptr_t;
+typedef local void *local_ptr_t;
+typedef private void *private_ptr_t;
+
+void check(bool);
+
+void test() {
+  // CHECK-NOT: call void @check(i1 zeroext false)
+  check(sizeof(size_t) == PTSIZE);
+  check(__alignof__(size_t) == PTSIZE);
+  check(sizeof(intptr_t) == PTSIZE);
+  check(__alignof__(intptr_t) == PTSIZE);
+  check(sizeof(uintptr_t) == PTSIZE);
+  check(__alignof__(uintptr_t) == PTSIZE);
+  check(sizeof(ptrdiff_t) == PTSIZE);
+  check(__alignof__(ptrdiff_t) == PTSIZE);
+
+  check(sizeof(char) == 1);
+  check(__alignof__(char) == 1);
+  check(sizeof(short) == 2);
+  check(__alignof__(short) == 2);
+  check(sizeof(int) == 4);
+  check(__alignof__(int) == 4);
+  check(sizeof(long) == 8);
+  check(__alignof__(long) == 8);
+#ifdef cl_khr_fp16
+  check(sizeof(half) == 2);
+  check(__alignof__(half) == 2);
+#endif
+  check(sizeof(float) == 4);
+  check(__alignof__(float) == 4);
+#ifdef cl_khr_fp64
+  check(sizeof(double) == 8);
+  check(__alignof__(double) == 8);
+#endif
+
+  check(sizeof(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4));
+  check(__alignof__(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4));
+  check(sizeof(global_ptr_t) == PTSIZE);
+  check(__alignof__(global_ptr_t) == PTSIZE);
+  check(sizeof(constant_ptr_t) == PTSIZE);
+  check(__alignof__(constant_ptr_t) == PTSIZE);
+  check(sizeof(local_ptr_t) == 4);
+  check(__alignof__(private_ptr_t) == 4);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/as_type.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/as_type.cl
new file mode 100644
index 0000000..51e82e3
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/as_type.cl
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+typedef __attribute__(( ext_vector_type(16) )) char char16;
+typedef __attribute__(( ext_vector_type(3) )) int int3;
+
+//CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f1(char4 x) {
+  return  __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f2(<3 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f2(char3 x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func <3 x i8> @f3(i32 %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast]], <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+//CHECK: ret <3 x i8> %[[astype]]
+char3 f3(int x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <4 x i8> @f4(i32 %[[x:.*]])
+//CHECK: %[[astype:.*]] = bitcast i32 %[[x]] to <4 x i8>
+//CHECK-NOT: shufflevector
+//CHECK: ret <4 x i8> %[[astype]]
+char4 f4(int x) {
+  return __builtin_astype(x, char4);
+}
+
+//CHECK: define spir_func i32 @f5(<3 x i8> %[[x:.*]])
+//CHECK: %[[shuffle:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+//CHECK: %[[astype:.*]] = bitcast <4 x i8> %[[shuffle]] to i32
+//CHECK: ret i32 %[[astype]]
+int f5(char3 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func i32 @f6(<4 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = bitcast <4 x i8> %[[x]] to i32
+//CHECK-NOT: shufflevector
+//CHECK: ret i32 %[[astype]]
+int f6(char4 x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func <3 x i8> @f7(<3 x i8> returned %[[x:.*]])
+//CHECK-NOT: bitcast
+//CHECK-NOT: shufflevector
+//CHECK: ret <3 x i8> %[[x]]
+char3 f7(char3 x) {
+  return __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func <3 x i32> @f8(<16 x i8> %[[x:.*]])
+//CHECK: %[[cast:.*]] = bitcast <16 x i8> %[[x]] to <4 x i32>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i32> %[[cast]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+//CHECK: ret <3 x i32> %[[astype]]
+int3 f8(char16 x) {
+  return __builtin_astype(x, int3);
+}
+
+//CHECK: define spir_func i32 addrspace(1)* @addr_cast(i32* readnone %[[x:.*]])
+//CHECK: %[[cast:.*]] = addrspacecast i32* %[[x]] to i32 addrspace(1)*
+//CHECK: ret i32 addrspace(1)* %[[cast]]
+global int* addr_cast(int *x) {
+  return __builtin_astype(x, global int*);
+}
+
+//CHECK: define spir_func i32 addrspace(1)* @int_to_ptr(i32 %[[x:.*]])
+//CHECK: %[[cast:.*]] = inttoptr i32 %[[x]] to i32 addrspace(1)*
+//CHECK: ret i32 addrspace(1)* %[[cast]]
+global int* int_to_ptr(int x) {
+  return __builtin_astype(x, global int*);
+}
+
+//CHECK: define spir_func i32 @ptr_to_int(i32* %[[x:.*]])
+//CHECK: %[[cast:.*]] = ptrtoint i32* %[[x]] to i32
+//CHECK: ret i32 %[[cast]]
+int ptr_to_int(int *x) {
+  return __builtin_astype(x, int);
+}
+
+//CHECK: define spir_func <3 x i8> @ptr_to_char3(i32* %[[x:.*]])
+//CHECK: %[[cast1:.*]] = ptrtoint i32* %[[x]] to i32
+//CHECK: %[[cast2:.*]] = bitcast i32 %[[cast1]] to <4 x i8>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast2]], <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+//CHECK: ret <3 x i8> %[[astype]]
+char3 ptr_to_char3(int *x) {
+  return  __builtin_astype(x, char3);
+}
+
+//CHECK: define spir_func i32* @char3_to_ptr(<3 x i8> %[[x:.*]])
+//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+//CHECK: %[[cast1:.*]] = bitcast <4 x i8> %[[astype]] to i32
+//CHECK: %[[cast2:.*]] = inttoptr i32 %[[cast1]] to i32*
+//CHECK: ret i32* %[[cast2]]
+int* char3_to_ptr(char3 x) {
+  return __builtin_astype(x, int*);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl
new file mode 100644
index 0000000..a6f7e14
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 < %s -cl-std=CL2.0 -triple spir64 -emit-llvm | FileCheck -check-prefix=SPIR %s
+// RUN: %clang_cc1 < %s -cl-std=CL2.0 -triple armv5e-none-linux-gnueabi -emit-llvm | FileCheck -check-prefix=ARM %s
+typedef enum memory_order {
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+typedef enum memory_scope {
+  memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
+  memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
+  memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
+  memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+  memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
+#endif
+} memory_scope;
+
+void f(atomic_int *i, global atomic_int *gi, local atomic_int *li, private atomic_int *pi, atomic_uint *ui, int cmp, int order, int scope) {
+  int x;
+  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8 addrspace(4)* {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8* {{%[0-9]+}}, i32 5, i32 1)
+  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: %[[GP:[0-9]+]] = addrspacecast i8 addrspace(1)* {{%[0-9]+}} to i8 addrspace(4)*
+  // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* %[[GP]], i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  __opencl_atomic_store(gi, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: %[[GP:[0-9]+]] = addrspacecast i8 addrspace(3)* {{%[0-9]+}} to i8 addrspace(4)*
+  // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* %[[GP]], i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  __opencl_atomic_store(li, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: %[[GP:[0-9]+]] = addrspacecast i8* {{%[0-9]+}} to i8 addrspace(4)*
+  // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* %[[GP]], i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  __opencl_atomic_store(pi, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  x = __opencl_atomic_fetch_add(i, 3, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  x = __opencl_atomic_fetch_min(i, 3, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  x = __opencl_atomic_fetch_min(ui, 3, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+  x = __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 2)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 2)
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_device);
+
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 3)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 3)
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_all_svm_devices);
+
+#ifdef cl_khr_subgroups
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 4)
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_sub_group);
+#endif
+
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, order, order, scope);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl
new file mode 100644
index 0000000..160f7fb
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -0,0 +1,291 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s
+
+// Also test serialization of atomic operations here, to avoid duplicating the test.
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s
+
+#ifndef ALREADY_INCLUDED
+#define ALREADY_INCLUDED
+
+typedef __INTPTR_TYPE__ intptr_t;
+typedef int int8 __attribute__((ext_vector_type(8)));
+
+typedef enum memory_order {
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+typedef enum memory_scope {
+  memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
+  memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
+  memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
+  memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+  memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
+#endif
+} memory_scope;
+
+atomic_int j;
+
+void fi1(atomic_int *i) {
+  // CHECK-LABEL: @fi1
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst
+  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
+
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst
+  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
+
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
+  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
+}
+
+void fi2(atomic_int *i) {
+  // CHECK-LABEL: @fi2
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
+}
+
+void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) {
+  // CHECK-LABEL: @test_addr
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  __opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group);
+}
+
+void fi3(atomic_int *i, atomic_uint *ui) {
+  // CHECK-LABEL: @fi3
+  // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
+}
+
+bool fi4(atomic_int *i) {
+  // CHECK-LABEL: @fi4(
+  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
+  // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
+  // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
+  // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
+  // CHECK: store i32 [[OLD]]
+  int cmp = 0;
+  return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group);
+}
+
+void fi5(atomic_int *i, int scope) {
+  // CHECK-LABEL: @fi5
+  // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [
+  // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]]
+  // CHECK-NEXT: i32 2, label %[[opencl_device:.*]]
+  // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]]
+  // CHECK-NEXT: ]
+  // CHECK: [[opencl_workgroup]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
+  // CHECK: br label %[[continue:.*]]
+  // CHECK: [[opencl_device]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst
+  // CHECK: br label %[[continue]]
+  // CHECK: [[opencl_allsvmdevices]]:
+  // CHECK: load atomic i32, i32* %{{.*}} seq_cst
+  // CHECK: br label %[[continue]]
+  // CHECK: [[opencl_subgroup]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
+  // CHECK: br label %[[continue]]
+  // CHECK: [[continue]]:
+  int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
+}
+
+void fi6(atomic_int *i, int order, int scope) {
+  // CHECK-LABEL: @fi6
+  // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [
+  // CHECK-NEXT: i32 1, label %[[acquire:.*]]
+  // CHECK-NEXT: i32 2, label %[[acquire:.*]]
+  // CHECK-NEXT: i32 5, label %[[seqcst:.*]]
+  // CHECK-NEXT: ]
+  // CHECK: [[monotonic]]:
+  // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [
+  // CHECK-NEXT: i32 1, label %[[MON_WG:.*]]
+  // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]]
+  // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]]
+  // CHECK-NEXT: ]
+  // CHECK: [[acquire]]:
+  // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [
+  // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]]
+  // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]]
+  // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]]
+  // CHECK-NEXT: ]
+  // CHECK: [[seqcst]]:
+  // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [
+  // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]]
+  // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]]
+  // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
+  // CHECK-NEXT: ]
+  // CHECK: [[MON_WG]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic
+  // CHECK: [[MON_DEV]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic
+  // CHECK: [[MON_ALL]]:
+  // CHECK: load atomic i32, i32* %{{.*}} monotonic
+  // CHECK: [[MON_SUB]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") monotonic
+  // CHECK: [[ACQ_WG]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire
+  // CHECK: [[ACQ_DEV]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire
+  // CHECK: [[ACQ_ALL]]:
+  // CHECK: load atomic i32, i32* %{{.*}} acquire
+  // CHECK: [[ACQ_SUB]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") acquire
+  // CHECK: [[SEQ_WG]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
+  // CHECK: [[SEQ_DEV]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst
+  // CHECK: [[SEQ_ALL]]:
+  // CHECK: load atomic i32, i32* %{{.*}} seq_cst
+  // CHECK: [[SEQ_SUB]]:
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
+  int x = __opencl_atomic_load(i, order, scope);
+}
+
+float ff1(global atomic_float *d) {
+  // CHECK-LABEL: @ff1
+  // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic
+  return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
+}
+
+void ff2(atomic_float *d) {
+  // CHECK-LABEL: @ff2
+  // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release
+  __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
+}
+
+float ff3(atomic_float *d) {
+  // CHECK-LABEL: @ff3
+  // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst
+  return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
+}
+
+// CHECK-LABEL: @atomic_init_foo
+void atomic_init_foo()
+{
+  // CHECK-NOT: atomic
+  // CHECK: store
+  __opencl_atomic_init(&j, 42);
+
+  // CHECK-NOT: atomic
+  // CHECK: }
+}
+
+// CHECK-LABEL: @failureOrder
+void failureOrder(atomic_int *ptr, int *ptr2) {
+  // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
+  __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
+
+  // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
+  __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
+}
+
+// CHECK-LABEL: @generalFailureOrder
+void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) {
+  __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group);
+  // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[ACQUIRE]]
+  // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
+
+  // CHECK: [[MONOTONIC]]
+  // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: ]
+
+  // CHECK: [[ACQUIRE]]
+  // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: ]
+
+  // CHECK: [[RELEASE]]
+  // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: ]
+
+  // CHECK: [[ACQREL]]
+  // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: ]
+
+  // CHECK: [[SEQCST]]
+  // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: ]
+
+  // CHECK: [[MONOTONIC_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} monotonic monotonic
+  // CHECK: br
+
+  // CHECK: [[ACQUIRE_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} acquire monotonic
+  // CHECK: br
+
+  // CHECK: [[ACQUIRE_ACQUIRE]]
+  // CHECK: cmpxchg {{.*}} acquire acquire
+  // CHECK: br
+
+  // CHECK: [[ACQREL_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} acq_rel monotonic
+  // CHECK: br
+
+  // CHECK: [[ACQREL_ACQUIRE]]
+  // CHECK: cmpxchg {{.*}} acq_rel acquire
+  // CHECK: br
+
+  // CHECK: [[SEQCST_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} seq_cst monotonic
+  // CHECK: br
+
+  // CHECK: [[SEQCST_ACQUIRE]]
+  // CHECK: cmpxchg {{.*}} seq_cst acquire
+  // CHECK: br
+
+  // CHECK: [[SEQCST_SEQCST]]
+  // CHECK: cmpxchg {{.*}} seq_cst seq_cst
+  // CHECK: br
+}
+
+int test_volatile(volatile atomic_int *i) {
+  // CHECK-LABEL: @test_volatile
+  // CHECK:      %[[i_addr:.*]] = alloca i32
+  // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
+  // CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]]
+  // CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]]
+  // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst
+  // CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]]
+  // CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]]
+  // CHECK-NEXT: ret i32 %[[retval]]
+  return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+}
+
+#endif
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/blocks.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/blocks.cl
new file mode 100644
index 0000000..80ac572
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/blocks.cl
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -debug-info-kind=limited -triple spir-unknown-unknown | FileCheck -check-prefixes=COMMON,SPIR %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -debug-info-kind=limited -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=COMMON,AMDGCN %s
+
+// COMMON: @__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }
+// COMMON-NOT: .str
+
+// SPIR-LABEL: define internal {{.*}}void @block_A_block_invoke(i8 addrspace(4)* %.block_descriptor, i8 addrspace(3)* %a)
+// AMDGCN-LABEL: define internal {{.*}}void @block_A_block_invoke(i8* %.block_descriptor, i8 addrspace(3)* %a)
+void (^block_A)(local void *) = ^(local void *a) {
+  return;
+};
+
+// COMMON-LABEL: define {{.*}}void @foo()
+void foo(){
+  int i;
+  // COMMON-NOT: %block.isa
+  // COMMON-NOT: %block.flags
+  // COMMON-NOT: %block.reserved
+  // COMMON-NOT: %block.descriptor
+  // SPIR: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block:.*]], i32 0, i32 0
+  // AMDGCN: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 0
+  // SPIR: store i32 12, i32* %[[block_size]]
+  // AMDGCN: store i32 12, i32 addrspace(5)* %[[block_size]]
+  // SPIR: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 1
+  // AMDGCN: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block]], i32 0, i32 1
+  // SPIR: store i32 4, i32* %[[block_align]]
+  // AMDGCN: store i32 4, i32 addrspace(5)* %[[block_align]]
+  // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 2
+  // SPIR: %[[i_value:.*]] = load i32, i32* %i
+  // SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
+  // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i32 }>* %[[block]] to i32 ()*
+  // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
+  // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
+  // SPIR: %[[block_literal:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
+  // SPIR: %[[blk_gen_ptr:.*]] = bitcast i32 () addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
+  // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
+  // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block]], i32 0, i32 2
+  // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
+  // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
+  // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
+  // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
+  // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
+  // AMDGCN: %[[block_literal:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
+  // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast i32 ()* %[[block_literal]] to i8*
+  // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
+
+  int (^ block_B)(void) = ^{
+    return i;
+  };
+  block_B();
+}
+
+// SPIR-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor)
+// SPIR:  %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 }> addrspace(4)*
+// SPIR:  %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(4)* %[[block]], i32 0, i32 2
+// SPIR:  %[[block_capture:.*]] = load i32, i32 addrspace(4)* %[[block_capture_addr]]
+// AMDGCN-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8* %.block_descriptor)
+// AMDGCN:  %[[block:.*]] = bitcast i8* %.block_descriptor to <{ i32, i32, i32 }>*
+// AMDGCN:  %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 2
+// AMDGCN:  %[[block_capture:.*]] = load i32, i32* %[[block_capture_addr]]
+
+// COMMON-NOT: define{{.*}}@__foo_block_invoke_kernel
+
+// COMMON: !DIDerivedType(tag: DW_TAG_member, name: "__size"
+// COMMON: !DIDerivedType(tag: DW_TAG_member, name: "__align"
+
+// COMMON-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__isa"
+// COMMON-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__flags"
+// COMMON-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__reserved"
+// COMMON-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__FuncPtr"
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/bool_cast.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/bool_cast.cl
new file mode 100644
index 0000000..72926eb
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/bool_cast.cl
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -o - -O0 | FileCheck %s
+
+typedef unsigned char uchar4 __attribute((ext_vector_type(4)));
+typedef unsigned int int4 __attribute((ext_vector_type(4)));
+typedef float float4 __attribute((ext_vector_type(4)));
+
+// CHECK-LABEL: define spir_kernel void @ker()
+void kernel ker() {
+  bool t = true;
+  int4 vec4 = (int4)t;
+// CHECK: {{%.*}} = load i8, i8* %t, align 1
+// CHECK: {{%.*}} = trunc i8 {{%.*}} to i1
+// CHECK: {{%.*}} = sext i1 {{%.*}} to i32
+// CHECK: {{%.*}} = insertelement <4 x i32> undef, i32 {{%.*}}, i32 0
+// CHECK: {{%.*}} = shufflevector <4 x i32> {{%.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* %vec4, align 16
+  int i = (int)t;
+// CHECK: {{%.*}} = load i8, i8* %t, align 1
+// CHECK: {{%.*}} = trunc i8 {{%.*}} to i1
+// CHECK: {{%.*}} = zext i1 {{%.*}} to i32
+// CHECK: store i32 {{%.*}}, i32* %i, align 4
+
+  uchar4 vc;
+  vc = (uchar4)true;
+// CHECK: store <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8>* %vc, align 4
+  unsigned char c;
+  c = (unsigned char)true;
+// CHECK: store i8 1, i8* %c, align 1
+
+  float4 vf;
+  vf = (float4)true;
+// CHECK: store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl
new file mode 100644
index 0000000..f8356fe
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl
@@ -0,0 +1,25 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -verify -S -emit-llvm -o - %s
+
+typedef unsigned int uint;
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
+
+kernel void builtins_amdgcn_dl_insts_err(
+    global float *fOut, global int *siOut, global uint *uiOut,
+    half2 v2hA, half2 v2hB, float fC,
+    short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
+    ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC, uint isClamp) {
+  fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, isClamp == 0 ? false : true);     // expected-error {{'__builtin_amdgcn_fdot2' must be a constant integer}}
+
+  siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, isClamp == 0 ? false : true); // expected-error {{'__builtin_amdgcn_sdot2' must be a constant integer}}
+  uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, isClamp == 0 ? false : true); // expected-error {{'__builtin_amdgcn_udot2' must be a constant integer}}
+
+  siOut[1] = __builtin_amdgcn_sdot4(siA, siB, siC, isClamp == 0 ? false : true);     // expected-error {{'__builtin_amdgcn_sdot4' must be a constant integer}}
+  uiOut[1] = __builtin_amdgcn_udot4(uiA, uiB, uiC, isClamp == 0 ? false : true);     // expected-error {{'__builtin_amdgcn_udot4' must be a constant integer}}
+
+  siOut[2] = __builtin_amdgcn_sdot8(siA, siB, siC, isClamp == 0 ? false : true);     // expected-error {{'__builtin_amdgcn_sdot8' must be a constant integer}}
+  uiOut[2] = __builtin_amdgcn_udot8(uiA, uiB, uiC, isClamp == 0 ? false : true);     // expected-error {{'__builtin_amdgcn_udot8' must be a constant integer}}
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
new file mode 100644
index 0000000..ca3f400
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
@@ -0,0 +1,35 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -verify -S -emit-llvm -o - %s
+
+typedef unsigned int uint;
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
+
+kernel void builtins_amdgcn_dl_insts_err(
+    global float *fOut, global int *siOut, global uint *uiOut,
+    half2 v2hA, half2 v2hB, float fC,
+    short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
+    ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
+  fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false);     // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dl-insts}}
+  fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true);      // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dl-insts}}
+
+  siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dl-insts}}
+  siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true);  // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dl-insts}}
+
+  uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dl-insts}}
+  uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true);  // expected-error {{'__builtin_amdgcn_udot2' needs target feature dl-insts}}
+
+  siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false);     // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dl-insts}}
+  siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true);      // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dl-insts}}
+
+  uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false);     // expected-error {{'__builtin_amdgcn_udot4' needs target feature dl-insts}}
+  uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true);      // expected-error {{'__builtin_amdgcn_udot4' needs target feature dl-insts}}
+
+  siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false);     // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dl-insts}}
+  siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true);      // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dl-insts}}
+
+  uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false);     // expected-error {{'__builtin_amdgcn_udot8' needs target feature dl-insts}}
+  uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true);      // expected-error {{'__builtin_amdgcn_udot8' needs target feature dl-insts}}
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
new file mode 100644
index 0000000..e5633fb
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
@@ -0,0 +1,56 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned int uint;
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
+
+// CHECK-LABEL: @builtins_amdgcn_dl_insts
+// CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, float %fC, i1 false)
+// CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, float %fC, i1 true)
+
+// CHECK: call i32 @llvm.amdgcn.sdot2(<2 x i16> %v2ssA, <2 x i16> %v2ssB, i32 %siC, i1 false)
+// CHECK: call i32 @llvm.amdgcn.sdot2(<2 x i16> %v2ssA, <2 x i16> %v2ssB, i32 %siC, i1 true)
+
+// CHECK: call i32 @llvm.amdgcn.udot2(<2 x i16> %v2usA, <2 x i16> %v2usB, i32 %uiC, i1 false)
+// CHECK: call i32 @llvm.amdgcn.udot2(<2 x i16> %v2usA, <2 x i16> %v2usB, i32 %uiC, i1 true)
+
+// CHECK: call i32 @llvm.amdgcn.sdot4(i32 %siA, i32 %siB, i32 %siC, i1 false)
+// CHECK: call i32 @llvm.amdgcn.sdot4(i32 %siA, i32 %siB, i32 %siC, i1 true)
+
+// CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
+// CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 true)
+
+// CHECK: call i32 @llvm.amdgcn.sdot8(i32 %siA, i32 %siB, i32 %siC, i1 false)
+// CHECK: call i32 @llvm.amdgcn.sdot8(i32 %siA, i32 %siB, i32 %siC, i1 true)
+
+// CHECK: call i32 @llvm.amdgcn.udot8(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
+// CHECK: call i32 @llvm.amdgcn.udot8(i32 %uiA, i32 %uiB, i32 %uiC, i1 true)
+kernel void builtins_amdgcn_dl_insts(
+    global float *fOut, global int *siOut, global uint *uiOut,
+    half2 v2hA, half2 v2hB, float fC,
+    short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
+    ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
+  fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false);
+  fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true);
+
+  siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false);
+  siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true);
+
+  uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false);
+  uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true);
+
+  siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false);
+  siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true);
+
+  uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false);
+  uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true);
+
+  siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false);
+  siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true);
+
+  uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false);
+  uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl
new file mode 100644
index 0000000..333b610
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl
@@ -0,0 +1,11 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// CHECK-LABEL: @test_fmed3_f16
+// CHECK: call half @llvm.amdgcn.fmed3.f16(half %a, half %b, half %c)
+void test_fmed3_f16(global half* out, half a, half b, half c)
+{
+  *out = __builtin_amdgcn_fmed3h(a, b, c);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
new file mode 100644
index 0000000..afa312c
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -0,0 +1,108 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -S -emit-llvm -o - %s | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef unsigned long ulong;
+
+// CHECK-LABEL: @test_div_fixup_f16
+// CHECK: call half @llvm.amdgcn.div.fixup.f16
+void test_div_fixup_f16(global half* out, half a, half b, half c)
+{
+  *out = __builtin_amdgcn_div_fixuph(a, b, c);
+}
+
+// CHECK-LABEL: @test_rcp_f16
+// CHECK: call half @llvm.amdgcn.rcp.f16
+void test_rcp_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_rcph(a);
+}
+
+// CHECK-LABEL: @test_rsq_f16
+// CHECK: call half @llvm.amdgcn.rsq.f16
+void test_rsq_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_rsqh(a);
+}
+
+// CHECK-LABEL: @test_sin_f16
+// CHECK: call half @llvm.amdgcn.sin.f16
+void test_sin_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_sinh(a);
+}
+
+// CHECK-LABEL: @test_cos_f16
+// CHECK: call half @llvm.amdgcn.cos.f16
+void test_cos_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_cosh(a);
+}
+
+// CHECK-LABEL: @test_ldexp_f16
+// CHECK: call half @llvm.amdgcn.ldexp.f16
+void test_ldexp_f16(global half* out, half a, int b)
+{
+  *out = __builtin_amdgcn_ldexph(a, b);
+}
+
+// CHECK-LABEL: @test_frexp_mant_f16
+// CHECK: call half @llvm.amdgcn.frexp.mant.f16
+void test_frexp_mant_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_frexp_manth(a);
+}
+
+// CHECK-LABEL: @test_frexp_exp_f16
+// CHECK: call i16 @llvm.amdgcn.frexp.exp.i16.f16
+void test_frexp_exp_f16(global short* out, half a)
+{
+  *out = __builtin_amdgcn_frexp_exph(a);
+}
+
+// CHECK-LABEL: @test_fract_f16
+// CHECK: call half @llvm.amdgcn.fract.f16
+void test_fract_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_fracth(a);
+}
+
+// CHECK-LABEL: @test_class_f16
+// CHECK: call i1 @llvm.amdgcn.class.f16
+void test_class_f16(global half* out, half a, int b)
+{
+  *out = __builtin_amdgcn_classh(a, b);
+}
+
+// CHECK-LABEL: @test_s_memrealtime
+// CHECK: call i64 @llvm.amdgcn.s.memrealtime()
+void test_s_memrealtime(global ulong* out)
+{
+  *out = __builtin_amdgcn_s_memrealtime();
+}
+
+// CHECK-LABEL: @test_mov_dpp
+// CHECK: call i32 @llvm.amdgcn.mov.dpp.i32(i32 %src, i32 0, i32 0, i32 0, i1 false)
+void test_mov_dpp(global int* out, int src)
+{
+  *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_faddf(local float *out, float src) {
+  *out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fminf(local float *out, float src) {
+  *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmaxf(local float *out, float src) {
+  *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
new file mode 100644
index 0000000..2015f36
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -0,0 +1,515 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown-opencl -S -emit-llvm -o - %s | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+typedef unsigned long ulong;
+typedef unsigned int uint;
+
+// CHECK-LABEL: @test_div_scale_f64
+// CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
+// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1
+// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0
+// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
+// CHECK: store i32 [[FLAGEXT]]
+void test_div_scale_f64(global double* out, global int* flagout, double a, double b)
+{
+  bool flag;
+  *out = __builtin_amdgcn_div_scale(a, b, true, &flag);
+  *flagout = flag;
+}
+
+// CHECK-LABEL: @test_div_scale_f32
+// CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
+// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1
+// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0
+// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
+// CHECK: store i32 [[FLAGEXT]]
+void test_div_scale_f32(global float* out, global int* flagout, float a, float b)
+{
+  bool flag;
+  *out = __builtin_amdgcn_div_scalef(a, b, true, &flag);
+  *flagout = flag;
+}
+
+// CHECK-LABEL: @test_div_fmas_f32
+// CHECK: call float @llvm.amdgcn.div.fmas.f32
+void test_div_fmas_f32(global float* out, float a, float b, float c, int d)
+{
+  *out = __builtin_amdgcn_div_fmasf(a, b, c, d);
+}
+
+// CHECK-LABEL: @test_div_fmas_f64
+// CHECK: call double @llvm.amdgcn.div.fmas.f64
+void test_div_fmas_f64(global double* out, double a, double b, double c, int d)
+{
+  *out = __builtin_amdgcn_div_fmas(a, b, c, d);
+}
+
+// CHECK-LABEL: @test_div_fixup_f32
+// CHECK: call float @llvm.amdgcn.div.fixup.f32
+void test_div_fixup_f32(global float* out, float a, float b, float c)
+{
+  *out = __builtin_amdgcn_div_fixupf(a, b, c);
+}
+
+// CHECK-LABEL: @test_div_fixup_f64
+// CHECK: call double @llvm.amdgcn.div.fixup.f64
+void test_div_fixup_f64(global double* out, double a, double b, double c)
+{
+  *out = __builtin_amdgcn_div_fixup(a, b, c);
+}
+
+// CHECK-LABEL: @test_trig_preop_f32
+// CHECK: call float @llvm.amdgcn.trig.preop.f32
+void test_trig_preop_f32(global float* out, float a, int b)
+{
+  *out = __builtin_amdgcn_trig_preopf(a, b);
+}
+
+// CHECK-LABEL: @test_trig_preop_f64
+// CHECK: call double @llvm.amdgcn.trig.preop.f64
+void test_trig_preop_f64(global double* out, double a, int b)
+{
+  *out = __builtin_amdgcn_trig_preop(a, b);
+}
+
+// CHECK-LABEL: @test_rcp_f32
+// CHECK: call float @llvm.amdgcn.rcp.f32
+void test_rcp_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_rcpf(a);
+}
+
+// CHECK-LABEL: @test_rcp_f64
+// CHECK: call double @llvm.amdgcn.rcp.f64
+void test_rcp_f64(global double* out, double a)
+{
+  *out = __builtin_amdgcn_rcp(a);
+}
+
+// CHECK-LABEL: @test_rsq_f32
+// CHECK: call float @llvm.amdgcn.rsq.f32
+void test_rsq_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_rsqf(a);
+}
+
+// CHECK-LABEL: @test_rsq_f64
+// CHECK: call double @llvm.amdgcn.rsq.f64
+void test_rsq_f64(global double* out, double a)
+{
+  *out = __builtin_amdgcn_rsq(a);
+}
+
+// CHECK-LABEL: @test_rsq_clamp_f32
+// CHECK: call float @llvm.amdgcn.rsq.clamp.f32
+void test_rsq_clamp_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_rsq_clampf(a);
+}
+
+// CHECK-LABEL: @test_rsq_clamp_f64
+// CHECK: call double @llvm.amdgcn.rsq.clamp.f64
+void test_rsq_clamp_f64(global double* out, double a)
+{
+  *out = __builtin_amdgcn_rsq_clamp(a);
+}
+
+// CHECK-LABEL: @test_sin_f32
+// CHECK: call float @llvm.amdgcn.sin.f32
+void test_sin_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_sinf(a);
+}
+
+// CHECK-LABEL: @test_cos_f32
+// CHECK: call float @llvm.amdgcn.cos.f32
+void test_cos_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_cosf(a);
+}
+
+// CHECK-LABEL: @test_log_clamp_f32
+// CHECK: call float @llvm.amdgcn.log.clamp.f32
+void test_log_clamp_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_log_clampf(a);
+}
+
+// CHECK-LABEL: @test_ldexp_f32
+// CHECK: call float @llvm.amdgcn.ldexp.f32
+void test_ldexp_f32(global float* out, float a, int b)
+{
+  *out = __builtin_amdgcn_ldexpf(a, b);
+}
+
+// CHECK-LABEL: @test_ldexp_f64
+// CHECK: call double @llvm.amdgcn.ldexp.f64
+void test_ldexp_f64(global double* out, double a, int b)
+{
+  *out = __builtin_amdgcn_ldexp(a, b);
+}
+
+// CHECK-LABEL: @test_frexp_mant_f32
+// CHECK: call float @llvm.amdgcn.frexp.mant.f32
+void test_frexp_mant_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_frexp_mantf(a);
+}
+
+// CHECK-LABEL: @test_frexp_mant_f64
+// CHECK: call double @llvm.amdgcn.frexp.mant.f64
+void test_frexp_mant_f64(global double* out, double a)
+{
+  *out = __builtin_amdgcn_frexp_mant(a);
+}
+
+// CHECK-LABEL: @test_frexp_exp_f32
+// CHECK: call i32 @llvm.amdgcn.frexp.exp.i32.f32
+void test_frexp_exp_f32(global int* out, float a)
+{
+  *out = __builtin_amdgcn_frexp_expf(a);
+}
+
+// CHECK-LABEL: @test_frexp_exp_f64
+// CHECK: call i32 @llvm.amdgcn.frexp.exp.i32.f64
+void test_frexp_exp_f64(global int* out, double a)
+{
+  *out = __builtin_amdgcn_frexp_exp(a);
+}
+
+// CHECK-LABEL: @test_fract_f32
+// CHECK: call float @llvm.amdgcn.fract.f32
+void test_fract_f32(global int* out, float a)
+{
+  *out = __builtin_amdgcn_fractf(a);
+}
+
+// CHECK-LABEL: @test_fract_f64
+// CHECK: call double @llvm.amdgcn.fract.f64
+void test_fract_f64(global int* out, double a)
+{
+  *out = __builtin_amdgcn_fract(a);
+}
+
+// CHECK-LABEL: @test_lerp
+// CHECK: call i32 @llvm.amdgcn.lerp
+void test_lerp(global int* out, int a, int b, int c)
+{
+  *out = __builtin_amdgcn_lerp(a, b, c);
+}
+
+// CHECK-LABEL: @test_sicmp_i32
+// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+void test_sicmp_i32(global ulong* out, int a, int b)
+{
+  *out = __builtin_amdgcn_sicmp(a, b, 32);
+}
+
+// CHECK-LABEL: @test_uicmp_i32
+// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+void test_uicmp_i32(global ulong* out, uint a, uint b)
+{
+  *out = __builtin_amdgcn_uicmp(a, b, 32);
+}
+
+// CHECK-LABEL: @test_sicmp_i64
+// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 38)
+void test_sicmp_i64(global ulong* out, long a, long b)
+{
+  *out = __builtin_amdgcn_sicmpl(a, b, 39-1);
+}
+
+// CHECK-LABEL: @test_uicmp_i64
+// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 35)
+void test_uicmp_i64(global ulong* out, ulong a, ulong b)
+{
+  *out = __builtin_amdgcn_uicmpl(a, b, 30+5);
+}
+
+// CHECK-LABEL: @test_ds_swizzle
+// CHECK: call i32 @llvm.amdgcn.ds.swizzle(i32 %a, i32 32)
+void test_ds_swizzle(global int* out, int a)
+{
+  *out = __builtin_amdgcn_ds_swizzle(a, 32);
+}
+
+// CHECK-LABEL: @test_ds_permute
+// CHECK: call i32 @llvm.amdgcn.ds.permute(i32 %a, i32 %b)
+void test_ds_permute(global int* out, int a, int b)
+{
+  out[0] = __builtin_amdgcn_ds_permute(a, b);
+}
+
+// CHECK-LABEL: @test_ds_bpermute
+// CHECK: call i32 @llvm.amdgcn.ds.bpermute(i32 %a, i32 %b)
+void test_ds_bpermute(global int* out, int a, int b)
+{
+  *out = __builtin_amdgcn_ds_bpermute(a, b);
+}
+
+// CHECK-LABEL: @test_readfirstlane
+// CHECK: call i32 @llvm.amdgcn.readfirstlane(i32 %a)
+void test_readfirstlane(global int* out, int a)
+{
+  *out = __builtin_amdgcn_readfirstlane(a);
+}
+
+// CHECK-LABEL: @test_readlane
+// CHECK: call i32 @llvm.amdgcn.readlane(i32 %a, i32 %b)
+void test_readlane(global int* out, int a, int b)
+{
+  *out = __builtin_amdgcn_readlane(a, b);
+}
+
+// CHECK-LABEL: @test_fcmp_f32
+// CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 5)
+void test_fcmp_f32(global ulong* out, float a, float b)
+{
+  *out = __builtin_amdgcn_fcmpf(a, b, 5);
+}
+
+// CHECK-LABEL: @test_fcmp_f64
+// CHECK: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 6)
+void test_fcmp_f64(global ulong* out, double a, double b)
+{
+  *out = __builtin_amdgcn_fcmp(a, b, 3+3);
+}
+
+// CHECK-LABEL: @test_class_f32
+// CHECK: call i1 @llvm.amdgcn.class.f32
+void test_class_f32(global float* out, float a, int b)
+{
+  *out = __builtin_amdgcn_classf(a, b);
+}
+
+// CHECK-LABEL: @test_class_f64
+// CHECK: call i1 @llvm.amdgcn.class.f64
+void test_class_f64(global double* out, double a, int b)
+{
+  *out = __builtin_amdgcn_class(a, b);
+}
+
+// CHECK-LABEL: @test_buffer_wbinvl1
+// CHECK: call void @llvm.amdgcn.buffer.wbinvl1(
+void test_buffer_wbinvl1()
+{
+  __builtin_amdgcn_buffer_wbinvl1();
+}
+
+// CHECK-LABEL: @test_s_dcache_inv
+// CHECK: call void @llvm.amdgcn.s.dcache.inv(
+void test_s_dcache_inv()
+{
+  __builtin_amdgcn_s_dcache_inv();
+}
+
+// CHECK-LABEL: @test_s_waitcnt
+// CHECK: call void @llvm.amdgcn.s.waitcnt(
+void test_s_waitcnt()
+{
+  __builtin_amdgcn_s_waitcnt(0);
+}
+
+// CHECK-LABEL: @test_s_sendmsg
+// CHECK: call void @llvm.amdgcn.s.sendmsg(
+void test_s_sendmsg()
+{
+  __builtin_amdgcn_s_sendmsg(1, 0);
+}
+
+// CHECK-LABEL: @test_s_sendmsg_var
+// CHECK: call void @llvm.amdgcn.s.sendmsg(
+void test_s_sendmsg_var(int in)
+{
+  __builtin_amdgcn_s_sendmsg(1, in);
+}
+
+// CHECK-LABEL: @test_s_sendmsghalt
+// CHECK: call void @llvm.amdgcn.s.sendmsghalt(
+void test_s_sendmsghalt()
+{
+  __builtin_amdgcn_s_sendmsghalt(1, 0);
+}
+
+// CHECK-LABEL: @test_s_sendmsghalt
+// CHECK: call void @llvm.amdgcn.s.sendmsghalt(
+void test_s_sendmsghalt_var(int in)
+{
+  __builtin_amdgcn_s_sendmsghalt(1, in);
+}
+
+// CHECK-LABEL: @test_s_barrier
+// CHECK: call void @llvm.amdgcn.s.barrier(
+void test_s_barrier()
+{
+  __builtin_amdgcn_s_barrier();
+}
+
+// CHECK-LABEL: @test_wave_barrier
+// CHECK: call void @llvm.amdgcn.wave.barrier(
+void test_wave_barrier()
+{
+  __builtin_amdgcn_wave_barrier();
+}
+
+// CHECK-LABEL: @test_s_memtime
+// CHECK: call i64 @llvm.amdgcn.s.memtime()
+void test_s_memtime(global ulong* out)
+{
+  *out = __builtin_amdgcn_s_memtime();
+}
+
+// CHECK-LABEL: @test_s_sleep
+// CHECK: call void @llvm.amdgcn.s.sleep(i32 1)
+// CHECK: call void @llvm.amdgcn.s.sleep(i32 15)
+void test_s_sleep()
+{
+  __builtin_amdgcn_s_sleep(1);
+  __builtin_amdgcn_s_sleep(15);
+}
+
+// CHECK-LABEL: @test_s_incperflevel
+// CHECK: call void @llvm.amdgcn.s.incperflevel(i32 1)
+// CHECK: call void @llvm.amdgcn.s.incperflevel(i32 15)
+void test_s_incperflevel()
+{
+  __builtin_amdgcn_s_incperflevel(1);
+  __builtin_amdgcn_s_incperflevel(15);
+}
+
+// CHECK-LABEL: @test_s_decperflevel
+// CHECK: call void @llvm.amdgcn.s.decperflevel(i32 1)
+// CHECK: call void @llvm.amdgcn.s.decperflevel(i32 15)
+void test_s_decperflevel()
+{
+  __builtin_amdgcn_s_decperflevel(1);
+  __builtin_amdgcn_s_decperflevel(15);
+}
+
+// CHECK-LABEL: @test_cubeid(
+// CHECK: call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
+void test_cubeid(global float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_cubeid(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubesc(
+// CHECK: call float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
+void test_cubesc(global float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_cubesc(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubetc(
+// CHECK: call float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
+void test_cubetc(global float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_cubetc(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubema(
+// CHECK: call float @llvm.amdgcn.cubema(float %a, float %b, float %c)
+void test_cubema(global float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_cubema(a, b, c);
+}
+
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.read_register.i64(metadata ![[EXEC:[0-9]+]]) #[[READ_EXEC_ATTRS:[0-9]+]]
+void test_read_exec(global ulong* out) {
+  *out = __builtin_amdgcn_read_exec();
+}
+
+// CHECK: declare i64 @llvm.read_register.i64(metadata) #[[NOUNWIND_READONLY:[0-9]+]]
+
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.read_register.i32(metadata ![[EXEC_LO:[0-9]+]]) #[[READ_EXEC_ATTRS]]
+void test_read_exec_lo(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i32 @llvm.read_register.i32(metadata ![[EXEC_HI:[0-9]+]]) #[[READ_EXEC_ATTRS]]
+void test_read_exec_hi(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_hi();
+}
+
+// CHECK-LABEL: @test_dispatch_ptr
+// CHECK: call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+void test_dispatch_ptr(__attribute__((address_space(4))) unsigned char ** out)
+{
+  *out = __builtin_amdgcn_dispatch_ptr();
+}
+
+// CHECK-LABEL: @test_kernarg_segment_ptr
+// CHECK: call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+void test_kernarg_segment_ptr(__attribute__((address_space(4))) unsigned char ** out)
+{
+  *out = __builtin_amdgcn_kernarg_segment_ptr();
+}
+
+// CHECK-LABEL: @test_implicitarg_ptr
+// CHECK: call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+void test_implicitarg_ptr(__attribute__((address_space(4))) unsigned char ** out)
+{
+  *out = __builtin_amdgcn_implicitarg_ptr();
+}
+
+// CHECK-LABEL: @test_get_group_id(
+// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.x()
+// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.y()
+// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.z()
+void test_get_group_id(int d, global int *out)
+{
+	switch (d) {
+	case 0: *out = __builtin_amdgcn_workgroup_id_x(); break;
+	case 1: *out = __builtin_amdgcn_workgroup_id_y(); break;
+	case 2: *out = __builtin_amdgcn_workgroup_id_z(); break;
+	default: *out = 0;
+	}
+}
+
+// CHECK-LABEL: @test_s_getreg(
+// CHECK: tail call i32 @llvm.amdgcn.s.getreg(i32 0)
+// CHECK: tail call i32 @llvm.amdgcn.s.getreg(i32 1)
+// CHECK: tail call i32 @llvm.amdgcn.s.getreg(i32 65535)
+void test_s_getreg(volatile global uint *out)
+{
+  *out = __builtin_amdgcn_s_getreg(0);
+  *out = __builtin_amdgcn_s_getreg(1);
+  *out = __builtin_amdgcn_s_getreg(65535);
+}
+
+// CHECK-LABEL: @test_get_local_id(
+// CHECK: tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[WI_RANGE:![0-9]*]]
+// CHECK: tail call i32 @llvm.amdgcn.workitem.id.y(), !range [[WI_RANGE]]
+// CHECK: tail call i32 @llvm.amdgcn.workitem.id.z(), !range [[WI_RANGE]]
+void test_get_local_id(int d, global int *out)
+{
+	switch (d) {
+	case 0: *out = __builtin_amdgcn_workitem_id_x(); break;
+	case 1: *out = __builtin_amdgcn_workitem_id_y(); break;
+	case 2: *out = __builtin_amdgcn_workitem_id_z(); break;
+	default: *out = 0;
+	}
+}
+
+// CHECK-LABEL: @test_fmed3_f32
+// CHECK: call float @llvm.amdgcn.fmed3.f32(
+void test_fmed3_f32(global float* out, float a, float b, float c)
+{
+  *out = __builtin_amdgcn_fmed3f(a, b, c);
+}
+
+// CHECK-LABEL: @test_s_getpc
+// CHECK: call i64 @llvm.amdgcn.s.getpc()
+void test_s_getpc(global ulong* out)
+{
+  *out = __builtin_amdgcn_s_getpc();
+}
+
+// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024}
+// CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
+// CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent }
+// CHECK-DAG: ![[EXEC]] = !{!"exec"}
+// CHECK-DAG: ![[EXEC_LO]] = !{!"exec_lo"}
+// CHECK-DAG: ![[EXEC_HI]] = !{!"exec_hi"}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl
new file mode 100644
index 0000000..5a4756b
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl
@@ -0,0 +1,16 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: @test_builtin_clz(
+// CHECK: tail call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+void test_builtin_clz(global int* out, int a)
+{
+  *out = __builtin_clz(a);
+}
+
+// CHECK-LABEL: @test_builtin_clzl(
+// CHECK: tail call i64 @llvm.ctlz.i64(i64 %a, i1 true)
+void test_builtin_clzl(global long* out, long a)
+{
+  *out = __builtin_clzl(a);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-r600.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-r600.cl
new file mode 100644
index 0000000..027a54a
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/builtins-r600.cl
@@ -0,0 +1,55 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple r600-unknown-unknown -target-cpu cypress -S -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: @test_recipsqrt_ieee_f32
+// CHECK: call float @llvm.r600.recipsqrt.ieee.f32
+void test_recipsqrt_ieee_f32(global float* out, float a)
+{
+  *out = __builtin_r600_recipsqrt_ieeef(a);
+}
+
+#if cl_khr_fp64
+// XCHECK-LABEL: @test_recipsqrt_ieee_f64
+// XCHECK: call double @llvm.r600.recipsqrt.ieee.f64
+void test_recipsqrt_ieee_f64(global double* out, double a)
+{
+  *out = __builtin_r600_recipsqrt_ieee(a);
+}
+#endif
+
+// CHECK-LABEL: @test_implicitarg_ptr
+// CHECK: call i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
+void test_implicitarg_ptr(__attribute__((address_space(7))) unsigned char ** out)
+{
+  *out = __builtin_r600_implicitarg_ptr();
+}
+
+// CHECK-LABEL: @test_get_group_id(
+// CHECK: tail call i32 @llvm.r600.read.tgid.x()
+// CHECK: tail call i32 @llvm.r600.read.tgid.y()
+// CHECK: tail call i32 @llvm.r600.read.tgid.z()
+void test_get_group_id(int d, global int *out)
+{
+	switch (d) {
+	case 0: *out = __builtin_r600_read_tgid_x(); break;
+	case 1: *out = __builtin_r600_read_tgid_y(); break;
+	case 2: *out = __builtin_r600_read_tgid_z(); break;
+	default: *out = 0;
+	}
+}
+
+// CHECK-LABEL: @test_get_local_id(
+// CHECK: tail call i32 @llvm.r600.read.tidig.x(), !range [[WI_RANGE:![0-9]*]]
+// CHECK: tail call i32 @llvm.r600.read.tidig.y(), !range [[WI_RANGE]]
+// CHECK: tail call i32 @llvm.r600.read.tidig.z(), !range [[WI_RANGE]]
+void test_get_local_id(int d, global int *out)
+{
+	switch (d) {
+	case 0: *out = __builtin_r600_read_tidig_x(); break;
+	case 1: *out = __builtin_r600_read_tidig_y(); break;
+	case 2: *out = __builtin_r600_read_tidig_z(); break;
+	default: *out = 0;
+	}
+}
+
+// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/byval.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/byval.cl
new file mode 100644
index 0000000..05c72c5
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/byval.cl
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s
+
+struct A {
+  int x[100];
+};
+
+int f(struct A a);
+
+int g() {
+  struct A a;
+  // CHECK: call i32 @f(%struct.A addrspace(5)* byval{{.*}}%a)
+  return f(a);
+}
+
+// CHECK: declare i32 @f(%struct.A addrspace(5)* byval{{.*}})
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cast_image.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cast_image.cl
new file mode 100644
index 0000000..d4e24b4
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cast_image.cl
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa %s | FileCheck --check-prefix=AMDGCN %s
+// RUN: %clang_cc1 -emit-llvm -o - -triple spir-unknown-unknown %s | FileCheck --check-prefix=SPIR %s
+
+#ifdef __AMDGCN__
+
+constant int* convert(image2d_t img) {
+  // AMDGCN: bitcast %opencl.image2d_ro_t addrspace(4)* %img to i32 addrspace(4)*
+  return __builtin_astype(img, constant int*);
+}
+
+#else
+
+global int* convert(image2d_t img) {
+  // SPIR: bitcast %opencl.image2d_ro_t addrspace(1)* %img to i32 addrspace(1)*
+  return __builtin_astype(img, global int*);
+}
+
+#endif
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cl-strict-aliasing.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cl-strict-aliasing.cl
new file mode 100644
index 0000000..b403f7c
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cl-strict-aliasing.cl
@@ -0,0 +1 @@
+// RUN: %clang_cc1 -x cl -emit-llvm -cl-strict-aliasing < %s
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
new file mode 100644
index 0000000..76ace5d
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+
+kernel void ker() {};
+// CHECK: define{{.*}}@ker() #0
+
+void foo() {};
+// CHECK: define{{.*}}@foo() #1
+
+// CHECK-LABEL: attributes #0
+// CHECK-UNIFORM: "uniform-work-group-size"="true"
+// CHECK-NONUNIFORM: "uniform-work-group-size"="false"
+
+// CHECK-LABEL: attributes #1
+// CHECK-NOT: uniform-work-group-size
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
new file mode 100644
index 0000000..d74a1df
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -0,0 +1,350 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B32
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B64
+
+#pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
+typedef void (^bl_t)(local void *);
+typedef struct {int a;} ndrange_t;
+
+// For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
+// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: @block_G =  addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
+
+// For anonymous blocks without captures, emit block literals as global variable.
+// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+
+// Emits block literal [[BL_GLOBAL]], invoke function [[INV_G]] and global block variable @block_G
+// COMMON: define internal spir_func void [[INV_G:.*]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}})
+const bl_t block_G = (bl_t) ^ (local void *a) {};
+
+void callee(int id, __global int *out) {
+  out[id] = id;
+}
+
+// COMMON-LABEL: define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
+kernel void device_side_enqueue(global int *a, global int *b, int i) {
+  // COMMON: %default_queue = alloca %opencl.queue_t*
+  queue_t default_queue;
+  // COMMON: %flags = alloca i32
+  unsigned flags = 0;
+  // COMMON: %ndrange = alloca %struct.ndrange_t
+  ndrange_t ndrange;
+  // COMMON: %clk_event = alloca %opencl.clk_event_t*
+  clk_event_t clk_event;
+  // COMMON: %event_wait_list = alloca %opencl.clk_event_t*
+  clk_event_t event_wait_list;
+  // COMMON: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*]
+  clk_event_t event_wait_list2[] = {clk_event};
+
+  // Emits block literal on stack and block kernel [[INVLK1]].
+  // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
+  // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
+  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
+  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(void) {
+                   a[i] = b[i];
+                 });
+
+  // Emits block literal on stack and block kernel [[INVLK2]].
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
+  // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
+  // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
+  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
+  // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
+  // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
+
+  enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event,
+                 ^(void) {
+                   a[i] = b[i];
+                 });
+
+  // Emits global block literal [[BLG1]] and block kernel [[INVGK1]].
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // B32: %[[TMP:.*]] = alloca [1 x i32]
+  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
+  // B32: store i32 256, i32* %[[TMP1]], align 4
+  // B64: %[[TMP:.*]] = alloca [1 x i64]
+  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
+  // B64: store i64 256, i64* %[[TMP1]], align 8
+  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // B32-SAME: i32* %[[TMP1]])
+  // B64-SAME: i64* %[[TMP1]])
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *p) {
+                   return;
+                 },
+                 256);
+  char c;
+  // Emits global block literal [[BLG2]] and block kernel [[INVGK2]].
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // B32: %[[TMP:.*]] = alloca [1 x i32]
+  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
+  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
+  // B64: %[[TMP:.*]] = alloca [1 x i64]
+  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
+  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
+  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // B32-SAME: i32* %[[TMP1]])
+  // B64-SAME: i64* %[[TMP1]])
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *p) {
+                   return;
+                 },
+                 c);
+
+  // Emits global block literal [[BLG3]] and block kernel [[INVGK3]].
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
+  // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
+  // COMMON: [[EVNT:%[0-9]+]]  = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
+  // B32: %[[TMP:.*]] = alloca [1 x i32]
+  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
+  // B32: store i32 256, i32* %[[TMP1]], align 4
+  // B64: %[[TMP:.*]] = alloca [1 x i64]
+  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
+  // B64: store i64 256, i64* %[[TMP1]], align 8
+  // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
+  // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]],
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // B32-SAME: i32* %[[TMP1]])
+  // B64-SAME: i64* %[[TMP1]])
+  enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
+                 ^(local void *p) {
+                   return;
+                 },
+                 256);
+
+  // Emits global block literal [[BLG4]] and block kernel [[INVGK4]].
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
+  // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
+  // COMMON: [[EVNT:%[0-9]+]]  = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
+  // B32: %[[TMP:.*]] = alloca [1 x i32]
+  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
+  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
+  // B64: %[[TMP:.*]] = alloca [1 x i64]
+  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
+  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
+  // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
+  // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // B32-SAME: i32* %[[TMP1]])
+  // B64-SAME: i64* %[[TMP1]])
+  enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
+                 ^(local void *p) {
+                   return;
+                 },
+                 c);
+
+  long l;
+  // Emits global block literal [[BLG5]] and block kernel [[INVGK5]].
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // B32: %[[TMP:.*]] = alloca [1 x i32]
+  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
+  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
+  // B64: %[[TMP:.*]] = alloca [1 x i64]
+  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
+  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
+  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
+  // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // B32-SAME: i32* %[[TMP1]])
+  // B64-SAME: i64* %[[TMP1]])
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *p) {
+                   return;
+                 },
+                 l);
+
+  // Emits global block literal [[BLG6]] and block kernel [[INVGK6]].
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // B32: %[[TMP:.*]] = alloca [3 x i32]
+  // B32: %[[TMP1:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 0
+  // B32: store i32 1, i32* %[[TMP1]], align 4
+  // B32: %[[TMP2:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 1
+  // B32: store i32 2, i32* %[[TMP2]], align 4
+  // B32: %[[TMP3:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 2
+  // B32: store i32 4, i32* %[[TMP3]], align 4
+  // B64: %[[TMP:.*]] = alloca [3 x i64]
+  // B64: %[[TMP1:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 0
+  // B64: store i64 1, i64* %[[TMP1]], align 8
+  // B64: %[[TMP2:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 1
+  // B64: store i64 2, i64* %[[TMP2]], align 8
+  // B64: %[[TMP3:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 2
+  // B64: store i64 4, i64* %[[TMP3]], align 8
+  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
+  // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3,
+  // B32-SAME: i32* %[[TMP1]])
+  // B64-SAME: i64* %[[TMP1]])
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *p1, local void *p2, local void *p3) {
+                   return;
+                 },
+                 1, 2, 4);
+
+  // Emits global block literal [[BLG7]] and block kernel [[INVGK7]].
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // B32: %[[TMP:.*]] = alloca [1 x i32]
+  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
+  // B32: store i32 0, i32* %[[TMP1]], align 4
+  // B64: %[[TMP:.*]] = alloca [1 x i64]
+  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
+  // B64: store i64 4294967296, i64* %[[TMP1]], align 8
+  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
+  // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+  // B32-SAME: i32* %[[TMP1]])
+  // B64-SAME: i64* %[[TMP1]])
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(local void *p) {
+                   return;
+                 },
+                 4294967296L);
+
+  // Emits global block literal [[BLG8]] and invoke function [[INVG8]].
+  // The full type of these expressions are long (and repeated elsewhere), so we
+  // capture it as part of the regex for convenience and clarity.
+  // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
+  void (^const block_A)(void) = ^{
+    return;
+  };
+
+  // Emits global block literal [[BLG9]] and invoke function [[INVG9]].
+  // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
+  void (^const block_B)(local void *) = ^(local void *a) {
+    return;
+  };
+
+  // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
+  // COMMON: call spir_func void [[INVG8:.*]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  block_A();
+
+  // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  enqueue_kernel(default_queue, flags, ndrange, block_A);
+
+  // Uses block kernel [[INVGK8]] and global block literal [[BLG8]].
+  // COMMON: call i32 @__get_kernel_work_group_size_impl(
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  unsigned size = get_kernel_work_group_size(block_A);
+
+  // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
+  // COMMON: call spir_func void [[INVG8]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  block_A();
+
+  void (^block_C)(void) = ^{
+    callee(i, a);
+  };
+
+  // Emits block literal on stack and block kernel [[INVLK3]].
+  // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+  // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
+  // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
+  enqueue_kernel(default_queue, flags, ndrange, block_C);
+
+  // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INVG9]].
+  // COMMON: call i32 @__get_kernel_work_group_size_impl(
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  size = get_kernel_work_group_size(block_B);
+
+  // Uses global block literal [[BLG8]] and block kernel [[INVGK8]]. Make sure no redundant block literal ind invoke functions are emitted.
+  // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  size = get_kernel_preferred_work_group_size_multiple(block_A);
+
+  // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
+  // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  size = get_kernel_preferred_work_group_size_multiple(block_G);
+
+  // Emits global block literal [[BLG10]] and block kernel [[INVGK10]].
+  // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK10:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
+
+  // Emits global block literal [[BLG11]] and block kernel [[INVGK11]].
+  // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK11:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
+}
+
+// COMMON: define internal spir_kernel void [[INVLK1]](i8 addrspace(4)*) #{{[0-9]+}} {
+// COMMON: entry:
+// COMMON:  call void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %0)
+// COMMON:  ret void
+// COMMON: }
+// COMMON: define internal spir_kernel void [[INVLK2]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVGK1]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVGK2]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVGK3]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVGK4]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVGK5]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVGK6]](i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*) #{{[0-9]+}} {
+// COMMON: entry:
+// COMMON:  call void @__device_side_enqueue_block_invoke_8(i8 addrspace(4)* %0, i8 addrspace(3)* %1, i8 addrspace(3)* %2, i8 addrspace(3)* %3)
+// COMMON:  ret void
+// COMMON: }
+// COMMON: define internal spir_kernel void [[INVGK7]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
+// COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_func void [[INVG9:.*]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
+// COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/const-str-array-decay.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/const-str-array-decay.cl
new file mode 100644
index 0000000..353aa3a
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/const-str-array-decay.cl
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -ffake-address-space-map | FileCheck %s
+
+int test_func(constant char* foo);
+
+kernel void str_array_decy() {
+  test_func("Test string literal");
+}
+
+// CHECK: i8 addrspace(2)* getelementptr inbounds ([20 x i8], [20 x i8] addrspace(2)*
+// CHECK-NOT: addrspacecast
+
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl
new file mode 100644
index 0000000..7bb9705
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -cl-opt-disable -ffake-address-space-map -emit-llvm -o - | FileCheck %s
+
+// CHECK: @array = addrspace({{[0-9]+}}) constant
+__constant float array[2] = {0.0f, 1.0f};
+
+kernel void test(global float *out) {
+  *out = array[0];
+}
+
+// Test that we don't use directly initializers for const aggregates
+// but create a copy in the original address space (unless a variable itself is
+// in the constant address space).
+
+void foo(constant int* p, constant const int *p1, const int *p2, const int *p3);
+// CHECK: @k.arr1 = internal addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3]
+// CHECK: @k.arr2 = private unnamed_addr addrspace(2) constant [3 x i32] [i32 4, i32 5, i32 6]
+// CHECK: @k.arr3 = private unnamed_addr addrspace(2) constant [3 x i32] [i32 7, i32 8, i32 9]
+// CHECK: @k.var1 = internal addrspace(2) constant i32 1
+kernel void k(void) {
+  // CHECK-NOT: %arr1 = alloca [3 x i32]
+  constant const int arr1[] = {1, 2, 3};
+  // CHECK: %arr2 = alloca [3 x i32]
+  const int arr2[] = {4, 5, 6};
+  // CHECK: %arr3 = alloca [3 x i32]
+  int arr3[] = {7, 8, 9};
+
+  constant int var1 = 1;
+  
+  // CHECK: call spir_func void @foo(i32 addrspace(2)* @k.var1, i32 addrspace(2)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(2)* @k.arr1, i32 0, i32 0)
+  foo(&var1, arr1, arr2, arr3);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/convergent.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/convergent.cl
new file mode 100644
index 0000000..a011920
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/convergent.cl
@@ -0,0 +1,142 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s
+
+// This is initially assumed convergent, but can be deduced to not require it.
+
+// CHECK-LABEL: define spir_func void @non_convfun() local_unnamed_addr #0
+// CHECK: ret void
+__attribute__((noinline))
+void non_convfun(void) {
+  volatile int* p;
+  *p = 0;
+}
+
+void convfun(void) __attribute__((convergent));
+void nodupfun(void) __attribute__((noduplicate));
+
+// External functions should be assumed convergent.
+void f(void);
+void g(void);
+
+// Test two if's are merged and non_convfun duplicated.
+// The LLVM IR is equivalent to:
+//    if (a) {
+//      f();
+//      non_convfun();
+//      g();
+//    } else {
+//      non_convfun();
+//    }
+//
+// CHECK-LABEL: define spir_func void @test_merge_if(i32 %a) local_unnamed_addr #1 {
+// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0
+// CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]]
+
+// CHECK: [[if_then]]:
+// CHECK: tail call spir_func void @f()
+// CHECK: tail call spir_func void @non_convfun()
+// CHECK: tail call spir_func void @g()
+
+// CHECK: br label %[[if_end3:.+]]
+
+// CHECK: [[if_end3_critedge]]:
+// CHECK: tail call spir_func void @non_convfun()
+// CHECK: br label %[[if_end3]]
+
+// CHECK: [[if_end3]]:
+// CHECK: ret void
+
+void test_merge_if(int a) {
+  if (a) {
+    f();
+  }
+  non_convfun();
+  if (a) {
+    g();
+  }
+}
+
+// CHECK-DAG: declare spir_func void @f() local_unnamed_addr #2
+// CHECK-DAG: declare spir_func void @g() local_unnamed_addr #2
+
+
+// Test two if's are not merged.
+// CHECK-LABEL: define spir_func void @test_no_merge_if(i32 %a) local_unnamed_addr #1
+// CHECK:  %[[tobool:.+]] = icmp eq i32 %a, 0
+// CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]]
+// CHECK: [[if_then]]:
+// CHECK: tail call spir_func void @f()
+// CHECK-NOT: call spir_func void @convfun()
+// CHECK-NOT: call spir_func void @g()
+// CHECK: br label %[[if_end]]
+// CHECK: [[if_end]]:
+// CHECK:  %[[tobool_pr:.+]] = phi i1 [ true, %[[if_then]] ], [ false, %{{.+}} ]
+// CHECK:  tail call spir_func void @convfun() #[[attr4:.+]]
+// CHECK:  br i1 %[[tobool_pr]], label %[[if_then2:.+]], label %[[if_end3:.+]]
+// CHECK: [[if_then2]]:
+// CHECK: tail call spir_func void @g()
+// CHECK:  br label %[[if_end3:.+]]
+// CHECK: [[if_end3]]:
+// CHECK-LABEL:  ret void
+
+void test_no_merge_if(int a) {
+  if (a) {
+    f();
+  }
+  convfun();
+  if(a) {
+    g();
+  }
+}
+
+// CHECK: declare spir_func void @convfun(){{[^#]*}} #2
+
+// Test loop is unrolled for convergent function.
+// CHECK-LABEL: define spir_func void @test_unroll() local_unnamed_addr #1
+// CHECK:  tail call spir_func void @convfun() #[[attr4:[0-9]+]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK-LABEL:  ret void
+
+void test_unroll() {
+  for (int i = 0; i < 10; i++)
+    convfun();
+}
+
+// Test loop is not unrolled for noduplicate function.
+// CHECK-LABEL: define spir_func void @test_not_unroll()
+// CHECK:  br label %[[for_body:.+]]
+// CHECK: [[for_cond_cleanup:.+]]:
+// CHECK:  ret void
+// CHECK: [[for_body]]:
+// CHECK:  tail call spir_func void @nodupfun() #[[attr5:[0-9]+]]
+// CHECK-NOT: call spir_func void @nodupfun()
+// CHECK:  br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
+
+void test_not_unroll() {
+  for (int i = 0; i < 10; i++)
+    nodupfun();
+}
+
+// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
+
+// CHECK-LABEL: @assume_convergent_asm
+// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
+kernel void assume_convergent_asm()
+{
+  __asm__ volatile("s_barrier");
+}
+
+// CHECK: attributes #0 = { noinline norecurse nounwind "
+// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
+// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/denorms-are-zero.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/denorms-are-zero.cl
new file mode 100644
index 0000000..ab8bf76
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/denorms-are-zero.cl
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - %s | FileCheck %s --check-prefix=DENORM-ZERO
+// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=AMDGCN
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=AMDGCN-DENORM
+// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck --check-prefix=AMDGCN-FEATURE %s
+
+// For all targets 'denorms-are-zero' attribute is set to 'true'
+// if '-cl-denorms-are-zero' was specified and  to 'false' otherwise.
+
+// CHECK-LABEL: define {{(dso_local )?}}void @f()
+// CHECK: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false"
+//
+// DENORM-ZERO-LABEL: define {{(dso_local )?}}void @f()
+// DENORM-ZERO: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true"
+
+// For amdgcn target cpu fiji, fp32 should be flushed since fiji does not support fp32 denormals, unless +fp32-denormals is
+// explicitly set. amdgcn target always do not flush fp64 denormals. The control for fp64 and fp16 denormals is the same.
+
+// AMDGCN-LABEL: define void @f()
+// AMDGCN: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
+// AMDGCN-DENORM-LABEL: define void @f()
+// AMDGCN-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
+// AMDGCN-FEATURE-LABEL: define void @f()
+// AMDGCN-FEATURE: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}-fp64-fp16-denormals{{[^"]*}}"
+void f() {}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/event_t.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/event_t.cl
new file mode 100644
index 0000000..aad441f
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/event_t.cl
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 | FileCheck %s
+
+void foo(event_t evt);
+
+void kernel ker() {
+  event_t e;
+// CHECK: alloca %opencl.event_t*,
+  foo(e);
+// CHECK: call {{.*}}void @foo(%opencl.event_t* %
+  foo(0);
+// CHECK: call {{.*}}void @foo(%opencl.event_t* null)
+  foo((event_t)0);
+// CHECK: call {{.*}}void @foo(%opencl.event_t* null)
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/ext-vector-shuffle.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/ext-vector-shuffle.cl
new file mode 100644
index 0000000..ee88ba3
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/ext-vector-shuffle.cl
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -cl-opt-disable -emit-llvm -o - | not grep 'extractelement'
+// RUN: %clang_cc1 %s -cl-opt-disable -emit-llvm -o - | not grep 'insertelement'
+// RUN: %clang_cc1 %s -cl-opt-disable -emit-llvm -o - | grep 'shufflevector'
+
+typedef __attribute__(( ext_vector_type(2) )) float float2;
+typedef __attribute__(( ext_vector_type(4) )) float float4;
+
+float2 test1(float4 V) {
+  return V.xy + V.wz;
+}
+
+float4 test2(float4 V) {
+  float2 W = V.ww;
+  return W.xyxy + W.yxyx;
+}
+
+float4 test3(float4 V1, float4 V2) { return (float4)(V1.zw, V2.xy); }
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/extension-begin.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/extension-begin.cl
new file mode 100644
index 0000000..1d5f789
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/extension-begin.cl
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -emit-llvm -o - | FileCheck %s
+
+__attribute__((overloadable)) void f(int x);
+
+#pragma OPENCL EXTENSION my_ext : begin
+
+__attribute__((overloadable)) void f(long x);
+
+#pragma OPENCL EXTENSION my_ext : end
+
+#pragma OPENCL EXTENSION my_ext : enable
+
+//CHECK: define spir_func void @test_f1(i64 %x)
+//CHECK: call spir_func void @_Z1fl(i64 %{{.*}})
+void test_f1(long x) {
+  f(x);
+}
+
+#pragma OPENCL EXTENSION my_ext : disable
+
+//CHECK: define spir_func void @test_f2(i64 %x)
+//CHECK: call spir_func void @_Z1fi(i32 %{{.*}})
+void test_f2(long x) {
+  f(x);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/fpmath.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/fpmath.cl
new file mode 100644
index 0000000..8908861
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/fpmath.cl
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck --check-prefix=CHECK --check-prefix=NODIVOPT %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s
+
+typedef __attribute__(( ext_vector_type(4) )) float float4;
+
+float spscalardiv(float a, float b) {
+  // CHECK: @spscalardiv
+  // CHECK: #[[ATTR:[0-9]+]]
+  // CHECK: fdiv{{.*}},
+  // NODIVOPT: !fpmath ![[MD:[0-9]+]]
+  // DIVOPT-NOT: !fpmath ![[MD:[0-9]+]]
+  return a / b;
+}
+
+float4 spvectordiv(float4 a, float4 b) {
+  // CHECK: @spvectordiv
+  // CHECK: #[[ATTR]]
+  // CHECK: fdiv{{.*}},
+  // NODIVOPT: !fpmath ![[MD]]
+  // DIVOPT-NOT: !fpmath ![[MD]]
+  return a / b;
+}
+
+#if __OPENCL_C_VERSION__ >=120
+void printf(constant char* fmt, ...);
+
+void testdbllit(long *val) {
+  // CHECK-FLT: float 2.000000e+01
+  // CHECK-DBL: double 2.000000e+01
+  printf("%f", 20.0);
+}
+
+#endif
+
+#ifndef NOFP64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+double dpscalardiv(double a, double b) {
+  // CHECK: @dpscalardiv
+  // CHECK: #[[ATTR]]
+  // CHECK-NOT: !fpmath
+  return a / b;
+}
+#endif
+
+// CHECK: attributes #[[ATTR]] = {
+// NODIVOPT: "correctly-rounded-divide-sqrt-fp-math"="false"
+// DIVOPT: "correctly-rounded-divide-sqrt-fp-math"="true"
+// CHECK: }
+// NODIVOPT: ![[MD]] = !{float 2.500000e+00}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/func-call-dbg-loc.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/func-call-dbg-loc.cl
new file mode 100644
index 0000000..4ed082f
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/func-call-dbg-loc.cl
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -debug-info-kind=limited -O0 -emit-llvm -o - %s | FileCheck %s
+
+typedef struct
+{
+    int a;
+} Struct;
+
+Struct func1();
+
+void func2(Struct S);
+
+void func3()
+{
+    // CHECK: call i32 @func1() #{{[0-9]+}}, !dbg ![[LOC:[0-9]+]]
+    // CHECK: call void @func2(i32 %{{[0-9]+}}) #{{[0-9]+}}, !dbg ![[LOC]]
+    func2(func1());
+}
+
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/gfx9-fp32-denorms.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/gfx9-fp32-denorms.cl
new file mode 100644
index 0000000..ccb4c6d
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/gfx9-fp32-denorms.cl
@@ -0,0 +1,13 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck --check-prefix=DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature +fp32-denormals %s | FileCheck --check-prefix=FEATURE_FP32_DENORMALS_ON %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature -fp32-denormals %s | FileCheck --check-prefix=FEATURE_FP32_DENORMALS_OFF %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -cl-denorms-are-zero %s | FileCheck --check-prefix=OPT_DENORMS_ARE_ZERO %s
+
+// DEFAULT: +fp32-denormals
+// FEATURE_FP32_DENORMALS_ON: +fp32-denormals
+// FEATURE_FP32_DENORMALS_OFF: -fp32-denormals
+// OPT_DENORMS_ARE_ZERO: -fp32-denormals
+
+kernel void gfx9_fp32_denorms() {}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/half.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/half.cl
new file mode 100644
index 0000000..eae8cdc
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/half.cl
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-pc-win32 | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+
+half test()
+{
+   half x = 0.1f;
+   x+=2.0f;
+   x-=2.0f;
+   half y = x + x;
+   half z = y * 1.0f;
+   return z;
+// CHECK: half 0xH3260
+}
+
+// CHECK-LABEL: @test_inc(half %x)
+// CHECK: [[INC:%.*]] = fadd half %x, 0xH3C00
+// CHECK: ret half [[INC]]
+half test_inc(half x)
+{
+  return ++x;
+}
+
+__attribute__((overloadable)) int min(int, int);
+__attribute__((overloadable)) half min(half, half);
+__attribute__((overloadable)) float min(float, float);
+
+__kernel void foo( __global half* buf, __global float* buf2 )
+{
+    buf[0] = min( buf[0], 1.5h );
+// CHECK: half 0xH3E00
+    buf[0] = min( buf2[0], 1.5f );
+// CHECK: float 1.500000e+00
+
+    const half one = 1.6666;
+    buf[1] = min( buf[1], one );
+// CHECK: half 0xH3EAB
+}
+
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/images.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/images.cl
new file mode 100644
index 0000000..eb054ec
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/images.cl
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -o - | FileCheck %s
+
+__attribute__((overloadable)) void read_image(read_only image1d_t img_ro);
+__attribute__((overloadable)) void read_image(write_only image1d_t img_wo);
+
+kernel void test_read_image(read_only image1d_t img_ro, write_only image1d_t img_wo) {
+  // CHECK: call void @_Z10read_image14ocl_image1d_ro(%opencl.image1d_ro_t* %{{[0-9]+}})
+  read_image(img_ro);
+  // CHECK: call void @_Z10read_image14ocl_image1d_wo(%opencl.image1d_wo_t* %{{[0-9]+}})
+  read_image(img_wo);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl
new file mode 100644
index 0000000..ccd9821
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl
@@ -0,0 +1,8 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s
+
+kernel void test_long(int arg0) {
+  long v15_16;
+  // CHECK: tail call i64 asm sideeffect "v_lshlrev_b64 v[15:16], 0, $0", "={v[15:16]},v"(i32 %arg0)
+  __asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(arg0));
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-arg-info-single-as.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-arg-info-single-as.cl
new file mode 100644
index 0000000..595c974
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-arg-info-single-as.cl
@@ -0,0 +1,9 @@
+// Test that the kernel argument info always refers to SPIR address spaces,
+// even if the target has only one address space like x86_64 does.
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -triple x86_64-unknown-unknown -cl-kernel-arg-info | FileCheck %s
+
+kernel void foo(__global int * G, __constant int *C, __local int *L) {
+  *G = *C + *L;
+}
+// CHECK: !kernel_arg_addr_space ![[MD123:[0-9]+]]
+// CHECK: ![[MD123]] = !{i32 1, i32 2, i32 3}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-arg-info.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-arg-info.cl
new file mode 100644
index 0000000..fa48ad2
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-arg-info.cl
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -triple spir-unknown-unknown -cl-kernel-arg-info | FileCheck %s -check-prefix ARGINFO
+
+kernel void foo(global int * globalintp, global int * restrict globalintrestrictp,
+                global const int * globalconstintp,
+                global const int * restrict globalconstintrestrictp,
+                constant int * constantintp, constant int * restrict constantintrestrictp,
+                global const volatile int * globalconstvolatileintp,
+                global const volatile int * restrict globalconstvolatileintrestrictp,
+                global volatile int * globalvolatileintp,
+                global volatile int * restrict globalvolatileintrestrictp,
+                local int * localintp, local int * restrict localintrestrictp,
+                local const int * localconstintp,
+                local const int * restrict localconstintrestrictp,
+                local const volatile int * localconstvolatileintp,
+                local const volatile int * restrict localconstvolatileintrestrictp,
+                local volatile int * localvolatileintp,
+                local volatile int * restrict localvolatileintrestrictp,
+                int X, const int constint, const volatile int constvolatileint,
+                volatile int volatileint) {
+  *globalintrestrictp = constint + volatileint;
+}
+// CHECK: define spir_kernel void @foo{{[^!]+}}
+// CHECK: !kernel_arg_addr_space ![[MD11:[0-9]+]]
+// CHECK: !kernel_arg_access_qual ![[MD12:[0-9]+]]
+// CHECK: !kernel_arg_type ![[MD13:[0-9]+]]
+// CHECK: !kernel_arg_base_type ![[MD13]]
+// CHECK: !kernel_arg_type_qual ![[MD14:[0-9]+]]
+// CHECK-NOT: !kernel_arg_name
+// ARGINFO: !kernel_arg_name ![[MD15:[0-9]+]]
+
+kernel void foo2(read_only image1d_t img1, image2d_t img2, write_only image2d_array_t img3, read_write image1d_t img4) {
+}
+// CHECK: define spir_kernel void @foo2{{[^!]+}}
+// CHECK: !kernel_arg_addr_space ![[MD21:[0-9]+]]
+// CHECK: !kernel_arg_access_qual ![[MD22:[0-9]+]]
+// CHECK: !kernel_arg_type ![[MD23:[0-9]+]]
+// CHECK: !kernel_arg_base_type ![[MD23]]
+// CHECK: !kernel_arg_type_qual ![[MD24:[0-9]+]]
+// CHECK-NOT: !kernel_arg_name
+// ARGINFO: !kernel_arg_name ![[MD25:[0-9]+]]
+
+kernel void foo3(__global half * X) {
+}
+// CHECK: define spir_kernel void @foo3{{[^!]+}}
+// CHECK: !kernel_arg_addr_space ![[MD31:[0-9]+]]
+// CHECK: !kernel_arg_access_qual ![[MD32:[0-9]+]]
+// CHECK: !kernel_arg_type ![[MD33:[0-9]+]]
+// CHECK: !kernel_arg_base_type ![[MD33]]
+// CHECK: !kernel_arg_type_qual ![[MD34:[0-9]+]]
+// CHECK-NOT: !kernel_arg_name
+// ARGINFO: !kernel_arg_name ![[MD35:[0-9]+]]
+
+typedef unsigned int myunsignedint;
+kernel void foo4(__global unsigned int * X, __global myunsignedint * Y) {
+}
+// CHECK: define spir_kernel void @foo4{{[^!]+}}
+// CHECK: !kernel_arg_addr_space ![[MD41:[0-9]+]]
+// CHECK: !kernel_arg_access_qual ![[MD42:[0-9]+]]
+// CHECK: !kernel_arg_type ![[MD43:[0-9]+]]
+// CHECK: !kernel_arg_base_type ![[MD44:[0-9]+]]
+// CHECK: !kernel_arg_type_qual ![[MD45:[0-9]+]]
+// CHECK-NOT: !kernel_arg_name
+// ARGINFO: !kernel_arg_name ![[MD46:[0-9]+]]
+
+typedef image1d_t myImage;
+kernel void foo5(myImage img1, write_only image1d_t img2) {
+}
+// CHECK: define spir_kernel void @foo5{{[^!]+}}
+// CHECK: !kernel_arg_addr_space ![[MD41:[0-9]+]]
+// CHECK: !kernel_arg_access_qual ![[MD51:[0-9]+]]
+// CHECK: !kernel_arg_type ![[MD52:[0-9]+]]
+// CHECK: !kernel_arg_base_type ![[MD53:[0-9]+]]
+// CHECK: !kernel_arg_type_qual ![[MD45]]
+// CHECK-NOT: !kernel_arg_name
+// ARGINFO: !kernel_arg_name ![[MD54:[0-9]+]]
+
+typedef char char16 __attribute__((ext_vector_type(16)));
+__kernel void foo6(__global char16 arg[]) {}
+// CHECK: !kernel_arg_type ![[MD61:[0-9]+]]
+// ARGINFO: !kernel_arg_name ![[MD62:[0-9]+]]
+
+typedef read_only  image1d_t ROImage;
+typedef write_only image1d_t WOImage;
+typedef read_write image1d_t RWImage;
+kernel void foo7(ROImage ro, WOImage wo, RWImage rw) {
+}
+// CHECK: define spir_kernel void @foo7{{[^!]+}}
+// CHECK: !kernel_arg_addr_space ![[MD71:[0-9]+]]
+// CHECK: !kernel_arg_access_qual ![[MD72:[0-9]+]]
+// CHECK: !kernel_arg_type ![[MD73:[0-9]+]]
+// CHECK: !kernel_arg_base_type ![[MD74:[0-9]+]]
+// CHECK: !kernel_arg_type_qual ![[MD75:[0-9]+]]
+// CHECK-NOT: !kernel_arg_name
+// ARGINFO: !kernel_arg_name ![[MD76:[0-9]+]]
+
+// CHECK: ![[MD11]] = !{i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 1, i32 1, i32 1, i32 1, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 0, i32 0, i32 0, i32 0}
+// CHECK: ![[MD12]] = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"}
+// CHECK: ![[MD13]] = !{!"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int", !"int", !"int", !"int"}
+// CHECK: ![[MD14]] = !{!"", !"restrict", !"const", !"restrict const", !"const", !"restrict const", !"const volatile", !"restrict const volatile", !"volatile", !"restrict volatile", !"", !"restrict", !"const", !"restrict const", !"const volatile", !"restrict const volatile", !"volatile", !"restrict volatile", !"", !"", !"", !""}
+// ARGINFO: ![[MD15]] = !{!"globalintp", !"globalintrestrictp", !"globalconstintp", !"globalconstintrestrictp", !"constantintp", !"constantintrestrictp", !"globalconstvolatileintp", !"globalconstvolatileintrestrictp", !"globalvolatileintp", !"globalvolatileintrestrictp", !"localintp", !"localintrestrictp", !"localconstintp", !"localconstintrestrictp", !"localconstvolatileintp", !"localconstvolatileintrestrictp", !"localvolatileintp", !"localvolatileintrestrictp", !"X", !"constint", !"constvolatileint", !"volatileint"}
+// CHECK: ![[MD21]] = !{i32 1, i32 1, i32 1, i32 1}
+// CHECK: ![[MD22]] = !{!"read_only", !"read_only", !"write_only", !"read_write"}
+// CHECK: ![[MD23]] = !{!"image1d_t", !"image2d_t", !"image2d_array_t", !"image1d_t"}
+// CHECK: ![[MD24]] = !{!"", !"", !"", !""}
+// ARGINFO: ![[MD25]] = !{!"img1", !"img2", !"img3", !"img4"}
+// CHECK: ![[MD31]] = !{i32 1}
+// CHECK: ![[MD32]] = !{!"none"}
+// CHECK: ![[MD33]] = !{!"half*"}
+// CHECK: ![[MD34]] = !{!""}
+// ARGINFO: ![[MD35]] = !{!"X"}
+// CHECK: ![[MD41]] = !{i32 1, i32 1}
+// CHECK: ![[MD42]] = !{!"none", !"none"}
+// CHECK: ![[MD43]] = !{!"uint*", !"myunsignedint*"}
+// CHECK: ![[MD44]] = !{!"uint*", !"uint*"}
+// CHECK: ![[MD45]] = !{!"", !""}
+// ARGINFO: ![[MD46]] = !{!"X", !"Y"}
+// CHECK: ![[MD51]] = !{!"read_only", !"write_only"}
+// CHECK: ![[MD52]] = !{!"myImage", !"image1d_t"}
+// CHECK: ![[MD53]] = !{!"image1d_t", !"image1d_t"}
+// ARGINFO: ![[MD54]] = !{!"img1", !"img2"}
+// CHECK: ![[MD61]] = !{!"char16*"}
+// ARGINFO: ![[MD62]] = !{!"arg"}
+// CHECK: ![[MD71]] = !{i32 1, i32 1, i32 1}
+// CHECK: ![[MD72]] = !{!"read_only", !"write_only", !"read_write"}
+// CHECK: ![[MD73]] = !{!"ROImage", !"WOImage", !"RWImage"}
+// CHECK: ![[MD74]] = !{!"image1d_t", !"image1d_t", !"image1d_t"}
+// CHECK: ![[MD75]] = !{!"", !"", !""}
+// ARGINFO: ![[MD76]] = !{!"ro", !"wo", !"rw"}
+
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-attributes.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-attributes.cl
new file mode 100644
index 0000000..c9ecb14
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-attributes.cl
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
+
+kernel  __attribute__((vec_type_hint(int))) __attribute__((reqd_work_group_size(1,2,4))) void kernel1(int a) {}
+// CHECK: define {{(dso_local )?}}spir_kernel void @kernel1(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD1:[0-9]+]] !reqd_work_group_size ![[MD2:[0-9]+]]
+
+kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {}
+// CHECK: define {{(dso_local )?}}spir_kernel void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]]
+
+kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {}
+// CHECK: define {{(dso_local )?}}spir_kernel void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]]
+
+// CHECK: [[MD1]] = !{i32 undef, i32 1}
+// CHECK: [[MD2]] = !{i32 1, i32 2, i32 4}
+// CHECK: [[MD3]] = !{<4 x i32> undef, i32 0}
+// CHECK: [[MD4]] = !{i32 8, i32 16, i32 32}
+// CHECK: [[MD5]] = !{i32 8}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-metadata.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-metadata.cl
new file mode 100644
index 0000000..cdec97b7
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernel-metadata.cl
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
+
+void normal_function() {
+}
+
+__kernel void kernel_function() {
+}
+
+// CHECK: define {{.*}}spir_kernel void @kernel_function() {{[^{]+}} !kernel_arg_addr_space ![[MD:[0-9]+]] !kernel_arg_access_qual ![[MD]] !kernel_arg_type ![[MD]] !kernel_arg_base_type ![[MD]] !kernel_arg_type_qual ![[MD]] {
+// CHECK: ![[MD]] = !{}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
new file mode 100644
index 0000000..5bb52e9
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s
+// Test that the kernels always use the SPIR calling convention
+// to have unambiguous mapping of arguments to feasibly implement
+// clSetKernelArg().
+
+typedef struct int_single {
+    int a;
+} int_single;
+
+typedef struct int_pair {
+    long a;
+    long b;
+} int_pair;
+
+typedef struct test_struct {
+    int elementA;
+    int elementB;
+    long elementC;
+    char elementD;
+    long elementE;
+    float elementF;
+    short elementG;
+    double elementH;
+} test_struct;
+
+kernel void test_single(int_single input, global int* output) {
+// CHECK: spir_kernel
+// AMDGCN: define amdgpu_kernel void @test_single
+// CHECK: struct.int_single* byval nocapture
+// CHECK: i32* nocapture %output
+ output[0] = input.a;
+}
+
+kernel void test_pair(int_pair input, global int* output) {
+// CHECK: spir_kernel
+// AMDGCN: define amdgpu_kernel void @test_pair
+// CHECK: struct.int_pair* byval nocapture
+// CHECK: i32* nocapture %output
+ output[0] = (int)input.a;
+ output[1] = (int)input.b;
+}
+
+kernel void test_kernel(test_struct input, global int* output) {
+// CHECK: spir_kernel
+// AMDGCN: define amdgpu_kernel void @test_kernel
+// CHECK: struct.test_struct* byval nocapture
+// CHECK: i32* nocapture %output
+ output[0] = input.elementA;
+ output[1] = input.elementB;
+ output[2] = (int)input.elementC;
+ output[3] = (int)input.elementD;
+ output[4] = (int)input.elementE;
+ output[5] = (int)input.elementF;
+ output[6] = (int)input.elementG;
+ output[7] = (int)input.elementH;
+};
+
+void test_function(int_pair input, global int* output) {
+// CHECK-NOT: spir_kernel
+// AMDGCN-NOT: define amdgpu_kernel void @test_function
+// CHECK: i64 %input.coerce0, i64 %input.coerce1, i32* nocapture %output
+ output[0] = (int)input.a;
+ output[1] = (int)input.b;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/lifetime.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/lifetime.cl
new file mode 100644
index 0000000..ed9f32f
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/lifetime.cl
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s -check-prefix=AMDGCN
+
+void use(char *a);
+
+__attribute__((always_inline)) void helper_no_markers() {
+  char a;
+  use(&a);
+}
+
+void lifetime_test() {
+// CHECK: @llvm.lifetime.start.p0i
+// AMDGCN: @llvm.lifetime.start.p5i
+  helper_no_markers();
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/local-initializer-undef.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/local-initializer-undef.cl
new file mode 100644
index 0000000..f209b1f
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/local-initializer-undef.cl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s
+
+typedef struct Foo {
+    int x;
+    float y;
+    float z;
+} Foo;
+
+// CHECK-DAG: @test.lds_int = internal addrspace(3) global i32 undef
+// CHECK-DAG: @test.lds_int_arr = internal addrspace(3) global [128 x i32] undef
+// CHECK-DAG: @test.lds_struct = internal addrspace(3) global %struct.Foo undef
+// CHECK-DAG: @test.lds_struct_arr = internal addrspace(3) global [64 x %struct.Foo] undef
+__kernel void test()
+{
+    __local int lds_int;
+    __local int lds_int_arr[128];
+    __local Foo lds_struct;
+    __local Foo lds_struct_arr[64];
+
+    lds_int = 1;
+    lds_int_arr[0] = 1;
+    lds_struct.x = 1;
+    lds_struct_arr[0].x = 1;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/local.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/local.cl
new file mode 100644
index 0000000..6f44b68
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/local.cl
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %s
+
+void func(local int*);
+
+__kernel void foo(void) {
+  // CHECK: @foo.i = internal addrspace(3) global i32 undef
+  __local int i;
+  func(&i);
+}
+
+// CHECK-LABEL: define {{.*}}void @_Z3barPU7CLlocali
+__kernel void __attribute__((__overloadable__)) bar(local int *x) {
+  *x = 5;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl
new file mode 100644
index 0000000..ac1c1b5
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+typedef int int4 __attribute((ext_vector_type(4)));
+typedef long long4 __attribute((ext_vector_type(4)));
+typedef float float4 __attribute((ext_vector_type(4)));
+typedef double double4 __attribute((ext_vector_type(4)));
+
+// CHECK: floatops
+kernel void floatops(global int4 *out, global float4 *fout) {
+  // CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  out[0] = (float4)(1, 1, 1, 1) && 1.0f;
+  // CHECK: store <4 x i32> zeroinitializer
+  out[1] = (float4)(0, 0, 0, 0) && (float4)(0, 0, 0, 0);
+
+  // CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  out[2] = (float4)(0, 0, 0, 0) || (float4)(1, 1, 1, 1);
+  // CHECK: store <4 x i32> zeroinitializer
+  out[3] = (float4)(0, 0, 0, 0) || 0.0f;
+
+  // CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  out[4] = !(float4)(0, 0, 0, 0);
+  // CHECK: store <4 x i32> zeroinitializer
+  out[5] = !(float4)(1, 2, 3, 4);
+  // CHECK: store <4 x i32> <i32 -1, i32 0, i32 -1, i32 0>
+  out[6] = !(float4)(0, 1, 0, 1);
+  // CHECK: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  fout[0] = (float4)(!0.0f);
+  // CHECK: store <4 x float> zeroinitializer
+  fout[1] = (float4)(!1.0f);
+}
+
+// CHECK: doubleops
+kernel void doubleops(global long4 *out, global double4 *dout) {
+  // CHECK: store <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
+  out[0] = (double4)(1, 1, 1, 1) && 1.0;
+  // CHECK: store <4 x i64> zeroinitializer
+  out[1] = (double4)(0, 0, 0, 0) && (double4)(0, 0, 0, 0);
+
+  // CHECK: store <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
+  out[2] = (double4)(0, 0, 0, 0) || (double4)(1, 1, 1, 1);
+  // CHECK: store <4 x i64> zeroinitializer
+  out[3] = (double4)(0, 0, 0, 0) || 0.0f;
+
+  // CHECK: store <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
+  out[4] = !(double4)(0, 0, 0, 0);
+  // CHECK: store <4 x i64> zeroinitializer
+  out[5] = !(double4)(1, 2, 3, 4);
+  // CHECK: store <4 x i64> <i64 -1, i64 0, i64 -1, i64 0>
+  out[6] = !(double4)(0, 1, 0, 1);
+  // CHECK: store <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  dout[0] = (double4)(!0.0f);
+  // CHECK: store <4 x double> zeroinitializer
+  dout[1] = (double4)(!1.0f);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/memcpy.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/memcpy.cl
new file mode 100644
index 0000000..5911b5c
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/memcpy.cl
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -ffake-address-space-map -emit-llvm -o - | FileCheck %s
+
+// CHECK-LABEL: @test
+// CHECK-NOT: addrspacecast
+// CHECK: call void @llvm.memcpy.p1i8.p2i8
+kernel void test(global float *g, constant float *c) {
+  __builtin_memcpy(g, c, 32);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/no-half.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/no-half.cl
new file mode 100644
index 0000000..aee8f67
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/no-half.cl
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 %s -cl-std=cl2.0 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=cl1.2 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=cl1.1 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+// CHECK-LABEL: @test_store_float(float %foo, half addrspace({{.}}){{.*}} %bar)
+__kernel void test_store_float(float foo, __global half* bar)
+{
+	__builtin_store_halff(foo, bar);
+// CHECK: [[HALF_VAL:%.*]] = fptrunc float %foo to half
+// CHECK: store half [[HALF_VAL]], half addrspace({{.}})* %bar, align 2
+}
+
+// CHECK-LABEL: @test_store_double(double %foo, half addrspace({{.}}){{.*}} %bar)
+__kernel void test_store_double(double foo, __global half* bar)
+{
+	__builtin_store_half(foo, bar);
+// CHECK: [[HALF_VAL:%.*]] = fptrunc double %foo to half
+// CHECK: store half [[HALF_VAL]], half addrspace({{.}})* %bar, align 2
+}
+
+// CHECK-LABEL: @test_load_float(float addrspace({{.}}){{.*}} %foo, half addrspace({{.}}){{.*}} %bar)
+__kernel void test_load_float(__global float* foo, __global half* bar)
+{
+	foo[0] = __builtin_load_halff(bar);
+// CHECK: [[HALF_VAL:%.*]] = load half, half addrspace({{.}})* %bar
+// CHECK: [[FULL_VAL:%.*]] = fpext half [[HALF_VAL]] to float
+// CHECK: store float [[FULL_VAL]], float addrspace({{.}})* %foo
+}
+
+// CHECK-LABEL: @test_load_double(double addrspace({{.}}){{.*}} %foo, half addrspace({{.}}){{.*}} %bar)
+__kernel void test_load_double(__global double* foo, __global half* bar)
+{
+	foo[0] = __builtin_load_half(bar);
+// CHECK: [[HALF_VAL:%.*]] = load half, half addrspace({{.}})* %bar
+// CHECK: [[FULL_VAL:%.*]] = fpext half [[HALF_VAL]] to double
+// CHECK: store double [[FULL_VAL]], double addrspace({{.}})* %foo
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl
new file mode 100644
index 0000000..14f6411
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s -check-prefix=NORMAL

+// RUN: %clang_cc1 %s -emit-llvm -cl-no-signed-zeros -o - | FileCheck %s -check-prefix=NO-SIGNED-ZEROS

+

+float signedzeros(float a) {

+  return a;

+}

+

+// CHECK: attributes

+// NORMAL: "no-signed-zeros-fp-math"="false"

+// NO-SIGNED-ZEROS: "no-signed-zeros-fp-math"="true"

diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/null_queue.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/null_queue.cl
new file mode 100644
index 0000000..cdcd7ee
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/null_queue.cl
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0  -emit-llvm %s -o - | FileCheck %s
+extern queue_t get_default_queue();
+
+bool compare() {
+  return 0 == get_default_queue() &&
+         get_default_queue() == 0;
+  // CHECK: icmp eq %opencl.queue_t* null, %{{.*}}
+  // CHECK: icmp eq %opencl.queue_t* %{{.*}}, null
+}
+
+void func(queue_t q);
+
+void init() {
+  queue_t q = 0;
+  func(0);
+  // CHECK: store %opencl.queue_t* null, %opencl.queue_t** %q
+  // CHECK: call void @func(%opencl.queue_t* null)
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/opencl_types.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/opencl_types.cl
new file mode 100644
index 0000000..1f1294b
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/opencl_types.cl
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 %s -triple "spir-unknown-unknown" -emit-llvm -o - -O0 | FileCheck %s --check-prefixes=CHECK-COM,CHECK-SPIR
+// RUN: %clang_cc1 -cl-std=CL2.0 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -O0 | FileCheck %s --check-prefixes=CHECK-COM,CHECK-AMDGCN
+
+#define CLK_ADDRESS_CLAMP_TO_EDGE       2
+#define CLK_NORMALIZED_COORDS_TRUE      1
+#define CLK_FILTER_NEAREST              0x10
+#define CLK_FILTER_LINEAR               0x20
+
+constant sampler_t glb_smp = CLK_ADDRESS_CLAMP_TO_EDGE|CLK_NORMALIZED_COORDS_TRUE|CLK_FILTER_NEAREST;
+// CHECK-COM-NOT: constant i32
+
+void fnc1(image1d_t img) {}
+// CHECK-SPIR: @fnc1(%opencl.image1d_ro_t addrspace(1)*
+// CHECK-AMDGCN: @fnc1(%opencl.image1d_ro_t addrspace(4)*
+
+void fnc1arr(image1d_array_t img) {}
+// CHECK-SPIR: @fnc1arr(%opencl.image1d_array_ro_t addrspace(1)*
+// CHECK-AMDGCN: @fnc1arr(%opencl.image1d_array_ro_t addrspace(4)*
+
+void fnc1buff(image1d_buffer_t img) {}
+// CHECK-SPIR: @fnc1buff(%opencl.image1d_buffer_ro_t addrspace(1)*
+// CHECK-AMDGCN: @fnc1buff(%opencl.image1d_buffer_ro_t addrspace(4)*
+
+void fnc2(image2d_t img) {}
+// CHECK-SPIR: @fnc2(%opencl.image2d_ro_t addrspace(1)*
+// CHECK-AMDGCN: @fnc2(%opencl.image2d_ro_t addrspace(4)*
+
+void fnc2arr(image2d_array_t img) {}
+// CHECK-SPIR: @fnc2arr(%opencl.image2d_array_ro_t addrspace(1)*
+// CHECK-AMDGCN: @fnc2arr(%opencl.image2d_array_ro_t addrspace(4)*
+
+void fnc3(image3d_t img) {}
+// CHECK-SPIR: @fnc3(%opencl.image3d_ro_t addrspace(1)*
+// CHECK-AMDGCN: @fnc3(%opencl.image3d_ro_t addrspace(4)*
+
+void fnc4smp(sampler_t s) {}
+// CHECK-SPIR-LABEL: define {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+// CHECK-AMDGCN-LABEL: define {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(4)*
+
+kernel void foo(image1d_t img) {
+  sampler_t smp = CLK_ADDRESS_CLAMP_TO_EDGE|CLK_NORMALIZED_COORDS_TRUE|CLK_FILTER_LINEAR;
+  // CHECK-SPIR: alloca %opencl.sampler_t addrspace(2)*
+  // CHECK-AMDGCN: alloca %opencl.sampler_t addrspace(4)*
+  event_t evt;
+  // CHECK-SPIR: alloca %opencl.event_t*
+  // CHECK-AMDGCN: alloca %opencl.event_t addrspace(5)*
+  clk_event_t clk_evt;
+  // CHECK-SPIR: alloca %opencl.clk_event_t*
+  // CHECK-AMDGCN: alloca %opencl.clk_event_t addrspace(1)*
+  queue_t queue;
+  // CHECK-SPIR: alloca %opencl.queue_t*
+  // CHECK-AMDGCN: alloca %opencl.queue_t addrspace(1)*
+  reserve_id_t rid;
+  // CHECK-SPIR: alloca %opencl.reserve_id_t*
+  // CHECK-AMDGCN: alloca %opencl.reserve_id_t addrspace(1)*
+  // CHECK-SPIR: store %opencl.sampler_t addrspace(2)*
+  // CHECK-AMDGCN: store %opencl.sampler_t addrspace(4)*
+  fnc4smp(smp);
+  // CHECK-SPIR: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+  // CHECK-AMDGCN: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(4)*
+  fnc4smp(glb_smp);
+  // CHECK-SPIR: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+  // CHECK-AMDGCN: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(4)*
+}
+
+kernel void foo_ro_pipe(read_only pipe int p) {}
+// CHECK-SPIR: @foo_ro_pipe(%opencl.pipe_ro_t addrspace(1)* %p)
+// CHECK_AMDGCN: @foo_ro_pipe(%opencl.pipe_ro_t addrspace(1)* %p)
+
+kernel void foo_wo_pipe(write_only pipe int p) {}
+// CHECK-SPIR: @foo_wo_pipe(%opencl.pipe_wo_t addrspace(1)* %p)
+// CHECK_AMDGCN: @foo_wo_pipe(%opencl.pipe_wo_t addrspace(1)* %p)
+
+void __attribute__((overloadable)) bad1(image1d_t b, image2d_t c, image2d_t d) {}
+// CHECK-SPIR-LABEL: @{{_Z4bad114ocl_image1d_ro14ocl_image2d_roS0_|"\\01\?bad1@@\$\$J0YAXPAUocl_image1d_ro@@PAUocl_image2d_ro@@1@Z"}}
+// CHECK-AMDGCN-LABEL: @{{_Z4bad114ocl_image1d_ro14ocl_image2d_roS0_|"\\01\?bad1@@\$\$J0YAXPAUocl_image1d_ro@@PAUocl_image2d_ro@@1@Z"}}(%opencl.image1d_ro_t addrspace(4)*{{.*}}%opencl.image2d_ro_t addrspace(4)*{{.*}}%opencl.image2d_ro_t addrspace(4)*{{.*}})
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/overload.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/overload.cl
new file mode 100644
index 0000000..f182cb5
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/overload.cl
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -emit-llvm -o - -triple spir-unknown-unknown %s | FileCheck %s
+
+typedef short short4 __attribute__((ext_vector_type(4)));
+
+// CHECK-DAG: declare spir_func <4 x i16> @_Z5clampDv4_sS_S_(<4 x i16>, <4 x i16>, <4 x i16>)
+short4 __attribute__ ((overloadable)) clamp(short4 x, short4 minval, short4 maxval);
+// CHECK-DAG: declare spir_func <4 x i16> @_Z5clampDv4_sss(<4 x i16>, i16 signext, i16 signext)
+short4 __attribute__ ((overloadable)) clamp(short4 x, short minval, short maxval);
+void __attribute__((overloadable)) foo(global int *a, global int *b);
+void __attribute__((overloadable)) foo(generic int *a, generic int *b);
+void __attribute__((overloadable)) bar(generic int *global *a, generic int *global *b);
+void __attribute__((overloadable)) bar(generic int *generic *a, generic int *generic *b);
+
+// Checking address space resolution
+void kernel test1() {
+  global int *a;
+  global int *b;
+  generic int *c;
+  local int *d;
+  generic int *generic *gengen;
+  generic int *local *genloc;
+  generic int *global *genglob;
+  // CHECK-DAG: call spir_func void @_Z3fooPU3AS1iS0_(i32 addrspace(1)* undef, i32 addrspace(1)* undef)
+  foo(a, b);
+  // CHECK-DAG: call spir_func void @_Z3fooPU3AS4iS0_(i32 addrspace(4)* undef, i32 addrspace(4)* undef)
+  foo(b, c);
+  // CHECK-DAG: call spir_func void @_Z3fooPU3AS4iS0_(i32 addrspace(4)* undef, i32 addrspace(4)* undef)
+  foo(a, d);
+
+  // CHECK-DAG: call spir_func void @_Z3barPU3AS4PU3AS4iS2_(i32 addrspace(4)* addrspace(4)* undef, i32 addrspace(4)* addrspace(4)* undef)
+  bar(gengen, genloc);
+  // CHECK-DAG: call spir_func void @_Z3barPU3AS4PU3AS4iS2_(i32 addrspace(4)* addrspace(4)* undef, i32 addrspace(4)* addrspace(4)* undef)
+  bar(gengen, genglob);
+  // CHECK-DAG: call spir_func void @_Z3barPU3AS1PU3AS4iS2_(i32 addrspace(4)* addrspace(1)* undef, i32 addrspace(4)* addrspace(1)* undef)
+  bar(genglob, genglob);
+}
+
+// Checking vector vs scalar resolution
+void kernel test2() {
+  short4 e0=0;
+
+  // CHECK-DAG: call spir_func <4 x i16> @_Z5clampDv4_sss(<4 x i16> zeroinitializer, i16 signext 0, i16 signext 255)
+  clamp(e0, 0, 255);
+  // CHECK-DAG: call spir_func <4 x i16> @_Z5clampDv4_sS_S_(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, <4 x i16> zeroinitializer)
+  clamp(e0, e0, e0);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/partial_initializer.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/partial_initializer.cl
new file mode 100644
index 0000000..ee6be91
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/partial_initializer.cl
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm %s -O0 -o - | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(2) ))  int int2;
+typedef __attribute__(( ext_vector_type(4) ))  int int4;
+
+// CHECK: %struct.StrucTy = type { i32, i32, i32 }
+
+// CHECK: @GA = addrspace(1) global [6 x [6 x float]] {{[[][[]}}6 x float] [float 1.000000e+00, float 2.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00],
+// CHECK:        [6 x float] zeroinitializer, [6 x float] zeroinitializer, [6 x float] zeroinitializer, [6 x float] zeroinitializer, [6 x float] zeroinitializer], align 4 
+float GA[6][6]  = {1.0f, 2.0f};
+
+typedef struct {
+  int x;
+  int y;
+  int z;
+} StrucTy;
+
+// CHECK: @GS = addrspace(1) global %struct.StrucTy { i32 1, i32 2, i32 0 }, align 4
+StrucTy GS = {1, 2};
+
+// CHECK: @GV1 = addrspace(1) global <4 x i32> <i32 1, i32 2, i32 3, i32 4>, align 16
+int4 GV1 = (int4)((int2)(1,2),3,4);
+
+// CHECK: @GV2 = addrspace(1) global <4 x i32> <i32 1, i32 1, i32 1, i32 1>, align 16
+int4 GV2 = (int4)(1);
+
+// CHECK: @f.S = private unnamed_addr addrspace(2) constant %struct.StrucTy { i32 1, i32 2, i32 0 }, align 4
+
+// CHECK-LABEL: define spir_func void @f()
+void f(void) {
+  // CHECK: %[[A:.*]] = alloca [6 x [6 x float]], align 4
+  // CHECK: %[[S:.*]] = alloca %struct.StrucTy, align 4
+  // CHECK: %[[V1:.*]] = alloca <4 x i32>, align 16
+  // CHECK: %[[compoundliteral:.*]] = alloca <4 x i32>, align 16
+  // CHECK: %[[compoundliteral1:.*]] = alloca <2 x i32>, align 8
+  // CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16
+
+  // CHECK: %[[v0:.*]] = bitcast [6 x [6 x float]]* %A to i8*
+  // CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %[[v0]], i8 0, i32 144, i1 false)
+  // CHECK: %[[v1:.*]] = bitcast i8* %[[v0]] to [6 x [6 x float]]*
+  // CHECK: %[[v2:.*]] = getelementptr inbounds [6 x [6 x float]], [6 x [6 x float]]* %[[v1]], i32 0, i32 0
+  // CHECK: %[[v3:.*]] = getelementptr inbounds [6 x float], [6 x float]* %[[v2]], i32 0, i32 0
+  // CHECK: store float 1.000000e+00, float* %[[v3]], align 4
+  // CHECK: %[[v4:.*]] = getelementptr inbounds [6 x float], [6 x float]* %[[v2]], i32 0, i32 1
+  // CHECK: store float 2.000000e+00, float* %[[v4]], align 4
+  float A[6][6]  = {1.0f, 2.0f};
+
+  // CHECK: %[[v5:.*]] = bitcast %struct.StrucTy* %S to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[v5]], i8 addrspace(2)* align 4 bitcast (%struct.StrucTy addrspace(2)* @f.S to i8 addrspace(2)*), i32 12, i1 false)
+  StrucTy S = {1, 2};
+
+  // CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>* %[[compoundliteral1]], align 8
+  // CHECK: %[[v6:.*]] = load <2 x i32>, <2 x i32>* %[[compoundliteral1]], align 8
+  // CHECK: %[[vext:.*]] = shufflevector <2 x i32> %[[v6]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  // CHECK: %[[vecinit:.*]] = shufflevector <4 x i32> %[[vext]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  // CHECK: %[[vecinit2:.*]] = insertelement <4 x i32> %[[vecinit]], i32 3, i32 2
+  // CHECK: %[[vecinit3:.*]] = insertelement <4 x i32> %[[vecinit2]], i32 4, i32 3
+  // CHECK: store <4 x i32> %[[vecinit3]], <4 x i32>* %[[compoundliteral]], align 16
+  // CHECK: %[[v7:.*]] = load <4 x i32>, <4 x i32>* %[[compoundliteral]], align 16
+  // CHECK: store <4 x i32> %[[v7]], <4 x i32>* %[[V1]], align 16
+  int4 V1 = (int4)((int2)(1,2),3,4);
+
+  // CHECK: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>* %[[V2]], align 16
+  int4 V2 = (int4)(1);
+}
+
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl
new file mode 100644
index 0000000..d912fce
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
+
+// CHECK-DAG: %opencl.pipe_ro_t = type opaque
+// CHECK-DAG: %opencl.pipe_wo_t = type opaque
+// CHECK-DAG: %opencl.reserve_id_t = type opaque
+
+#pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
+void test1(read_only pipe int p, global int *ptr) {
+  // CHECK: call i32 @__read_pipe_2(%opencl.pipe_ro_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
+  read_pipe(p, ptr);
+  // CHECK: call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_ro_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  reserve_id_t rid = reserve_read_pipe(p, 2);
+  // CHECK: call i32 @__read_pipe_4(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
+  read_pipe(p, rid, 2, ptr);
+  // CHECK: call void @__commit_read_pipe(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  commit_read_pipe(p, rid);
+}
+
+void test2(write_only pipe int p, global int *ptr) {
+  // CHECK: call i32 @__write_pipe_2(%opencl.pipe_wo_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
+  write_pipe(p, ptr);
+  // CHECK: call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_wo_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  reserve_id_t rid = reserve_write_pipe(p, 2);
+  // CHECK: call i32 @__write_pipe_4(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
+  write_pipe(p, rid, 2, ptr);
+  // CHECK: call void @__commit_write_pipe(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  commit_write_pipe(p, rid);
+}
+
+void test3(read_only pipe int p, global int *ptr) {
+  // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_read_pipe(%opencl.pipe_ro_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  reserve_id_t rid = work_group_reserve_read_pipe(p, 2);
+  // CHECK: call void @__work_group_commit_read_pipe(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  work_group_commit_read_pipe(p, rid);
+}
+
+void test4(write_only pipe int p, global int *ptr) {
+  // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_write_pipe(%opencl.pipe_wo_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  reserve_id_t rid = work_group_reserve_write_pipe(p, 2);
+  // CHECK: call void @__work_group_commit_write_pipe(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  work_group_commit_write_pipe(p, rid);
+}
+
+void test5(read_only pipe int p, global int *ptr) {
+  // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_read_pipe(%opencl.pipe_ro_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  reserve_id_t rid = sub_group_reserve_read_pipe(p, 2);
+  // CHECK: call void @__sub_group_commit_read_pipe(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  sub_group_commit_read_pipe(p, rid);
+}
+
+void test6(write_only pipe int p, global int *ptr) {
+  // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_write_pipe(%opencl.pipe_wo_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  reserve_id_t rid = sub_group_reserve_write_pipe(p, 2);
+  // CHECK: call void @__sub_group_commit_write_pipe(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  sub_group_commit_write_pipe(p, rid);
+}
+
+void test7(read_only pipe int p, global int *ptr) {
+  // CHECK: call i32 @__get_pipe_num_packets_ro(%opencl.pipe_ro_t* %{{.*}}, i32 4, i32 4)
+  *ptr = get_pipe_num_packets(p);
+  // CHECK: call i32 @__get_pipe_max_packets_ro(%opencl.pipe_ro_t* %{{.*}}, i32 4, i32 4)
+  *ptr = get_pipe_max_packets(p);
+}
+
+void test8(write_only pipe int p, global int *ptr) {
+  // CHECK: call i32 @__get_pipe_num_packets_wo(%opencl.pipe_wo_t* %{{.*}}, i32 4, i32 4)
+  *ptr = get_pipe_num_packets(p);
+  // CHECK: call i32 @__get_pipe_max_packets_wo(%opencl.pipe_wo_t* %{{.*}}, i32 4, i32 4)
+  *ptr = get_pipe_max_packets(p);
+}
+
+void test9(read_only pipe int r, write_only pipe int w, global int *ptr) {
+  // verify that return type is correctly casted to i1 value
+  // CHECK: %[[R:[0-9]+]] = call i32 @__read_pipe_2
+  // CHECK: icmp ne i32 %[[R]], 0
+  if (read_pipe(r, ptr)) *ptr = -1;
+  // CHECK: %[[W:[0-9]+]] = call i32 @__write_pipe_2
+  // CHECK: icmp ne i32 %[[W]], 0
+  if (write_pipe(w, ptr)) *ptr = -1;
+  // CHECK: %[[NR:[0-9]+]] = call i32 @__get_pipe_num_packets_ro
+  // CHECK: icmp ne i32 %[[NR]], 0
+  if (get_pipe_num_packets(r)) *ptr = -1;
+  // CHECK: %[[NW:[0-9]+]] = call i32 @__get_pipe_num_packets_wo
+  // CHECK: icmp ne i32 %[[NW]], 0
+  if (get_pipe_num_packets(w)) *ptr = -1;
+  // CHECK: %[[MR:[0-9]+]] = call i32 @__get_pipe_max_packets_ro
+  // CHECK: icmp ne i32 %[[MR]], 0
+  if (get_pipe_max_packets(r)) *ptr = -1;
+  // CHECK: %[[MW:[0-9]+]] = call i32 @__get_pipe_max_packets_wo
+  // CHECK: icmp ne i32 %[[MW]], 0
+  if (get_pipe_max_packets(w)) *ptr = -1;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/pipe_types.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/pipe_types.cl
new file mode 100644
index 0000000..ba064c6
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/pipe_types.cl
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
+
+// CHECK: %opencl.pipe_ro_t = type opaque
+// CHECK: %opencl.pipe_wo_t = type opaque
+typedef unsigned char __attribute__((ext_vector_type(3))) uchar3;
+typedef int __attribute__((ext_vector_type(4))) int4;
+
+void test1(read_only pipe int p) {
+// CHECK: define void @test1(%opencl.pipe_ro_t* %p)
+  reserve_id_t rid;
+// CHECK: %rid = alloca %opencl.reserve_id_t
+}
+
+void test2(write_only pipe float p) {
+// CHECK: define void @test2(%opencl.pipe_wo_t* %p)
+}
+
+void test3(read_only pipe const int p) {
+// CHECK: define void @test3(%opencl.pipe_ro_t* %p)
+}
+
+void test4(read_only pipe uchar3 p) {
+// CHECK: define void @test4(%opencl.pipe_ro_t* %p)
+}
+
+void test5(read_only pipe int4 p) {
+// CHECK: define void @test5(%opencl.pipe_ro_t* %p)
+}
+
+typedef read_only pipe int MyPipe;
+kernel void test6(MyPipe p) {
+// CHECK: define spir_kernel void @test6(%opencl.pipe_ro_t* %p)
+}
+
+struct Person {
+  const char *Name;
+  bool isFemale;
+  int ID;
+};
+
+void test_reserved_read_pipe(global struct Person *SDst,
+                             read_only pipe struct Person SPipe) {
+// CHECK: define void @test_reserved_read_pipe
+  read_pipe (SPipe, SDst);
+  // CHECK: call i32 @__read_pipe_2(%opencl.pipe_ro_t* %{{.*}}, i8* %{{.*}}, i32 16, i32 8)
+  read_pipe (SPipe, SDst);
+  // CHECK: call i32 @__read_pipe_2(%opencl.pipe_ro_t* %{{.*}}, i8* %{{.*}}, i32 16, i32 8)
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/preserve_vec3.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/preserve_vec3.cl
new file mode 100644
index 0000000..6efbbb3
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -fpreserve-vec3-type  | FileCheck %s
+
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+
+void kernel foo(global float3 *a, global float3 *b) {
+  // CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a
+  // CHECK: store <3 x float> %[[LOAD_A]], <3 x float> addrspace(1)* %b
+  *b = *a;
+}
+
+void kernel float4_to_float3(global float3 *a, global float4 *b) {
+  // CHECK: %[[LOAD_A:.*]] = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
+  // CHECK: %[[ASTYPE:.*]] = shufflevector <4 x float> %[[LOAD_A]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  // CHECK: store <3 x float> %[[ASTYPE:.*]], <3 x float> addrspace(1)* %a, align 16
+  *a = __builtin_astype(*b, float3);
+}
+
+void kernel float3_to_float4(global float3 *a, global float4 *b) {
+  // CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a, align 16
+  // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  // CHECK: store <4 x float> %[[ASTYPE:.*]], <4 x float> addrspace(1)* %b, align 16
+  *b = __builtin_astype(*a, float4);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/private-array-initialization.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/private-array-initialization.cl
new file mode 100644
index 0000000..9aa058d
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/private-array-initialization.cl
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -emit-llvm -o - | FileCheck -check-prefix=PRIVATE0 %s
+// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa-unknown -O0 -emit-llvm -o - | FileCheck -check-prefix=PRIVATE5 %s
+
+// CHECK: @test.arr = private unnamed_addr addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3], align 4
+
+void test() {
+  __private int arr[] = {1, 2, 3};
+// PRIVATE0:  %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8*
+// PRIVATE0:  call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[arr_i8_ptr]], i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i32 12, i1 false)
+
+// PRIVATE5: %arr = alloca [3 x i32], align 4, addrspace(5)
+// PRIVATE5: %0 = bitcast [3 x i32] addrspace(5)* %arr to i8 addrspace(5)*
+// PRIVATE5: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %0, i8 addrspace(4)* align 4 bitcast ([3 x i32] addrspace(4)* @test.arr to i8 addrspace(4)*), i64 12, i1 false)
+}
+
+__kernel void initializer_cast_is_valid_crash() {
+// PRIVATE0: %v512 = alloca [64 x i8], align 1
+// PRIVATE0: %0 = bitcast [64 x i8]* %v512 to i8*
+// PRIVATE0: call void @llvm.memset.p0i8.i32(i8* align 1 %0, i8 0, i32 64, i1 false)
+// PRIVATE0: %1 = bitcast i8* %0 to [64 x i8]*
+
+
+// PRIVATE5: %v512 = alloca [64 x i8], align 1, addrspace(5)
+// PRIVATE5: %0 = bitcast [64 x i8] addrspace(5)* %v512 to i8 addrspace(5)*
+// PRIVATE5: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 1 %0, i8 0, i64 64, i1 false)
+// PRIVATE5: %1 = bitcast i8 addrspace(5)* %0 to [64 x i8] addrspace(5)*
+  unsigned char v512[64] = {
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00
+  };
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/ptx-calls.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/ptx-calls.cl
new file mode 100644
index 0000000..2a34003
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/ptx-calls.cl
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s
+
+void device_function() {
+}
+// CHECK-LABEL: define void @device_function()
+
+__kernel void kernel_function() {
+  device_function();
+}
+// CHECK-LABEL: define spir_kernel void @kernel_function()
+// CHECK: call void @device_function()
+// CHECK: !{{[0-9]+}} = !{void ()* @kernel_function, !"kernel", i32 1}
+
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/ptx-kernels.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/ptx-kernels.cl
new file mode 100644
index 0000000..b9e1c22
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/ptx-kernels.cl
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -o - | FileCheck %s
+
+void device_function() {
+}
+// CHECK-LABEL: define void @device_function()
+
+__kernel void kernel_function() {
+}
+// CHECK-LABEL: define spir_kernel void @kernel_function()
+
+// CHECK: !{{[0-9]+}} = !{void ()* @kernel_function, !"kernel", i32 1}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/relaxed-fpmath.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/relaxed-fpmath.cl
new file mode 100644
index 0000000..7676ee1
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/relaxed-fpmath.cl
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s -check-prefix=NORMAL
+// RUN: %clang_cc1 %s -emit-llvm -cl-fast-relaxed-math -o - | FileCheck %s -check-prefix=FAST
+// RUN: %clang_cc1 %s -emit-llvm -cl-finite-math-only -o - | FileCheck %s -check-prefix=FINITE
+// RUN: %clang_cc1 %s -emit-llvm -cl-unsafe-math-optimizations -o - | FileCheck %s -check-prefix=UNSAFE
+// RUN: %clang_cc1 %s -emit-llvm -cl-mad-enable -o - | FileCheck %s -check-prefix=MAD
+// RUN: %clang_cc1 %s -emit-llvm -cl-no-signed-zeros -o - | FileCheck %s -check-prefix=NOSIGNED
+
+float spscalardiv(float a, float b) {
+  // CHECK: @spscalardiv(
+
+  // NORMAL: fdiv float
+  // FAST: fdiv fast float
+  // FINITE: fdiv nnan ninf float
+  // UNSAFE: fdiv nnan nsz float
+  // MAD: fdiv float
+  // NOSIGNED: fdiv nsz float
+  return a / b;
+}
+// CHECK: attributes
+
+// NORMAL: "less-precise-fpmad"="false"
+// NORMAL: "no-infs-fp-math"="false"
+// NORMAL: "no-nans-fp-math"="false"
+// NORMAL: "no-signed-zeros-fp-math"="false"
+// NORMAL: "unsafe-fp-math"="false"
+
+// FAST: "less-precise-fpmad"="true"
+// FAST: "no-infs-fp-math"="true"
+// FAST: "no-nans-fp-math"="true"
+// FAST: "no-signed-zeros-fp-math"="true"
+// FAST: "unsafe-fp-math"="true"
+
+// FINITE: "less-precise-fpmad"="false"
+// FINITE: "no-infs-fp-math"="true"
+// FINITE: "no-nans-fp-math"="true"
+// FINITE: "no-signed-zeros-fp-math"="false"
+// FINITE: "unsafe-fp-math"="false"
+
+// UNSAFE: "less-precise-fpmad"="true"
+// UNSAFE: "no-infs-fp-math"="false"
+// UNSAFE: "no-nans-fp-math"="true"
+// UNSAFE: "no-signed-zeros-fp-math"="true"
+// UNSAFE: "unsafe-fp-math"="true"
+
+// MAD: "less-precise-fpmad"="true"
+// MAD: "no-infs-fp-math"="false"
+// MAD: "no-nans-fp-math"="false"
+// MAD: "no-signed-zeros-fp-math"="false"
+// MAD: "unsafe-fp-math"="false"
+
+// NOSIGNED: "less-precise-fpmad"="false"
+// NOSIGNED: "no-infs-fp-math"="false"
+// NOSIGNED: "no-nans-fp-math"="false"
+// NOSIGNED: "no-signed-zeros-fp-math"="true"
+// NOSIGNED: "unsafe-fp-math"="false"
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/sampler.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/sampler.cl
new file mode 100644
index 0000000..22976c5
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/sampler.cl
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
+//
+// This test covers 5 cases of sampler initialzation:
+//   1. function argument passing
+//      1a. argument is a file-scope variable
+//      1b. argument is a function-scope variable
+//      1c. argument is one of caller function's parameters
+//   2. variable initialization
+//      2a. initializing a file-scope variable
+//      2b. initializing a function-scope variable
+
+#define CLK_ADDRESS_CLAMP_TO_EDGE       2
+#define CLK_NORMALIZED_COORDS_TRUE      1
+#define CLK_FILTER_NEAREST              0x10
+#define CLK_FILTER_LINEAR               0x20
+
+// CHECK: %opencl.sampler_t = type opaque
+
+// Case 2a
+constant sampler_t glb_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
+// CHECK-NOT: glb_smp
+
+int get_sampler_initializer(void);
+
+void fnc4smp(sampler_t s) {}
+// CHECK: define spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* %
+
+kernel void foo(sampler_t smp_par) {
+  // CHECK-LABEL: define spir_kernel void @foo(%opencl.sampler_t addrspace(2)* %smp_par)
+  // CHECK: [[smp_par_ptr:%[A-Za-z0-9_\.]+]] = alloca %opencl.sampler_t addrspace(2)*
+
+  // Case 2b
+  sampler_t smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_NEAREST;
+  // CHECK: [[smp_ptr:%[A-Za-z0-9_\.]+]] = alloca %opencl.sampler_t addrspace(2)*
+  // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19)
+  // CHECK: store %opencl.sampler_t addrspace(2)* [[SAMP]], %opencl.sampler_t addrspace(2)** [[smp_ptr]]
+
+  // Case 1b
+  fnc4smp(smp);
+  // CHECK-NOT: call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19)
+  // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]]
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  // Case 1b
+  fnc4smp(smp);
+  // CHECK-NOT: call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19)
+  // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]]
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  // Case 1a
+  fnc4smp(glb_smp);
+  // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  // Case 1c
+  fnc4smp(smp_par);
+  // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_par_ptr]]
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  fnc4smp(5);
+  // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 5)
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  const sampler_t const_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
+  fnc4smp(const_smp);
+   // CHECK: [[CONST_SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
+  // CHECK: store %opencl.sampler_t addrspace(2)* [[CONST_SAMP]], %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR:%[a-zA-Z0-9]+]]
+  fnc4smp(const_smp);
+  // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR]]
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  constant sampler_t constant_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
+  fnc4smp(constant_smp);
+  // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  // TODO: enable sampler initialization with non-constant integer.
+  //const sampler_t const_smp_func_init = get_sampler_initializer();
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/shifts.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/shifts.cl
new file mode 100644
index 0000000..7011a42
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/shifts.cl
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -x cl -O1 -emit-llvm  %s -o - -triple x86_64-linux-gnu | FileCheck %s -check-prefix=OPT
+// RUN: %clang_cc1 -x cl -O0 -emit-llvm  %s -o - -triple x86_64-linux-gnu | FileCheck %s -check-prefix=NOOPT
+
+// OpenCL essentially reduces all shift amounts to the last word-size
+// bits before evaluating. Test this both for variables and constants
+// evaluated in the front-end.
+
+// OPT: @gtest1 = local_unnamed_addr constant i64 2147483648
+__constant const unsigned long gtest1 = 1UL << 31;
+
+// NOOPT: @negativeShift32
+int negativeShift32(int a,int b) {
+  // NOOPT: %array0 = alloca [256 x i8]
+  char array0[((int)1)<<40];
+  // NOOPT: %array1 = alloca [256 x i8]
+  char array1[((int)1)<<(-24)];
+
+  // NOOPT: ret i32 65536
+  return ((int)1)<<(-16);
+}
+
+//OPT: @positiveShift32
+int positiveShift32(int a,int b) {
+  //OPT: [[M32:%.+]] = and i32 %b, 31
+  //OPT-NEXT: [[C32:%.+]] = shl i32 %a, [[M32]]
+  int c = a<<b;
+  int d = ((int)1)<<33;
+  //OPT-NEXT: [[E32:%.+]] = add nsw i32 [[C32]], 2
+  int e = c + d;
+  //OPT-NEXT: ret i32 [[E32]]
+  return e;
+}
+
+//OPT: @positiveShift64
+long positiveShift64(long a,long b) {
+  //OPT: [[M64:%.+]] = and i64 %b, 63
+  //OPT-NEXT: [[C64:%.+]] = ashr i64 %a, [[M64]]
+  long c = a>>b;
+  long d = ((long)8)>>65;
+  //OPT-NEXT: [[E64:%.+]] = add nsw i64 [[C64]], 4
+  long e = c + d;
+  //OPT-NEXT: ret i64 [[E64]]
+  return e;
+}
+
+typedef __attribute__((ext_vector_type(4))) int int4;
+
+//OPT: @vectorVectorTest
+int4 vectorVectorTest(int4 a,int4 b) {
+  //OPT: [[VM:%.+]] = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+  //OPT-NEXT: [[VC:%.+]] = shl <4 x i32> %a, [[VM]]
+  int4 c = a << b;
+  //OPT-NEXT: [[VF:%.+]] = add <4 x i32> [[VC]], <i32 2, i32 4, i32 16, i32 8>
+  int4 d = {1, 1, 1, 1};
+  int4 e = {33, 34, -28, -29};
+  int4 f = c + (d << e);
+  //OPT-NEXT: ret <4 x i32> [[VF]]
+  return f;
+}
+
+//NOOPT-LABEL: @vectorScalarTest
+int4 vectorScalarTest(int4 a,int b) {
+  //NOOPT: [[SP0:%.+]] = insertelement <4 x i32> undef
+  //NOOPT: [[SP1:%.+]] = shufflevector <4 x i32> [[SP0]], <4 x i32> undef, <4 x i32> zeroinitializer
+  //NOOPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], <i32 31, i32 31, i32 31, i32 31>
+  //NOOPT: [[VSC:%.+]] = shl <4 x i32> [[VSS:%.+]], [[VSM]]
+  int4 c = a << b;
+  //NOOPT: [[VSF:%.+]] = shl <4 x i32> [[VSC1:%.+]], <i32 2, i32 2, i32 2, i32 2>
+  //NOOPT: [[VSA:%.+]] = add <4 x i32> [[VSC2:%.+]], [[VSF]]
+  int4 d = {1, 1, 1, 1};
+  int4 f = c + (d << 34);
+  //NOOPT: ret <4 x i32>
+  return f;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/single-precision-constant.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/single-precision-constant.cl
new file mode 100644
index 0000000..6ff7bd1
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/single-precision-constant.cl
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 %s -cl-single-precision-constant -emit-llvm -o - | FileCheck %s
+
+float fn(float f) {
+  // CHECK: tail call float @llvm.fmuladd.f32(float %f, float 2.000000e+00, float 1.000000e+00)
+  return f*2. + 1.;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/size_t.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/size_t.cl
new file mode 100644
index 0000000..63a0622
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/size_t.cl
@@ -0,0 +1,124 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple spir-unknown-unknown -o - | FileCheck --check-prefix=SZ32 %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple spir64-unknown-unknown -o - | FileCheck --check-prefix=SZ64 --check-prefix=SZ64ONLY %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple amdgcn -o - | FileCheck --check-prefix=SZ64 --check-prefix=AMDGCN %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple amdgcn---opencl -o - | FileCheck --check-prefix=SZ64 --check-prefix=AMDGCN %s
+
+//SZ32: define{{.*}} i32 @test_ptrtoint_private(i8* %x)
+//SZ32: ptrtoint i8* %{{.*}} to i32
+//SZ64ONLY: define{{.*}} i64 @test_ptrtoint_private(i8* %x)
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_ptrtoint_private(i8 addrspace(5)* %x)
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
+size_t test_ptrtoint_private(private char* x) {
+  return (size_t)x;
+}
+
+//SZ32: define{{.*}} i32 @test_ptrtoint_global(i8 addrspace(1)* %x)
+//SZ32: ptrtoint i8 addrspace(1)* %{{.*}} to i32
+//SZ64: define{{.*}} i64 @test_ptrtoint_global(i8 addrspace(1)* %x)
+//SZ64: ptrtoint i8 addrspace(1)* %{{.*}} to i64
+intptr_t test_ptrtoint_global(global char* x) {
+  return (intptr_t)x;
+}
+
+//SZ32: define{{.*}} i32 @test_ptrtoint_constant(i8 addrspace(2)* %x)
+//SZ32: ptrtoint i8 addrspace(2)* %{{.*}} to i32
+//SZ64ONLY: define{{.*}} i64 @test_ptrtoint_constant(i8 addrspace(2)* %x)
+//SZ64ONLY: ptrtoint i8 addrspace(2)* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_ptrtoint_constant(i8 addrspace(4)* %x)
+//AMDGCN: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+uintptr_t test_ptrtoint_constant(constant char* x) {
+  return (uintptr_t)x;
+}
+
+//SZ32: define{{.*}} i32 @test_ptrtoint_local(i8 addrspace(3)* %x)
+//SZ32: ptrtoint i8 addrspace(3)* %{{.*}} to i32
+//SZ64: define{{.*}} i64 @test_ptrtoint_local(i8 addrspace(3)* %x)
+//SZ64: ptrtoint i8 addrspace(3)* %{{.*}} to i64
+size_t test_ptrtoint_local(local char* x) {
+  return (size_t)x;
+}
+
+//SZ32: define{{.*}} i32 @test_ptrtoint_generic(i8 addrspace(4)* %x)
+//SZ32: ptrtoint i8 addrspace(4)* %{{.*}} to i32
+//SZ64ONLY: define{{.*}} i64 @test_ptrtoint_generic(i8 addrspace(4)* %x)
+//SZ64ONLY: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_ptrtoint_generic(i8* %x)
+//AMDGCN: ptrtoint i8* %{{.*}} to i64
+size_t test_ptrtoint_generic(generic char* x) {
+  return (size_t)x;
+}
+
+//SZ32: define{{.*}} i8* @test_inttoptr_private(i32 %x)
+//SZ32: inttoptr i32 %{{.*}} to i8*
+//SZ64ONLY: define{{.*}} i8* @test_inttoptr_private(i64 %x)
+//SZ64ONLY: inttoptr i64 %{{.*}} to i8*
+//AMDGCN: define{{.*}} i8 addrspace(5)* @test_inttoptr_private(i64 %x)
+//AMDGCN: trunc i64 %{{.*}} to i32
+//AMDGCN: inttoptr i32 %{{.*}} to i8 addrspace(5)*
+private char* test_inttoptr_private(size_t x) {
+  return (private char*)x;
+}
+
+//SZ32: define{{.*}} i8 addrspace(1)* @test_inttoptr_global(i32 %x)
+//SZ32: inttoptr i32 %{{.*}} to i8 addrspace(1)*
+//SZ64: define{{.*}} i8 addrspace(1)* @test_inttoptr_global(i64 %x)
+//SZ64: inttoptr i64 %{{.*}} to i8 addrspace(1)*
+global char* test_inttoptr_global(size_t x) {
+  return (global char*)x;
+}
+
+//SZ32: define{{.*}} i8 addrspace(3)* @test_add_local(i8 addrspace(3)* %x, i32 %y)
+//SZ32: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i32
+//SZ64: define{{.*}} i8 addrspace(3)* @test_add_local(i8 addrspace(3)* %x, i64 %y)
+//AMDGCN: trunc i64 %{{.*}} to i32
+//AMDGCN: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i32
+//SZ64ONLY: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i64
+local char* test_add_local(local char* x, ptrdiff_t y) {
+  return x + y;
+}
+
+//SZ32: define{{.*}} i8 addrspace(1)* @test_add_global(i8 addrspace(1)* %x, i32 %y)
+//SZ32: getelementptr inbounds i8, i8 addrspace(1)* %{{.*}}, i32
+//SZ64: define{{.*}} i8 addrspace(1)* @test_add_global(i8 addrspace(1)* %x, i64 %y)
+//SZ64: getelementptr inbounds i8, i8 addrspace(1)* %{{.*}}, i64
+global char* test_add_global(global char* x, ptrdiff_t y) {
+  return x + y;
+}
+
+//SZ32: define{{.*}} i32 @test_sub_local(i8 addrspace(3)* %x, i8 addrspace(3)* %y)
+//SZ32: ptrtoint i8 addrspace(3)* %{{.*}} to i32
+//SZ32: ptrtoint i8 addrspace(3)* %{{.*}} to i32
+//SZ64: define{{.*}} i64 @test_sub_local(i8 addrspace(3)* %x, i8 addrspace(3)* %y)
+//SZ64: ptrtoint i8 addrspace(3)* %{{.*}} to i64
+//SZ64: ptrtoint i8 addrspace(3)* %{{.*}} to i64
+ptrdiff_t test_sub_local(local char* x, local char *y) {
+  return x - y;
+}
+
+//SZ32: define{{.*}} i32 @test_sub_private(i8* %x, i8* %y)
+//SZ32: ptrtoint i8* %{{.*}} to i32
+//SZ32: ptrtoint i8* %{{.*}} to i32
+//SZ64ONLY: define{{.*}} i64 @test_sub_private(i8* %x, i8* %y)
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_sub_private(i8 addrspace(5)* %x, i8 addrspace(5)* %y)
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
+ptrdiff_t test_sub_private(private char* x, private char *y) {
+  return x - y;
+}
+
+//SZ32: define{{.*}} i32 @test_sub_mix(i8* %x, i8 addrspace(4)* %y)
+//SZ32: ptrtoint i8* %{{.*}} to i32
+//SZ32: ptrtoint i8 addrspace(4)* %{{.*}} to i32
+//SZ64ONLY: define{{.*}} i64 @test_sub_mix(i8* %x, i8 addrspace(4)* %y)
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//SZ64ONLY: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_sub_mix(i8 addrspace(5)* %x, i8* %y)
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
+//AMDGCN: ptrtoint i8* %{{.*}} to i64
+ptrdiff_t test_sub_mix(private char* x, generic char *y) {
+  return x - y;
+}
+
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir-calling-conv.cl
new file mode 100644
index 0000000..8fa39fd
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir-calling-conv.cl
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 %s -triple "spir-unknown-unknown" -emit-llvm -o - | FileCheck %s
+
+int get_dummy_id(int D);
+
+kernel void bar(global int *A);
+
+kernel void foo(global int *A)
+// CHECK: define spir_kernel void @foo(i32 addrspace(1)* %A)
+{
+  int id = get_dummy_id(0);
+  // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 0)
+  A[id] = id;
+  bar(A);
+  // CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* %A)
+}
+
+// CHECK: declare spir_func i32 @get_dummy_id(i32)
+// CHECK: declare spir_kernel void @bar(i32 addrspace(1)*)
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir32_target.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir32_target.cl
new file mode 100644
index 0000000..8f395b3
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir32_target.cl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 %s -triple "spir-unknown-unknown" -emit-llvm -o - | FileCheck %s
+
+// CHECK: target triple = "spir-unknown-unknown"
+
+typedef struct {
+  char c;
+  void *v;
+  void *v2;
+} my_st;
+
+kernel void foo(global long *arg) {
+  int res1[sizeof(my_st)  == 12 ? 1 : -1];
+  int res2[sizeof(void *) ==  4 ? 1 : -1];
+  int res3[sizeof(arg)    ==  4 ? 1 : -1];
+
+  my_st *tmp = 0;
+
+  arg[0] = (long)(&tmp->v);
+//CHECK: store i64 4, i64 addrspace(1)*
+  arg[1] = (long)(&tmp->v2);
+//CHECK: store i64 8, i64 addrspace(1)*
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir64_target.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir64_target.cl
new file mode 100644
index 0000000..245cd80
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir64_target.cl
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - | FileCheck %s
+
+// CHECK: target triple = "spir64-unknown-unknown"
+
+typedef struct {
+  char c;
+  void *v;
+  void *v2;
+} my_st;
+
+kernel void foo(global long *arg) {
+  int res1[sizeof(my_st)  == 24 ? 1 : -1];
+  int res2[sizeof(void *) ==  8 ? 1 : -1];
+  int res3[sizeof(arg)    ==  8 ? 1 : -1];
+
+  my_st *tmp = 0;
+  arg[3] = (long)(&tmp->v);
+//CHECK: store i64 8, i64 addrspace(1)*
+  arg[4] = (long)(&tmp->v2);
+//CHECK: store i64 16, i64 addrspace(1)*
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl
new file mode 100644
index 0000000..ac5b8e8
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 %s -triple "spir-unknown-unknown" -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-SPIR-CL10
+// RUN: %clang_cc1 %s -triple "spir-unknown-unknown" -emit-llvm -o - -cl-std=CL1.2 | FileCheck %s --check-prefix=CHECK-SPIR-CL12
+// RUN: %clang_cc1 %s -triple "spir-unknown-unknown" -emit-llvm -o - -cl-std=CL2.0 | FileCheck %s --check-prefix=CHECK-SPIR-CL20
+// RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-SPIR-CL10
+// RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=CL1.2 | FileCheck %s --check-prefix=CHECK-SPIR-CL12
+// RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=CL2.0 | FileCheck %s --check-prefix=CHECK-SPIR-CL20
+
+// RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AMDGCN-CL10
+// RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -cl-std=CL1.2 | FileCheck %s --check-prefix=CHECK-AMDGCN-CL12
+// RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -cl-std=CL2.0 | FileCheck %s --check-prefix=CHECK-AMDGCN-CL20
+
+kernel void foo() {}
+kernel void bar() {}
+
+// CHECK-SPIR-CL10-DAG: !opencl.spir.version = !{[[SPIR:![0-9]+]]}
+// CHECK-SPIR-CL10-DAG: !opencl.ocl.version = !{[[OCL:![0-9]+]]}
+// CHECK-SPIR-CL10-DAG: [[SPIR]] = !{i32 1, i32 2}
+// CHECK-SPIR-CL10-DAG: [[OCL]] = !{i32 1, i32 0}
+// CHECK-SPIR-CL12-DAG: !opencl.spir.version = !{[[VER:![0-9]+]]}
+// CHECK-SPIR-CL12-DAG: !opencl.ocl.version = !{[[VER]]}
+// CHECK-SPIR-CL12: [[VER]] = !{i32 1, i32 2}
+
+// CHECK-SPIR-CL20-DAG: !opencl.spir.version = !{[[VER:![0-9]+]]}
+// CHECK-SPIR-CL20-DAG: !opencl.ocl.version = !{[[VER]]}
+// CHECK-SPIR-CL20: [[VER]] = !{i32 2, i32 0}
+
+// CHECK-AMDGCN-CL10-NOT: !opencl.spir.version
+// CHECK-AMDGCN-CL10: !opencl.ocl.version = !{[[OCL:![0-9]+]]}
+// CHECK-AMDGCN-CL10: [[OCL]] = !{i32 1, i32 0}
+// CHECK-AMDGCN-CL12-NOT: !opencl.spir.version
+// CHECK-AMDGCN-CL12: !opencl.ocl.version = !{[[OCL:![0-9]+]]}
+// CHECK-AMDGCN-CL12: [[OCL]] = !{i32 1, i32 2}
+// CHECK-AMDGCN-CL20-NOT: !opencl.spir.version
+// CHECK-AMDGCN-CL20: !opencl.ocl.version = !{[[OCL:![0-9]+]]}
+// CHECK-AMDGCN-CL20: [[OCL]] = !{i32 2, i32 0}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/str_literals.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/str_literals.cl
new file mode 100644
index 0000000..514044c
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/str_literals.cl
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -cl-opt-disable -emit-llvm -o - -ffake-address-space-map | FileCheck %s
+
+__constant char *__constant x = "hello world";
+__constant char *__constant y = "hello world";
+
+// CHECK: unnamed_addr addrspace(2) constant{{.*}}"hello world\00"
+// CHECK-NOT: addrspace(2) unnamed_addr constant
+// CHECK: @x = {{(dso_local )?}}addrspace(2) constant i8 addrspace(2)*
+// CHECK: @y = {{(dso_local )?}}addrspace(2) constant i8 addrspace(2)*
+// CHECK: unnamed_addr addrspace(2) constant{{.*}}"f\00"
+
+void f() {
+  //CHECK: store i8 addrspace(2)* {{.*}}, i8 addrspace(2)**
+  constant const char *f3 = __func__;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl
new file mode 100644
index 0000000..72c09da
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
+
+// CHECK: %[[A:.*]] = type { float, float, float }
+typedef struct {
+  float x,y,z;
+} A;
+typedef private A *PA;
+typedef global A *GA;
+
+void test(void) {
+  global int *glob;
+  local int *loc;
+  private int *priv;
+  generic int *gen;
+
+  //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(1)* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
+  glob = to_global(glob);
+  
+  //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(3)* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
+  glob = to_global(loc);
+ 
+  //CHECK: %[[ARG:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
+  glob = to_global(priv);
+ 
+  //CHECK: %[[ARG:.*]] = bitcast i32 addrspace(4)* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to i32 addrspace(1)*
+  glob = to_global(gen);
+  
+  //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(1)* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
+  loc = to_local(glob);
+
+  //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(3)* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
+  loc = to_local(loc);
+
+  //CHECK: %[[ARG:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
+  loc = to_local(priv);
+
+  //CHECK: %[[ARG:.*]] = bitcast i32 addrspace(4)* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8 addrspace(3)* %[[RET]] to i32 addrspace(3)*
+  loc = to_local(gen);
+
+  //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(1)* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
+  priv = to_private(glob);
+
+  //CHECK: %[[ARG:.*]] = addrspacecast i32 addrspace(3)* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
+  priv = to_private(loc);
+
+  //CHECK: %[[ARG:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
+  priv = to_private(priv);
+
+  //CHECK: %[[ARG:.*]] = bitcast i32 addrspace(4)* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8* @__to_private(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8* %[[RET]] to i32*
+  priv = to_private(gen);
+
+  //CHECK: %[[ARG:.*]] = addrspacecast %[[A]]* %{{.*}} to i8 addrspace(4)*
+  //CHECK: %[[RET:.*]] = call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[ARG]])
+  //CHECK: %{{.*}} = bitcast i8 addrspace(1)* %[[RET]] to %[[A]] addrspace(1)*
+  PA pA;
+  GA gA = to_global(pA);
+
+  //CHECK-NOT: addrspacecast
+  //CHECK-NOT: bitcast
+  //CHECK: call i8 addrspace(1)* @__to_global(i8 addrspace(4)* %{{.*}})
+  //CHECK-NOT: addrspacecast
+  //CHECK-NOT: bitcast
+  generic void *gen_v;
+  global void *glob_v = to_global(gen_v);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl
new file mode 100644
index 0000000..6a9ba87
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s | FileCheck %s
+
+/*** for ***/
+void for_count()
+{
+// CHECK-LABEL: for_count
+    __attribute__((opencl_unroll_hint(8)))
+    for( int i = 0; i < 1000; ++i);
+// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_COUNT:.*]]
+}
+
+void for_disable()
+{
+// CHECK-LABEL: for_disable
+    __attribute__((opencl_unroll_hint(1)))
+    for( int i = 0; i < 1000; ++i);
+// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_DISABLE:.*]]
+}
+
+void for_full()
+{
+// CHECK-LABEL: for_full
+    __attribute__((opencl_unroll_hint))
+    for( int i = 0; i < 1000; ++i);
+// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_FULL:.*]]
+}
+
+/*** while ***/
+void while_count()
+{
+// CHECK-LABEL: while_count
+    int i = 1000;
+    __attribute__((opencl_unroll_hint(8)))
+    while(i-->0);
+// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_COUNT:.*]]
+}
+
+void while_disable()
+{
+// CHECK-LABEL: while_disable
+    int i = 1000;
+    __attribute__((opencl_unroll_hint(1)))
+    while(i-->0);
+// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_DISABLE:.*]]
+}
+
+void while_full()
+{
+// CHECK-LABEL: while_full
+    int i = 1000;
+    __attribute__((opencl_unroll_hint))
+    while(i-->0);
+// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_FULL:.*]]
+}
+
+/*** do ***/
+void do_count()
+{
+// CHECK-LABEL: do_count
+    int i = 1000;
+    __attribute__((opencl_unroll_hint(8)))
+    do {} while(i--> 0);
+// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_COUNT:.*]]
+}
+
+void do_disable()
+{
+// CHECK-LABEL: do_disable
+    int i = 1000;
+    __attribute__((opencl_unroll_hint(1)))
+    do {} while(i--> 0);
+// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_DISABLE:.*]]
+}
+
+void do_full()
+{
+// CHECK-LABEL: do_full
+    int i = 1000;
+    __attribute__((opencl_unroll_hint))
+    do {} while(i--> 0);
+// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_FULL:.*]]
+}
+
+
+// CHECK: ![[FOR_COUNT]]     =  distinct !{![[FOR_COUNT]],  ![[COUNT:.*]]}
+// CHECK: ![[COUNT]]         =  !{!"llvm.loop.unroll.count", i32 8}
+// CHECK: ![[FOR_DISABLE]]   =  distinct !{![[FOR_DISABLE]],  ![[DISABLE:.*]]}
+// CHECK: ![[DISABLE]]       =  !{!"llvm.loop.unroll.disable"}
+// CHECK: ![[FOR_FULL]]      =  distinct !{![[FOR_FULL]],  ![[FULL:.*]]}
+// CHECK: ![[FULL]]          =  !{!"llvm.loop.unroll.full"}
+// CHECK: ![[WHILE_COUNT]]   =  distinct !{![[WHILE_COUNT]],    ![[COUNT]]}
+// CHECK: ![[WHILE_DISABLE]] =  distinct !{![[WHILE_DISABLE]],  ![[DISABLE]]}
+// CHECK: ![[WHILE_FULL]]    =  distinct !{![[WHILE_FULL]],     ![[FULL]]}
+// CHECK: ![[DO_COUNT]]      =  distinct !{![[DO_COUNT]],       ![[COUNT]]}
+// CHECK: ![[DO_DISABLE]]    =  distinct !{![[DO_DISABLE]],     ![[DISABLE]]}
+// CHECK: ![[DO_FULL]]       =  distinct !{![[DO_FULL]],        ![[FULL]]}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vectorLoadStore.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vectorLoadStore.cl
new file mode 100644
index 0000000..cb35e6f
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vectorLoadStore.cl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple "spir-unknown-unknown" %s -emit-llvm -O0 -o - | FileCheck %s
+
+typedef char char2 __attribute((ext_vector_type(2)));
+typedef char char3 __attribute((ext_vector_type(3)));
+typedef char char8 __attribute((ext_vector_type(8)));
+typedef float float4 __attribute((ext_vector_type(4)));
+
+// Check for optimized vec3 load/store which treats vec3 as vec4.
+void foo(char3 *P, char3 *Q) {
+  *P = *Q;
+  // CHECK: %{{.*}} = shufflevector <4 x i8> %{{.*}}, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+}
+
+// CHECK: define spir_func void @alignment()
+void alignment() {
+  __private char2 data_generic[100];
+  __private char8 data_private[100];
+
+  // CHECK: %{{.*}} = load <4 x float>, <4 x float> addrspace(4)* %{{.*}}, align 2
+  // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 8
+  ((private float4 *)data_private)[1] = ((float4 *)data_generic)[2];
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_literals_nested.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_literals_nested.cl
new file mode 100644
index 0000000..b9013d0
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_literals_nested.cl
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 %s -emit-llvm -O3 -o - | FileCheck %s
+
+typedef int int2 __attribute((ext_vector_type(2)));
+typedef int int4 __attribute((ext_vector_type(4)));
+
+__constant const int4 itest1 = (int4)(1, 2, ((int2)(3, 4)));
+// CHECK: constant <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+__constant const int4 itest2 = (int4)(1, 2, ((int2)(3)));
+// CHECK: constant <4 x i32> <i32 1, i32 2, i32 3, i32 3>
+
+typedef float float2 __attribute((ext_vector_type(2)));
+typedef float float4 __attribute((ext_vector_type(4)));
+
+void ftest1(float4 *p) {
+  *p = (float4)(1.1f, 1.2f, ((float2)(1.3f, 1.4f)));
+// CHECK: store <4 x float> <float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000>
+}
+
+float4 ftest2(float4 *p) {
+   *p =  (float4)(1.1f, 1.2f, ((float2)(1.3f)));
+// CHECK: store <4 x float> <float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF4CCCCC0000000>
+}
+
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl
new file mode 100644
index 0000000..bba5b23
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -emit-llvm %s -o %t
+
+typedef __attribute__(( ext_vector_type(2) ))  int int2;
+typedef __attribute__(( ext_vector_type(3) ))  int int3;
+typedef __attribute__(( ext_vector_type(4) ))  int int4;
+typedef __attribute__(( ext_vector_type(8) ))  int int8;
+typedef __attribute__(( ext_vector_type(4) ))  float float4;
+
+void vector_literals_valid() {
+  int4 a_1_1_1_1 = (int4)(1,2,3,4);
+  int4 a_2_1_1 = (int4)((int2)(1,2),3,4);
+  int4 a_1_2_1 = (int4)(1,(int2)(2,3),4);
+  int4 a_1_1_2 = (int4)(1,2,(int2)(3,4));
+  int4 a_2_2 = (int4)((int2)(1,2),(int2)(3,4));
+  int4 a_3_1 = (int4)((int3)(1,2,3),4);
+  int4 a_1_3 = (int4)(1,(int3)(2,3,4));
+  int4 a = (int4)(1);
+  int8 b = (int8)(1,2,a.xy,a);
+  float4 V2 = (float4) (1);
+}
+
+
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_logops.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_logops.cl
new file mode 100644
index 0000000..388f1d7
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_logops.cl
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -O3 %s -emit-llvm -o - | FileCheck %s
+
+typedef int int2 __attribute((ext_vector_type(2)));
+
+int test1()
+{
+  int2 a = (int2)(1,0);
+  int2 b = (int2)(1,1);
+  return (a&&b).x + (a||b).y;
+  // CHECK: ret i32 -2
+}
+
+int test2()
+{
+  int2 a = (int2)(1,0);
+  return (!a).y;
+  // CHECK: ret i32 -1
+}
+
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_odd.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_odd.cl
new file mode 100644
index 0000000..c44328b
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_odd.cl
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -O0 -emit-llvm -o - | FileCheck %s
+
+typedef unsigned char __attribute__((ext_vector_type(3))) uchar3;
+
+//CHECK: {{%.*}} = shufflevector <3 x i8> {{%.*}}, <3 x i8> <i8 1, i8 1, i8 undef>, <3 x i32> <i32 0, i32 3, i32 2>
+
+kernel void test_odd_vector1 (uchar3 lhs)
+{
+  lhs.odd = 1;
+}
+
+//CHECK: {{%.*}} = shufflevector <3 x i8> {{%.*}}, <3 x i8> <i8 2, i8 2, i8 undef>, <3 x i32> <i32 0, i32 1, i32 3>
+
+kernel void test_odd_vector2 (uchar3 lhs)
+{
+  lhs.hi = 2;
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_shufflevector_valid.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_shufflevector_valid.cl
new file mode 100644
index 0000000..0953c66
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vector_shufflevector_valid.cl
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -emit-llvm -O0 %s -o - | FileCheck %s
+
+// The shuffle vector mask must always be of i32 vector type
+// See http://reviews.llvm.org/D10838 and https://llvm.org/bugs/show_bug.cgi?id=23800#c2
+// for more information about a bug where a 64 bit index operand causes the generation
+// of an invalid mask
+
+typedef unsigned int uint2 __attribute((ext_vector_type(2)));
+
+void vector_shufflevector_valid(void) {
+    //CHECK: {{%.*}} = shufflevector <2 x i32> {{%.*}}, <2 x i32> undef, <2 x i32> <i32 0, i32 undef>
+    (uint2)(((uint2)(0)).s0, 0);
+}
diff --git a/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vla.cl b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vla.cl
new file mode 100644
index 0000000..f3d868a
--- /dev/null
+++ b/src/third_party/llvm-project/clang/test/CodeGenOpenCL/vla.cl
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -emit-llvm -triple "spir-unknown-unknown" -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,SPIR %s
+// RUN: %clang_cc1 -emit-llvm -triple amdgcn-amd-amdhsa -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
+
+constant int sz0 = 5;
+// SPIR: @sz0 = addrspace(2) constant i32 5
+// AMDGCN: @sz0 = addrspace(4) constant i32 5
+const global int sz1 = 16;
+// CHECK: @sz1 = addrspace(1) constant i32 16
+const constant int sz2 = 8;
+// SPIR: @sz2 = addrspace(2) constant i32 8
+// AMDGCN: @sz2 = addrspace(4) constant i32 8
+// CHECK: @testvla.vla2 = internal addrspace(3) global [8 x i16] undef
+
+kernel void testvla()
+{
+  int vla0[sz0];
+// SPIR: %vla0 = alloca [5 x i32]
+// SPIR-NOT: %vla0 = alloca [5 x i32]{{.*}}addrspace
+// AMDGCN: %vla0 = alloca [5 x i32]{{.*}}addrspace(5)
+  char vla1[sz1];
+// SPIR: %vla1 = alloca [16 x i8]
+// SPIR-NOT: %vla1 = alloca [16 x i8]{{.*}}addrspace
+// AMDGCN: %vla1 = alloca [16 x i8]{{.*}}addrspace(5)
+  local short vla2[sz2];
+}