| # RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,VI %s |
| # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s |
| |
| # If there's a base offset, check that SILoadStoreOptimizer creates |
| # V_ADD_{I|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than |
| # $vcc, which is used in _e32); this ensures that $vcc is not inadvertently |
| # clobbered. |
| |
| # GCN-LABEL: name: kernel |
| |
| # VI: V_ADD_I32_e64 %6, %0, |
| # VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8, |
| # VI: V_ADD_I32_e64 %10, %3, |
| # VI-NEXT: DS_READ2_B32 killed %11, 0, 8, |
| |
| # GFX9: V_ADD_U32_e64 %6, %0, |
| # GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8, |
| # GFX9: V_ADD_U32_e64 %9, %3, |
| # GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8, |
| |
| --- | |
| @0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4 |
| |
| define amdgpu_kernel void @kernel() { |
| bb.0: |
| br label %bb2 |
| |
| bb1: |
| ret void |
| |
| bb2: |
| %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0 |
| %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8 |
| %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16 |
| %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24 |
| br label %bb1 |
| } |
| --- |
| name: kernel |
| body: | |
| bb.0: |
| %0:vgpr_32 = IMPLICIT_DEF |
| S_BRANCH %bb.2 |
| |
| bb.1: |
| S_ENDPGM |
| |
| bb.2: |
| %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec |
| %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec |
| V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec |
| DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) |
| %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1) |
| %4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2) |
| %5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3) |
| $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc |
| S_CBRANCH_VCCNZ %bb.1, implicit $vcc |
| S_BRANCH %bb.1 |
| ... |