| # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s |
| # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s |
| |
| --- |
| # Trivial clause at beginning of program |
| name: trivial_smem_clause_load_smrd4_x1 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1 |
| ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Trivial clause at beginning of program |
| name: trivial_smem_clause_load_smrd4_x2 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2 |
| ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Trivial clause at beginning of program |
| name: trivial_smem_clause_load_smrd4_x3 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3 |
| ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0 |
| ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0 |
| $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Trivial clause at beginning of program |
| name: trivial_smem_clause_load_smrd4_x4 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4 |
| ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0 |
| ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 |
| ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0 |
| $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 |
| $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Reuse of same input pointer is OK |
| name: trivial_smem_clause_load_smrd4_x2_sameptr |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr |
| ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # 32-bit load partially clobbers its own ptr reg |
| name: smrd_load4_overwrite_ptr_lo |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo |
| ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # 32-bit load partially clobbers its own ptr reg |
| name: smrd_load4_overwrite_ptr_hi |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi |
| ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # 64-bit load clobbers its own ptr reg |
| name: smrd_load8_overwrite_ptr |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: smrd_load8_overwrite_ptr |
| ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt |
| # breaks the clause. |
| |
| name: break_smem_clause_at_max_smem_clause_size_smrd_load4 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4 |
| ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0 |
| ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| |
| $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| |
| $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| |
| $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| |
| $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0 |
| $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: break_smem_clause_simple_load_smrd4_lo_ptr |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr |
| ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; XNACK-NEXT: S_NOP 0 |
| ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: break_smem_clause_simple_load_smrd4_hi_ptr |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr |
| ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: break_smem_clause_simple_load_smrd8_ptr |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr |
| ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 |
| ; XNACK-NEXT: S_NOP 0 |
| ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: break_smem_clause_simple_load_smrd16_ptr |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr |
| ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: break_smem_clause_block_boundary_load_smrd8_ptr |
| |
| body: | |
| ; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr |
| ; GCN: bb.0: |
| ; GCN: successors: %bb.1(0x80000000) |
| ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN: bb.1: |
| ; XNACK-NEXT: S_NOP 0 |
| ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| bb.0: |
| $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 |
| |
| bb.1: |
| $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # The load clobbers the pointer of the store, so it needs to break. |
| |
| name: break_smem_clause_store_load_into_ptr_smrd4 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4 |
| ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0 |
| $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # The load clobbers the data of the store, so it needs to break. |
| # FIXME: Would it be better to s_nop and wait later? |
| |
| name: break_smem_clause_store_load_into_data_smrd4 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4 |
| ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0 |
| $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Regular VALU instruction breaks clause, no nop needed |
| name: valu_inst_breaks_smem_clause |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: valu_inst_breaks_smem_clause |
| ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec |
| ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $vgpr8 = V_MOV_B32_e32 0, implicit $exec |
| $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Regular SALU instruction breaks clause, no nop needed |
| name: salu_inst_breaks_smem_clause |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: salu_inst_breaks_smem_clause |
| ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 |
| ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $sgpr8 = S_MOV_B32 0 |
| $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| name: ds_inst_breaks_smem_clause |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: ds_inst_breaks_smem_clause |
| ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec |
| ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec |
| $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: flat_inst_breaks_smem_clause |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: flat_inst_breaks_smem_clause |
| ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 |
| $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr |
| $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # FIXME: Should this be handled? |
| name: implicit_use_breaks_smem_clause |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: implicit_use_breaks_smem_clause |
| ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13 |
| ; XNACK-NEXT: S_NOP 0 |
| ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13 |
| $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0 |
| S_ENDPGM |
| ... |