# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX12 %s --- | @mem = internal unnamed_addr addrspace(4) constant [4 x <4 x i32>] [<4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> ] define amdgpu_gs void @mask_hazard_getpc1() { ret void } define amdgpu_gs void @mask_hazard_getpc2() { ret void } define amdgpu_gs void @mask_hazard_vcc1() { ret void } define amdgpu_gs void @mask_hazard_vcc2() { ret void } define amdgpu_gs void @mask_hazard_cndmask_dpp1() { ret void } define amdgpu_gs void @mask_hazard_cndmask_dpp2() { ret void } define amdgpu_gs void @mask_hazard_cndmask_dpp3() { ret void } define amdgpu_gs void @mask_hazard_cndmask_dpp4() { ret void } define amdgpu_gs void @mask_hazard_addc1() { ret void } define amdgpu_gs void @mask_hazard_addc2() { ret void } define amdgpu_gs void @mask_hazard_addc3() { ret void } define amdgpu_gs void @mask_hazard_addc4() { ret void } define amdgpu_gs void @mask_hazard_subb1() { ret void } define amdgpu_gs void @mask_hazard_subb2() { ret void } define amdgpu_gs void @mask_hazard_subb3() { ret void } define amdgpu_gs void @mask_hazard_subb4() { ret void } define amdgpu_gs void @mask_hazard_subbrev1() { ret void } define amdgpu_gs void @mask_hazard_subbrev2() { ret void } define amdgpu_gs void @mask_hazard_subbrev3() { ret void } define amdgpu_gs void @mask_hazard_subbrev4() { ret void } define amdgpu_gs void @mask_hazard_div_fmas_f32() { ret void } define amdgpu_gs void @mask_hazard_div_fmas_f64() { ret void } define amdgpu_gs void @mask_hazard_subreg1() { ret void } define amdgpu_gs void @mask_hazard_subreg2() { ret void } define amdgpu_gs void @mask_hazard_subreg3() { ret void } define amdgpu_gs void @mask_hazard_subreg4() { ret void } define amdgpu_gs void @mask_hazard_subreg5() { ret void } define amdgpu_gs void @mask_hazard_waitcnt() { ret void } define amdgpu_gs void @mask_hazard_gap1() { ret void } define amdgpu_gs void @mask_hazard_gap2() { ret void } define amdgpu_gs void @mask_hazard_gap3() { ret void } define amdgpu_gs void @mask_hazard_no_hazard1() { ret void } define amdgpu_gs void @mask_hazard_no_hazard2() { ret void } define amdgpu_gs void @mask_hazard_no_hazard3() { ret void } ... --- name: mask_hazard_getpc1 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_getpc1 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_getpc1 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX12-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX12-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec $sgpr0_sgpr1 = S_GETPC_B64 $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc S_ENDPGM 0 ... --- name: mask_hazard_getpc2 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_getpc2 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 { ; GFX11-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(amdgpu-rel32-lo) @mem + 8, implicit-def $scc ; GFX11-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(amdgpu-rel32-lo) @mem + 16, implicit-def $scc, implicit $scc ; GFX11-NEXT: } ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_getpc2 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX12-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 { ; GFX12-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX12-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(amdgpu-rel32-lo) @mem + 4, implicit-def $scc ; GFX12-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(amdgpu-rel32-lo) @mem + 12, implicit-def $scc, implicit $scc ; GFX12-NEXT: } ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec BUNDLE implicit-def $sgpr0_sgpr1 { $sgpr0_sgpr1 = S_GETPC_B64 $sgpr0 = S_ADD_U32 $sgpr0, target-flags(amdgpu-rel32-lo) @mem + 4, implicit-def $scc $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(amdgpu-rel32-lo) @mem + 12, implicit-def $scc, implicit $scc } S_ENDPGM 0 ... --- name: mask_hazard_vcc1 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_vcc1 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_vcc1 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_vcc2 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_vcc2 ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_vcc2 ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec $vcc = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_cndmask_dpp1 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_cndmask_dpp1 ; GFX11: $vgpr0 = V_CNDMASK_B32_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 15, 15, 1, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_cndmask_dpp1 ; GFX12: $vgpr0 = V_CNDMASK_B32_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 15, 15, 1, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_CNDMASK_B32_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 15, 15, 1, implicit $vcc, implicit $exec $vcc = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_cndmask_dpp2 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_cndmask_dpp2 ; GFX11: $vgpr0 = V_CNDMASK_B32_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_cndmask_dpp2 ; GFX12: $vgpr0 = V_CNDMASK_B32_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_CNDMASK_B32_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_cndmask_dpp4 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_cndmask_dpp4 ; GFX11: $vgpr0 = V_CNDMASK_B16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_cndmask_dpp4 ; GFX12: $vgpr0 = V_CNDMASK_B16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_CNDMASK_B16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_addc1 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_addc1 ; GFX11: $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_addc1 ; GFX12: $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_addc2 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_addc2 ; GFX11: $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_addc2 ; GFX12: $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec $vcc = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_addc3 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_addc3 ; GFX11: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_addc3 ; GFX12: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec $vcc = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_addc4 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_addc4 ; GFX11: $vgpr0, $sgpr2_sgpr3 = V_ADDC_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_addc4 ; GFX12: $vgpr0, $sgpr2_sgpr3 = V_ADDC_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0, $sgpr2_sgpr3 = V_ADDC_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_subb1 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subb1 ; GFX11: $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subb1 ; GFX12: $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_subb2 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subb2 ; GFX11: $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subb2 ; GFX12: $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec $vcc = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_subb3 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subb3 ; GFX11: $vgpr0 = V_SUBB_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subb3 ; GFX12: $vgpr0 = V_SUBB_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_SUBB_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec $vcc = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_subb4 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subb4 ; GFX11: $vgpr0, $sgpr2_sgpr3 = V_SUBB_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subb4 ; GFX12: $vgpr0, $sgpr2_sgpr3 = V_SUBB_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0, $sgpr2_sgpr3 = V_SUBB_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_subbrev1 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subbrev1 ; GFX11: $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subbrev1 ; GFX12: $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_subbrev2 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subbrev2 ; GFX11: $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subbrev2 ; GFX12: $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec $vcc = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_subbrev3 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subbrev3 ; GFX11: $vgpr0 = V_SUBBREV_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subbrev3 ; GFX12: $vgpr0 = V_SUBBREV_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_SUBBREV_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec $vcc = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_subbrev4 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subbrev4 ; GFX11: $vgpr0, $sgpr2_sgpr3 = V_SUBBREV_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subbrev4 ; GFX12: $vgpr0, $sgpr2_sgpr3 = V_SUBBREV_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0, $sgpr2_sgpr3 = V_SUBBREV_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_div_fmas_f32 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_div_fmas_f32 ; GFX11: $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_div_fmas_f32 ; GFX12: $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec $vcc = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... --- name: mask_hazard_div_fmas_f64 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_div_fmas_f64 ; GFX11: $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_div_fmas_f64 ; GFX12: $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec $vcc = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... # Check low word overlap --- name: mask_hazard_subreg1 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subreg1 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subreg1 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX12-NEXT: $sgpr2 = S_MOV_B32 0 ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec $sgpr2 = S_MOV_B32 0 S_ENDPGM 0 ... # Check high word overlap --- name: mask_hazard_subreg2 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subreg2 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subreg2 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX12-NEXT: $sgpr3 = S_MOV_B32 0 ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec $sgpr3 = S_MOV_B32 0 S_ENDPGM 0 ... # Check multiple subreg overlap --- name: mask_hazard_subreg3 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subreg3 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0 ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subreg3 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX12-NEXT: $sgpr2 = S_MOV_B32 0 ; GFX12-NEXT: $sgpr3 = S_MOV_B32 0 ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec $sgpr2 = S_MOV_B32 0 $sgpr3 = S_MOV_B32 0 S_ENDPGM 0 ... # Check vcc_lo overlap --- name: mask_hazard_subreg4 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subreg4 ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc_lo = S_MOV_B32 0 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: $sgpr2 = S_MOV_B32 $vcc_lo ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subreg4 ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc_lo = S_MOV_B32 0 ; GFX12-NEXT: $sgpr2 = S_MOV_B32 $vcc_lo ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec $vcc_lo = S_MOV_B32 0 $sgpr2 = S_MOV_B32 $vcc_lo S_ENDPGM 0 ... # Check vcc_hi overlap --- name: mask_hazard_subreg5 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_subreg5 ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc_hi = S_MOV_B32 0 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: $sgpr2 = S_MOV_B32 $vcc_hi ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_subreg5 ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc_hi = S_MOV_B32 0 ; GFX12-NEXT: $sgpr2 = S_MOV_B32 $vcc_hi ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec $vcc_hi = S_MOV_B32 0 $sgpr2 = S_MOV_B32 $vcc_hi S_ENDPGM 0 ... # S_WAITCNT does not mitigate hazard --- name: mask_hazard_waitcnt body: | bb.0: ; GFX11-LABEL: name: mask_hazard_waitcnt ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: S_WAITCNT 0 ; GFX11-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_waitcnt ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX12-NEXT: S_WAITCNT 0 ; GFX12-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX12-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec S_WAITCNT 0 $sgpr0_sgpr1 = S_GETPC_B64 $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc S_ENDPGM 0 ... # Check implicit $exec --- name: mask_hazard_gap1 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_gap1 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_gap1 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX12-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec ; GFX12-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec ; GFX12-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX12-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec $vgpr2 = V_MOV_B32_e32 0, implicit $exec $vgpr3 = V_MOV_B32_e32 0, implicit $exec $sgpr0_sgpr1 = S_GETPC_B64 $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc S_ENDPGM 0 ... # Check implicit $mode --- name: mask_hazard_gap2 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_gap2 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit $mode ; GFX11-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_gap2 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX12-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit $mode ; GFX12-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX12-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit $mode $sgpr0_sgpr1 = S_GETPC_B64 $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc S_ENDPGM 0 ... # Check explicit $exec --- name: mask_hazard_gap3 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_gap3 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: $vgpr2 = V_WRITELANE_B32 $exec_lo, 0, $vgpr2 ; GFX11-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: mask_hazard_gap3 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX12-NEXT: $vgpr2 = V_WRITELANE_B32 $exec_lo, 0, $vgpr2 ; GFX12-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 ; GFX12-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GFX12-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec $vgpr2 = V_WRITELANE_B32 $exec_lo, 0, $vgpr2 $sgpr0_sgpr1 = S_GETPC_B64 $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc S_ENDPGM 0 ... # Different SGPR write --- name: mask_hazard_no_hazard1 body: | bb.0: ; GCN-LABEL: name: mask_hazard_no_hazard1 ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec $sgpr0 = S_MOV_B32 0 S_ENDPGM 0 ... # Different SGPR write with mask read overlap --- name: mask_hazard_no_hazard2 body: | bb.0: ; GCN-LABEL: name: mask_hazard_no_hazard2 ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $vcc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec $sgpr0_sgpr1 = S_MOV_B64 $vcc S_ENDPGM 0 ... # Overlapping VGPR write --- name: mask_hazard_no_hazard3 body: | bb.0: ; GCN-LABEL: name: mask_hazard_no_hazard3 ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec $vgpr2 = V_MOV_B32_e32 0, implicit $exec S_ENDPGM 0 ...