; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; Simples case, if - then, that requires lane mask merging, ; %phi lane mask will hold %val_A at %A. Lanes that are active in %B ; will overwrite its own lane bit in lane mask with val_B define amdgpu_ps void @divergent_i1_phi_if_then(ptr addrspace(1) %out, i32 %tid, i32 %cond) { ; GFX10-LABEL: divergent_i1_phi_if_then: ; GFX10: ; %bb.0: ; %A ; GFX10-NEXT: v_cmp_le_u32_e64 s0, 6, v2 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: ; %bb.1: ; %B ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 1, v2 ; GFX10-NEXT: ; %bb.2: ; %exit ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm A: %val_A = icmp uge i32 %tid, 6 %cmp = icmp eq i32 %cond, 0 br i1 %cmp, label %B, label %exit B: %val_B = icmp ult i32 %tid, 1 br label %exit exit: %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ] %sel = select i1 %phi, i32 1, i32 2 store i32 %sel, ptr addrspace(1) %out ret void } ; if - else define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, i32 %cond) { ; GFX10-LABEL: divergent_i1_phi_if_else: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_and_b32 s0, 1, s0 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_xor_b32 s1, exec_lo, s1 ; GFX10-NEXT: ; %bb.1: ; %B ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 2, v2 ; GFX10-NEXT: ; implicit-def: $vgpr2 ; GFX10-NEXT: ; %bb.2: ; %Flow ; GFX10-NEXT: s_andn2_saveexec_b32 s1, s1 ; GFX10-NEXT: ; %bb.3: ; %A ; GFX10-NEXT: v_cmp_le_u32_e64 s0, 1, v2 ; GFX10-NEXT: ; %bb.4: ; %exit ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm entry: %cmp = icmp eq i32 %cond, 0 br i1 %cmp, label %A, label %B A: %val_A = icmp uge i32 %tid, 1 br label %exit B: %val_B = icmp ult i32 %tid, 2 br label %exit exit: %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ] %sel = select i1 %phi, i32 1, i32 2 store i32 %sel, ptr addrspace(1) %out ret void } ; if - break; ; counter = 0; ; do { ; if (a[counter] == 0) ; break; ; if (b[counter] == 0) ; break; ; if (c[counter] == 0) ; break; ; x[counter++]+=1; ; } while (counter<100); ; Tests with multiple break conditions. Divergent phis will be used to track ; if any of the break conditions was reached. We only need to do simple lane ; mask merging (for current loop iteration only). There is an intrinsic, ; if_break, that will merge lane masks across all iterations of the loop. define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a) { ; GFX10-LABEL: loop_with_1break: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_mov_b32 s0, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, s0 ; GFX10-NEXT: s_branch .LBB2_2 ; GFX10-NEXT: .LBB2_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB2_2 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX10-NEXT: s_and_b32 s1, exec_lo, s2 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execz .LBB2_4 ; GFX10-NEXT: .LBB2_2: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4 ; GFX10-NEXT: s_mov_b32 s2, -1 ; GFX10-NEXT: v_lshlrev_b64 v[5:6], 2, v[4:5] ; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v2, v5 ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v3, v6, vcc_lo ; GFX10-NEXT: global_load_dword v7, v[7:8], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v7 ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB2_1 ; GFX10-NEXT: ; %bb.3: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB2_2 Depth=1 ; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v0, v5 ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v1, v6, vcc_lo ; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v4 ; GFX10-NEXT: v_cmp_gt_u32_e64 s2, 0x64, v4 ; GFX10-NEXT: global_load_dword v7, v[5:6], off ; GFX10-NEXT: v_mov_b32_e32 v4, v8 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v7, 1, v7 ; GFX10-NEXT: global_store_dword v[5:6], v7, off ; GFX10-NEXT: s_branch .LBB2_1 ; GFX10-NEXT: .LBB2_4: ; %exit ; GFX10-NEXT: s_endpgm entry: br label %A A: %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter %a.val = load i32, ptr addrspace(1) %a.plus.counter %a.cond = icmp eq i32 %a.val, 0 br i1 %a.cond, label %exit, label %loop.body loop.body: %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter %x.val = load i32, ptr addrspace(1) %x.plus.counter %x.val.plus.1 = add i32 %x.val, 1 store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter %counter.plus.1 = add i32 %counter, 1 %x.cond = icmp ult i32 %counter, 100 br i1 %x.cond, label %exit, label %A exit: ret void } define amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b) { ; GFX10-LABEL: loop_with_2breaks: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_mov_b32 s0, 0 ; GFX10-NEXT: v_mov_b32_e32 v6, s0 ; GFX10-NEXT: s_branch .LBB3_3 ; GFX10-NEXT: .LBB3_1: ; %Flow3 ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3 ; GFX10-NEXT: .LBB3_2: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX10-NEXT: s_and_b32 s1, exec_lo, s2 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execz .LBB3_6 ; GFX10-NEXT: .LBB3_3: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6 ; GFX10-NEXT: s_mov_b32 s2, -1 ; GFX10-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] ; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 ; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo ; GFX10-NEXT: global_load_dword v9, v[9:10], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB3_2 ; GFX10-NEXT: ; %bb.4: ; %B ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 ; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7 ; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo ; GFX10-NEXT: global_load_dword v9, v[9:10], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 ; GFX10-NEXT: s_and_saveexec_b32 s3, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB3_1 ; GFX10-NEXT: ; %bb.5: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 ; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo ; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6 ; GFX10-NEXT: v_cmp_gt_u32_e64 s2, 0x64, v6 ; GFX10-NEXT: global_load_dword v9, v[7:8], off ; GFX10-NEXT: v_mov_b32_e32 v6, v10 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9 ; GFX10-NEXT: global_store_dword v[7:8], v9, off ; GFX10-NEXT: s_branch .LBB3_1 ; GFX10-NEXT: .LBB3_6: ; %exit ; GFX10-NEXT: s_endpgm entry: br label %A A: %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter %a.val = load i32, ptr addrspace(1) %a.plus.counter %a.cond = icmp eq i32 %a.val, 0 br i1 %a.cond, label %exit, label %B B: %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter %b.val = load i32, ptr addrspace(1) %b.plus.counter %b.cond = icmp eq i32 %b.val, 0 br i1 %b.cond, label %exit, label %loop.body loop.body: %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter %x.val = load i32, ptr addrspace(1) %x.plus.counter %x.val.plus.1 = add i32 %x.val, 1 store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter %counter.plus.1 = add i32 %counter, 1 %x.cond = icmp ult i32 %counter, 100 br i1 %x.cond, label %exit, label %A exit: ret void } define amdgpu_cs void @loop_with_3breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c) { ; GFX10-LABEL: loop_with_3breaks: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_mov_b32 s0, 0 ; GFX10-NEXT: v_mov_b32_e32 v8, s0 ; GFX10-NEXT: s_branch .LBB4_4 ; GFX10-NEXT: .LBB4_1: ; %Flow5 ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: .LBB4_2: ; %Flow4 ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3 ; GFX10-NEXT: .LBB4_3: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX10-NEXT: s_and_b32 s1, exec_lo, s2 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execz .LBB4_8 ; GFX10-NEXT: .LBB4_4: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_ashrrev_i32_e32 v9, 31, v8 ; GFX10-NEXT: s_mov_b32 s2, -1 ; GFX10-NEXT: v_lshlrev_b64 v[9:10], 2, v[8:9] ; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v2, v9 ; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v3, v10, vcc_lo ; GFX10-NEXT: global_load_dword v11, v[11:12], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_3 ; GFX10-NEXT: ; %bb.5: ; %B ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 ; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v4, v9 ; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v5, v10, vcc_lo ; GFX10-NEXT: global_load_dword v11, v[11:12], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 ; GFX10-NEXT: s_and_saveexec_b32 s3, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_2 ; GFX10-NEXT: ; %bb.6: ; %C ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 ; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v6, v9 ; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v7, v10, vcc_lo ; GFX10-NEXT: global_load_dword v11, v[11:12], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_1 ; GFX10-NEXT: ; %bb.7: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 ; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v0, v9 ; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v1, v10, vcc_lo ; GFX10-NEXT: v_add_nc_u32_e32 v12, 1, v8 ; GFX10-NEXT: v_cmp_gt_u32_e64 s2, 0x64, v8 ; GFX10-NEXT: global_load_dword v11, v[9:10], off ; GFX10-NEXT: v_mov_b32_e32 v8, v12 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v11, 1, v11 ; GFX10-NEXT: global_store_dword v[9:10], v11, off ; GFX10-NEXT: s_branch .LBB4_1 ; GFX10-NEXT: .LBB4_8: ; %exit ; GFX10-NEXT: s_endpgm entry: br label %A A: %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter %a.val = load i32, ptr addrspace(1) %a.plus.counter %a.cond = icmp eq i32 %a.val, 0 br i1 %a.cond, label %exit, label %B B: %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter %b.val = load i32, ptr addrspace(1) %b.plus.counter %b.cond = icmp eq i32 %b.val, 0 br i1 %b.cond, label %exit, label %C C: %c.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %c, i32 %counter %c.val = load i32, ptr addrspace(1) %c.plus.counter %c.cond = icmp eq i32 %c.val, 0 br i1 %c.cond, label %exit, label %loop.body loop.body: %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter %x.val = load i32, ptr addrspace(1) %x.plus.counter %x.val.plus.1 = add i32 %x.val, 1 store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter %counter.plus.1 = add i32 %counter, 1 %x.cond = icmp ult i32 %counter, 100 br i1 %x.cond, label %exit, label %A exit: ret void } ; Divergent condition if with body, ending with break. This is loop with two ; exits but structurizer will create phi that will track exit from break ; and move break.body after the loop. Loop will then have one exit and phi ; used outside of the loop by condition used to enter the break.body. define amdgpu_cs void @loop_with_div_break_with_body(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %a.break) { ; GFX10-LABEL: loop_with_div_break_with_body: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_mov_b32 s0, 0 ; GFX10-NEXT: v_mov_b32_e32 v6, s0 ; GFX10-NEXT: s_branch .LBB5_2 ; GFX10-NEXT: .LBB5_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX10-NEXT: s_and_b32 s1, exec_lo, s2 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: s_and_b32 s1, 1, s3 ; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, s1 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execz .LBB5_4 ; GFX10-NEXT: .LBB5_2: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6 ; GFX10-NEXT: s_mov_b32 s2, -1 ; GFX10-NEXT: s_mov_b32 s3, 1 ; GFX10-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] ; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 ; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo ; GFX10-NEXT: global_load_dword v9, v[9:10], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB5_1 ; GFX10-NEXT: ; %bb.3: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 ; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo ; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6 ; GFX10-NEXT: v_cmp_gt_u32_e64 s2, 0x64, v6 ; GFX10-NEXT: s_mov_b32 s3, 0 ; GFX10-NEXT: global_load_dword v9, v[7:8], off ; GFX10-NEXT: v_mov_b32_e32 v6, v10 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9 ; GFX10-NEXT: global_store_dword v[7:8], v9, off ; GFX10-NEXT: s_branch .LBB5_1 ; GFX10-NEXT: .LBB5_4: ; %loop.exit.guard ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX10-NEXT: s_and_saveexec_b32 s0, s1 ; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execz .LBB5_6 ; GFX10-NEXT: ; %bb.5: ; %break.body ; GFX10-NEXT: v_mov_b32_e32 v0, 10 ; GFX10-NEXT: global_store_dword v[4:5], v0, off ; GFX10-NEXT: .LBB5_6: ; %exit ; GFX10-NEXT: s_endpgm entry: br label %A A: %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter %a.val = load i32, ptr addrspace(1) %a.plus.counter %a.cond = icmp eq i32 %a.val, 0 br i1 %a.cond, label %break.body, label %loop.body break.body: store i32 10, ptr addrspace(1) %a.break br label %exit loop.body: %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter %x.val = load i32, ptr addrspace(1) %x.plus.counter %x.val.plus.1 = add i32 %x.val, 1 store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter %counter.plus.1 = add i32 %counter, 1 %x.cond = icmp ult i32 %counter, 100 br i1 %x.cond, label %exit, label %A exit: ret void } ; Snippet from test generated by the GraphicsFuzz tool, frontend generates ir ; with irreducible control flow graph. FixIrreducible converts it into natural ; loop and in the process creates i1 phi with three incoming values. ; int loop(int x, int y, int a0, int a1, int a2, int a3, int a4) { ; do { ; if (y < a2) { ; do { ; } while (x < a2); ; } ; if (x < a3) { ; return a1; ; } ; } while (y < a2); ; return a0; ; } ; This test is also interesting because it has phi with three incomings ;define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ;.entry: ; %.y_lt_a2 = icmp sgt i32 %a2, %y ; %.x_lt_a2 = icmp sgt i32 %a2, %x ; %.x_lt_a3 = icmp sgt i32 %a3, %x ; br i1 %.y_lt_a2, label %.preheader, label %.loopexit ; first iteration, jump to inner loop if 'y < a2' or start with 'if (x < a3)' ; ;.preheader: ; if (y < a2), ; br label %.inner_loop ; ;.inner_loop: ; do while x < a2 ; br i1 %.x_lt_a2, label %.inner_loop, label %.loopexit ; ;.loopexit: ; if x < a3 ; %not.inner_loop = xor i1 %.y_lt_a2, true ; %brmerge = select i1 %.x_lt_a3, i1 true, i1 %not.inner_loop ; exit loop if 'x < a3' or 'loop ends since !(y < a2)' ; %.ret = select i1 %.x_lt_a3, i32 %a1, i32 %a0 ; select retrun value a1 'x < a3' or a0 'loop ends' ; br i1 %brmerge, label %.exit, label %.preheader ; ;.exit: ; ret i32 %.ret ;}