; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s define float @dyn_extract_v8f32_const_s_v(i32 %sel) { ; GCN-LABEL: dyn_extract_v8f32_const_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 ; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 ; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 ; GPRIDX-NEXT: s_cselect_b32 s0, 2.0, 1.0 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40400000, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 ; GPRIDX-NEXT: s_cselect_b32 s0, 4.0, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 4 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40a00000, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 5 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40c00000, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 6 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x40e00000, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 7 ; GPRIDX-NEXT: s_cselect_b32 s0, 0x41000000, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_const_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s4, 1.0 ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 ; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 ; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 ; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 ; MOVREL-NEXT: s_mov_b32 s7, 4.0 ; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 ; MOVREL-NEXT: s_mov_b32 s5, 2.0 ; MOVREL-NEXT: s_movrels_b32 s0, s4 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000 ; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000 ; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000 ; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000 ; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0 ; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000 ; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s4 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v8f32_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_mov_b32_e32 v7, s8 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_mov_b32_e32 v8, s9 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v8, vcc ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s9, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v8f32_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 m0, s2 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 ; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 ; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 ; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 ; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 m0, s10 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_movrels_b32 s0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 m0, s10 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel ret float %ext } define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { ; GCN-LABEL: dyn_extract_v8i64_const_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b64 s[16:17], 2 ; GCN-NEXT: s_mov_b64 s[18:19], 1 ; GCN-NEXT: s_mov_b64 s[14:15], 3 ; GCN-NEXT: v_mov_b32_e32 v1, s18 ; GCN-NEXT: v_mov_b32_e32 v2, s19 ; GCN-NEXT: v_mov_b32_e32 v3, s16 ; GCN-NEXT: v_mov_b32_e32 v4, s17 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: s_mov_b64 s[12:13], 4 ; GCN-NEXT: v_mov_b32_e32 v5, s14 ; GCN-NEXT: v_mov_b32_e32 v6, s15 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: s_mov_b64 s[10:11], 5 ; GCN-NEXT: v_mov_b32_e32 v7, s12 ; GCN-NEXT: v_mov_b32_e32 v8, s13 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: s_mov_b64 s[8:9], 6 ; GCN-NEXT: v_mov_b32_e32 v9, s10 ; GCN-NEXT: v_mov_b32_e32 v10, s11 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: s_mov_b64 s[6:7], 7 ; GCN-NEXT: v_mov_b32_e32 v11, s8 ; GCN-NEXT: v_mov_b32_e32 v12, s9 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: s_mov_b64 s[4:5], 8 ; GCN-NEXT: v_mov_b32_e32 v13, s6 ; GCN-NEXT: v_mov_b32_e32 v14, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_mov_b32_e32 v15, s4 ; GCN-NEXT: v_mov_b32_e32 v16, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v8i64_const_s_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_mov_b64 s[4:5], 2 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: s_mov_b64 s[6:7], 1 ; GFX10-NEXT: s_mov_b64 s[4:5], 3 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX10-NEXT: s_mov_b64 s[6:7], 4 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10-NEXT: s_mov_b64 s[4:5], 5 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10-NEXT: s_mov_b64 s[6:7], 6 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10-NEXT: s_mov_b64 s[4:5], 7 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX10-NEXT: s_mov_b64 s[6:7], 8 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s7, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v8i64_const_s_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b64 s[0:1], 2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1 ; GFX11-NEXT: s_mov_b64 s[2:3], 1 ; GFX11-NEXT: s_mov_b64 s[0:1], 3 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX11-NEXT: s_mov_b64 s[2:3], 4 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s1, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX11-NEXT: s_mov_b64 s[0:1], 5 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX11-NEXT: s_mov_b64 s[2:3], 6 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s1, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX11-NEXT: s_mov_b64 s[0:1], 7 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX11-NEXT: s_mov_b64 s[2:3], 8 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s1, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i64> , i32 %sel ret i64 %ext } define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 ; GPRIDX-NEXT: s_mov_b32 m0, s2 ; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 ; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 ; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 ; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 ; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 ; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_const_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b64 s[4:5], 1 ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: s_mov_b64 s[18:19], 8 ; MOVREL-NEXT: s_mov_b64 s[16:17], 7 ; MOVREL-NEXT: s_mov_b64 s[14:15], 6 ; MOVREL-NEXT: s_mov_b64 s[12:13], 5 ; MOVREL-NEXT: s_mov_b64 s[10:11], 4 ; MOVREL-NEXT: s_mov_b64 s[8:9], 3 ; MOVREL-NEXT: s_mov_b64 s[6:7], 2 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm ; ; GFX10-LABEL: dyn_extract_v8i64_const_s_s: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_mov_b64 s[4:5], 1 ; GFX10-NEXT: s_mov_b32 m0, s2 ; GFX10-NEXT: s_mov_b64 s[18:19], 8 ; GFX10-NEXT: s_mov_b64 s[16:17], 7 ; GFX10-NEXT: s_mov_b64 s[14:15], 6 ; GFX10-NEXT: s_mov_b64 s[12:13], 5 ; GFX10-NEXT: s_mov_b64 s[10:11], 4 ; GFX10-NEXT: s_mov_b64 s[8:9], 3 ; GFX10-NEXT: s_mov_b64 s[6:7], 2 ; GFX10-NEXT: s_movrels_b64 s[0:1], s[4:5] ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v8i64_const_s_s: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_mov_b64 s[4:5], 1 ; GFX11-NEXT: s_mov_b32 m0, s2 ; GFX11-NEXT: s_mov_b64 s[18:19], 8 ; GFX11-NEXT: s_mov_b64 s[16:17], 7 ; GFX11-NEXT: s_mov_b64 s[14:15], 6 ; GFX11-NEXT: s_mov_b64 s[12:13], 5 ; GFX11-NEXT: s_mov_b64 s[10:11], 4 ; GFX11-NEXT: s_mov_b64 s[8:9], 3 ; GFX11-NEXT: s_mov_b64 s[6:7], 2 ; GFX11-NEXT: s_movrels_b64 s[0:1], s[4:5] ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> , i32 %sel store i64 %ext, ptr addrspace(1) undef ret void } define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: v_mov_b32_e32 v1, s2 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s3 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 ; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 ; GPRIDX-NEXT: v_mov_b32_e32 v8, s9 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v9, s10 ; GPRIDX-NEXT: v_mov_b32_e32 v10, s11 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v11, s12 ; GPRIDX-NEXT: v_mov_b32_e32 v12, s13 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 ; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_s_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: v_mov_b32_e32 v1, s2 ; MOVREL-NEXT: v_mov_b32_e32 v2, s3 ; MOVREL-NEXT: v_mov_b32_e32 v3, s4 ; MOVREL-NEXT: v_mov_b32_e32 v4, s5 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; MOVREL-NEXT: v_mov_b32_e32 v5, s6 ; MOVREL-NEXT: v_mov_b32_e32 v6, s7 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; MOVREL-NEXT: v_mov_b32_e32 v7, s8 ; MOVREL-NEXT: v_mov_b32_e32 v8, s9 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; MOVREL-NEXT: v_mov_b32_e32 v9, s10 ; MOVREL-NEXT: v_mov_b32_e32 v10, s11 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; MOVREL-NEXT: v_mov_b32_e32 v11, s12 ; MOVREL-NEXT: v_mov_b32_e32 v12, s13 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; MOVREL-NEXT: v_mov_b32_e32 v13, s14 ; MOVREL-NEXT: v_mov_b32_e32 v14, s15 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; MOVREL-NEXT: v_mov_b32_e32 v15, s16 ; MOVREL-NEXT: v_mov_b32_e32 v16, s17 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm ; ; GFX10-LABEL: dyn_extract_v8i64_s_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s16, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s17, vcc_lo ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v8i64_s_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s16, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s17, vcc_lo ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel store i64 %ext, ptr addrspace(1) undef ret void } define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v8i64_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v8i64_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v8i64_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i64> %vec, i32 %sel ret i64 %ext } define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v17, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] ; MOVREL-NEXT: s_endpgm ; ; GFX10-LABEL: dyn_extract_v8i64_v_s: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_lshl_b32 m0, s2, 1 ; GFX10-NEXT: v_movrels_b32_e32 v16, v0 ; GFX10-NEXT: v_movrels_b32_e32 v17, v1 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[16:17], off ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v8i64_v_s: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_lshl_b32 m0, s2, 1 ; GFX11-NEXT: v_movrels_b32_e32 v16, v0 ; GFX11-NEXT: v_movrels_b32_e32 v17, v1 ; GFX11-NEXT: global_store_b64 v[0:1], v[16:17], off ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel store i64 %ext, ptr addrspace(1) undef ret void } define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8i64_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8i64_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm ; ; GFX10-LABEL: dyn_extract_v8i64_s_s: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_mov_b32 s0, s2 ; GFX10-NEXT: s_mov_b32 s1, s3 ; GFX10-NEXT: s_mov_b32 m0, s18 ; GFX10-NEXT: s_mov_b32 s2, s4 ; GFX10-NEXT: s_mov_b32 s3, s5 ; GFX10-NEXT: s_mov_b32 s4, s6 ; GFX10-NEXT: s_mov_b32 s5, s7 ; GFX10-NEXT: s_mov_b32 s6, s8 ; GFX10-NEXT: s_mov_b32 s7, s9 ; GFX10-NEXT: s_mov_b32 s8, s10 ; GFX10-NEXT: s_mov_b32 s9, s11 ; GFX10-NEXT: s_mov_b32 s10, s12 ; GFX10-NEXT: s_mov_b32 s11, s13 ; GFX10-NEXT: s_mov_b32 s12, s14 ; GFX10-NEXT: s_mov_b32 s13, s15 ; GFX10-NEXT: s_mov_b32 s14, s16 ; GFX10-NEXT: s_mov_b32 s15, s17 ; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v8i64_s_s: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_mov_b32 s0, s2 ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_mov_b32 m0, s18 ; GFX11-NEXT: s_mov_b32 s2, s4 ; GFX11-NEXT: s_mov_b32 s3, s5 ; GFX11-NEXT: s_mov_b32 s4, s6 ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 ; GFX11-NEXT: s_mov_b32 s8, s10 ; GFX11-NEXT: s_mov_b32 s9, s11 ; GFX11-NEXT: s_mov_b32 s10, s12 ; GFX11-NEXT: s_mov_b32 s11, s13 ; GFX11-NEXT: s_mov_b32 s12, s14 ; GFX11-NEXT: s_mov_b32 s13, s15 ; GFX11-NEXT: s_mov_b32 s14, s16 ; GFX11-NEXT: s_mov_b32 s15, s17 ; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <8 x i64> %vec, i32 %sel store i64 %ext, ptr addrspace(1) undef ret void } define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_add_i32 s10, s10, 3 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 ; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 ; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 ; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 ; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 m0, s10 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_movrels_b32 s0, s3 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s_offset3: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 m0, s10 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s3 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 3 %ext = extractelement <8 x float> %vec, i32 %add ret float %ext } define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: v_add_u32_e32 v8, 3, v8 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v8, vcc, 3, v8 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; MOVREL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v_offset3: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_add_nc_u32_e32 v8, 3, v8 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 %ext = extractelement <8 x float> %vec, i32 %add ret float %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset1: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[2:3] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset1: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[2:3] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 1 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset2: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[4:5] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset2: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[4:5] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 2 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset3: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[6:7] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset3: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[6:7] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 3 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset4: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[8:9] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset4: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[8:9] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 4 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset5: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[10:11] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset5: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[10:11] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 5 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offset6: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 m0, s18 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[12:13] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset6: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[12:13] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 6 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_nop 0 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15] ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15] ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset7: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17 ; GFX10PLUS-NEXT: s_mov_b32 m0, s18 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[14:15] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 7 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_add_i32 m0, s18, -1 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_mov_b32 s15, s17 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offsetm1: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_add_i32 m0, s18, -1 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16 ; GFX10PLUS-NEXT: s_mov_b32 s15, s17 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, -1 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: v_add_u32_e32 v16, 3, v16 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v16, vcc, 3, v16 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; MOVREL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v16, 3, v16 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v8f64_v_v_offset3: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v16 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 %ext = extractelement <8 x double> %vec, i32 %add ret double %ext } define ptr addrspace(3) @dyn_extract_v8p3_v_v(<8 x ptr addrspace(3)> %vec, i32 %idx) { ; GCN-LABEL: dyn_extract_v8p3_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: dyn_extract_v8p3_v_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx ret ptr addrspace(3) %ext } define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x ptr addrspace(3)> inreg %vec, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_extract_v8p3_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 ; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 ; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 ; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 ; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 ; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 ; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 ; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 ; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ds_write_b32 v0, v0 ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8p3_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 m0, s10 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_movrels_b32 s0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: s_mov_b32 m0, -1 ; MOVREL-NEXT: ds_write_b32 v0, v0 ; MOVREL-NEXT: s_endpgm ; ; GFX10-LABEL: dyn_extract_v8p3_s_s: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_mov_b32 s0, s2 ; GFX10-NEXT: s_mov_b32 m0, s10 ; GFX10-NEXT: s_mov_b32 s1, s3 ; GFX10-NEXT: s_mov_b32 s2, s4 ; GFX10-NEXT: s_mov_b32 s3, s5 ; GFX10-NEXT: s_mov_b32 s4, s6 ; GFX10-NEXT: s_mov_b32 s5, s7 ; GFX10-NEXT: s_mov_b32 s6, s8 ; GFX10-NEXT: s_mov_b32 s7, s9 ; GFX10-NEXT: s_movrels_b32 s0, s0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: ds_write_b32 v0, v0 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v8p3_s_s: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_mov_b32 s0, s2 ; GFX11-NEXT: s_mov_b32 m0, s10 ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_mov_b32 s2, s4 ; GFX11-NEXT: s_mov_b32 s3, s5 ; GFX11-NEXT: s_mov_b32 s4, s6 ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 ; GFX11-NEXT: s_movrels_b32 s0, s0 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: ds_store_b32 v0, v0 ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx store ptr addrspace(3) %ext, ptr addrspace(3) undef ret void } define ptr addrspace(1) @dyn_extract_v8p1_v_v(<8 x ptr addrspace(1)> %vec, i32 %idx) { ; GCN-LABEL: dyn_extract_v8p1_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v8p1_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v8p1_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx ret ptr addrspace(1) %ext } define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x ptr addrspace(1)> inreg %vec, i32 inreg %idx) { ; GPRIDX-LABEL: dyn_extract_v8p1_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 m0, s18 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 ; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: s_mov_b32 s6, s8 ; GPRIDX-NEXT: s_mov_b32 s7, s9 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s11, s13 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s13, s15 ; GPRIDX-NEXT: s_mov_b32 s14, s16 ; GPRIDX-NEXT: s_mov_b32 s15, s17 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v8p1_s_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 m0, s18 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 ; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: s_mov_b32 s6, s8 ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: s_mov_b32 s10, s12 ; MOVREL-NEXT: s_mov_b32 s11, s13 ; MOVREL-NEXT: s_mov_b32 s12, s14 ; MOVREL-NEXT: s_mov_b32 s13, s15 ; MOVREL-NEXT: s_mov_b32 s14, s16 ; MOVREL-NEXT: s_mov_b32 s15, s17 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; MOVREL-NEXT: s_endpgm ; ; GFX10-LABEL: dyn_extract_v8p1_s_s: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_mov_b32 s0, s2 ; GFX10-NEXT: s_mov_b32 s1, s3 ; GFX10-NEXT: s_mov_b32 m0, s18 ; GFX10-NEXT: s_mov_b32 s2, s4 ; GFX10-NEXT: s_mov_b32 s3, s5 ; GFX10-NEXT: s_mov_b32 s4, s6 ; GFX10-NEXT: s_mov_b32 s5, s7 ; GFX10-NEXT: s_mov_b32 s6, s8 ; GFX10-NEXT: s_mov_b32 s7, s9 ; GFX10-NEXT: s_mov_b32 s8, s10 ; GFX10-NEXT: s_mov_b32 s9, s11 ; GFX10-NEXT: s_mov_b32 s10, s12 ; GFX10-NEXT: s_mov_b32 s11, s13 ; GFX10-NEXT: s_mov_b32 s12, s14 ; GFX10-NEXT: s_mov_b32 s13, s15 ; GFX10-NEXT: s_mov_b32 s14, s16 ; GFX10-NEXT: s_mov_b32 s15, s17 ; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v8p1_s_s: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_mov_b32 s0, s2 ; GFX11-NEXT: s_mov_b32 s1, s3 ; GFX11-NEXT: s_mov_b32 m0, s18 ; GFX11-NEXT: s_mov_b32 s2, s4 ; GFX11-NEXT: s_mov_b32 s3, s5 ; GFX11-NEXT: s_mov_b32 s4, s6 ; GFX11-NEXT: s_mov_b32 s5, s7 ; GFX11-NEXT: s_mov_b32 s6, s8 ; GFX11-NEXT: s_mov_b32 s7, s9 ; GFX11-NEXT: s_mov_b32 s8, s10 ; GFX11-NEXT: s_mov_b32 s9, s11 ; GFX11-NEXT: s_mov_b32 s10, s12 ; GFX11-NEXT: s_mov_b32 s11, s13 ; GFX11-NEXT: s_mov_b32 s12, s14 ; GFX11-NEXT: s_mov_b32 s13, s15 ; GFX11-NEXT: s_mov_b32 s14, s16 ; GFX11-NEXT: s_mov_b32 s15, s17 ; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx store ptr addrspace(1) %ext, ptr addrspace(1) undef ret void } define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v16f32_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v16f32_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v16f32_v_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 m0, s2 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <16 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v32f32_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v32f32_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v32f32_v_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 m0, s2 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <32 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v16f64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v32 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v16f64_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v32, v0 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v32 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v16f64_v_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v32, v0 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v32 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <16 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v16f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s4, 1.0 ; GCN-NEXT: s_mov_b32 m0, s2 ; GCN-NEXT: s_mov_b32 s19, 0x41800000 ; GCN-NEXT: s_mov_b32 s18, 0x41700000 ; GCN-NEXT: s_mov_b32 s17, 0x41600000 ; GCN-NEXT: s_mov_b32 s16, 0x41500000 ; GCN-NEXT: s_mov_b32 s15, 0x41400000 ; GCN-NEXT: s_mov_b32 s14, 0x41300000 ; GCN-NEXT: s_mov_b32 s13, 0x41200000 ; GCN-NEXT: s_mov_b32 s12, 0x41100000 ; GCN-NEXT: s_mov_b32 s11, 0x41000000 ; GCN-NEXT: s_mov_b32 s10, 0x40e00000 ; GCN-NEXT: s_mov_b32 s9, 0x40c00000 ; GCN-NEXT: s_mov_b32 s8, 0x40a00000 ; GCN-NEXT: s_mov_b32 s7, 4.0 ; GCN-NEXT: s_mov_b32 s6, 0x40400000 ; GCN-NEXT: s_mov_b32 s5, 2.0 ; GCN-NEXT: s_movrels_b32 s0, s4 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v16f32_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s19, 0x41800000 ; GFX10PLUS-NEXT: s_mov_b32 s18, 0x41700000 ; GFX10PLUS-NEXT: s_mov_b32 s17, 0x41600000 ; GFX10PLUS-NEXT: s_mov_b32 s16, 0x41500000 ; GFX10PLUS-NEXT: s_mov_b32 s15, 0x41400000 ; GFX10PLUS-NEXT: s_mov_b32 s14, 0x41300000 ; GFX10PLUS-NEXT: s_mov_b32 s13, 0x41200000 ; GFX10PLUS-NEXT: s_mov_b32 s12, 0x41100000 ; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000 ; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000 ; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000 ; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000 ; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0 ; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000 ; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s4 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <16 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v32f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s36, 1.0 ; GCN-NEXT: s_mov_b32 m0, s2 ; GCN-NEXT: s_mov_b32 s67, 0x42000000 ; GCN-NEXT: s_mov_b32 s66, 0x41f80000 ; GCN-NEXT: s_mov_b32 s65, 0x41f00000 ; GCN-NEXT: s_mov_b32 s64, 0x41e80000 ; GCN-NEXT: s_mov_b32 s63, 0x41e00000 ; GCN-NEXT: s_mov_b32 s62, 0x41d80000 ; GCN-NEXT: s_mov_b32 s61, 0x41d00000 ; GCN-NEXT: s_mov_b32 s60, 0x41c80000 ; GCN-NEXT: s_mov_b32 s59, 0x41c00000 ; GCN-NEXT: s_mov_b32 s58, 0x41b80000 ; GCN-NEXT: s_mov_b32 s57, 0x41b00000 ; GCN-NEXT: s_mov_b32 s56, 0x41a80000 ; GCN-NEXT: s_mov_b32 s55, 0x41a00000 ; GCN-NEXT: s_mov_b32 s54, 0x41980000 ; GCN-NEXT: s_mov_b32 s53, 0x41900000 ; GCN-NEXT: s_mov_b32 s52, 0x41880000 ; GCN-NEXT: s_mov_b32 s51, 0x41800000 ; GCN-NEXT: s_mov_b32 s50, 0x41700000 ; GCN-NEXT: s_mov_b32 s49, 0x41600000 ; GCN-NEXT: s_mov_b32 s48, 0x41500000 ; GCN-NEXT: s_mov_b32 s47, 0x41400000 ; GCN-NEXT: s_mov_b32 s46, 0x41300000 ; GCN-NEXT: s_mov_b32 s45, 0x41200000 ; GCN-NEXT: s_mov_b32 s44, 0x41100000 ; GCN-NEXT: s_mov_b32 s43, 0x41000000 ; GCN-NEXT: s_mov_b32 s42, 0x40e00000 ; GCN-NEXT: s_mov_b32 s41, 0x40c00000 ; GCN-NEXT: s_mov_b32 s40, 0x40a00000 ; GCN-NEXT: s_mov_b32 s39, 4.0 ; GCN-NEXT: s_mov_b32 s38, 0x40400000 ; GCN-NEXT: s_mov_b32 s37, 2.0 ; GCN-NEXT: s_movrels_b32 s0, s36 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v32f32_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s36, 1.0 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s67, 0x42000000 ; GFX10PLUS-NEXT: s_mov_b32 s66, 0x41f80000 ; GFX10PLUS-NEXT: s_mov_b32 s65, 0x41f00000 ; GFX10PLUS-NEXT: s_mov_b32 s64, 0x41e80000 ; GFX10PLUS-NEXT: s_mov_b32 s63, 0x41e00000 ; GFX10PLUS-NEXT: s_mov_b32 s62, 0x41d80000 ; GFX10PLUS-NEXT: s_mov_b32 s61, 0x41d00000 ; GFX10PLUS-NEXT: s_mov_b32 s60, 0x41c80000 ; GFX10PLUS-NEXT: s_mov_b32 s59, 0x41c00000 ; GFX10PLUS-NEXT: s_mov_b32 s58, 0x41b80000 ; GFX10PLUS-NEXT: s_mov_b32 s57, 0x41b00000 ; GFX10PLUS-NEXT: s_mov_b32 s56, 0x41a80000 ; GFX10PLUS-NEXT: s_mov_b32 s55, 0x41a00000 ; GFX10PLUS-NEXT: s_mov_b32 s54, 0x41980000 ; GFX10PLUS-NEXT: s_mov_b32 s53, 0x41900000 ; GFX10PLUS-NEXT: s_mov_b32 s52, 0x41880000 ; GFX10PLUS-NEXT: s_mov_b32 s51, 0x41800000 ; GFX10PLUS-NEXT: s_mov_b32 s50, 0x41700000 ; GFX10PLUS-NEXT: s_mov_b32 s49, 0x41600000 ; GFX10PLUS-NEXT: s_mov_b32 s48, 0x41500000 ; GFX10PLUS-NEXT: s_mov_b32 s47, 0x41400000 ; GFX10PLUS-NEXT: s_mov_b32 s46, 0x41300000 ; GFX10PLUS-NEXT: s_mov_b32 s45, 0x41200000 ; GFX10PLUS-NEXT: s_mov_b32 s44, 0x41100000 ; GFX10PLUS-NEXT: s_mov_b32 s43, 0x41000000 ; GFX10PLUS-NEXT: s_mov_b32 s42, 0x40e00000 ; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40c00000 ; GFX10PLUS-NEXT: s_mov_b32 s40, 0x40a00000 ; GFX10PLUS-NEXT: s_mov_b32 s39, 4.0 ; GFX10PLUS-NEXT: s_mov_b32 s38, 0x40400000 ; GFX10PLUS-NEXT: s_mov_b32 s37, 2.0 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s36 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <32 x float> , i32 %sel ret float %ext } define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v16f64_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s66, 0 ; GCN-NEXT: s_mov_b32 s64, 0 ; GCN-NEXT: s_mov_b32 s62, 0 ; GCN-NEXT: s_mov_b32 s60, 0 ; GCN-NEXT: s_mov_b32 s58, 0 ; GCN-NEXT: s_mov_b32 s56, 0 ; GCN-NEXT: s_mov_b32 s54, 0 ; GCN-NEXT: s_mov_b32 s52, 0 ; GCN-NEXT: s_mov_b32 s50, 0 ; GCN-NEXT: s_mov_b32 s48, 0 ; GCN-NEXT: s_mov_b32 s46, 0 ; GCN-NEXT: s_mov_b32 s44, 0 ; GCN-NEXT: s_mov_b32 s40, 0 ; GCN-NEXT: s_mov_b64 s[36:37], 1.0 ; GCN-NEXT: s_mov_b32 m0, s2 ; GCN-NEXT: s_mov_b32 s67, 0x40300000 ; GCN-NEXT: s_mov_b32 s65, 0x402e0000 ; GCN-NEXT: s_mov_b32 s63, 0x402c0000 ; GCN-NEXT: s_mov_b32 s61, 0x402a0000 ; GCN-NEXT: s_mov_b32 s59, 0x40280000 ; GCN-NEXT: s_mov_b32 s57, 0x40260000 ; GCN-NEXT: s_mov_b32 s55, 0x40240000 ; GCN-NEXT: s_mov_b32 s53, 0x40220000 ; GCN-NEXT: s_mov_b32 s51, 0x40200000 ; GCN-NEXT: s_mov_b32 s49, 0x401c0000 ; GCN-NEXT: s_mov_b32 s47, 0x40180000 ; GCN-NEXT: s_mov_b32 s45, 0x40140000 ; GCN-NEXT: s_mov_b64 s[42:43], 4.0 ; GCN-NEXT: s_mov_b32 s41, 0x40080000 ; GCN-NEXT: s_mov_b64 s[38:39], 2.0 ; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v16f64_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b64 s[36:37], 1.0 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s66, 0 ; GFX10PLUS-NEXT: s_mov_b32 s64, 0 ; GFX10PLUS-NEXT: s_mov_b32 s62, 0 ; GFX10PLUS-NEXT: s_mov_b32 s60, 0 ; GFX10PLUS-NEXT: s_mov_b32 s58, 0 ; GFX10PLUS-NEXT: s_mov_b32 s56, 0 ; GFX10PLUS-NEXT: s_mov_b32 s54, 0 ; GFX10PLUS-NEXT: s_mov_b32 s52, 0 ; GFX10PLUS-NEXT: s_mov_b32 s50, 0 ; GFX10PLUS-NEXT: s_mov_b32 s48, 0 ; GFX10PLUS-NEXT: s_mov_b32 s46, 0 ; GFX10PLUS-NEXT: s_mov_b32 s44, 0 ; GFX10PLUS-NEXT: s_mov_b32 s40, 0 ; GFX10PLUS-NEXT: s_mov_b32 s67, 0x40300000 ; GFX10PLUS-NEXT: s_mov_b32 s65, 0x402e0000 ; GFX10PLUS-NEXT: s_mov_b32 s63, 0x402c0000 ; GFX10PLUS-NEXT: s_mov_b32 s61, 0x402a0000 ; GFX10PLUS-NEXT: s_mov_b32 s59, 0x40280000 ; GFX10PLUS-NEXT: s_mov_b32 s57, 0x40260000 ; GFX10PLUS-NEXT: s_mov_b32 s55, 0x40240000 ; GFX10PLUS-NEXT: s_mov_b32 s53, 0x40220000 ; GFX10PLUS-NEXT: s_mov_b32 s51, 0x40200000 ; GFX10PLUS-NEXT: s_mov_b32 s49, 0x401c0000 ; GFX10PLUS-NEXT: s_mov_b32 s47, 0x40180000 ; GFX10PLUS-NEXT: s_mov_b32 s45, 0x40140000 ; GFX10PLUS-NEXT: s_mov_b64 s[42:43], 4.0 ; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40080000 ; GFX10PLUS-NEXT: s_mov_b64 s[38:39], 2.0 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[36:37] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <16 x double> , i32 %sel ret double %ext } define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v6f32_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v6f32_s_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s7, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel ret float %ext } define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v6f32_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: dyn_extract_v6f32_v_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v6 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v6 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v6 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v6 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <6 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v6f32_v_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v6f32_v_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v6f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_cmp_eq_u32 s8, 1 ; GCN-NEXT: s_cselect_b32 s0, s3, s2 ; GCN-NEXT: s_cmp_eq_u32 s8, 2 ; GCN-NEXT: s_cselect_b32 s0, s4, s0 ; GCN-NEXT: s_cmp_eq_u32 s8, 3 ; GCN-NEXT: s_cselect_b32 s0, s5, s0 ; GCN-NEXT: s_cmp_eq_u32 s8, 4 ; GCN-NEXT: s_cselect_b32 s0, s6, s0 ; GCN-NEXT: s_cmp_eq_u32 s8, 5 ; GCN-NEXT: s_cselect_b32 s0, s7, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v6f32_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 1 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 2 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 3 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 4 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 5 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v7f32_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_mov_b32_e32 v7, s8 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v7f32_s_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s8, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel ret float %ext } define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v7f32_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v7 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: dyn_extract_v7f32_v_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v7 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v7 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v7 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v7 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <7 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v7f32_v_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v7f32_v_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v7f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_cmp_eq_u32 s9, 1 ; GCN-NEXT: s_cselect_b32 s0, s3, s2 ; GCN-NEXT: s_cmp_eq_u32 s9, 2 ; GCN-NEXT: s_cselect_b32 s0, s4, s0 ; GCN-NEXT: s_cmp_eq_u32 s9, 3 ; GCN-NEXT: s_cselect_b32 s0, s5, s0 ; GCN-NEXT: s_cmp_eq_u32 s9, 4 ; GCN-NEXT: s_cselect_b32 s0, s6, s0 ; GCN-NEXT: s_cmp_eq_u32 s9, 5 ; GCN-NEXT: s_cselect_b32 s0, s7, s0 ; GCN-NEXT: s_cmp_eq_u32 s9, 6 ; GCN-NEXT: s_cselect_b32 s0, s8, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v7f32_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 1 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 2 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 3 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 4 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 5 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 6 ; GFX10PLUS-NEXT: s_cselect_b32 s0, s8, s0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v6f64_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v7, s8 ; GCN-NEXT: v_mov_b32_e32 v8, s9 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v9, s10 ; GCN-NEXT: v_mov_b32_e32 v10, s11 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v11, s12 ; GCN-NEXT: v_mov_b32_e32 v12, s13 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v11, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v12, vcc ; GCN-NEXT: v_readfirstlane_b32 s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s1, v1 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_extract_v6f64_s_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_extract_v6f64_s_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x double> %vec, i32 %sel ret double %ext } define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v6f64_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v6f64_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v6f64_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <6 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v6f64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v12, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v12 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v6f64_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v12, v0 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v12 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v6f64_v_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v12, v0 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v12 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v6f64_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 m0, s14 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v6f64_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 m0, s14 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v7f64_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v7, s8 ; GCN-NEXT: v_mov_b32_e32 v8, s9 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v9, s10 ; GCN-NEXT: v_mov_b32_e32 v10, s11 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v11, s12 ; GCN-NEXT: v_mov_b32_e32 v12, s13 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_mov_b32_e32 v13, s14 ; GCN-NEXT: v_mov_b32_e32 v14, s15 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr2 killed $exec ; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr3 killed $exec ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc ; GCN-NEXT: v_readfirstlane_b32 s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s1, v1 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: dyn_extract_v7f64_s_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: s_mov_b32 s0, s14 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: dyn_extract_v7f64_s_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: s_mov_b32 s0, s14 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x double> %vec, i32 %sel ret double %ext } define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v7f64_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v14 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v7f64_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v7f64_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <7 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v7f64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v14, v0 ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: v_readfirstlane_b32 s0, v14 ; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v7f64_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v14, v0 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v14 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v7f64_v_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v14, v0 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v14 ; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x double> %vec, i32 %sel ret double %ext } define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v7f64_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 m0, s16 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v7f64_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 m0, s16 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1] ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x double> %vec, i32 %sel ret double %ext } define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v5f64_s_s: ; GPRIDX: .amd_kernel_code_t ; GPRIDX-NEXT: amd_code_version_major = 1 ; GPRIDX-NEXT: amd_code_version_minor = 2 ; GPRIDX-NEXT: amd_machine_kind = 1 ; GPRIDX-NEXT: amd_machine_version_major = 9 ; GPRIDX-NEXT: amd_machine_version_minor = 0 ; GPRIDX-NEXT: amd_machine_version_stepping = 0 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 ; GPRIDX-NEXT: priority = 0 ; GPRIDX-NEXT: float_mode = 240 ; GPRIDX-NEXT: priv = 0 ; GPRIDX-NEXT: enable_dx10_clamp = 1 ; GPRIDX-NEXT: debug_mode = 0 ; GPRIDX-NEXT: enable_ieee_mode = 1 ; GPRIDX-NEXT: enable_wgp_mode = 0 ; GPRIDX-NEXT: enable_mem_ordered = 0 ; GPRIDX-NEXT: enable_fwd_progress = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GPRIDX-NEXT: user_sgpr_count = 6 ; GPRIDX-NEXT: enable_trap_handler = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 ; GPRIDX-NEXT: enable_exception_msb = 0 ; GPRIDX-NEXT: granulated_lds_size = 0 ; GPRIDX-NEXT: enable_exception = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GPRIDX-NEXT: enable_wavefront_size32 = 0 ; GPRIDX-NEXT: enable_ordered_append_gds = 0 ; GPRIDX-NEXT: private_element_size = 1 ; GPRIDX-NEXT: is_ptr64 = 1 ; GPRIDX-NEXT: is_dynamic_callstack = 0 ; GPRIDX-NEXT: is_debug_enabled = 0 ; GPRIDX-NEXT: is_xnack_enabled = 1 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 ; GPRIDX-NEXT: gds_segment_byte_size = 0 ; GPRIDX-NEXT: kernarg_segment_byte_size = 12 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0 ; GPRIDX-NEXT: wavefront_sgpr_count = 13 ; GPRIDX-NEXT: workitem_vgpr_count = 3 ; GPRIDX-NEXT: reserved_vgpr_first = 0 ; GPRIDX-NEXT: reserved_vgpr_count = 0 ; GPRIDX-NEXT: reserved_sgpr_first = 0 ; GPRIDX-NEXT: reserved_sgpr_count = 0 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 ; GPRIDX-NEXT: kernarg_segment_alignment = 4 ; GPRIDX-NEXT: group_segment_alignment = 4 ; GPRIDX-NEXT: private_segment_alignment = 4 ; GPRIDX-NEXT: wavefront_size = 6 ; GPRIDX-NEXT: call_convention = -1 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 ; GPRIDX-NEXT: .end_amd_kernel_code_t ; GPRIDX-NEXT: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GPRIDX-NEXT: s_load_dword s8, s[4:5], 0x8 ; GPRIDX-NEXT: s_mov_b32 s4, 0 ; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000 ; GPRIDX-NEXT: s_mov_b32 s2, 0 ; GPRIDX-NEXT: s_mov_b32 s3, 0x40140000 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1 ; GPRIDX-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 4 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 ; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v5f64_s_s: ; MOVREL: .amd_kernel_code_t ; MOVREL-NEXT: amd_code_version_major = 1 ; MOVREL-NEXT: amd_code_version_minor = 2 ; MOVREL-NEXT: amd_machine_kind = 1 ; MOVREL-NEXT: amd_machine_version_major = 8 ; MOVREL-NEXT: amd_machine_version_minor = 0 ; MOVREL-NEXT: amd_machine_version_stepping = 3 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1 ; MOVREL-NEXT: priority = 0 ; MOVREL-NEXT: float_mode = 240 ; MOVREL-NEXT: priv = 0 ; MOVREL-NEXT: enable_dx10_clamp = 1 ; MOVREL-NEXT: debug_mode = 0 ; MOVREL-NEXT: enable_ieee_mode = 1 ; MOVREL-NEXT: enable_wgp_mode = 0 ; MOVREL-NEXT: enable_mem_ordered = 0 ; MOVREL-NEXT: enable_fwd_progress = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; MOVREL-NEXT: user_sgpr_count = 6 ; MOVREL-NEXT: enable_trap_handler = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 ; MOVREL-NEXT: enable_vgpr_workitem_id = 0 ; MOVREL-NEXT: enable_exception_msb = 0 ; MOVREL-NEXT: granulated_lds_size = 0 ; MOVREL-NEXT: enable_exception = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; MOVREL-NEXT: enable_wavefront_size32 = 0 ; MOVREL-NEXT: enable_ordered_append_gds = 0 ; MOVREL-NEXT: private_element_size = 1 ; MOVREL-NEXT: is_ptr64 = 1 ; MOVREL-NEXT: is_dynamic_callstack = 0 ; MOVREL-NEXT: is_debug_enabled = 0 ; MOVREL-NEXT: is_xnack_enabled = 0 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 ; MOVREL-NEXT: gds_segment_byte_size = 0 ; MOVREL-NEXT: kernarg_segment_byte_size = 12 ; MOVREL-NEXT: workgroup_fbarrier_count = 0 ; MOVREL-NEXT: wavefront_sgpr_count = 9 ; MOVREL-NEXT: workitem_vgpr_count = 4 ; MOVREL-NEXT: reserved_vgpr_first = 0 ; MOVREL-NEXT: reserved_vgpr_count = 0 ; MOVREL-NEXT: reserved_sgpr_first = 0 ; MOVREL-NEXT: reserved_sgpr_count = 0 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 ; MOVREL-NEXT: kernarg_segment_alignment = 4 ; MOVREL-NEXT: group_segment_alignment = 4 ; MOVREL-NEXT: private_segment_alignment = 4 ; MOVREL-NEXT: wavefront_size = 6 ; MOVREL-NEXT: call_convention = -1 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 ; MOVREL-NEXT: .end_amd_kernel_code_t ; MOVREL-NEXT: ; %bb.0: ; %entry ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; MOVREL-NEXT: s_load_dword s8, s[4:5], 0x8 ; MOVREL-NEXT: s_mov_b32 s4, 0 ; MOVREL-NEXT: s_mov_b32 s5, 0x40080000 ; MOVREL-NEXT: s_mov_b32 s2, 0 ; MOVREL-NEXT: s_mov_b32 s3, 0x40140000 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) ; MOVREL-NEXT: s_cmp_eq_u32 s8, 1 ; MOVREL-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 ; MOVREL-NEXT: s_cmp_eq_u32 s8, 2 ; MOVREL-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] ; MOVREL-NEXT: s_cmp_eq_u32 s8, 3 ; MOVREL-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] ; MOVREL-NEXT: s_cmp_eq_u32 s8, 4 ; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, s2 ; MOVREL-NEXT: v_mov_b32_e32 v3, s1 ; MOVREL-NEXT: v_mov_b32_e32 v1, s3 ; MOVREL-NEXT: v_mov_b32_e32 v2, s0 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; MOVREL-NEXT: s_endpgm ; ; GFX10-LABEL: dyn_extract_v5f64_s_s: ; GFX10: .amd_kernel_code_t ; GFX10-NEXT: amd_code_version_major = 1 ; GFX10-NEXT: amd_code_version_minor = 2 ; GFX10-NEXT: amd_machine_kind = 1 ; GFX10-NEXT: amd_machine_version_major = 10 ; GFX10-NEXT: amd_machine_version_minor = 1 ; GFX10-NEXT: amd_machine_version_stepping = 0 ; GFX10-NEXT: kernel_code_entry_byte_offset = 256 ; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 ; GFX10-NEXT: granulated_workitem_vgpr_count = 0 ; GFX10-NEXT: granulated_wavefront_sgpr_count = 0 ; GFX10-NEXT: priority = 0 ; GFX10-NEXT: float_mode = 240 ; GFX10-NEXT: priv = 0 ; GFX10-NEXT: enable_dx10_clamp = 1 ; GFX10-NEXT: debug_mode = 0 ; GFX10-NEXT: enable_ieee_mode = 1 ; GFX10-NEXT: enable_wgp_mode = 1 ; GFX10-NEXT: enable_mem_ordered = 1 ; GFX10-NEXT: enable_fwd_progress = 0 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX10-NEXT: user_sgpr_count = 6 ; GFX10-NEXT: enable_trap_handler = 0 ; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 ; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 ; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 ; GFX10-NEXT: enable_sgpr_workgroup_info = 0 ; GFX10-NEXT: enable_vgpr_workitem_id = 0 ; GFX10-NEXT: enable_exception_msb = 0 ; GFX10-NEXT: granulated_lds_size = 0 ; GFX10-NEXT: enable_exception = 0 ; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 ; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 ; GFX10-NEXT: enable_sgpr_queue_ptr = 0 ; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GFX10-NEXT: enable_sgpr_dispatch_id = 0 ; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 ; GFX10-NEXT: enable_sgpr_private_segment_size = 0 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GFX10-NEXT: enable_wavefront_size32 = 1 ; GFX10-NEXT: enable_ordered_append_gds = 0 ; GFX10-NEXT: private_element_size = 1 ; GFX10-NEXT: is_ptr64 = 1 ; GFX10-NEXT: is_dynamic_callstack = 0 ; GFX10-NEXT: is_debug_enabled = 0 ; GFX10-NEXT: is_xnack_enabled = 1 ; GFX10-NEXT: workitem_private_segment_byte_size = 0 ; GFX10-NEXT: workgroup_group_segment_byte_size = 0 ; GFX10-NEXT: gds_segment_byte_size = 0 ; GFX10-NEXT: kernarg_segment_byte_size = 12 ; GFX10-NEXT: workgroup_fbarrier_count = 0 ; GFX10-NEXT: wavefront_sgpr_count = 7 ; GFX10-NEXT: workitem_vgpr_count = 3 ; GFX10-NEXT: reserved_vgpr_first = 0 ; GFX10-NEXT: reserved_vgpr_count = 0 ; GFX10-NEXT: reserved_sgpr_first = 0 ; GFX10-NEXT: reserved_sgpr_count = 0 ; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 ; GFX10-NEXT: kernarg_segment_alignment = 4 ; GFX10-NEXT: group_segment_alignment = 4 ; GFX10-NEXT: private_segment_alignment = 4 ; GFX10-NEXT: wavefront_size = 5 ; GFX10-NEXT: call_convention = -1 ; GFX10-NEXT: runtime_loader_kernel_symbol = 0 ; GFX10-NEXT: .end_amd_kernel_code_t ; GFX10-NEXT: ; %bb.0: ; %entry ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: s_load_dword s6, s[4:5], 0x8 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: s_mov_b32 s2, 0 ; GFX10-NEXT: s_mov_b32 s3, 0x40080000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s6, 1 ; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; GFX10-NEXT: s_cmp_eq_u32 s6, 2 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; GFX10-NEXT: s_cmp_eq_u32 s6, 3 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_mov_b32 s5, 0x40140000 ; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] ; GFX10-NEXT: s_cmp_eq_u32 s6, 4 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v5f64_s_s: ; GFX11: .amd_kernel_code_t ; GFX11-NEXT: amd_code_version_major = 1 ; GFX11-NEXT: amd_code_version_minor = 2 ; GFX11-NEXT: amd_machine_kind = 1 ; GFX11-NEXT: amd_machine_version_major = 11 ; GFX11-NEXT: amd_machine_version_minor = 0 ; GFX11-NEXT: amd_machine_version_stepping = 0 ; GFX11-NEXT: kernel_code_entry_byte_offset = 256 ; GFX11-NEXT: kernel_code_prefetch_byte_size = 0 ; GFX11-NEXT: granulated_workitem_vgpr_count = 0 ; GFX11-NEXT: granulated_wavefront_sgpr_count = 0 ; GFX11-NEXT: priority = 0 ; GFX11-NEXT: float_mode = 240 ; GFX11-NEXT: priv = 0 ; GFX11-NEXT: enable_dx10_clamp = 1 ; GFX11-NEXT: debug_mode = 0 ; GFX11-NEXT: enable_ieee_mode = 1 ; GFX11-NEXT: enable_wgp_mode = 1 ; GFX11-NEXT: enable_mem_ordered = 1 ; GFX11-NEXT: enable_fwd_progress = 0 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX11-NEXT: user_sgpr_count = 15 ; GFX11-NEXT: enable_trap_handler = 0 ; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1 ; GFX11-NEXT: enable_sgpr_workgroup_id_y = 0 ; GFX11-NEXT: enable_sgpr_workgroup_id_z = 0 ; GFX11-NEXT: enable_sgpr_workgroup_info = 0 ; GFX11-NEXT: enable_vgpr_workitem_id = 0 ; GFX11-NEXT: enable_exception_msb = 0 ; GFX11-NEXT: granulated_lds_size = 0 ; GFX11-NEXT: enable_exception = 0 ; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0 ; GFX11-NEXT: enable_sgpr_dispatch_ptr = 0 ; GFX11-NEXT: enable_sgpr_queue_ptr = 0 ; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GFX11-NEXT: enable_sgpr_dispatch_id = 0 ; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0 ; GFX11-NEXT: enable_sgpr_private_segment_size = 0 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GFX11-NEXT: enable_wavefront_size32 = 1 ; GFX11-NEXT: enable_ordered_append_gds = 0 ; GFX11-NEXT: private_element_size = 1 ; GFX11-NEXT: is_ptr64 = 1 ; GFX11-NEXT: is_dynamic_callstack = 0 ; GFX11-NEXT: is_debug_enabled = 0 ; GFX11-NEXT: is_xnack_enabled = 0 ; GFX11-NEXT: workitem_private_segment_byte_size = 0 ; GFX11-NEXT: workgroup_group_segment_byte_size = 0 ; GFX11-NEXT: gds_segment_byte_size = 0 ; GFX11-NEXT: kernarg_segment_byte_size = 12 ; GFX11-NEXT: workgroup_fbarrier_count = 0 ; GFX11-NEXT: wavefront_sgpr_count = 7 ; GFX11-NEXT: workitem_vgpr_count = 3 ; GFX11-NEXT: reserved_vgpr_first = 0 ; GFX11-NEXT: reserved_vgpr_count = 0 ; GFX11-NEXT: reserved_sgpr_first = 0 ; GFX11-NEXT: reserved_sgpr_count = 0 ; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0 ; GFX11-NEXT: kernarg_segment_alignment = 4 ; GFX11-NEXT: group_segment_alignment = 4 ; GFX11-NEXT: private_segment_alignment = 4 ; GFX11-NEXT: wavefront_size = 5 ; GFX11-NEXT: call_convention = -1 ; GFX11-NEXT: runtime_loader_kernel_symbol = 0 ; GFX11-NEXT: .end_amd_kernel_code_t ; GFX11-NEXT: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b32 s6, s[0:1], 0x8 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s2, 0 ; GFX11-NEXT: s_mov_b32 s3, 0x40080000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s6, 1 ; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; GFX11-NEXT: s_cmp_eq_u32 s6, 2 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; GFX11-NEXT: s_cmp_eq_u32 s6, 3 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_mov_b32 s5, 0x40140000 ; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] ; GFX11-NEXT: s_cmp_eq_u32 s6, 4 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <5 x double> , i32 %sel store double %ext, ptr addrspace(1) %out ret void } define float @dyn_extract_v15f32_const_s_v(i32 %sel) { ; GCN-LABEL: dyn_extract_v15f32_const_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 ; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 ; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: v_mov_b32_e32 v6, 0x41100000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 ; GCN-NEXT: v_mov_b32_e32 v7, 0x41200000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 ; GCN-NEXT: v_mov_b32_e32 v8, 0x41300000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 ; GCN-NEXT: v_mov_b32_e32 v9, 0x41400000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 ; GCN-NEXT: v_mov_b32_e32 v10, 0x41500000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 ; GCN-NEXT: v_mov_b32_e32 v11, 0x41600000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 ; GCN-NEXT: v_mov_b32_e32 v12, 0x41700000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v15f32_const_s_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s4, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v15f32_const_s_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <15 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v15f32_const_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s4, 1.0 ; GCN-NEXT: s_mov_b32 m0, s2 ; GCN-NEXT: s_mov_b32 s18, 0x41700000 ; GCN-NEXT: s_mov_b32 s17, 0x41600000 ; GCN-NEXT: s_mov_b32 s16, 0x41500000 ; GCN-NEXT: s_mov_b32 s15, 0x41400000 ; GCN-NEXT: s_mov_b32 s14, 0x41300000 ; GCN-NEXT: s_mov_b32 s13, 0x41200000 ; GCN-NEXT: s_mov_b32 s12, 0x41100000 ; GCN-NEXT: s_mov_b32 s11, 0x41000000 ; GCN-NEXT: s_mov_b32 s10, 0x40e00000 ; GCN-NEXT: s_mov_b32 s9, 0x40c00000 ; GCN-NEXT: s_mov_b32 s8, 0x40a00000 ; GCN-NEXT: s_mov_b32 s7, 4.0 ; GCN-NEXT: s_mov_b32 s6, 0x40400000 ; GCN-NEXT: s_mov_b32 s5, 2.0 ; GCN-NEXT: s_movrels_b32 s0, s4 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v15f32_const_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s18, 0x41700000 ; GFX10PLUS-NEXT: s_mov_b32 s17, 0x41600000 ; GFX10PLUS-NEXT: s_mov_b32 s16, 0x41500000 ; GFX10PLUS-NEXT: s_mov_b32 s15, 0x41400000 ; GFX10PLUS-NEXT: s_mov_b32 s14, 0x41300000 ; GFX10PLUS-NEXT: s_mov_b32 s13, 0x41200000 ; GFX10PLUS-NEXT: s_mov_b32 s12, 0x41100000 ; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000 ; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000 ; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000 ; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000 ; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0 ; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000 ; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s4 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> , i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v15f32_s_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_mov_b32_e32 v2, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_mov_b32_e32 v7, s8 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_mov_b32_e32 v8, s9 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: v_mov_b32_e32 v9, s10 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 ; GCN-NEXT: v_mov_b32_e32 v10, s11 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 ; GCN-NEXT: v_mov_b32_e32 v11, s12 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 ; GCN-NEXT: v_mov_b32_e32 v12, s13 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 ; GCN-NEXT: v_mov_b32_e32 v13, s14 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 ; GCN-NEXT: v_mov_b32_e32 v14, s15 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 ; GCN-NEXT: v_mov_b32_e32 v15, s16 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_v: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s9, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s11, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s15, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s16, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> %vec, i32 %sel ret float %ext } define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) { ; GCN-LABEL: dyn_extract_v15f32_v_v: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15 ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v15f32_v_v: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v15f32_v_v: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <15 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) { ; GPRIDX-LABEL: dyn_extract_v15f32_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 ; GPRIDX-NEXT: s_set_gpr_idx_off ; GPRIDX-NEXT: ; return to shader part epilog ; ; MOVREL-LABEL: dyn_extract_v15f32_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 m0, s2 ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 ; MOVREL-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v15f32_v_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 m0, s2 ; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v15f32_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 m0, s17 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_movrels_b32 s0, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 m0, s17 ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s0 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> %vec, i32 %sel ret float %ext } define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v15f32_s_s_offset3: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 m0, s17 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s4, s6 ; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: s_mov_b32 s6, s8 ; GCN-NEXT: s_mov_b32 s7, s9 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: s_mov_b32 s10, s12 ; GCN-NEXT: s_mov_b32 s11, s13 ; GCN-NEXT: s_mov_b32 s12, s14 ; GCN-NEXT: s_mov_b32 s13, s15 ; GCN-NEXT: s_mov_b32 s14, s16 ; GCN-NEXT: s_movrels_b32 s0, s3 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s_offset3: ; GFX10PLUS: ; %bb.0: ; %entry ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: s_mov_b32 m0, s17 ; GFX10PLUS-NEXT: s_mov_b32 s0, s2 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s4, s6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 ; GFX10PLUS-NEXT: s_mov_b32 s8, s10 ; GFX10PLUS-NEXT: s_mov_b32 s9, s11 ; GFX10PLUS-NEXT: s_mov_b32 s10, s12 ; GFX10PLUS-NEXT: s_mov_b32 s11, s13 ; GFX10PLUS-NEXT: s_mov_b32 s12, s14 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 ; GFX10PLUS-NEXT: s_mov_b32 s14, s16 ; GFX10PLUS-NEXT: s_movrels_b32 s0, s3 ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %add = add i32 %sel, 3 %ext = extractelement <15 x float> %vec, i32 %add ret float %ext } define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: v_add_u32_e32 v15, 3, v15 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15 ; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v15, vcc, 3, v15 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc ; MOVREL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v15, 3, v15 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v15f32_v_v_offset3: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v15 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 %ext = extractelement <15 x float> %vec, i32 %add ret float %ext } define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s: ; GPRIDX: .amd_kernel_code_t ; GPRIDX-NEXT: amd_code_version_major = 1 ; GPRIDX-NEXT: amd_code_version_minor = 2 ; GPRIDX-NEXT: amd_machine_kind = 1 ; GPRIDX-NEXT: amd_machine_version_major = 9 ; GPRIDX-NEXT: amd_machine_version_minor = 0 ; GPRIDX-NEXT: amd_machine_version_stepping = 0 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 ; GPRIDX-NEXT: priority = 0 ; GPRIDX-NEXT: float_mode = 240 ; GPRIDX-NEXT: priv = 0 ; GPRIDX-NEXT: enable_dx10_clamp = 1 ; GPRIDX-NEXT: debug_mode = 0 ; GPRIDX-NEXT: enable_ieee_mode = 1 ; GPRIDX-NEXT: enable_wgp_mode = 0 ; GPRIDX-NEXT: enable_mem_ordered = 0 ; GPRIDX-NEXT: enable_fwd_progress = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GPRIDX-NEXT: user_sgpr_count = 6 ; GPRIDX-NEXT: enable_trap_handler = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 ; GPRIDX-NEXT: enable_exception_msb = 0 ; GPRIDX-NEXT: granulated_lds_size = 0 ; GPRIDX-NEXT: enable_exception = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GPRIDX-NEXT: enable_wavefront_size32 = 0 ; GPRIDX-NEXT: enable_ordered_append_gds = 0 ; GPRIDX-NEXT: private_element_size = 1 ; GPRIDX-NEXT: is_ptr64 = 1 ; GPRIDX-NEXT: is_dynamic_callstack = 0 ; GPRIDX-NEXT: is_debug_enabled = 0 ; GPRIDX-NEXT: is_xnack_enabled = 1 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 ; GPRIDX-NEXT: gds_segment_byte_size = 0 ; GPRIDX-NEXT: kernarg_segment_byte_size = 12 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0 ; GPRIDX-NEXT: wavefront_sgpr_count = 10 ; GPRIDX-NEXT: workitem_vgpr_count = 2 ; GPRIDX-NEXT: reserved_vgpr_first = 0 ; GPRIDX-NEXT: reserved_vgpr_count = 0 ; GPRIDX-NEXT: reserved_sgpr_first = 0 ; GPRIDX-NEXT: reserved_sgpr_count = 0 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 ; GPRIDX-NEXT: kernarg_segment_alignment = 4 ; GPRIDX-NEXT: group_segment_alignment = 4 ; GPRIDX-NEXT: private_segment_alignment = 4 ; GPRIDX-NEXT: wavefront_size = 6 ; GPRIDX-NEXT: call_convention = -1 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 ; GPRIDX-NEXT: .end_amd_kernel_code_t ; GPRIDX-NEXT: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_load_dword s2, s[4:5], 0x8 ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, 0 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 ; GPRIDX-NEXT: s_cselect_b32 s3, 2.0, 1.0 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 ; GPRIDX-NEXT: s_cselect_b32 s3, 0x40400000, s3 ; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 ; GPRIDX-NEXT: s_cselect_b32 s2, 4.0, s3 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 ; GPRIDX-NEXT: global_store_dword v1, v0, s[0:1] ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v4f32_s_s_s: ; MOVREL: .amd_kernel_code_t ; MOVREL-NEXT: amd_code_version_major = 1 ; MOVREL-NEXT: amd_code_version_minor = 2 ; MOVREL-NEXT: amd_machine_kind = 1 ; MOVREL-NEXT: amd_machine_version_major = 8 ; MOVREL-NEXT: amd_machine_version_minor = 0 ; MOVREL-NEXT: amd_machine_version_stepping = 3 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0 ; MOVREL-NEXT: priority = 0 ; MOVREL-NEXT: float_mode = 240 ; MOVREL-NEXT: priv = 0 ; MOVREL-NEXT: enable_dx10_clamp = 1 ; MOVREL-NEXT: debug_mode = 0 ; MOVREL-NEXT: enable_ieee_mode = 1 ; MOVREL-NEXT: enable_wgp_mode = 0 ; MOVREL-NEXT: enable_mem_ordered = 0 ; MOVREL-NEXT: enable_fwd_progress = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; MOVREL-NEXT: user_sgpr_count = 6 ; MOVREL-NEXT: enable_trap_handler = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 ; MOVREL-NEXT: enable_vgpr_workitem_id = 0 ; MOVREL-NEXT: enable_exception_msb = 0 ; MOVREL-NEXT: granulated_lds_size = 0 ; MOVREL-NEXT: enable_exception = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; MOVREL-NEXT: enable_wavefront_size32 = 0 ; MOVREL-NEXT: enable_ordered_append_gds = 0 ; MOVREL-NEXT: private_element_size = 1 ; MOVREL-NEXT: is_ptr64 = 1 ; MOVREL-NEXT: is_dynamic_callstack = 0 ; MOVREL-NEXT: is_debug_enabled = 0 ; MOVREL-NEXT: is_xnack_enabled = 0 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 ; MOVREL-NEXT: gds_segment_byte_size = 0 ; MOVREL-NEXT: kernarg_segment_byte_size = 12 ; MOVREL-NEXT: workgroup_fbarrier_count = 0 ; MOVREL-NEXT: wavefront_sgpr_count = 6 ; MOVREL-NEXT: workitem_vgpr_count = 3 ; MOVREL-NEXT: reserved_vgpr_first = 0 ; MOVREL-NEXT: reserved_vgpr_count = 0 ; MOVREL-NEXT: reserved_sgpr_first = 0 ; MOVREL-NEXT: reserved_sgpr_count = 0 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 ; MOVREL-NEXT: kernarg_segment_alignment = 4 ; MOVREL-NEXT: group_segment_alignment = 4 ; MOVREL-NEXT: private_segment_alignment = 4 ; MOVREL-NEXT: wavefront_size = 6 ; MOVREL-NEXT: call_convention = -1 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 ; MOVREL-NEXT: .end_amd_kernel_code_t ; MOVREL-NEXT: ; %bb.0: ; %entry ; MOVREL-NEXT: s_load_dword s2, s[4:5], 0x8 ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) ; MOVREL-NEXT: s_cmp_eq_u32 s2, 1 ; MOVREL-NEXT: s_cselect_b32 s3, 2.0, 1.0 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 2 ; MOVREL-NEXT: s_cselect_b32 s3, 0x40400000, s3 ; MOVREL-NEXT: s_cmp_eq_u32 s2, 3 ; MOVREL-NEXT: s_cselect_b32 s2, 4.0, s3 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v2, s2 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 ; MOVREL-NEXT: flat_store_dword v[0:1], v2 ; MOVREL-NEXT: s_endpgm ; ; GFX10-LABEL: dyn_extract_v4f32_s_s_s: ; GFX10: .amd_kernel_code_t ; GFX10-NEXT: amd_code_version_major = 1 ; GFX10-NEXT: amd_code_version_minor = 2 ; GFX10-NEXT: amd_machine_kind = 1 ; GFX10-NEXT: amd_machine_version_major = 10 ; GFX10-NEXT: amd_machine_version_minor = 1 ; GFX10-NEXT: amd_machine_version_stepping = 0 ; GFX10-NEXT: kernel_code_entry_byte_offset = 256 ; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 ; GFX10-NEXT: granulated_workitem_vgpr_count = 0 ; GFX10-NEXT: granulated_wavefront_sgpr_count = 0 ; GFX10-NEXT: priority = 0 ; GFX10-NEXT: float_mode = 240 ; GFX10-NEXT: priv = 0 ; GFX10-NEXT: enable_dx10_clamp = 1 ; GFX10-NEXT: debug_mode = 0 ; GFX10-NEXT: enable_ieee_mode = 1 ; GFX10-NEXT: enable_wgp_mode = 1 ; GFX10-NEXT: enable_mem_ordered = 1 ; GFX10-NEXT: enable_fwd_progress = 0 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX10-NEXT: user_sgpr_count = 6 ; GFX10-NEXT: enable_trap_handler = 0 ; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 ; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 ; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 ; GFX10-NEXT: enable_sgpr_workgroup_info = 0 ; GFX10-NEXT: enable_vgpr_workitem_id = 0 ; GFX10-NEXT: enable_exception_msb = 0 ; GFX10-NEXT: granulated_lds_size = 0 ; GFX10-NEXT: enable_exception = 0 ; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 ; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 ; GFX10-NEXT: enable_sgpr_queue_ptr = 0 ; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GFX10-NEXT: enable_sgpr_dispatch_id = 0 ; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 ; GFX10-NEXT: enable_sgpr_private_segment_size = 0 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GFX10-NEXT: enable_wavefront_size32 = 1 ; GFX10-NEXT: enable_ordered_append_gds = 0 ; GFX10-NEXT: private_element_size = 1 ; GFX10-NEXT: is_ptr64 = 1 ; GFX10-NEXT: is_dynamic_callstack = 0 ; GFX10-NEXT: is_debug_enabled = 0 ; GFX10-NEXT: is_xnack_enabled = 1 ; GFX10-NEXT: workitem_private_segment_byte_size = 0 ; GFX10-NEXT: workgroup_group_segment_byte_size = 0 ; GFX10-NEXT: gds_segment_byte_size = 0 ; GFX10-NEXT: kernarg_segment_byte_size = 12 ; GFX10-NEXT: workgroup_fbarrier_count = 0 ; GFX10-NEXT: wavefront_sgpr_count = 6 ; GFX10-NEXT: workitem_vgpr_count = 2 ; GFX10-NEXT: reserved_vgpr_first = 0 ; GFX10-NEXT: reserved_vgpr_count = 0 ; GFX10-NEXT: reserved_sgpr_first = 0 ; GFX10-NEXT: reserved_sgpr_count = 0 ; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 ; GFX10-NEXT: kernarg_segment_alignment = 4 ; GFX10-NEXT: group_segment_alignment = 4 ; GFX10-NEXT: private_segment_alignment = 4 ; GFX10-NEXT: wavefront_size = 5 ; GFX10-NEXT: call_convention = -1 ; GFX10-NEXT: runtime_loader_kernel_symbol = 0 ; GFX10-NEXT: .end_amd_kernel_code_t ; GFX10-NEXT: ; %bb.0: ; %entry ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s2, 1 ; GFX10-NEXT: s_cselect_b32 s3, 2.0, 1.0 ; GFX10-NEXT: s_cmp_eq_u32 s2, 2 ; GFX10-NEXT: s_cselect_b32 s3, 0x40400000, s3 ; GFX10-NEXT: s_cmp_eq_u32 s2, 3 ; GFX10-NEXT: s_cselect_b32 s2, 4.0, s3 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v4f32_s_s_s: ; GFX11: .amd_kernel_code_t ; GFX11-NEXT: amd_code_version_major = 1 ; GFX11-NEXT: amd_code_version_minor = 2 ; GFX11-NEXT: amd_machine_kind = 1 ; GFX11-NEXT: amd_machine_version_major = 11 ; GFX11-NEXT: amd_machine_version_minor = 0 ; GFX11-NEXT: amd_machine_version_stepping = 0 ; GFX11-NEXT: kernel_code_entry_byte_offset = 256 ; GFX11-NEXT: kernel_code_prefetch_byte_size = 0 ; GFX11-NEXT: granulated_workitem_vgpr_count = 0 ; GFX11-NEXT: granulated_wavefront_sgpr_count = 0 ; GFX11-NEXT: priority = 0 ; GFX11-NEXT: float_mode = 240 ; GFX11-NEXT: priv = 0 ; GFX11-NEXT: enable_dx10_clamp = 1 ; GFX11-NEXT: debug_mode = 0 ; GFX11-NEXT: enable_ieee_mode = 1 ; GFX11-NEXT: enable_wgp_mode = 1 ; GFX11-NEXT: enable_mem_ordered = 1 ; GFX11-NEXT: enable_fwd_progress = 0 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX11-NEXT: user_sgpr_count = 15 ; GFX11-NEXT: enable_trap_handler = 0 ; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1 ; GFX11-NEXT: enable_sgpr_workgroup_id_y = 0 ; GFX11-NEXT: enable_sgpr_workgroup_id_z = 0 ; GFX11-NEXT: enable_sgpr_workgroup_info = 0 ; GFX11-NEXT: enable_vgpr_workitem_id = 0 ; GFX11-NEXT: enable_exception_msb = 0 ; GFX11-NEXT: granulated_lds_size = 0 ; GFX11-NEXT: enable_exception = 0 ; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0 ; GFX11-NEXT: enable_sgpr_dispatch_ptr = 0 ; GFX11-NEXT: enable_sgpr_queue_ptr = 0 ; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GFX11-NEXT: enable_sgpr_dispatch_id = 0 ; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0 ; GFX11-NEXT: enable_sgpr_private_segment_size = 0 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GFX11-NEXT: enable_wavefront_size32 = 1 ; GFX11-NEXT: enable_ordered_append_gds = 0 ; GFX11-NEXT: private_element_size = 1 ; GFX11-NEXT: is_ptr64 = 1 ; GFX11-NEXT: is_dynamic_callstack = 0 ; GFX11-NEXT: is_debug_enabled = 0 ; GFX11-NEXT: is_xnack_enabled = 0 ; GFX11-NEXT: workitem_private_segment_byte_size = 0 ; GFX11-NEXT: workgroup_group_segment_byte_size = 0 ; GFX11-NEXT: gds_segment_byte_size = 0 ; GFX11-NEXT: kernarg_segment_byte_size = 12 ; GFX11-NEXT: workgroup_fbarrier_count = 0 ; GFX11-NEXT: wavefront_sgpr_count = 4 ; GFX11-NEXT: workitem_vgpr_count = 2 ; GFX11-NEXT: reserved_vgpr_first = 0 ; GFX11-NEXT: reserved_vgpr_count = 0 ; GFX11-NEXT: reserved_sgpr_first = 0 ; GFX11-NEXT: reserved_sgpr_count = 0 ; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0 ; GFX11-NEXT: kernarg_segment_alignment = 4 ; GFX11-NEXT: group_segment_alignment = 4 ; GFX11-NEXT: private_segment_alignment = 4 ; GFX11-NEXT: wavefront_size = 5 ; GFX11-NEXT: call_convention = -1 ; GFX11-NEXT: runtime_loader_kernel_symbol = 0 ; GFX11-NEXT: .end_amd_kernel_code_t ; GFX11-NEXT: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s2, 1 ; GFX11-NEXT: s_cselect_b32 s3, 2.0, 1.0 ; GFX11-NEXT: s_cmp_eq_u32 s2, 2 ; GFX11-NEXT: s_cselect_b32 s3, 0x40400000, s3 ; GFX11-NEXT: s_cmp_eq_u32 s2, 3 ; GFX11-NEXT: s_cselect_b32 s2, 4.0, s3 ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <4 x float> , i32 %sel store float %ext, ptr addrspace(1) %out ret void } define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %sel) { ; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s: ; GPRIDX: .amd_kernel_code_t ; GPRIDX-NEXT: amd_code_version_major = 1 ; GPRIDX-NEXT: amd_code_version_minor = 2 ; GPRIDX-NEXT: amd_machine_kind = 1 ; GPRIDX-NEXT: amd_machine_version_major = 9 ; GPRIDX-NEXT: amd_machine_version_minor = 0 ; GPRIDX-NEXT: amd_machine_version_stepping = 0 ; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 ; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 ; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 ; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 ; GPRIDX-NEXT: priority = 0 ; GPRIDX-NEXT: float_mode = 240 ; GPRIDX-NEXT: priv = 0 ; GPRIDX-NEXT: enable_dx10_clamp = 1 ; GPRIDX-NEXT: debug_mode = 0 ; GPRIDX-NEXT: enable_ieee_mode = 1 ; GPRIDX-NEXT: enable_wgp_mode = 0 ; GPRIDX-NEXT: enable_mem_ordered = 0 ; GPRIDX-NEXT: enable_fwd_progress = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GPRIDX-NEXT: user_sgpr_count = 6 ; GPRIDX-NEXT: enable_trap_handler = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 0 ; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 ; GPRIDX-NEXT: enable_vgpr_workitem_id = 0 ; GPRIDX-NEXT: enable_exception_msb = 0 ; GPRIDX-NEXT: granulated_lds_size = 0 ; GPRIDX-NEXT: enable_exception = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_queue_ptr = 0 ; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GPRIDX-NEXT: enable_sgpr_dispatch_id = 0 ; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 ; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GPRIDX-NEXT: enable_wavefront_size32 = 0 ; GPRIDX-NEXT: enable_ordered_append_gds = 0 ; GPRIDX-NEXT: private_element_size = 1 ; GPRIDX-NEXT: is_ptr64 = 1 ; GPRIDX-NEXT: is_dynamic_callstack = 0 ; GPRIDX-NEXT: is_debug_enabled = 0 ; GPRIDX-NEXT: is_xnack_enabled = 1 ; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 ; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 ; GPRIDX-NEXT: gds_segment_byte_size = 0 ; GPRIDX-NEXT: kernarg_segment_byte_size = 12 ; GPRIDX-NEXT: workgroup_fbarrier_count = 0 ; GPRIDX-NEXT: wavefront_sgpr_count = 11 ; GPRIDX-NEXT: workitem_vgpr_count = 3 ; GPRIDX-NEXT: reserved_vgpr_first = 0 ; GPRIDX-NEXT: reserved_vgpr_count = 0 ; GPRIDX-NEXT: reserved_sgpr_first = 0 ; GPRIDX-NEXT: reserved_sgpr_count = 0 ; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 ; GPRIDX-NEXT: kernarg_segment_alignment = 4 ; GPRIDX-NEXT: group_segment_alignment = 4 ; GPRIDX-NEXT: private_segment_alignment = 4 ; GPRIDX-NEXT: wavefront_size = 6 ; GPRIDX-NEXT: call_convention = -1 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 ; GPRIDX-NEXT: .end_amd_kernel_code_t ; GPRIDX-NEXT: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_load_dword s6, s[4:5], 0x8 ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GPRIDX-NEXT: s_mov_b32 s2, 0 ; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 3 ; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 ; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v4f64_s_s_s: ; MOVREL: .amd_kernel_code_t ; MOVREL-NEXT: amd_code_version_major = 1 ; MOVREL-NEXT: amd_code_version_minor = 2 ; MOVREL-NEXT: amd_machine_kind = 1 ; MOVREL-NEXT: amd_machine_version_major = 8 ; MOVREL-NEXT: amd_machine_version_minor = 0 ; MOVREL-NEXT: amd_machine_version_stepping = 3 ; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 ; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 ; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 ; MOVREL-NEXT: granulated_wavefront_sgpr_count = 0 ; MOVREL-NEXT: priority = 0 ; MOVREL-NEXT: float_mode = 240 ; MOVREL-NEXT: priv = 0 ; MOVREL-NEXT: enable_dx10_clamp = 1 ; MOVREL-NEXT: debug_mode = 0 ; MOVREL-NEXT: enable_ieee_mode = 1 ; MOVREL-NEXT: enable_wgp_mode = 0 ; MOVREL-NEXT: enable_mem_ordered = 0 ; MOVREL-NEXT: enable_fwd_progress = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; MOVREL-NEXT: user_sgpr_count = 6 ; MOVREL-NEXT: enable_trap_handler = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 ; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 0 ; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 ; MOVREL-NEXT: enable_vgpr_workitem_id = 0 ; MOVREL-NEXT: enable_exception_msb = 0 ; MOVREL-NEXT: granulated_lds_size = 0 ; MOVREL-NEXT: enable_exception = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 0 ; MOVREL-NEXT: enable_sgpr_queue_ptr = 0 ; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; MOVREL-NEXT: enable_sgpr_dispatch_id = 0 ; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 ; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; MOVREL-NEXT: enable_wavefront_size32 = 0 ; MOVREL-NEXT: enable_ordered_append_gds = 0 ; MOVREL-NEXT: private_element_size = 1 ; MOVREL-NEXT: is_ptr64 = 1 ; MOVREL-NEXT: is_dynamic_callstack = 0 ; MOVREL-NEXT: is_debug_enabled = 0 ; MOVREL-NEXT: is_xnack_enabled = 0 ; MOVREL-NEXT: workitem_private_segment_byte_size = 0 ; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 ; MOVREL-NEXT: gds_segment_byte_size = 0 ; MOVREL-NEXT: kernarg_segment_byte_size = 12 ; MOVREL-NEXT: workgroup_fbarrier_count = 0 ; MOVREL-NEXT: wavefront_sgpr_count = 7 ; MOVREL-NEXT: workitem_vgpr_count = 4 ; MOVREL-NEXT: reserved_vgpr_first = 0 ; MOVREL-NEXT: reserved_vgpr_count = 0 ; MOVREL-NEXT: reserved_sgpr_first = 0 ; MOVREL-NEXT: reserved_sgpr_count = 0 ; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 ; MOVREL-NEXT: kernarg_segment_alignment = 4 ; MOVREL-NEXT: group_segment_alignment = 4 ; MOVREL-NEXT: private_segment_alignment = 4 ; MOVREL-NEXT: wavefront_size = 6 ; MOVREL-NEXT: call_convention = -1 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 ; MOVREL-NEXT: .end_amd_kernel_code_t ; MOVREL-NEXT: ; %bb.0: ; %entry ; MOVREL-NEXT: s_load_dword s6, s[4:5], 0x8 ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; MOVREL-NEXT: s_mov_b32 s2, 0 ; MOVREL-NEXT: s_mov_b32 s3, 0x40080000 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) ; MOVREL-NEXT: s_cmp_eq_u32 s6, 1 ; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; MOVREL-NEXT: s_cmp_eq_u32 s6, 2 ; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; MOVREL-NEXT: s_cmp_eq_u32 s6, 3 ; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] ; MOVREL-NEXT: v_mov_b32_e32 v0, s2 ; MOVREL-NEXT: v_mov_b32_e32 v3, s1 ; MOVREL-NEXT: v_mov_b32_e32 v1, s3 ; MOVREL-NEXT: v_mov_b32_e32 v2, s0 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; MOVREL-NEXT: s_endpgm ; ; GFX10-LABEL: dyn_extract_v4f64_s_s_s: ; GFX10: .amd_kernel_code_t ; GFX10-NEXT: amd_code_version_major = 1 ; GFX10-NEXT: amd_code_version_minor = 2 ; GFX10-NEXT: amd_machine_kind = 1 ; GFX10-NEXT: amd_machine_version_major = 10 ; GFX10-NEXT: amd_machine_version_minor = 1 ; GFX10-NEXT: amd_machine_version_stepping = 0 ; GFX10-NEXT: kernel_code_entry_byte_offset = 256 ; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 ; GFX10-NEXT: granulated_workitem_vgpr_count = 0 ; GFX10-NEXT: granulated_wavefront_sgpr_count = 0 ; GFX10-NEXT: priority = 0 ; GFX10-NEXT: float_mode = 240 ; GFX10-NEXT: priv = 0 ; GFX10-NEXT: enable_dx10_clamp = 1 ; GFX10-NEXT: debug_mode = 0 ; GFX10-NEXT: enable_ieee_mode = 1 ; GFX10-NEXT: enable_wgp_mode = 1 ; GFX10-NEXT: enable_mem_ordered = 1 ; GFX10-NEXT: enable_fwd_progress = 0 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX10-NEXT: user_sgpr_count = 6 ; GFX10-NEXT: enable_trap_handler = 0 ; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 ; GFX10-NEXT: enable_sgpr_workgroup_id_y = 0 ; GFX10-NEXT: enable_sgpr_workgroup_id_z = 0 ; GFX10-NEXT: enable_sgpr_workgroup_info = 0 ; GFX10-NEXT: enable_vgpr_workitem_id = 0 ; GFX10-NEXT: enable_exception_msb = 0 ; GFX10-NEXT: granulated_lds_size = 0 ; GFX10-NEXT: enable_exception = 0 ; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 ; GFX10-NEXT: enable_sgpr_dispatch_ptr = 0 ; GFX10-NEXT: enable_sgpr_queue_ptr = 0 ; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GFX10-NEXT: enable_sgpr_dispatch_id = 0 ; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 ; GFX10-NEXT: enable_sgpr_private_segment_size = 0 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GFX10-NEXT: enable_wavefront_size32 = 1 ; GFX10-NEXT: enable_ordered_append_gds = 0 ; GFX10-NEXT: private_element_size = 1 ; GFX10-NEXT: is_ptr64 = 1 ; GFX10-NEXT: is_dynamic_callstack = 0 ; GFX10-NEXT: is_debug_enabled = 0 ; GFX10-NEXT: is_xnack_enabled = 1 ; GFX10-NEXT: workitem_private_segment_byte_size = 0 ; GFX10-NEXT: workgroup_group_segment_byte_size = 0 ; GFX10-NEXT: gds_segment_byte_size = 0 ; GFX10-NEXT: kernarg_segment_byte_size = 12 ; GFX10-NEXT: workgroup_fbarrier_count = 0 ; GFX10-NEXT: wavefront_sgpr_count = 7 ; GFX10-NEXT: workitem_vgpr_count = 3 ; GFX10-NEXT: reserved_vgpr_first = 0 ; GFX10-NEXT: reserved_vgpr_count = 0 ; GFX10-NEXT: reserved_sgpr_first = 0 ; GFX10-NEXT: reserved_sgpr_count = 0 ; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 ; GFX10-NEXT: kernarg_segment_alignment = 4 ; GFX10-NEXT: group_segment_alignment = 4 ; GFX10-NEXT: private_segment_alignment = 4 ; GFX10-NEXT: wavefront_size = 5 ; GFX10-NEXT: call_convention = -1 ; GFX10-NEXT: runtime_loader_kernel_symbol = 0 ; GFX10-NEXT: .end_amd_kernel_code_t ; GFX10-NEXT: ; %bb.0: ; %entry ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: s_load_dword s6, s[4:5], 0x8 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: s_mov_b32 s2, 0 ; GFX10-NEXT: s_mov_b32 s3, 0x40080000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s6, 1 ; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; GFX10-NEXT: s_cmp_eq_u32 s6, 2 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; GFX10-NEXT: s_cmp_eq_u32 s6, 3 ; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: dyn_extract_v4f64_s_s_s: ; GFX11: .amd_kernel_code_t ; GFX11-NEXT: amd_code_version_major = 1 ; GFX11-NEXT: amd_code_version_minor = 2 ; GFX11-NEXT: amd_machine_kind = 1 ; GFX11-NEXT: amd_machine_version_major = 11 ; GFX11-NEXT: amd_machine_version_minor = 0 ; GFX11-NEXT: amd_machine_version_stepping = 0 ; GFX11-NEXT: kernel_code_entry_byte_offset = 256 ; GFX11-NEXT: kernel_code_prefetch_byte_size = 0 ; GFX11-NEXT: granulated_workitem_vgpr_count = 0 ; GFX11-NEXT: granulated_wavefront_sgpr_count = 0 ; GFX11-NEXT: priority = 0 ; GFX11-NEXT: float_mode = 240 ; GFX11-NEXT: priv = 0 ; GFX11-NEXT: enable_dx10_clamp = 1 ; GFX11-NEXT: debug_mode = 0 ; GFX11-NEXT: enable_ieee_mode = 1 ; GFX11-NEXT: enable_wgp_mode = 1 ; GFX11-NEXT: enable_mem_ordered = 1 ; GFX11-NEXT: enable_fwd_progress = 0 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX11-NEXT: user_sgpr_count = 15 ; GFX11-NEXT: enable_trap_handler = 0 ; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1 ; GFX11-NEXT: enable_sgpr_workgroup_id_y = 0 ; GFX11-NEXT: enable_sgpr_workgroup_id_z = 0 ; GFX11-NEXT: enable_sgpr_workgroup_info = 0 ; GFX11-NEXT: enable_vgpr_workitem_id = 0 ; GFX11-NEXT: enable_exception_msb = 0 ; GFX11-NEXT: granulated_lds_size = 0 ; GFX11-NEXT: enable_exception = 0 ; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0 ; GFX11-NEXT: enable_sgpr_dispatch_ptr = 0 ; GFX11-NEXT: enable_sgpr_queue_ptr = 0 ; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1 ; GFX11-NEXT: enable_sgpr_dispatch_id = 0 ; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0 ; GFX11-NEXT: enable_sgpr_private_segment_size = 0 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0 ; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0 ; GFX11-NEXT: enable_wavefront_size32 = 1 ; GFX11-NEXT: enable_ordered_append_gds = 0 ; GFX11-NEXT: private_element_size = 1 ; GFX11-NEXT: is_ptr64 = 1 ; GFX11-NEXT: is_dynamic_callstack = 0 ; GFX11-NEXT: is_debug_enabled = 0 ; GFX11-NEXT: is_xnack_enabled = 0 ; GFX11-NEXT: workitem_private_segment_byte_size = 0 ; GFX11-NEXT: workgroup_group_segment_byte_size = 0 ; GFX11-NEXT: gds_segment_byte_size = 0 ; GFX11-NEXT: kernarg_segment_byte_size = 12 ; GFX11-NEXT: workgroup_fbarrier_count = 0 ; GFX11-NEXT: wavefront_sgpr_count = 7 ; GFX11-NEXT: workitem_vgpr_count = 3 ; GFX11-NEXT: reserved_vgpr_first = 0 ; GFX11-NEXT: reserved_vgpr_count = 0 ; GFX11-NEXT: reserved_sgpr_first = 0 ; GFX11-NEXT: reserved_sgpr_count = 0 ; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 ; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0 ; GFX11-NEXT: kernarg_segment_alignment = 4 ; GFX11-NEXT: group_segment_alignment = 4 ; GFX11-NEXT: private_segment_alignment = 4 ; GFX11-NEXT: wavefront_size = 5 ; GFX11-NEXT: call_convention = -1 ; GFX11-NEXT: runtime_loader_kernel_symbol = 0 ; GFX11-NEXT: .end_amd_kernel_code_t ; GFX11-NEXT: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b32 s6, s[0:1], 0x8 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s2, 0 ; GFX11-NEXT: s_mov_b32 s3, 0x40080000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s6, 1 ; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; GFX11-NEXT: s_cmp_eq_u32 s6, 2 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; GFX11-NEXT: s_cmp_eq_u32 s6, 3 ; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm entry: %ext = extractelement <4 x double> , i32 %sel store double %ext, ptr addrspace(1) %out ret void } define i32 @v_extract_v64i32_7(ptr addrspace(1) %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_7: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:28 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: v_extract_v64i32_7: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 28, v0 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; MOVREL-NEXT: flat_load_dword v0, v[0:1] ; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_extract_v64i32_7: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:28 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_extract_v64i32_7: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:28 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %vec = load <64 x i32>, ptr addrspace(1) %ptr %elt = extractelement <64 x i32> %vec, i32 7 ret i32 %elt } define i32 @v_extract_v64i32_32(ptr addrspace(1) %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_32: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:128 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: v_extract_v64i32_32: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; MOVREL-NEXT: flat_load_dword v0, v[0:1] ; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_extract_v64i32_32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:128 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_extract_v64i32_32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %vec = load <64 x i32>, ptr addrspace(1) %ptr %elt = extractelement <64 x i32> %vec, i32 32 ret i32 %elt } define i32 @v_extract_v64i32_33(ptr addrspace(1) %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_33: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:132 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: v_extract_v64i32_33: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x84, v0 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; MOVREL-NEXT: flat_load_dword v0, v[0:1] ; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_extract_v64i32_33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:132 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_extract_v64i32_33: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:132 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %vec = load <64 x i32>, ptr addrspace(1) %ptr %elt = extractelement <64 x i32> %vec, i32 33 ret i32 %elt } define i32 @v_extract_v64i32_37(ptr addrspace(1) %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_37: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:148 ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; ; MOVREL-LABEL: v_extract_v64i32_37: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x94, v0 ; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; MOVREL-NEXT: flat_load_dword v0, v[0:1] ; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_extract_v64i32_37: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:148 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_extract_v64i32_37: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:148 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %vec = load <64 x i32>, ptr addrspace(1) %ptr %elt = extractelement <64 x i32> %vec, i32 37 ret i32 %elt }