; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s define i8 @v_sext_inreg_i8_4(i8 %value) { ; GCN-LABEL: v_sext_inreg_i8_4: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 4 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i8_4: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 4 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i8 %value, 4 %ashr = ashr i8 %shl, 4 ret i8 %ashr } define i8 @v_sext_inreg_i8_7(i8 %value) { ; GCN-LABEL: v_sext_inreg_i8_7: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i8_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i8 %value, 7 %ashr = ashr i8 %shl, 7 ret i8 %ashr } define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) { ; GFX6-LABEL: s_sext_inreg_i8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x50000 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_sext_inreg_i8: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshl_b32 s0, s0, 3 ; GFX8-NEXT: s_sext_i32_i8 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 3 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_sext_inreg_i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 3 ; GFX9-NEXT: s_sext_i32_i8 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 3 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 3 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i8 %value, 3 %ashr = ashr i8 %shl, 3 ret i8 %ashr } define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) { ; GFX6-LABEL: s_sext_inreg_i8_6: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_sext_inreg_i8_6: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshl_b32 s0, s0, 6 ; GFX8-NEXT: s_sext_i32_i8 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 6 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_sext_inreg_i8_6: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 6 ; GFX9-NEXT: s_sext_i32_i8 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 6 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i8_6: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 6 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 6 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i8 %value, 6 %ashr = ashr i8 %shl, 6 ret i8 %ashr } define i24 @v_sext_inreg_i24_12(i24 %value) { ; GCN-LABEL: v_sext_inreg_i24_12: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 12, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i24_12: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 12, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i24 %value, 12 %ashr = ashr i24 %value, 12 ret i24 %ashr } define i24 @v_sext_inreg_i24_7(i24 %value) { ; GCN-LABEL: v_sext_inreg_i24_7: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 17 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i24_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 17 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i24 %value, 7 %ashr = ashr i24 %shl, 7 ret i24 %ashr } define amdgpu_ps i24 @s_sext_inreg_i24_8(i24 inreg %value) { ; GCN-LABEL: s_sext_inreg_i24_8: ; GCN: ; %bb.0: ; GCN-NEXT: s_sext_i32_i16 s0, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i24_8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i24 %value, 8 %ashr = ashr i24 %shl, 8 ret i24 %ashr } define amdgpu_ps i24 @s_sext_inreg_i24_7(i24 inreg %value) { ; GCN-LABEL: s_sext_inreg_i24_7: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i32 s0, s0, 0x110000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i24_7: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x110000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i24 %value, 7 %ashr = ashr i24 %shl, 7 ret i24 %ashr } define i32 @v_sext_inreg_i32_3(i32 %value) { ; GCN-LABEL: v_sext_inreg_i32_3: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 29 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i32_3: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 29 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i32 %value, 3 %ashr = ashr i32 %shl, 3 ret i32 %ashr } define i32 @v_sext_inreg_i32_31(i32 %value) { ; GCN-LABEL: v_sext_inreg_i32_31: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i32 %value, 31 %ashr = ashr i32 %value, 31 ret i32 %ashr } define amdgpu_ps i32 @s_sext_inreg_i32_2(i32 inreg %value) { ; GCN-LABEL: s_sext_inreg_i32_2: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i32 s0, s0, 0x1e0000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i32_2: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x1e0000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i32 %value, 2 %ashr = ashr i32 %shl, 2 ret i32 %ashr } define amdgpu_ps i32 @s_sext_inreg_i32_31(i32 inreg %value) { ; GCN-LABEL: s_sext_inreg_i32_31: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i32 s0, s0, 0x10000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x10000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i32 %value, 31 %ashr = ashr i32 %shl, 31 ret i32 %ashr } define <2 x i32> @v_sext_inreg_v2i32_14(<2 x i32> %value) { ; GCN-LABEL: v_sext_inreg_v2i32_14: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 18 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 18 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v2i32_14: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 18 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 18 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <2 x i32> %value, %ashr = ashr <2 x i32> %shl, ret <2 x i32> %ashr } define <2 x i32> @v_sext_inreg_v2i32_31(<2 x i32> %value) { ; GCN-LABEL: v_sext_inreg_v2i32_31: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 1 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v2i32_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <2 x i32> %value, %shr = ashr <2 x i32> %shl, ret <2 x i32> %shr } define amdgpu_ps <2 x i32> @s_sext_inreg_v2i32_22(<2 x i32> inreg %value) { ; GCN-LABEL: s_sext_inreg_v2i32_22: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i32 s0, s0, 0xa0000 ; GCN-NEXT: s_bfe_i32 s1, s1, 0xa0000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_v2i32_22: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0xa0000 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0xa0000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl <2 x i32> %value, %ashr = ashr <2 x i32> %shl, ret <2 x i32> %ashr } define <3 x i32> @v_sext_inreg_v3i32_16(<3 x i32> %value, <3 x i32> %amount) { ; GCN-LABEL: v_sext_inreg_v3i32_16: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v3i32_16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 16 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <3 x i32> %value, %ashr = ashr <3 x i32> %shl, ret <3 x i32> %ashr } define amdgpu_ps <3 x i32> @s_sext_inreg_v3i32_22(<3 x i32> inreg %value) { ; GCN-LABEL: s_sext_inreg_v3i32_22: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i32 s0, s0, 0xa0000 ; GCN-NEXT: s_bfe_i32 s1, s1, 0xa0000 ; GCN-NEXT: s_bfe_i32 s2, s2, 0xa0000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_v3i32_22: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0xa0000 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0xa0000 ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0xa0000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl <3 x i32> %value, %ashr = ashr <3 x i32> %shl, ret <3 x i32> %ashr } define <4 x i32> @v_sext_inreg_v4i32_6(<4 x i32> %value) { ; GCN-LABEL: v_sext_inreg_v4i32_6: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 26 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 26 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 26 ; GCN-NEXT: v_bfe_i32 v3, v3, 0, 26 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v4i32_6: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 26 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 26 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 26 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 26 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <4 x i32> %value, %ashr = ashr <4 x i32> %shl, ret <4 x i32> %ashr } define amdgpu_ps <4 x i32> @s_sext_inreg_v4i32_13(<4 x i32> inreg %value) { ; GCN-LABEL: s_sext_inreg_v4i32_13: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i32 s0, s0, 0x130000 ; GCN-NEXT: s_bfe_i32 s1, s1, 0x130000 ; GCN-NEXT: s_bfe_i32 s2, s2, 0x130000 ; GCN-NEXT: s_bfe_i32 s3, s3, 0x130000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_v4i32_13: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x130000 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x130000 ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x130000 ; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x130000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl <4 x i32> %value, %ashr = ashr <4 x i32> %shl, ret <4 x i32> %ashr } define <5 x i32> @v_sext_inreg_v5i32_30(<5 x i32> %value) { ; GCN-LABEL: v_sext_inreg_v5i32_30: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 2 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 2 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 2 ; GCN-NEXT: v_bfe_i32 v3, v3, 0, 2 ; GCN-NEXT: v_bfe_i32 v4, v4, 0, 2 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v5i32_30: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 2 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 2 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 2 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 2 ; GFX10PLUS-NEXT: v_bfe_i32 v4, v4, 0, 2 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <5 x i32> %value, %ashr = ashr <5 x i32> %shl, ret <5 x i32> %ashr } define amdgpu_ps <5 x i32> @s_sext_inreg_v5i32_19(<5 x i32> inreg %value) { ; GCN-LABEL: s_sext_inreg_v5i32_19: ; GCN: ; %bb.0: ; GCN-NEXT: s_ashr_i32 s0, s0, 19 ; GCN-NEXT: s_ashr_i32 s1, s1, 19 ; GCN-NEXT: s_ashr_i32 s2, s2, 19 ; GCN-NEXT: s_ashr_i32 s3, s3, 19 ; GCN-NEXT: s_ashr_i32 s4, s4, 19 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_v5i32_19: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 19 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 19 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 19 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 19 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, 19 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl <5 x i32> %value, %ashr = ashr <5 x i32> %value, ret <5 x i32> %ashr } define <16 x i32> @v_sext_inreg_v16i32_27(<16 x i32> %value) { ; GCN-LABEL: v_sext_inreg_v16i32_27: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 5 ; GCN-NEXT: v_bfe_i32 v1, v1, 0, 5 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 5 ; GCN-NEXT: v_bfe_i32 v3, v3, 0, 5 ; GCN-NEXT: v_bfe_i32 v4, v4, 0, 5 ; GCN-NEXT: v_bfe_i32 v5, v5, 0, 5 ; GCN-NEXT: v_bfe_i32 v6, v6, 0, 5 ; GCN-NEXT: v_bfe_i32 v7, v7, 0, 5 ; GCN-NEXT: v_bfe_i32 v8, v8, 0, 5 ; GCN-NEXT: v_bfe_i32 v9, v9, 0, 5 ; GCN-NEXT: v_bfe_i32 v10, v10, 0, 5 ; GCN-NEXT: v_bfe_i32 v11, v11, 0, 5 ; GCN-NEXT: v_bfe_i32 v12, v12, 0, 5 ; GCN-NEXT: v_bfe_i32 v13, v13, 0, 5 ; GCN-NEXT: v_bfe_i32 v14, v14, 0, 5 ; GCN-NEXT: v_bfe_i32 v15, v15, 0, 5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v16i32_27: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v4, v4, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v5, v5, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v6, v6, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v7, v7, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v8, v8, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v9, v9, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v10, v10, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v11, v11, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v12, v12, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v13, v13, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v14, v14, 0, 5 ; GFX10PLUS-NEXT: v_bfe_i32 v15, v15, 0, 5 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <16 x i32> %value, %ashr = ashr <16 x i32> %shl, ret <16 x i32> %ashr } define amdgpu_ps <16 x i32> @s_sext_inreg_v16i32_3(<16 x i32> inreg %value) { ; GCN-LABEL: s_sext_inreg_v16i32_3: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i32 s0, s0, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s1, s1, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s2, s2, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s3, s3, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s4, s4, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s5, s5, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s6, s6, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s7, s7, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s8, s8, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s9, s9, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s10, s10, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s11, s11, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s12, s12, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s13, s13, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s14, s14, 0x1d0000 ; GCN-NEXT: s_bfe_i32 s15, s15, 0x1d0000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_v16i32_3: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s4, s4, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s5, s5, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s6, s6, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s7, s7, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s8, s8, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s9, s9, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s10, s10, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s11, s11, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s12, s12, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s13, s13, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s14, s14, 0x1d0000 ; GFX10PLUS-NEXT: s_bfe_i32 s15, s15, 0x1d0000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl <16 x i32> %value, %ashr = ashr <16 x i32> %shl, ret <16 x i32> %ashr } define i16 @v_sext_inreg_i16_4(i16 %value) { ; GFX6-LABEL: v_sext_inreg_i16_4: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 12 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_sext_inreg_i16_4: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 4, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_i16_4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 12 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i16_4: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 12 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i16 %value, 4 %ashr = ashr i16 %shl, 4 ret i16 %ashr } define i16 @v_sext_inreg_i16_15(i16 %value) { ; GFX6-LABEL: v_sext_inreg_i16_15: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_sext_inreg_i16_15: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 15, v0 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_i16_15: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i16 %value, 15 %ashr = ashr i16 %shl, 15 ret i16 %ashr } define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) { ; GFX6-LABEL: s_sext_inreg_i16_9: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x70000 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_sext_inreg_i16_9: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshl_b32 s0, s0, 9 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 9 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_sext_inreg_i16_9: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 9 ; GFX9-NEXT: s_sext_i32_i16 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 9 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i16_9: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 9 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i16 %value, 9 %ashr = ashr i16 %shl, 9 ret i16 %ashr } define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) { ; GFX6-LABEL: s_sext_inreg_i16_15: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_sext_inreg_i16_15: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshl_b32 s0, s0, 15 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 15 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_sext_inreg_i16_15: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 15 ; GFX9-NEXT: s_sext_i32_i16 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 15 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 15 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i16 %value, 15 %ashr = ashr i16 %shl, 15 ret i16 %ashr } define <2 x i16> @v_sext_inreg_v2i16_8(<2 x i16> %value) { ; GFX6-LABEL: v_sext_inreg_v2i16_8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_sext_inreg_v2i16_8: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 8, v1 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX8-NEXT: v_or_b32_sdwa v0, sext(v0), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_v2i16_8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v2i16_8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <2 x i16> %value, %ashr = ashr <2 x i16> %shl, ret <2 x i16> %ashr } define <2 x i16> @v_sext_inreg_v2i16_15(<2 x i16> %value) { ; GFX6-LABEL: v_sext_inreg_v2i16_15: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_sext_inreg_v2i16_15: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 15 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 15, v0 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v1 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_v2i16_15: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v2i16_15: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <2 x i16> %value, %ashr = ashr <2 x i16> %shl, ret <2 x i16> %ashr } define amdgpu_ps i32 @s_sext_inreg_v2i16_11(<2 x i16> inreg %value) { ; GFX6-LABEL: s_sext_inreg_v2i16_11: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i32 s1, s1, 0x50000 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x50000 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_sext_inreg_v2i16_11: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshr_b32 s1, s0, 16 ; GFX8-NEXT: s_lshl_b32 s0, s0, 11 ; GFX8-NEXT: s_lshl_b32 s1, s1, 11 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_ashr_i32 s0, s0, 11 ; GFX8-NEXT: s_ashr_i32 s1, s1, 11 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_or_b32 s0, s1, s0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_sext_inreg_v2i16_11: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshr_b32 s1, s0, 16 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0xb000b ; GFX9-NEXT: s_lshl_b32 s1, s1, 11 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 ; GFX9-NEXT: s_sext_i32_i16 s1, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16 ; GFX9-NEXT: s_sext_i32_i16 s2, 0xb000b ; GFX9-NEXT: s_ashr_i32 s1, s1, s2 ; GFX9-NEXT: s_ashr_i32 s0, s0, 11 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s1, s0 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_v2i16_11: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_lshr_b32 s1, s0, 16 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0xb000b ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 11 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s1 ; GFX10PLUS-NEXT: s_sext_i32_i16 s1, 0xb000b ; GFX10PLUS-NEXT: s_sext_i32_i16 s2, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s2, s1 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 11 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s1, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl <2 x i16> %value, %ashr = ashr <2 x i16> %shl, %cast = bitcast <2 x i16> %ashr to i32 ret i32 %cast } ; FIXME ; define <3 x i16> @v_sext_inreg_v3i16_4(<3 x i16> %value) { ; %shl = shl <3 x i16> %value, ; %ashr = ashr <3 x i16> %shl, ; ret <3 x i16> %ashr ; } ; define amdgpu_ps <3 x i16> @s_sext_inreg_v3i16_4(<3 x i16> inreg %value) { ; %shl = shl <3 x i16> %value, ; %ashr = ashr <3 x i16> %shl, ; ret <3 x i16> %ashr ; } define <2 x float> @v_sext_inreg_v4i16_3(<4 x i16> %value) { ; GFX6-LABEL: v_sext_inreg_v4i16_3: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 13 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 13 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 13 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 13 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_sext_inreg_v4i16_3: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v3, 3 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 3, v0 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, 3, v1 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 3, v2 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 3, v4 ; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_v4i16_3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 3, v0 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 3, v1 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 3, v0 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 3, v1 op_sel_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v4i16_3: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 3, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 3, v1 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 3, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, 3, v1 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <4 x i16> %value, %ashr = ashr <4 x i16> %shl, %cast = bitcast <4 x i16> %ashr to <2 x float> ret <2 x float> %cast } define amdgpu_ps <2 x i32> @s_sext_inreg_v4i16_14(<4 x i16> inreg %value) { ; GFX6-LABEL: s_sext_inreg_v4i16_14: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i32 s1, s1, 0x20000 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff ; GFX6-NEXT: s_bfe_i32 s2, s2, 0x20000 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff ; GFX6-NEXT: s_and_b32 s2, s3, 0xffff ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 ; GFX6-NEXT: s_or_b32 s1, s1, s2 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_sext_inreg_v4i16_14: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshr_b32 s2, s0, 16 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16 ; GFX8-NEXT: s_lshl_b32 s0, s0, 14 ; GFX8-NEXT: s_lshl_b32 s2, s2, 14 ; GFX8-NEXT: s_lshl_b32 s1, s1, 14 ; GFX8-NEXT: s_lshl_b32 s3, s3, 14 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_ashr_i32 s0, s0, 14 ; GFX8-NEXT: s_ashr_i32 s2, s2, 14 ; GFX8-NEXT: s_ashr_i32 s1, s1, 14 ; GFX8-NEXT: s_ashr_i32 s3, s3, 14 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_or_b32 s0, s2, s0 ; GFX8-NEXT: s_lshl_b32 s2, s3, 16 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffff ; GFX8-NEXT: s_or_b32 s1, s2, s1 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_sext_inreg_v4i16_14: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshr_b32 s2, s0, 16 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0xe000e ; GFX9-NEXT: s_lshl_b32 s2, s2, 14 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 ; GFX9-NEXT: s_sext_i32_i16 s2, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16 ; GFX9-NEXT: s_sext_i32_i16 s3, 0xe000e ; GFX9-NEXT: s_ashr_i32 s2, s2, s3 ; GFX9-NEXT: s_ashr_i32 s0, s0, 14 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s2, s0 ; GFX9-NEXT: s_lshr_b32 s2, s1, 16 ; GFX9-NEXT: s_lshl_b32 s1, s1, 0xe000e ; GFX9-NEXT: s_lshl_b32 s2, s2, 14 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 ; GFX9-NEXT: s_sext_i32_i16 s2, s1 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16 ; GFX9-NEXT: s_ashr_i32 s2, s2, s3 ; GFX9-NEXT: s_ashr_i32 s1, s1, 14 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s2, s1 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_v4i16_14: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 16 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0xe000e ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 14 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 0xe000e ; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, 14 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s2 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX10PLUS-NEXT: s_sext_i32_i16 s2, 0xe000e ; GFX10PLUS-NEXT: s_sext_i32_i16 s3, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s1 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s2 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 14 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s4, s2 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 14 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s3, s0 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s2, s1 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl <4 x i16> %value, %ashr = ashr <4 x i16> %shl, %cast = bitcast <4 x i16> %ashr to <2 x i32> ret <2 x i32> %cast } ; FIXME ; define <5 x i16> @v_sext_inreg_v5i16(<5 x i16> %value) { ; %shl = shl <5 x i16> %value, %amount ; ret <5 x i16> %result ; } ; define amdgpu_ps <5 x i16> @s_sext_inreg_v5i16(<5 x i16> inreg %value) { ; %shl = shl <5 x i16> %value, %amount ; ret <5 x i16> %result ; } ; define <3 x float> @v_sext_inreg_v6i16(<6 x i16> %value) { ; %shl = shl <6 x i16> %value, %amount ; %cast = bitcast <6 x i16> %result to <3 x float> ; ret <3 x float> %cast ; } ; define amdgpu_ps <3 x i32> @s_sext_inreg_v6i16(<6 x i16> inreg %value) { ; %shl = shl <6 x i16> %value, %amount ; %cast = bitcast <6 x i16> %result to <3 x i32> ; ret <3 x i32> %cast ; } define <4 x float> @v_sext_inreg_v8i16_11(<8 x i16> %value) { ; GFX6-LABEL: v_sext_inreg_v8i16_11: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 5 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 5 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 5 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 5 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 5 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 5 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 5 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v5 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 5 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v6 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_sext_inreg_v8i16_11: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v5, 11 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, 11, v0 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, 11, v1 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v4 ; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_lshlrev_b16_e32 v7, 11, v2 ; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v6 ; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_lshlrev_b16_e32 v8, 11, v3 ; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v7 ; GFX8-NEXT: v_ashrrev_i16_sdwa v2, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v8 ; GFX8-NEXT: v_ashrrev_i16_sdwa v3, v5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_v8i16_11: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 11, v0 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 11, v1 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v2, 11, v2 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v3, 11, v3 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 11, v0 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 11, v1 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v2, 11, v2 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v3, 11, v3 op_sel_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v8i16_11: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 11, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 11, v1 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v2, 11, v2 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v3, 11, v3 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 11, v0 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, 11, v1 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v2, 11, v2 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v3, 11, v3 op_sel_hi:[0,1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <8 x i16> %value, %ashr = ashr <8 x i16> %shl, %cast = bitcast <8 x i16> %ashr to <4 x float> ret <4 x float> %cast } define amdgpu_ps <4 x i32> @s_sext_inreg_v8i16_5(<8 x i16> inreg %value) { ; GFX6-LABEL: s_sext_inreg_v8i16_5: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i32 s1, s1, 0xb0000 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0xb0000 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff ; GFX6-NEXT: s_bfe_i32 s2, s2, 0xb0000 ; GFX6-NEXT: s_bfe_i32 s3, s3, 0xb0000 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_bfe_i32 s5, s5, 0xb0000 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff ; GFX6-NEXT: s_and_b32 s2, s3, 0xffff ; GFX6-NEXT: s_bfe_i32 s4, s4, 0xb0000 ; GFX6-NEXT: s_bfe_i32 s7, s7, 0xb0000 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 ; GFX6-NEXT: s_and_b32 s3, s5, 0xffff ; GFX6-NEXT: s_bfe_i32 s6, s6, 0xb0000 ; GFX6-NEXT: s_or_b32 s1, s1, s2 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 ; GFX6-NEXT: s_and_b32 s4, s7, 0xffff ; GFX6-NEXT: s_or_b32 s2, s2, s3 ; GFX6-NEXT: s_and_b32 s3, s6, 0xffff ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 ; GFX6-NEXT: s_or_b32 s3, s3, s4 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_sext_inreg_v8i16_5: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshr_b32 s4, s0, 16 ; GFX8-NEXT: s_lshr_b32 s5, s1, 16 ; GFX8-NEXT: s_lshl_b32 s0, s0, 5 ; GFX8-NEXT: s_lshl_b32 s4, s4, 5 ; GFX8-NEXT: s_lshr_b32 s6, s2, 16 ; GFX8-NEXT: s_lshl_b32 s1, s1, 5 ; GFX8-NEXT: s_lshl_b32 s5, s5, 5 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_lshr_b32 s7, s3, 16 ; GFX8-NEXT: s_lshl_b32 s2, s2, 5 ; GFX8-NEXT: s_lshl_b32 s6, s6, 5 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 ; GFX8-NEXT: s_ashr_i32 s0, s0, 5 ; GFX8-NEXT: s_ashr_i32 s4, s4, 5 ; GFX8-NEXT: s_lshl_b32 s3, s3, 5 ; GFX8-NEXT: s_lshl_b32 s7, s7, 5 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_ashr_i32 s1, s1, 5 ; GFX8-NEXT: s_ashr_i32 s5, s5, 5 ; GFX8-NEXT: s_lshl_b32 s4, s4, 16 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s7, s7 ; GFX8-NEXT: s_ashr_i32 s2, s2, 5 ; GFX8-NEXT: s_ashr_i32 s6, s6, 5 ; GFX8-NEXT: s_or_b32 s0, s4, s0 ; GFX8-NEXT: s_lshl_b32 s4, s5, 16 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffff ; GFX8-NEXT: s_ashr_i32 s3, s3, 5 ; GFX8-NEXT: s_ashr_i32 s7, s7, 5 ; GFX8-NEXT: s_or_b32 s1, s4, s1 ; GFX8-NEXT: s_lshl_b32 s4, s6, 16 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff ; GFX8-NEXT: s_or_b32 s2, s4, s2 ; GFX8-NEXT: s_lshl_b32 s4, s7, 16 ; GFX8-NEXT: s_and_b32 s3, s3, 0xffff ; GFX8-NEXT: s_or_b32 s3, s4, s3 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_sext_inreg_v8i16_5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshr_b32 s4, s0, 16 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x50005 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 ; GFX9-NEXT: s_sext_i32_i16 s4, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 16 ; GFX9-NEXT: s_sext_i32_i16 s5, 0x50005 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5 ; GFX9-NEXT: s_ashr_i32 s0, s0, 5 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s4, s0 ; GFX9-NEXT: s_lshr_b32 s4, s1, 16 ; GFX9-NEXT: s_lshl_b32 s1, s1, 0x50005 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX9-NEXT: s_sext_i32_i16 s4, s1 ; GFX9-NEXT: s_ashr_i32 s1, s1, 16 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5 ; GFX9-NEXT: s_ashr_i32 s1, s1, 5 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s4, s1 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16 ; GFX9-NEXT: s_lshl_b32 s2, s2, 0x50005 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 ; GFX9-NEXT: s_sext_i32_i16 s4, s2 ; GFX9-NEXT: s_ashr_i32 s2, s2, 16 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5 ; GFX9-NEXT: s_ashr_i32 s2, s2, 5 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s4, s2 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16 ; GFX9-NEXT: s_lshl_b32 s3, s3, 0x50005 ; GFX9-NEXT: s_lshl_b32 s4, s4, 5 ; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4 ; GFX9-NEXT: s_sext_i32_i16 s4, s3 ; GFX9-NEXT: s_ashr_i32 s3, s3, 16 ; GFX9-NEXT: s_ashr_i32 s4, s4, s5 ; GFX9-NEXT: s_ashr_i32 s3, s3, 5 ; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_v8i16_5: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_lshr_b32 s4, s0, 16 ; GFX10PLUS-NEXT: s_lshr_b32 s6, s1, 16 ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0x50005 ; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, 5 ; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 0x50005 ; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s4 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s6 ; GFX10PLUS-NEXT: s_lshr_b32 s6, s2, 16 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s0 ; GFX10PLUS-NEXT: s_sext_i32_i16 s5, 0x50005 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 0x50005 ; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 5 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s2, s6 ; GFX10PLUS-NEXT: s_lshr_b32 s6, s3, 16 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s4, s0 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s1 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, 0x50005 ; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5 ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 5 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s3, s6 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s4, s1 ; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s2 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 16 ; GFX10PLUS-NEXT: s_sext_i32_i16 s6, s3 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 16 ; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5 ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 5 ; GFX10PLUS-NEXT: s_ashr_i32 s5, s6, s5 ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 5 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s4, s2 ; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s5, s3 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl <8 x i16> %value, %ashr = ashr <8 x i16> %shl, %cast = bitcast <8 x i16> %ashr to <4 x i32> ret <4 x i32> %cast } define i64 @v_sext_inreg_i64_23(i64 %value) { ; GCN-LABEL: v_sext_inreg_i64_23: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 9 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i64_23: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 9 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i64 %value, 23 %ashr = ashr i64 %shl, 23 ret i64 %ashr } define i64 @v_sext_inreg_i64_40(i64 %value) { ; GCN-LABEL: v_sext_inreg_i64_40: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i64_40: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i64 %value, 40 %ashr = ashr i64 %shl, 40 ret i64 %ashr } define i64 @v_sext_inreg_i64_63(i64 %value) { ; GCN-LABEL: v_sext_inreg_i64_63: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i64_63: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i64 %value, 63 %ashr = ashr i64 %shl, 63 ret i64 %ashr } define i64 @v_sext_inreg_i64_33(i64 %value) { ; GCN-LABEL: v_sext_inreg_i64_33: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 31 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i64_33: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 31 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i64 %value, 33 %ashr = ashr i64 %shl, 33 ret i64 %ashr } define i64 @v_sext_inreg_i64_32(i64 %value) { ; GCN-LABEL: v_sext_inreg_i64_32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i64_32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i64 %value, 32 %ashr = ashr i64 %value, 32 ret i64 %ashr } define i64 @v_sext_inreg_i64_31(i64 %value) { ; GCN-LABEL: v_sext_inreg_i64_31: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 1 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i64 %value, 31 %ashr = ashr i64 %shl, 31 ret i64 %ashr } define amdgpu_ps i64 @s_sext_inreg_i64_3(i64 inreg %value) { ; GCN-LABEL: s_sext_inreg_i64_3: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x3d0000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i64_3: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x3d0000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i64 %value, 3 %ashr = ashr i64 %shl, 3 ret i64 %ashr } define amdgpu_ps i64 @s_sext_inreg_i64_63(i64 inreg %value) { ; GCN-LABEL: s_sext_inreg_i64_63: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x10000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i64_63: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x10000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i64 %value, 63 %ashr = ashr i64 %shl, 63 ret i64 %ashr } define amdgpu_ps i64 @s_sext_inreg_i64_33(i64 inreg %value) { ; GCN-LABEL: s_sext_inreg_i64_33: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x1f0000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i64_33: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x1f0000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i64 %value, 33 %ashr = ashr i64 %shl, 33 ret i64 %ashr } define amdgpu_ps i64 @s_sext_inreg_i64_32(i64 inreg %value) { ; GCN-LABEL: s_sext_inreg_i64_32: ; GCN: ; %bb.0: ; GCN-NEXT: s_ashr_i32 s1, s0, 31 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i64_32: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_ashr_i32 s1, s0, 31 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i64 %value, 32 %ashr = ashr i64 %shl, 32 ret i64 %ashr } define amdgpu_ps i64 @s_sext_inreg_i64_31(i64 inreg %value) { ; GCN-LABEL: s_sext_inreg_i64_31: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x210000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x210000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i64 %value, 31 %ashr = ashr i64 %shl, 31 ret i64 %ashr } define <2 x i64> @v_sext_inreg_v2i64_16(<2 x i64> %value) { ; GCN-LABEL: v_sext_inreg_v2i64_16: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 16 ; GCN-NEXT: v_bfe_i32 v3, v2, 0, 16 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v2i64_16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 16 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 16 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <2 x i64> %value, %ashr = ashr <2 x i64> %shl, ret <2 x i64> %ashr } define <2 x i64> @v_sext_inreg_v2i64_31(<2 x i64> %value) { ; GCN-LABEL: v_sext_inreg_v2i64_31: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_bfe_i32 v1, v0, 0, 1 ; GCN-NEXT: v_bfe_i32 v3, v2, 0, 1 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_v2i64_31: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1 ; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl <2 x i64> %value, %ashr = ashr <2 x i64> %shl, ret <2 x i64> %ashr } define amdgpu_ps <2 x i64> @s_sext_inreg_v2i64_30(<2 x i64> inreg %value) { ; GCN-LABEL: s_sext_inreg_v2i64_30: ; GCN: ; %bb.0: ; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x220000 ; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x220000 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_v2i64_30: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x220000 ; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x220000 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl <2 x i64> %value, %ashr = ashr <2 x i64> %shl, ret <2 x i64> %ashr } define i65 @v_sext_inreg_i65_22(i65 %value) { ; GFX6-LABEL: v_sext_inreg_i65_22: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 22 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 10, v1 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 0 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 10 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 10, v2 ; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 22 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_sext_inreg_i65_22: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3] ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 10, v1 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v3 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1] ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 10 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 10, v2 ; GFX8-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3] ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_i65_22: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3] ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 10, v1 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v3 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1] ; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 1 ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX9-NEXT: v_bfe_u32 v1, v1, 0, 10 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 10, v1 ; GFX9-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i65_22: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3] ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 10, v1 ; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1] ; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v3 ; GFX10PLUS-NEXT: v_bfe_u32 v1, v1, 0, 10 ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 1 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v2, 10, v1 ; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i65 %value, 22 %ashr = ashr i65 %shl, 22 ret i65 %ashr } define i65 @v_sext_inreg_i65_33(i65 %value) { ; GFX6-LABEL: v_sext_inreg_i65_33: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v3, v1 ; GFX6-NEXT: v_bfe_i32 v1, v2, 0, 1 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v1 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], 31 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v3, v0 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 1, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_sext_inreg_i65_33: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v3, v1 ; GFX8-NEXT: v_bfe_i32 v1, v2, 0, 1 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v1 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2] ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 1, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_i65_33: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-NEXT: v_bfe_i32 v1, v2, 0, 1 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2] ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3 ; GFX9-NEXT: v_or_b32_e32 v0, v3, v0 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 1, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i65_33: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1 ; GFX10PLUS-NEXT: v_bfe_i32 v1, v2, 0, 1 ; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 1, v3 ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 31, v1 ; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2] ; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 1, v2 ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v3, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %shl = shl i65 %value, 33 %ashr = ashr i65 %value, 33 ret i65 %ashr } define amdgpu_ps i65 @s_sext_inreg_i65_18(i65 inreg %value) { ; GCN-LABEL: s_sext_inreg_i65_18: ; GCN: ; %bb.0: ; GCN-NEXT: s_lshl_b64 s[2:3], s[2:3], 18 ; GCN-NEXT: s_lshr_b32 s4, s1, 14 ; GCN-NEXT: s_mov_b32 s5, 0 ; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] ; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 ; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x2e0000 ; GCN-NEXT: s_lshl_b32 s7, s2, 14 ; GCN-NEXT: s_mov_b32 s6, s5 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] ; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 18 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i65_18: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[2:3], 18 ; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 14 ; GFX10PLUS-NEXT: s_mov_b32 s5, 0 ; GFX10PLUS-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x2e0000 ; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] ; GFX10PLUS-NEXT: s_mov_b32 s6, s5 ; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 ; GFX10PLUS-NEXT: s_lshl_b32 s7, s2, 14 ; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[2:3], 18 ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i65 %value, 18 %ashr = ashr i65 %shl, 18 ret i65 %ashr } define amdgpu_ps i65 @s_sext_inreg_i65_33(i65 inreg %value) { ; GCN-LABEL: s_sext_inreg_i65_33: ; GCN: ; %bb.0: ; GCN-NEXT: s_lshl_b32 s3, s2, 1 ; GCN-NEXT: s_mov_b32 s2, 0 ; GCN-NEXT: s_lshr_b64 s[4:5], s[0:1], 31 ; GCN-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5] ; GCN-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 ; GCN-NEXT: s_bfe_u32 s0, s0, 0x1f0000 ; GCN-NEXT: s_mov_b32 s1, s2 ; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], 31 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] ; GCN-NEXT: s_ashr_i32 s2, s5, 1 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_sext_inreg_i65_33: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_lshl_b32 s3, s2, 1 ; GFX10PLUS-NEXT: s_mov_b32 s2, 0 ; GFX10PLUS-NEXT: s_lshr_b64 s[4:5], s[0:1], 31 ; GFX10PLUS-NEXT: s_bfe_u32 s0, s0, 0x1f0000 ; GFX10PLUS-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5] ; GFX10PLUS-NEXT: s_mov_b32 s1, s2 ; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 ; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[4:5], 31 ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] ; GFX10PLUS-NEXT: s_ashr_i32 s2, s5, 1 ; GFX10PLUS-NEXT: ; return to shader part epilog %shl = shl i65 %value, 33 %ashr = ashr i65 %shl, 33 ret i65 %ashr } ; FIXME: Argument lowering asserts ; define <2 x i65> @v_sext_inreg_v2i65_36(<2 x i65> %value) { ; %shl = shl <2 x i65> %value, ; %ashr = ashr <2 x i65> %shl, ; ret <2 x i65> %ashr ; } ; define amdgpu_ps <2 x i65> @s_sext_inreg_v2i65_36(<2 x i65> inreg %valuex) { ; %shl = shl <2 x i65> %value, ; %ashr = ashrshl <2 x i65> %shl, ; ret <2 x i65> %ashr ; } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX10: {{.*}} ; GFX11: {{.*}}