; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s define amdgpu_cs float @v_s_exp_f32(float inreg %src) { ; GFX12-LABEL: v_s_exp_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000 ; GFX12-NEXT: s_cselect_b32 s1, 0x42800000, 0 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) ; GFX12-NEXT: s_add_f32 s0, s0, s1 ; GFX12-NEXT: s_cselect_b32 s1, 0x1f800000, 1.0 ; GFX12-NEXT: v_s_exp_f32 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) ; GFX12-NEXT: s_mul_f32 s0, s0, s1 ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call float @llvm.exp2.f32(float %src) ret float %result } define amdgpu_cs half @v_s_exp_f16(half inreg %src) { ; GFX12-LABEL: v_s_exp_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_exp_f16 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call half @llvm.exp2.f16(half %src) ret half %result } define amdgpu_cs float @v_s_amdgcn_exp_f32(float inreg %src) { ; GFX12-LABEL: v_s_amdgcn_exp_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_exp_f32 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call float @llvm.amdgcn.exp2.f32(float %src) ret float %result } define amdgpu_cs half @v_s_amdgcn_exp_f16(half inreg %src) { ; GFX12-LABEL: v_s_amdgcn_exp_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_exp_f16 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call half @llvm.amdgcn.exp2.f16(half %src) ret half %result } define amdgpu_cs float @v_s_log_f32(float inreg %src) { ; GFX12-LABEL: v_s_log_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_cmp_lt_f32 s0, 0x800000 ; GFX12-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) ; GFX12-NEXT: s_mul_f32 s0, s0, s1 ; GFX12-NEXT: s_cselect_b32 s1, 0x42000000, 0 ; GFX12-NEXT: v_s_log_f32 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) ; GFX12-NEXT: s_sub_f32 s0, s0, s1 ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call float @llvm.log2.f32(float %src) ret float %result } define amdgpu_cs half @v_s_log_f16(half inreg %src) { ; GFX12-LABEL: v_s_log_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_log_f16 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call half @llvm.log2.f16(half %src) ret half %result } define amdgpu_cs float @v_s_amdgcn_log_f32(float inreg %src) { ; GFX12-LABEL: v_s_amdgcn_log_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_log_f32 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call float @llvm.amdgcn.log.f32(float %src) ret float %result } define amdgpu_cs half @v_s_amdgcn_log_f16(half inreg %src) { ; GFX12-LABEL: v_s_amdgcn_log_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_log_f16 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call half @llvm.amdgcn.log.f16(half %src) ret half %result } define amdgpu_cs float @v_s_rcp_f32(float inreg %src) { ; GFX12-LABEL: v_s_rcp_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_rcp_f32 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call fast float @llvm.amdgcn.rcp.f32(float %src) ret float %result } define amdgpu_cs half @v_s_rcp_f16(half inreg %src) { ; GFX12-LABEL: v_s_rcp_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_rcp_f16 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call fast half @llvm.amdgcn.rcp.f16(half %src) ret half %result } ; TODO-GFX12: GlobalISel should generate v_s_rsq. define amdgpu_cs float @v_s_rsq_f32(float inreg %src) { ; GFX12-SDAG-LABEL: v_s_rsq_f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_s_rsq_f32 s0, s0 ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: v_s_rsq_f32: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: v_s_sqrt_f32 s0, s0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1) ; GFX12-GISEL-NEXT: v_s_rcp_f32 s0, s0 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %sqrt = call fast float @llvm.sqrt.f32(float %src) %fdiv = fdiv fast float 1.0, %sqrt ret float %fdiv } define amdgpu_cs half @v_s_rsq_f16(half inreg %src) { ; GFX12-LABEL: v_s_rsq_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_rsq_f16 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %sqrt = call fast half @llvm.sqrt.f16(half %src) %result = fdiv fast half 1.0, %sqrt ret half %result } ; TODO-GFX12: Should not use any VALU instructions. define amdgpu_cs float @v_s_sqrt_f32(float inreg %src) { ; GFX12-SDAG-LABEL: v_s_sqrt_f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_mul_f32 s1, s0, 0x4f800000 ; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xf800000 ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-SDAG-NEXT: s_cselect_b32 s1, s1, s0 ; GFX12-SDAG-NEXT: v_s_sqrt_f32 s2, s1 ; GFX12-SDAG-NEXT: s_mov_b32 s4, s1 ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-SDAG-NEXT: s_add_co_i32 s3, s2, -1 ; GFX12-SDAG-NEXT: s_xor_b32 s5, s3, 0x80000000 ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) ; GFX12-SDAG-NEXT: s_fmac_f32 s4, s5, s2 ; GFX12-SDAG-NEXT: s_mov_b32 s5, s1 ; GFX12-SDAG-NEXT: s_cmp_le_f32 s4, 0 ; GFX12-SDAG-NEXT: s_cselect_b32 s3, s3, s2 ; GFX12-SDAG-NEXT: s_add_co_i32 s4, s2, 1 ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-SDAG-NEXT: s_xor_b32 s6, s4, 0x80000000 ; GFX12-SDAG-NEXT: s_fmac_f32 s5, s6, s2 ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) ; GFX12-SDAG-NEXT: s_cmp_gt_f32 s5, 0 ; GFX12-SDAG-NEXT: s_cselect_b32 s2, s4, s3 ; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xf800000 ; GFX12-SDAG-NEXT: s_mul_f32 s0, s2, 0x37800000 ; GFX12-SDAG-NEXT: v_cmp_class_f32_e64 s3, s1, 0x260 ; GFX12-SDAG-NEXT: s_cselect_b32 s0, s0, s2 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX12-SDAG-NEXT: s_and_b32 s2, s3, exec_lo ; GFX12-SDAG-NEXT: s_cselect_b32 s0, s1, s0 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: v_s_sqrt_f32: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_cmp_gt_f32 0xf800000, s0 ; GFX12-GISEL-NEXT: s_mul_f32 s2, s0, 0x4f800000 ; GFX12-GISEL-NEXT: s_cselect_b32 s1, 1, 0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: s_cselect_b32 s0, s2, s0 ; GFX12-GISEL-NEXT: v_s_sqrt_f32 s2, s0 ; GFX12-GISEL-NEXT: s_mov_b32 s4, s0 ; GFX12-GISEL-NEXT: s_mov_b32 s6, s0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: s_add_co_i32 s3, s2, -1 ; GFX12-GISEL-NEXT: s_xor_b32 s5, s3, 0x80000000 ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: s_fmac_f32 s4, s5, s2 ; GFX12-GISEL-NEXT: s_add_co_i32 s5, s2, 1 ; GFX12-GISEL-NEXT: s_xor_b32 s7, s5, 0x80000000 ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_2) ; GFX12-GISEL-NEXT: s_cmp_le_f32 s4, 0 ; GFX12-GISEL-NEXT: s_fmac_f32 s6, s7, s2 ; GFX12-GISEL-NEXT: s_cselect_b32 s2, s3, s2 ; GFX12-GISEL-NEXT: s_cmp_gt_f32 s6, 0 ; GFX12-GISEL-NEXT: s_cselect_b32 s2, s5, s2 ; GFX12-GISEL-NEXT: s_cmp_lg_u32 s1, 0 ; GFX12-GISEL-NEXT: s_mul_f32 s3, s2, 0x37800000 ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: s_cselect_b32 s1, s3, s2 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-GISEL-NEXT: v_cmp_class_f32_e64 s1, s0, 0x260 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1 ; GFX12-GISEL-NEXT: ; return to shader part epilog %result = call float @llvm.sqrt.f32(float %src) ret float %result } define amdgpu_cs half @v_s_sqrt_f16(half inreg %src) { ; GFX12-LABEL: v_s_sqrt_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_sqrt_f16 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call half @llvm.sqrt.f16(half %src) ret half %result } define amdgpu_cs float @v_amdgcn_sqrt_f32(float inreg %src) { ; GFX12-LABEL: v_amdgcn_sqrt_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_sqrt_f32 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call float @llvm.amdgcn.sqrt.f32(float %src) ret float %result } define amdgpu_cs half @v_amdgcn_sqrt_f16(half inreg %src) { ; GFX12-LABEL: v_amdgcn_sqrt_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_sqrt_f16 s0, s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %result = call half @llvm.amdgcn.sqrt.f16(half %src) ret half %result } define amdgpu_cs float @srcmods_abs_f32(float inreg %src) { ; GFX12-LABEL: srcmods_abs_f32: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_bitset0_b32 s0, 31 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_cmp_lt_f32 s0, 0x800000 ; GFX12-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 ; GFX12-NEXT: s_mul_f32 s0, s0, s1 ; GFX12-NEXT: s_cselect_b32 s1, 0x42000000, 0 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(TRANS32_DEP_1) ; GFX12-NEXT: v_s_log_f32 s0, s0 ; GFX12-NEXT: s_sub_f32 s0, s0, s1 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %abs = call float @llvm.fabs.f32(float %src) %result = call float @llvm.log2.f32(float %abs) ret float %result } define amdgpu_cs float @srcmods_neg_f32(float inreg %src) { ; GFX12-SDAG-LABEL: srcmods_neg_f32: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_xor_b32 s1, s0, 0x80000000 ; GFX12-SDAG-NEXT: s_cmp_gt_f32 s0, 0x80800000 ; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x4f800000, 1.0 ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) ; GFX12-SDAG-NEXT: s_mul_f32 s0, s1, s0 ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42000000, 0 ; GFX12-SDAG-NEXT: v_s_log_f32 s0, s0 ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) ; GFX12-SDAG-NEXT: s_sub_f32 s0, s0, s1 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: srcmods_neg_f32: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_xor_b32 s0, s0, 0x80000000 ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000 ; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 ; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1 ; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(TRANS32_DEP_1) ; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0 ; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1 ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %neg = fneg float %src %result = call float @llvm.log2.f32(float %neg) ret float %result } define amdgpu_cs half @srcmods_abs_f16(half inreg %src) { ; GFX12-LABEL: srcmods_abs_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_log_f16 s0, |s0| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %abs = call half @llvm.fabs.f16(half %src) %result = call half @llvm.log2.f16(half %abs) ret half %result } define amdgpu_cs half @srcmods_neg_f16(half inreg %src) { ; GFX12-LABEL: srcmods_neg_f16: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_s_log_f16 s0, -s0 ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: ; return to shader part epilog %neg = fneg half %src %result = call half @llvm.log2.f16(half %neg) ret half %result } declare half @llvm.exp2.f16(half) declare float @llvm.exp2.f32(float) declare half @llvm.amdgcn.exp2.f16(half) declare float @llvm.amdgcn.exp2.f32(float) declare half @llvm.log2.f16(half) declare float @llvm.log2.f32(float) declare half @llvm.amdgcn.log.f16(half) declare float @llvm.amdgcn.log.f32(float) declare half @llvm.amdgcn.rcp.f16(half) declare float @llvm.amdgcn.rcp.f32(float) declare half @llvm.sqrt.f16(half) declare float @llvm.sqrt.f32(float) declare half @llvm.amdgcn.sqrt.f16(half) declare float @llvm.amdgcn.sqrt.f32(float) declare half @llvm.fabs.f16(half) declare float @llvm.fabs.f32(float)