4806 lines
215 KiB
LLVM
4806 lines
215 KiB
LLVM
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SDAG,GCN-IEEE,SDAG-IEEE %s
|
||
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GISEL,GCN-IEEE,GISEL-IEEE %s
|
||
|
|
||
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,SDAG,GCN-DAZ,SDAG-DAZ %s
|
||
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,GISEL,GCN-DAZ,GISEL-DAZ %s
|
||
|
|
||
|
define float @v_sqrt_f32(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_fneg(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_fneg:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x8f800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0xcf800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_fneg:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e64 v2, -v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e64 vcc, v1, -v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_fneg:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x8f800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0xcf800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_fneg:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x4f800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e64 v2, -v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, v1, -v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%x.neg = fneg float %x
|
||
|
%result = call float @llvm.sqrt.f32(float %x.neg)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_fabs(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_fabs:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s5, 0x4f800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e64 v1, |v0|, s5
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_fabs:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e64 v2, |v0|, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e64 vcc, v1, |v0|
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_fabs:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s5, 0x4f800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e64 v1, |v0|, s5
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_fabs:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x4f800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e64 v2, |v0|, v2
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, v1, |v0|
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%x.fabs = call float @llvm.fabs.f32(float %x)
|
||
|
%result = call float @llvm.sqrt.f32(float %x.fabs)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_fneg_fabs(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_fneg_fabs:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x8f800000
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s5, 0xcf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e64 v1, |v0|, s5
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, -|v0|, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_fneg_fabs:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e64 v2, -|v0|, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e64 vcc, v1, -|v0|
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, -|v0|, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_fneg_fabs:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x8f800000
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s5, 0xcf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e64 v1, |v0|, s5
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v0, -|v0|, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_fneg_fabs:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x4f800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e64 v2, -|v0|, v2
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, v1, -|v0|
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v0, -|v0|, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%x.fabs = call float @llvm.fabs.f32(float %x)
|
||
|
%x.fabs.neg = fneg float %x.fabs
|
||
|
%result = call float @llvm.sqrt.f32(float %x.fabs.neg)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_ninf(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ninf:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ninf:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_ninf:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_ninf:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call ninf float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_no_infs_attribute(float %x) #5 {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_no_infs_attribute:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_no_infs_attribute:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_no_infs_attribute:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_no_infs_attribute:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call ninf float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_nnan(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_nnan:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_nnan:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_nnan:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_nnan:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call nnan float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define amdgpu_ps i32 @s_sqrt_f32(float inreg %x) {
|
||
|
; SDAG-IEEE-LABEL: s_sqrt_f32:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s0, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, s0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; SDAG-IEEE-NEXT: ; return to shader part epilog
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: s_sqrt_f32:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, s0
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, s0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[0:1], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; GISEL-IEEE-NEXT: ; return to shader part epilog
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: s_sqrt_f32:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, s0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; SDAG-DAZ-NEXT: ; return to shader part epilog
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: s_sqrt_f32:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x4f800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, s0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, s0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; GISEL-DAZ-NEXT: ; return to shader part epilog
|
||
|
%result = call float @llvm.sqrt.f32(float %x)
|
||
|
%cast = bitcast float %result to i32
|
||
|
%firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
|
||
|
ret i32 %firstlane
|
||
|
}
|
||
|
|
||
|
define amdgpu_ps i32 @s_sqrt_f32_ninf(float inreg %x) {
|
||
|
; SDAG-IEEE-LABEL: s_sqrt_f32_ninf:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s0, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, s0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; SDAG-IEEE-NEXT: ; return to shader part epilog
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: s_sqrt_f32_ninf:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, s0
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, s0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[0:1], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; GISEL-IEEE-NEXT: ; return to shader part epilog
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: s_sqrt_f32_ninf:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, s0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; SDAG-DAZ-NEXT: ; return to shader part epilog
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: s_sqrt_f32_ninf:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x4f800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, s0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, s0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; GISEL-DAZ-NEXT: ; return to shader part epilog
|
||
|
%result = call ninf float @llvm.sqrt.f32(float %x)
|
||
|
%cast = bitcast float %result to i32
|
||
|
%firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
|
||
|
ret i32 %firstlane
|
||
|
}
|
||
|
|
||
|
define amdgpu_ps i32 @s_sqrt_f32_afn(float inreg %x) {
|
||
|
; GCN-LABEL: s_sqrt_f32_afn:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, s0
|
||
|
; GCN-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; GCN-NEXT: ; return to shader part epilog
|
||
|
%result = call afn float @llvm.sqrt.f32(float %x)
|
||
|
%cast = bitcast float %result to i32
|
||
|
%firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
|
||
|
ret i32 %firstlane
|
||
|
}
|
||
|
|
||
|
define amdgpu_ps i32 @s_sqrt_f32_afn_nnan_ninf(float inreg %x) {
|
||
|
; GCN-LABEL: s_sqrt_f32_afn_nnan_ninf:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, s0
|
||
|
; GCN-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; GCN-NEXT: ; return to shader part epilog
|
||
|
%result = call afn nnan ninf float @llvm.sqrt.f32(float %x)
|
||
|
%cast = bitcast float %result to i32
|
||
|
%firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
|
||
|
ret i32 %firstlane
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_nsz(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_nsz:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_nsz:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_nsz:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_nsz:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call nsz float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_nnan_ninf(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_nnan_ninf:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_nnan_ninf:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_nnan_ninf:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_nnan_ninf:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call nnan ninf float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_nnan_ninf_nsz(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_nnan_ninf_nsz:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_nnan_ninf_nsz:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_nnan_ninf_nsz:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_nnan_ninf_nsz:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call nnan ninf nsz float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_afn(float %x) {
|
||
|
; GCN-LABEL: v_sqrt_f32_afn:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_afn_nsz(float %x) {
|
||
|
; GCN-LABEL: v_sqrt_f32_afn_nsz:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn nsz float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define <2 x float> @v_sqrt_v2f32_afn(<2 x float> %x) {
|
||
|
; GCN-LABEL: v_sqrt_v2f32_afn:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_afn_nnan(float %x) {
|
||
|
; GCN-LABEL: v_sqrt_f32_afn_nnan:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn nnan float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_fabs_afn_ninf(float %x) {
|
||
|
; GCN-LABEL: v_sqrt_f32_fabs_afn_ninf:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e64 v0, |v0|
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%fabs = call float @llvm.fabs.f32(float %x)
|
||
|
%result = call afn ninf float @llvm.sqrt.f32(float %fabs)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_afn_nnan_ninf(float %x) {
|
||
|
; GCN-LABEL: v_sqrt_f32_afn_nnan_ninf:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn nnan ninf float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define <2 x float> @v_sqrt_v2f32_afn_nnan_ninf(<2 x float> %x) {
|
||
|
; GCN-LABEL: v_sqrt_v2f32_afn_nnan_ninf:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn nnan ninf <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_afn_nnan_ninf_nsz(float %x) {
|
||
|
; GCN-LABEL: v_sqrt_f32_afn_nnan_ninf_nsz:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn nnan ninf nsz float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32__approx_func_fp_math(float %x) #2 {
|
||
|
; GCN-LABEL: v_sqrt_f32__approx_func_fp_math:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call nsz float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32__enough_unsafe_attrs(float %x) #3 {
|
||
|
; GCN-LABEL: v_sqrt_f32__enough_unsafe_attrs:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call nsz float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32__unsafe_attr(float %x) #4 {
|
||
|
; GCN-LABEL: v_sqrt_f32__unsafe_attr:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call nsz float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define <2 x float> @v_sqrt_v2f32(<2 x float> %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_v2f32:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s6, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v4, -v3, v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v2, -v4, v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v4, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v4
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v5, -v2, v4, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v4
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v4, -v5, v4, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_v2f32:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v2, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v3, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v3
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v3, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v3
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v3, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v7
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v3, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v2
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v7
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_v2f32:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0.5, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v3, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v4, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v5, -v3, v3, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v4, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v5, v2, v3
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x4f800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v4
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v1, v3
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v5, -v3, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v5, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v6, -v2, v2, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v6, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v1, v4
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_v2f32:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v3, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, v0, v3
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, -v3, v4, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v4, v4, v5, v4
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, -v4, v4, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v5, v3, v4
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v2, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v2, v1
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v4
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5]
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0.5, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, -v2, v3, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v5, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, -v3, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v5, v2, v3
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v1, v4
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
define <3 x float> @v_sqrt_v3f32(<3 x float> %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_v3f32:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s6, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v3, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v3
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v5, -v4, v3, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v3
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v5, v3, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v4, v5, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v5, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v5
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v6, -v3, v5, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v5
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v5, -v6, v5, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v5, 0x37800000, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v5, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v5
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v6, -v3, v5, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v5
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v5, -v6, v5, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v5, 0x37800000, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v2, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_v3f32:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v3, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v4, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], -1, v4
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v6, -v5, v4, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v7, s[4:5], 1, v4
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v8, -v7, v4, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v8
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v6, 0x4f800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v3, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v6, v1
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v6
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v7, -v4, v6, v1
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v8, s[4:5], 1, v6
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v9, -v8, v6, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v7
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v9
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v6, 0x37800000, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v6, 0x4f800000, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v3
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v6, -v4, v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v7, s[4:5], 1, v3
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v8, -v7, v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v8
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v2, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_v3f32:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s6, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v3, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, v0, v3
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v5, -v3, v4, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, v4, v5, v4
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v6, -v4, v4, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v6, v3, v4
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v4, v1
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v5, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v5
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5]
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, v1, v4
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0.5, v4
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v6, -v4, v3, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v6, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, v4, v6, v4
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v6, 0x4f800000, v2
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 s[4:5], s6, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v7, -v3, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v6, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v7, v4, v3
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v1, v5
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, v2, v6
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0.5, v6
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v6, -v4, v3, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v6, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v7, -v3, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, v4, v6, v4
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v7, v4, v3
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v2, v5
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_v3f32:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v3, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v4, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, v0, v4
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, 0.5, v4
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v6, -v4, v5, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, v5, v6, v5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v4, v4, v6, v4
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v6, -v5, v5, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v4, v6, v4, v5
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0x37800000, v4
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0x4f800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v5, v1
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v6, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v6
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, v1, v5
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0.5, v5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v7, -v5, v4, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v4, v4, v7, v4
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, v5, v7, v5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v7, -v4, v4, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v4, v7, v5, v4
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v7, 0x4f800000, v2
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 s[4:5], v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5]
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0x37800000, v4
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v1, v6
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, v2, v3
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, -v3, v4, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v4, v4, v5, v4
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, -v4, v4, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v5, v3, v4
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v2, v6
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call <3 x float> @llvm.sqrt.v3f32(<3 x float> %x)
|
||
|
ret <3 x float> %result
|
||
|
}
|
||
|
|
||
|
; fpmath should be ignored
|
||
|
define float @v_sqrt_f32_ulp05(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp05:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp05:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_ulp05:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_ulp05:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x), !fpmath !0
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
; fpmath should be used with DAZ only
|
||
|
define float @v_sqrt_f32_ulp1(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp1:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp1:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_ulp1:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x), !fpmath !1
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
; fpmath should always be used
|
||
|
define float @v_sqrt_f32_ulp2(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_ulp2:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
; fpmath should always be used
|
||
|
define float @v_sqrt_f32_ulp25(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp25:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp25:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_ulp25:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x), !fpmath !3
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
; fpmath should always be used
|
||
|
define float @v_sqrt_f32_ulp3(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp3:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp3:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_ulp3:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x), !fpmath !4
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_ulp2_fabs(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_fabs:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_fabs:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_fabs:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e64 v0, |v0|
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%x.fabs = call float @llvm.fabs.f32(float %x)
|
||
|
%result = call float @llvm.sqrt.f32(float %x.fabs), !fpmath !2
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define <2 x float> @v_sqrt_v2f32_ulp1(<2 x float> %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp1:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s6, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v4, -v3, v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v2, -v4, v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v4, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v4
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v5, -v2, v4, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v4
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v4, -v5, v4, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp1:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v2, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v3, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v3
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v3, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v3
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v3, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v7
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v3, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v2
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v7
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp1:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !1
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
; fpmath should always be used
|
||
|
define <2 x float> @v_sqrt_v2f32_ulp2(<2 x float> %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
define <2 x float> @v_sqrt_v2f32_ulp1_fabs(<2 x float> %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp1_fabs:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s6, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s7, 0x4f800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e64 v2, |v0|, s7
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v4, -v3, v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v2, -v4, v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e64 v4, |v1|, s7
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s6
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, |v1|, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v4, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v4
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v5, -v2, v4, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v4
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v4, -v5, v4, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp1_fabs:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e64 v4, |v0|, v3
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e64 vcc, v2, |v0|
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, |v0|, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v4, v0
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e64 v3, |v1|, v3
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], -1, v4
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v6, -v5, v4, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v7, s[4:5], 1, v4
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v8, -v7, v4, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v8
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e64 vcc, v2, |v1|
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, |v1|, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v4, -v3, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v2
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v7
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp1_fabs:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e64 v0, |v0|
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e64 v1, |v1|
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%x.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
|
||
|
%result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x.fabs), !fpmath !1
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
; fpmath should always be used
|
||
|
define <2 x float> @v_sqrt_v2f32_ulp2_fabs(<2 x float> %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_fabs:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s6, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s6
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[6:7], |v1|, s6
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[6:7]
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e64 v1, |v1|, v2
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[6:7]
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_fabs:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[6:7], |v1|, v2
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[6:7]
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v3
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v1, |v1|, v2
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[6:7]
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_fabs:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e64 v0, |v0|
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e64 v1, |v1|
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%x.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
|
||
|
%result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x.fabs), !fpmath !2
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
; afn is stronger than the fpmath
|
||
|
define float @v_sqrt_f32_afn_ulp1(float %x) {
|
||
|
; GCN-LABEL: v_sqrt_f32_afn_ulp1:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn float @llvm.sqrt.f32(float %x), !fpmath !1
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
; afn is stronger than the fpmath
|
||
|
define float @v_sqrt_f32_afn_ulp2(float %x) {
|
||
|
; GCN-LABEL: v_sqrt_f32_afn_ulp2:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn float @llvm.sqrt.f32(float %x), !fpmath !2
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define <2 x float> @v_sqrt_v2f32_afn_ulp1(<2 x float> %x) {
|
||
|
; GCN-LABEL: v_sqrt_v2f32_afn_ulp1:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !1
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
; fpmath should always be used
|
||
|
define <2 x float> @v_sqrt_v2f32_afn_ulp2(<2 x float> %x) {
|
||
|
; GCN-LABEL: v_sqrt_v2f32_afn_ulp2:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_ulp2_noncontractable_rcp(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_rcp:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_rcp_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v1, v0
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_rcp:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x7f800000
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_rcp_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v1, v0
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_noncontractable_rcp:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: v_rcp_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4
|
||
|
%result = fdiv float 1.0, %sqrt, !fpmath !3
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_ulp2_contractable_rcp(float %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_rcp:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4b800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_rsq_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x45800000
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_rcp:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4b800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_rsq_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x45800000
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_rcp:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_rsq_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4
|
||
|
%result = fdiv contract float 1.0, %sqrt, !fpmath !3
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_ulp2_noncontractable_fdiv(float %x, float %y) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v3, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2
|
||
|
; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7f800000
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v3
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v1, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x6f800000
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x2f800000
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_rcp_f32_e32 v0, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x6f800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x2f800000
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_rcp_f32_e32 v0, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4
|
||
|
%result = fdiv float %y, %sqrt, !fpmath !3
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_ulp2_contractable_fdiv(float %x, float %y) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v3, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2
|
||
|
; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7f800000
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v3
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v1, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_fdiv:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x6f800000
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x2f800000
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_rcp_f32_e32 v0, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_fdiv:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x6f800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x2f800000
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_rcp_f32_e32 v0, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4
|
||
|
%result = fdiv contract float %y, %sqrt, !fpmath !3
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_ulp2_contractable_fdiv_arcp(float %x, float %y) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv_arcp:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v1, v0
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv_arcp:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7f800000
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v1, v0
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_fdiv_arcp:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: v_rcp_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: v_mul_f32_e32 v0, v1, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4
|
||
|
%result = fdiv arcp contract float %y, %sqrt, !fpmath !3
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define <2 x float> @v_sqrt_v2f32_ulp2_noncontractable_rcp(<2 x float> %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_noncontractable_rcp:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_noncontractable_rcp:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7f800000
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_noncontractable_rcp:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GCN-DAZ-NEXT: v_rcp_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: v_rcp_f32_e32 v1, v1
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4
|
||
|
%result = fdiv <2 x float> <float 1.0, float 1.0>, %sqrt, !fpmath !3
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
define <2 x float> @v_sqrt_v2f32_ulp2_contractable_rcp(<2 x float> %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x4b800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, 1.0, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_rsq_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, v1, v2
|
||
|
; SDAG-IEEE-NEXT: v_rsq_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, 0x45800000
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, v1, v2
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x4b800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_rsq_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, v1, v2
|
||
|
; GISEL-IEEE-NEXT: v_rsq_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x45800000
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, v1, v2
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_rsq_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: v_rsq_f32_e32 v1, v1
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4
|
||
|
%result = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt, !fpmath !3
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv(<2 x float> %x, <2 x float> %y) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v5, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, s4
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v5, v2, v5, vcc
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v2, v2
|
||
|
; SDAG-IEEE-NEXT: v_rcp_f32_e32 v4, v4
|
||
|
; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, v5, v4
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v4, v0
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v3
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, s4
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v3, v3
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, v4, v2
|
||
|
; SDAG-IEEE-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v5, 5, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v5
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, 0x7f800000
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v6, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, v5
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v6, v2, v6, vcc
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v2, v2
|
||
|
; GISEL-IEEE-NEXT: v_rcp_f32_e32 v4, v4
|
||
|
; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v2, v0
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, v6, v4
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v4, v0
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v3
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v3, v3
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, v4, v2
|
||
|
; GISEL-IEEE-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-DAZ-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x6f800000
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v4, 0x2f800000
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v4, 1.0, v4, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v0, v5
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, v1, v4
|
||
|
; SDAG-DAZ-NEXT: v_rcp_f32_e32 v0, v0
|
||
|
; SDAG-DAZ-NEXT: v_rcp_f32_e32 v1, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v5, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, v4, v1
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-DAZ-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x6f800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v5, 0x2f800000
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v4
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, v4
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v4, 1.0, v5, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v0, v6
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, v1, v4
|
||
|
; GISEL-DAZ-NEXT: v_rcp_f32_e32 v0, v0
|
||
|
; GISEL-DAZ-NEXT: v_rcp_f32_e32 v1, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v6, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, v4, v1
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4
|
||
|
%result = fdiv contract <2 x float> %y, %sqrt, !fpmath !3
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv_arcp(<2 x float> %x, <2 x float> %y) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv_arcp:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_rcp_f32_e32 v4, v4
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v4, v0
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v2, v0
|
||
|
; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, v3, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv_arcp:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v5, 5, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v5
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, 0x7f800000
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_rcp_f32_e32 v4, v4
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v4, v0
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v2, v0
|
||
|
; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2
|
||
|
; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, v3, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv_arcp:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GCN-DAZ-NEXT: v_rcp_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: v_rcp_f32_e32 v1, v1
|
||
|
; GCN-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0
|
||
|
; GCN-DAZ-NEXT: v_mul_f32_e32 v1, v3, v1
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4
|
||
|
%result = fdiv arcp contract <2 x float> %y, %sqrt, !fpmath !3
|
||
|
ret <2 x float> %result
|
||
|
}
|
||
|
|
||
|
define amdgpu_ps i32 @s_sqrt_f32_ulp1(float inreg %x) {
|
||
|
; GCN-LABEL: s_sqrt_f32_ulp1:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, s0
|
||
|
; GCN-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; GCN-NEXT: ; return to shader part epilog
|
||
|
%result = call afn float @llvm.sqrt.f32(float %x), !fpmath !1
|
||
|
%cast = bitcast float %result to i32
|
||
|
%firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
|
||
|
ret i32 %firstlane
|
||
|
}
|
||
|
|
||
|
define amdgpu_ps i32 @s_sqrt_f32_ulp2(float inreg %x) {
|
||
|
; GCN-LABEL: s_sqrt_f32_ulp2:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, s0
|
||
|
; GCN-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; GCN-NEXT: ; return to shader part epilog
|
||
|
%result = call afn float @llvm.sqrt.f32(float %x), !fpmath !2
|
||
|
%cast = bitcast float %result to i32
|
||
|
%firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
|
||
|
ret i32 %firstlane
|
||
|
}
|
||
|
|
||
|
define amdgpu_ps i32 @s_sqrt_f32_ulp3(float inreg %x) {
|
||
|
; GCN-LABEL: s_sqrt_f32_ulp3:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, s0
|
||
|
; GCN-NEXT: v_readfirstlane_b32 s0, v0
|
||
|
; GCN-NEXT: ; return to shader part epilog
|
||
|
%result = call afn float @llvm.sqrt.f32(float %x), !fpmath !4
|
||
|
%cast = bitcast float %result to i32
|
||
|
%firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
|
||
|
ret i32 %firstlane
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_known_never_posdenormal_ulp2(float nofpclass(psub) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_posdenormal_ulp2:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_posdenormal_ulp2:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_known_never_posdenormal_ulp2:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_nsz_known_never_posdenormal_ulp2(float nofpclass(psub) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_nsz_known_never_posdenormal_ulp2:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_nsz_known_never_posdenormal_ulp2:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_nsz_known_never_posdenormal_ulp2:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call nsz float @llvm.sqrt.f32(float %x), !fpmath !2
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_known_never_negdenormal(float nofpclass(nsub) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_negdenormal:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_negdenormal:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_known_never_negdenormal:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_known_never_denormal(float nofpclass(sub) %x) {
|
||
|
; GCN-LABEL: v_sqrt_f32_known_never_denormal:
|
||
|
; GCN: ; %bb.0:
|
||
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_ninf_known_never_zero(float nofpclass(zero) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_ninf_known_never_zero:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_ninf_known_never_zero:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_ninf_known_never_zero:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_ninf_known_never_zero:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call ninf float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_known_never_zero(float nofpclass(zero) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_known_never_zero_never_inf(float nofpclass(zero inf) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_inf:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_inf:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_inf:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_inf:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_known_never_zero_never_ninf(float nofpclass(zero ninf) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_ninf:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_ninf:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_known_never_zero_never_pinf(float nofpclass(zero pinf) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_pinf:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_pinf:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_pinf:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_pinf:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_frexp_src(float %x) {
|
||
|
; SDAG-LABEL: v_sqrt_f32_frexp_src:
|
||
|
; SDAG: ; %bb.0:
|
||
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-NEXT: s_mov_b32 s4, 0x7f800000
|
||
|
; SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0
|
||
|
; SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-LABEL: v_sqrt_f32_frexp_src:
|
||
|
; GISEL: ; %bb.0:
|
||
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
|
||
|
; GISEL-NEXT: v_frexp_mant_f32_e32 v1, v0
|
||
|
; GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
|
||
|
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||
|
%frexp = call { float, i32 } @llvm.frexp.f32.i32(float %x)
|
||
|
%frexp.mant = extractvalue { float, i32 } %frexp, 0
|
||
|
%result = call float @llvm.sqrt.f32(float %frexp.mant)
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_ulp3_frexp_src(float %x) {
|
||
|
; SDAG-LABEL: v_sqrt_f32_ulp3_frexp_src:
|
||
|
; SDAG: ; %bb.0:
|
||
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-NEXT: s_mov_b32 s4, 0x7f800000
|
||
|
; SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0
|
||
|
; SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
|
||
|
; SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-LABEL: v_sqrt_f32_ulp3_frexp_src:
|
||
|
; GISEL: ; %bb.0:
|
||
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
|
||
|
; GISEL-NEXT: v_frexp_mant_f32_e32 v1, v0
|
||
|
; GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
|
||
|
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; GISEL-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||
|
%frexp = call { float, i32 } @llvm.frexp.f32.i32(float %x)
|
||
|
%frexp.mant = extractvalue { float, i32 } %frexp, 0
|
||
|
%result = call float @llvm.sqrt.f32(float %frexp.mant), !fpmath !4
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_known_never_zero_never_ninf_ulp2(float nofpclass(zero ninf) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf_ulp2:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf_ulp2:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_ninf_ulp2:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_known_never_ninf_ulp2(float nofpclass(ninf) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_ninf_ulp2:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_ninf_ulp2:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_known_never_ninf_ulp2:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_sqrt_f32_nsz_known_never_ninf_ulp2(float nofpclass(ninf) %x) {
|
||
|
; SDAG-IEEE-LABEL: v_sqrt_f32_nsz_known_never_ninf_ulp2:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_sqrt_f32_nsz_known_never_ninf_ulp2:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GCN-DAZ-LABEL: v_sqrt_f32_nsz_known_never_ninf_ulp2:
|
||
|
; GCN-DAZ: ; %bb.0:
|
||
|
; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
|
||
|
; GCN-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%result = call nsz float @llvm.sqrt.f32(float %x), !fpmath !2
|
||
|
ret float %result
|
||
|
}
|
||
|
|
||
|
define float @v_elim_redun_check_ult_sqrt(float %in) {
|
||
|
; SDAG-IEEE-LABEL: v_elim_redun_check_ult_sqrt:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_elim_redun_check_ult_sqrt:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v4, -v3, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v2
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v6, -v5, v2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v6
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v2, 1
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7fc00000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_nge_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_elim_redun_check_ult_sqrt:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
|
||
|
; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_elim_redun_check_ult_sqrt:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
|
||
|
; GISEL-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v2, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0.5, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v4, -v2, v3, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v4, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v4, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v4, -v3, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v4, v2, v3
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v1, v3
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||
|
; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v2, 1
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x7fc00000
|
||
|
; GISEL-DAZ-NEXT: v_cmp_nge_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call float @llvm.sqrt.f32(float %in)
|
||
|
%cmp = fcmp ult float %in, -0.000000e+00
|
||
|
%res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
|
||
|
ret float %res
|
||
|
}
|
||
|
|
||
|
define float @v_elim_redun_check_ult_sqrt_ulp3(float %in) {
|
||
|
; SDAG-IEEE-LABEL: v_elim_redun_check_ult_sqrt_ulp3:
|
||
|
; SDAG-IEEE: ; %bb.0:
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000
|
||
|
; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v0, v1
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; SDAG-IEEE-NEXT: s_brev_b32 s4, 1
|
||
|
; SDAG-IEEE-NEXT: v_cmp_le_f32_e32 vcc, s4, v0
|
||
|
; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x7fc00000
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: v_elim_redun_check_ult_sqrt_ulp3:
|
||
|
; GISEL-IEEE: ; %bb.0:
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v0, v1
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7fc00000
|
||
|
; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
|
||
|
; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v2, 1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_nge_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: v_elim_redun_check_ult_sqrt_ulp3:
|
||
|
; SDAG-DAZ: ; %bb.0:
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: s_brev_b32 s4, 1
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000
|
||
|
; SDAG-DAZ-NEXT: v_cmp_le_f32_e32 vcc, s4, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: v_elim_redun_check_ult_sqrt_ulp3:
|
||
|
; GISEL-DAZ: ; %bb.0:
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v2, 1
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x7fc00000
|
||
|
; GISEL-DAZ-NEXT: v_cmp_nge_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31]
|
||
|
%sqrt = call float @llvm.sqrt.f32(float %in), !fpmath !4
|
||
|
%cmp = fcmp ult float %in, -0.000000e+00
|
||
|
%res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
|
||
|
ret float %res
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @elim_redun_check_neg0(ptr addrspace(1) %out, float %in) {
|
||
|
; SDAG-IEEE-LABEL: elim_redun_check_neg0:
|
||
|
; SDAG-IEEE: ; %bb.0: ; %entry
|
||
|
; SDAG-IEEE-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; SDAG-IEEE-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s6, -1
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||
|
; SDAG-IEEE-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: elim_redun_check_neg0:
|
||
|
; GISEL-IEEE: ; %bb.0: ; %entry
|
||
|
; GISEL-IEEE-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; GISEL-IEEE-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: s_mov_b32 s6, -1
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[0:1], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v1, 1
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x7fc00000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||
|
; GISEL-IEEE-NEXT: s_endpgm
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: elim_redun_check_neg0:
|
||
|
; SDAG-DAZ: ; %bb.0: ; %entry
|
||
|
; SDAG-DAZ-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-DAZ-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s3, 0xf000
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s2, -1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||
|
; SDAG-DAZ-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: elim_redun_check_neg0:
|
||
|
; GISEL-DAZ: ; %bb.0: ; %entry
|
||
|
; GISEL-DAZ-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; GISEL-DAZ-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
||
|
; GISEL-DAZ-NEXT: s_mov_b32 s3, 0xf000
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v1, 1
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: s_mov_b32 s2, -1
|
||
|
; GISEL-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||
|
; GISEL-DAZ-NEXT: s_endpgm
|
||
|
entry:
|
||
|
%sqrt = call float @llvm.sqrt.f32(float %in)
|
||
|
%cmp = fcmp olt float %in, -0.000000e+00
|
||
|
%res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
|
||
|
store float %res, ptr addrspace(1) %out
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @elim_redun_check_pos0(ptr addrspace(1) %out, float %in) {
|
||
|
; SDAG-IEEE-LABEL: elim_redun_check_pos0:
|
||
|
; SDAG-IEEE: ; %bb.0: ; %entry
|
||
|
; SDAG-IEEE-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; SDAG-IEEE-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s6, -1
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||
|
; SDAG-IEEE-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: elim_redun_check_pos0:
|
||
|
; GISEL-IEEE: ; %bb.0: ; %entry
|
||
|
; GISEL-IEEE-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; GISEL-IEEE-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: s_mov_b32 s6, -1
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[0:1], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, s2, 0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||
|
; GISEL-IEEE-NEXT: s_endpgm
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: elim_redun_check_pos0:
|
||
|
; SDAG-DAZ: ; %bb.0: ; %entry
|
||
|
; SDAG-DAZ-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-DAZ-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s3, 0xf000
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s2, -1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||
|
; SDAG-DAZ-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: elim_redun_check_pos0:
|
||
|
; GISEL-DAZ: ; %bb.0: ; %entry
|
||
|
; GISEL-DAZ-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; GISEL-DAZ-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
||
|
; GISEL-DAZ-NEXT: s_mov_b32 s3, 0xf000
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e64 vcc, s2, 0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||
|
; GISEL-DAZ-NEXT: s_mov_b32 s2, -1
|
||
|
; GISEL-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||
|
; GISEL-DAZ-NEXT: s_endpgm
|
||
|
entry:
|
||
|
%sqrt = call float @llvm.sqrt.f32(float %in)
|
||
|
%cmp = fcmp olt float %in, 0.000000e+00
|
||
|
%res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
|
||
|
store float %res, ptr addrspace(1) %out
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @elim_redun_check_ult(ptr addrspace(1) %out, float %in) {
|
||
|
; SDAG-IEEE-LABEL: elim_redun_check_ult:
|
||
|
; SDAG-IEEE: ; %bb.0: ; %entry
|
||
|
; SDAG-IEEE-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; SDAG-IEEE-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s6, -1
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v1
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||
|
; SDAG-IEEE-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: elim_redun_check_ult:
|
||
|
; GISEL-IEEE: ; %bb.0: ; %entry
|
||
|
; GISEL-IEEE-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; GISEL-IEEE-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: s_mov_b32 s6, -1
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[0:1], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v1, 1
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x7fc00000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_nge_f32_e32 vcc, s2, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||
|
; GISEL-IEEE-NEXT: s_endpgm
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: elim_redun_check_ult:
|
||
|
; SDAG-DAZ: ; %bb.0: ; %entry
|
||
|
; SDAG-DAZ-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-DAZ-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s3, 0xf000
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s2, -1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||
|
; SDAG-DAZ-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: elim_redun_check_ult:
|
||
|
; GISEL-DAZ: ; %bb.0: ; %entry
|
||
|
; GISEL-DAZ-NEXT: s_load_dword s2, s[0:1], 0xb
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; GISEL-DAZ-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
||
|
; GISEL-DAZ-NEXT: s_mov_b32 s3, 0xf000
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v1, 1
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000
|
||
|
; GISEL-DAZ-NEXT: v_cmp_nge_f32_e32 vcc, s2, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||
|
; GISEL-DAZ-NEXT: s_mov_b32 s2, -1
|
||
|
; GISEL-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||
|
; GISEL-DAZ-NEXT: s_endpgm
|
||
|
entry:
|
||
|
%sqrt = call float @llvm.sqrt.f32(float %in)
|
||
|
%cmp = fcmp ult float %in, -0.000000e+00
|
||
|
%res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
|
||
|
store float %res, ptr addrspace(1) %out
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @elim_redun_check_v2(ptr addrspace(1) %out, <2 x float> %in) {
|
||
|
; SDAG-IEEE-LABEL: elim_redun_check_v2:
|
||
|
; SDAG-IEEE: ; %bb.0: ; %entry
|
||
|
; SDAG-IEEE-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s6, -1
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, s11, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, s11
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v3, v2
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s10, v1
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, s8
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s5, s9
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v4, vcc, -1, v3
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v5, -v4, v3, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v5
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v5, vcc, 1, v3
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v5, v3, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v5, s10
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v5, v1, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v5, v0
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v2, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v5
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v5, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v5
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v5, -v3, v5, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v5
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||
|
; SDAG-IEEE-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: elim_redun_check_v2:
|
||
|
; GISEL-IEEE: ; %bb.0: ; %entry
|
||
|
; GISEL-IEEE-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, s6
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, s6, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, s7, v1
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], -1, v3
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v5, s[0:1], 1, v3
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v6, -v4, v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v7, -v5, v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v6
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v7
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, s7
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[0:1], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v3, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[0:1], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v7
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v4
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x80000000
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x7fc00000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s7, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: s_mov_b32 s6, -1
|
||
|
; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; GISEL-IEEE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||
|
; GISEL-IEEE-NEXT: s_endpgm
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: elim_redun_check_v2:
|
||
|
; SDAG-DAZ: ; %bb.0: ; %entry
|
||
|
; SDAG-DAZ-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s6, -1
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, s3, v1
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v3, s3
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, s0
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s5, s1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, v2, v3
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v5, -v3, v4, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, v4, v5, v4
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v6, -v4, v4, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v6, v3, v4
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v4, s2
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v4, v0
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v5, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v5
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v1, v3, v2, s[0:1]
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v4
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v4
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v3, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v4, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v6, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v4, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v6, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v5
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||
|
; SDAG-DAZ-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: elim_redun_check_v2:
|
||
|
; GISEL-DAZ: ; %bb.0: ; %entry
|
||
|
; GISEL-DAZ-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, s6
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, s6, v1
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, s7
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, s7, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, v2, v3
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v6, -v3, v5, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, v5, v6, v5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v6, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v6, -v5, v5, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v6, v3, v5
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0x37800000, v3
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v4
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1]
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, -v1, v3, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, -v3, v3, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v5, v1, v3
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v4
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x80000000
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x7fc00000
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s7, v3
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
|
||
|
; GISEL-DAZ-NEXT: s_mov_b32 s6, -1
|
||
|
; GISEL-DAZ-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; GISEL-DAZ-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||
|
; GISEL-DAZ-NEXT: s_endpgm
|
||
|
entry:
|
||
|
%sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
|
||
|
%cmp = fcmp olt <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
|
||
|
%res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
|
||
|
store <2 x float> %res, ptr addrspace(1) %out
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @elim_redun_check_v2_ult(ptr addrspace(1) %out, <2 x float> %in) {
|
||
|
; SDAG-IEEE-LABEL: elim_redun_check_v2_ult:
|
||
|
; SDAG-IEEE: ; %bb.0: ; %entry
|
||
|
; SDAG-IEEE-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s6, -1
|
||
|
; SDAG-IEEE-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, s11, v1
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, s11
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v3, v2
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s10, v1
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s4, s8
|
||
|
; SDAG-IEEE-NEXT: s_mov_b32 s5, s9
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v4, vcc, -1, v3
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v5, -v4, v3, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v5
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v5, vcc, 1, v3
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v5, v3, v2
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v5, s10
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v5, v1, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v5, v0
|
||
|
; SDAG-IEEE-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v2, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v5
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v5, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
|
||
|
; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v5
|
||
|
; SDAG-IEEE-NEXT: v_fma_f32 v5, -v3, v5, v0
|
||
|
; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v5
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
|
||
|
; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v4
|
||
|
; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||
|
; SDAG-IEEE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||
|
; SDAG-IEEE-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-IEEE-LABEL: elim_redun_check_v2_ult:
|
||
|
; GISEL-IEEE: ; %bb.0: ; %entry
|
||
|
; GISEL-IEEE-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; GISEL-IEEE-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, s6
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, s6, v1
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, s7, v1
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], -1, v3
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v5, s[0:1], 1, v3
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v6, -v4, v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v7, -v5, v3, v2
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v6
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v7
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, s7
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc
|
||
|
; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v4
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[0:1], -1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v5, -v3, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[0:1], 1, v1
|
||
|
; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v1, v0
|
||
|
; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v5
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v7
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[0:1]
|
||
|
; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v1
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v4
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x80000000
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
|
||
|
; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x7fc00000
|
||
|
; GISEL-IEEE-NEXT: v_cmp_nge_f32_e32 vcc, s6, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: v_cmp_nge_f32_e32 vcc, s7, v3
|
||
|
; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
|
||
|
; GISEL-IEEE-NEXT: s_mov_b32 s6, -1
|
||
|
; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; GISEL-IEEE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||
|
; GISEL-IEEE-NEXT: s_endpgm
|
||
|
;
|
||
|
; SDAG-DAZ-LABEL: elim_redun_check_v2_ult:
|
||
|
; SDAG-DAZ: ; %bb.0: ; %entry
|
||
|
; SDAG-DAZ-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s6, -1
|
||
|
; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, s3, v1
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v3, s3
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s4, s0
|
||
|
; SDAG-DAZ-NEXT: s_mov_b32 s5, s1
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, v2, v3
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v5, -v3, v4, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, v4, v5, v4
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v6, -v4, v4, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v6, v3, v4
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v4, s2
|
||
|
; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||
|
; SDAG-DAZ-NEXT: v_rsq_f32_e32 v4, v0
|
||
|
; SDAG-DAZ-NEXT: v_mov_b32_e32 v5, 0x260
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v5
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v1, v3, v2, s[0:1]
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v4
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v4
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v4, -v3, v2, 0.5
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v4, v2
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v6, -v2, v2, v0
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v4, v3
|
||
|
; SDAG-DAZ-NEXT: v_fma_f32 v2, v6, v3, v2
|
||
|
; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v5
|
||
|
; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||
|
; SDAG-DAZ-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||
|
; SDAG-DAZ-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-DAZ-LABEL: elim_redun_check_v2_ult:
|
||
|
; GISEL-DAZ: ; %bb.0: ; %entry
|
||
|
; GISEL-DAZ-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000
|
||
|
; GISEL-DAZ-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, s6
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, s6, v1
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v3, v2
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, s7
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, s7, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, v2, v3
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v6, -v3, v5, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, v5, v6, v5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v6, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v6, -v5, v5, v2
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v6, v3, v5
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0x37800000, v3
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
|
||
|
; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||
|
; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x260
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v4
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1]
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, v0, v1
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, -v1, v3, 0.5
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v5, v1
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v5, -v3, v3, v0
|
||
|
; GISEL-DAZ-NEXT: v_fma_f32 v1, v5, v1, v3
|
||
|
; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v1
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||
|
; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v4
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x80000000
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
|
||
|
; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x7fc00000
|
||
|
; GISEL-DAZ-NEXT: v_cmp_nge_f32_e32 vcc, s6, v3
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
|
||
|
; GISEL-DAZ-NEXT: v_cmp_nge_f32_e32 vcc, s7, v3
|
||
|
; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
|
||
|
; GISEL-DAZ-NEXT: s_mov_b32 s6, -1
|
||
|
; GISEL-DAZ-NEXT: s_mov_b32 s7, 0xf000
|
||
|
; GISEL-DAZ-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||
|
; GISEL-DAZ-NEXT: s_endpgm
|
||
|
entry:
|
||
|
%sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
|
||
|
%cmp = fcmp ult <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
|
||
|
%res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
|
||
|
store <2 x float> %res, ptr addrspace(1) %out
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare float @llvm.fabs.f32(float) #0
|
||
|
declare float @llvm.sqrt.f32(float) #0
|
||
|
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0
|
||
|
declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
|
||
|
declare <3 x float> @llvm.sqrt.v3f32(<3 x float>) #0
|
||
|
declare i32 @llvm.amdgcn.readfirstlane(i32) #1
|
||
|
|
||
|
declare { float, i32 } @llvm.frexp.f32.i32(float) #0
|
||
|
|
||
|
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||
|
attributes #1 = { convergent nounwind willreturn memory(none) }
|
||
|
attributes #2 = { "approx-func-fp-math"="true" }
|
||
|
attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" }
|
||
|
attributes #4 = { "unsafe-fp-math"="true" }
|
||
|
attributes #5 = { "no-infs-fp-math"="true" }
|
||
|
|
||
|
!0 = !{float 0.5}
|
||
|
!1 = !{float 1.0}
|
||
|
!2 = !{float 2.0}
|
||
|
!3 = !{float 2.5}
|
||
|
!4 = !{float 3.0}
|
||
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||
|
; GCN-IEEE: {{.*}}
|