231 lines
12 KiB
LLVM
231 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: opt -S --passes='require<profile-summary>,function(codegenprepare)' < %s | FileCheck %s
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
; Sink the GEP to make use of scalar+vector addressing modes.
|
|
define <vscale x 4 x float> @gather_offsets_sink_gep(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
|
|
; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_gep(
|
|
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: cond.block:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i32> [[INDICES]]
|
|
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
|
|
;
|
|
entry:
|
|
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %indices
|
|
br i1 %cond, label %cond.block, label %exit
|
|
|
|
cond.block:
|
|
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
|
|
br label %exit
|
|
|
|
exit:
|
|
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
|
|
ret <vscale x 4 x float> %ret
|
|
}
|
|
|
|
; Sink sext to make use of scalar+sxtw(vector) addressing modes.
|
|
define <vscale x 4 x float> @gather_offsets_sink_sext(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
|
|
; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_sext(
|
|
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: cond.block:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
|
|
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
|
|
;
|
|
entry:
|
|
%indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64>
|
|
br i1 %cond, label %cond.block, label %exit
|
|
|
|
cond.block:
|
|
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
|
|
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
|
|
br label %exit
|
|
|
|
exit:
|
|
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
|
|
ret <vscale x 4 x float> %ret
|
|
}
|
|
|
|
; As above but ensure both the GEP and sext is sunk.
|
|
define <vscale x 4 x float> @gather_offsets_sink_sext_get(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
|
|
; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_sext_get(
|
|
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: cond.block:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
|
|
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
|
|
;
|
|
entry:
|
|
%indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64>
|
|
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
|
|
br i1 %cond, label %cond.block, label %exit
|
|
|
|
cond.block:
|
|
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
|
|
br label %exit
|
|
|
|
exit:
|
|
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
|
|
ret <vscale x 4 x float> %ret
|
|
}
|
|
|
|
; Don't sink GEPs that cannot benefit from SVE's scalar+vector addressing modes.
|
|
define <vscale x 4 x float> @gather_no_scalar_base(<vscale x 4 x ptr> %bases, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
|
|
; CHECK-LABEL: define <vscale x 4 x float> @gather_no_scalar_base(
|
|
; CHECK-SAME: <vscale x 4 x ptr> [[BASES:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, <vscale x 4 x ptr> [[BASES]], <vscale x 4 x i32> [[INDICES]]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: cond.block:
|
|
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
|
|
;
|
|
entry:
|
|
%ptrs = getelementptr float, <vscale x 4 x ptr> %bases, <vscale x 4 x i32> %indices
|
|
br i1 %cond, label %cond.block, label %exit
|
|
|
|
cond.block:
|
|
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
|
|
br label %exit
|
|
|
|
exit:
|
|
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
|
|
ret <vscale x 4 x float> %ret
|
|
}
|
|
|
|
; Don't sink extends whose result type is already favourable for SVE's sxtw/uxtw addressing modes.
|
|
; NOTE: We still want to sink the GEP.
|
|
define <vscale x 4 x float> @gather_offset_type_too_small(ptr %base, <vscale x 4 x i8> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
|
|
; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_type_too_small(
|
|
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i8> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[INDICES_SEXT:%.*]] = sext <vscale x 4 x i8> [[INDICES]] to <vscale x 4 x i32>
|
|
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: cond.block:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i32> [[INDICES_SEXT]]
|
|
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
|
|
;
|
|
entry:
|
|
%indices.sext = sext <vscale x 4 x i8> %indices to <vscale x 4 x i32>
|
|
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %indices.sext
|
|
br i1 %cond, label %cond.block, label %exit
|
|
|
|
cond.block:
|
|
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
|
|
br label %exit
|
|
|
|
exit:
|
|
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
|
|
ret <vscale x 4 x float> %ret
|
|
}
|
|
|
|
; Don't sink extends that cannot benefit from SVE's sxtw/uxtw addressing modes.
|
|
; NOTE: We still want to sink the GEP.
|
|
define <vscale x 4 x float> @gather_offset_type_too_big(ptr %base, <vscale x 4 x i48> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
|
|
; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_type_too_big(
|
|
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i48> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[INDICES_SEXT:%.*]] = sext <vscale x 4 x i48> [[INDICES]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: cond.block:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[INDICES_SEXT]]
|
|
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
|
|
;
|
|
entry:
|
|
%indices.sext = sext <vscale x 4 x i48> %indices to <vscale x 4 x i64>
|
|
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
|
|
br i1 %cond, label %cond.block, label %exit
|
|
|
|
cond.block:
|
|
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
|
|
br label %exit
|
|
|
|
exit:
|
|
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
|
|
ret <vscale x 4 x float> %ret
|
|
}
|
|
|
|
; Sink zext to make use of scalar+uxtw(vector) addressing modes.
|
|
; TODO: There's an argument here to split the extend into i8->i32 and i32->i64,
|
|
; which would be especially useful if the i8s are the result of a load because
|
|
; it would maintain the use of sign-extending loads.
|
|
define <vscale x 4 x float> @gather_offset_sink_zext(ptr %base, <vscale x 4 x i8> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
|
|
; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_sink_zext(
|
|
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i8> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: cond.block:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext <vscale x 4 x i8> [[INDICES]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
|
|
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
|
|
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
|
|
;
|
|
entry:
|
|
%indices.zext = zext <vscale x 4 x i8> %indices to <vscale x 4 x i64>
|
|
br i1 %cond, label %cond.block, label %exit
|
|
|
|
cond.block:
|
|
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.zext
|
|
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
|
|
br label %exit
|
|
|
|
exit:
|
|
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
|
|
ret <vscale x 4 x float> %ret
|
|
}
|
|
|
|
; Ensure we support scatters as well as gathers.
|
|
define void @scatter_offsets_sink_sext_get(<vscale x 4 x float> %data, ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
|
|
; CHECK-LABEL: define void @scatter_offsets_sink_sext_get(
|
|
; CHECK-SAME: <vscale x 4 x float> [[DATA:%.*]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: cond.block:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
|
|
; CHECK-NEXT: tail call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[DATA]], <vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK]])
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64>
|
|
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
|
|
br i1 %cond, label %cond.block, label %exit
|
|
|
|
cond.block:
|
|
tail call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask)
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
|
|
declare void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
|