264 lines
23 KiB
LLVM
264 lines
23 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt < %s -interleaved-access -S | FileCheck %s
|
|
; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s
|
|
|
|
target triple = "aarch64-linux-gnu"
|
|
|
|
define { <vscale x 16 x i8>, <vscale x 16 x i8> } @deinterleave_nxi8_factor2(ptr %ptr) #0 {
|
|
; CHECK-LABEL: define { <vscale x 16 x i8>, <vscale x 16 x i8> } @deinterleave_nxi8_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[LDN]]
|
|
;
|
|
%load = load <vscale x 32 x i8>, ptr %ptr, align 1
|
|
%deinterleave = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.experimental.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %load)
|
|
ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %deinterleave
|
|
}
|
|
|
|
define { <vscale x 8 x i16>, <vscale x 8 x i16> } @deinterleave_nxi16_factor2(ptr %ptr) #0 {
|
|
; CHECK-LABEL: define { <vscale x 8 x i16>, <vscale x 8 x i16> } @deinterleave_nxi16_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]]
|
|
;
|
|
%load = load <vscale x 16 x i16>, ptr %ptr, align 2
|
|
%deinterleave = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.experimental.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %load)
|
|
ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %deinterleave
|
|
}
|
|
|
|
define { <vscale x 4 x i32>, <vscale x 4 x i32> } @deinterleave_nx8xi32_factor2(ptr %ptr) #0 {
|
|
; CHECK-LABEL: define { <vscale x 4 x i32>, <vscale x 4 x i32> } @deinterleave_nx8xi32_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]]
|
|
;
|
|
%load = load <vscale x 8 x i32>, ptr %ptr, align 4
|
|
%deinterleave = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %load)
|
|
ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %deinterleave
|
|
}
|
|
|
|
define { <vscale x 2 x i64>, <vscale x 2 x i64> } @deinterleave_nxi64_factor2(ptr %ptr) #0 {
|
|
; CHECK-LABEL: define { <vscale x 2 x i64>, <vscale x 2 x i64> } @deinterleave_nxi64_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]]
|
|
;
|
|
%load = load <vscale x 4 x i64>, ptr %ptr, align 8
|
|
%deinterleave = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.experimental.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %load)
|
|
ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %deinterleave
|
|
}
|
|
|
|
define { <vscale x 4 x float>, <vscale x 4 x float> } @deinterleave_nxfloat_factor2(ptr %ptr) #0 {
|
|
; CHECK-LABEL: define { <vscale x 4 x float>, <vscale x 4 x float> } @deinterleave_nxfloat_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret { <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]]
|
|
;
|
|
%load = load <vscale x 8 x float>, ptr %ptr, align 4
|
|
%deinterleave = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.experimental.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %load)
|
|
ret { <vscale x 4 x float>, <vscale x 4 x float> } %deinterleave
|
|
}
|
|
|
|
define { <vscale x 2 x double>, <vscale x 2 x double> } @deinterleave_nxdouble_factor2(ptr %ptr) #0 {
|
|
; CHECK-LABEL: define { <vscale x 2 x double>, <vscale x 2 x double> } @deinterleave_nxdouble_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]]
|
|
;
|
|
%load = load <vscale x 4 x double>, ptr %ptr, align 8
|
|
%deinterleave = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %load)
|
|
ret { <vscale x 2 x double>, <vscale x 2 x double> } %deinterleave
|
|
}
|
|
|
|
define { <vscale x 2 x ptr>, <vscale x 2 x ptr> } @deinterleave_nxptr_factor2(ptr %ptr) #0 {
|
|
; CHECK-LABEL: define { <vscale x 2 x ptr>, <vscale x 2 x ptr> } @deinterleave_nxptr_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x ptr>, <vscale x 2 x ptr> } @llvm.aarch64.sve.ld2.sret.nxv2p0(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret { <vscale x 2 x ptr>, <vscale x 2 x ptr> } [[LDN]]
|
|
;
|
|
%load = load <vscale x 4 x ptr>, ptr %ptr, align 8
|
|
%deinterleave = tail call { <vscale x 2 x ptr>, <vscale x 2 x ptr> } @llvm.experimental.vector.deinterleave2.nxv4p0(<vscale x 4 x ptr> %load)
|
|
ret { <vscale x 2 x ptr>, <vscale x 2 x ptr> } %deinterleave
|
|
}
|
|
|
|
define void @interleave_nxi8_factor2(ptr %ptr, <vscale x 16 x i8> %l, <vscale x 16 x i8> %r) #0 {
|
|
; CHECK-LABEL: define void @interleave_nxi8_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 16 x i8> [[L:%.*]], <vscale x 16 x i8> [[R:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[L]], <vscale x 16 x i8> [[R]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%interleave = tail call <vscale x 32 x i8> @llvm.experimental.vector.interleave2.nxv32i8(<vscale x 16 x i8> %l, <vscale x 16 x i8> %r)
|
|
store <vscale x 32 x i8> %interleave, ptr %ptr, align 1
|
|
ret void
|
|
}
|
|
|
|
define void @interleave_nxi16_factor2(ptr %ptr, <vscale x 8 x i16> %l, <vscale x 8 x i16> %r) #0 {
|
|
; CHECK-LABEL: define void @interleave_nxi16_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 8 x i16> [[L:%.*]], <vscale x 8 x i16> [[R:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[L]], <vscale x 8 x i16> [[R]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%interleave = tail call <vscale x 16 x i16> @llvm.experimental.vector.interleave2.nxv16i16(<vscale x 8 x i16> %l, <vscale x 8 x i16> %r)
|
|
store <vscale x 16 x i16> %interleave, ptr %ptr, align 2
|
|
ret void
|
|
}
|
|
|
|
define void @interleave_nxi32_factor2(ptr %ptr, <vscale x 4 x i32> %l, <vscale x 4 x i32> %r) #0 {
|
|
; CHECK-LABEL: define void @interleave_nxi32_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 4 x i32> [[L:%.*]], <vscale x 4 x i32> [[R:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[L]], <vscale x 4 x i32> [[R]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%interleave = tail call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> %l, <vscale x 4 x i32> %r)
|
|
store <vscale x 8 x i32> %interleave, ptr %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @interleave_nxi64_factor2(ptr %ptr, <vscale x 2 x i64> %l, <vscale x 2 x i64> %r) #0 {
|
|
; CHECK-LABEL: define void @interleave_nxi64_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 2 x i64> [[L:%.*]], <vscale x 2 x i64> [[R:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[L]], <vscale x 2 x i64> [[R]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%interleave = tail call <vscale x 4 x i64> @llvm.experimental.vector.interleave2.nxv4i64(<vscale x 2 x i64> %l, <vscale x 2 x i64> %r)
|
|
store <vscale x 4 x i64> %interleave, ptr %ptr, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @interleave_nxfloat_factor2(ptr %ptr, <vscale x 4 x float> %l, <vscale x 4 x float> %r) #0 {
|
|
; CHECK-LABEL: define void @interleave_nxfloat_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 4 x float> [[L:%.*]], <vscale x 4 x float> [[R:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[L]], <vscale x 4 x float> [[R]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%interleave = tail call <vscale x 8 x float> @llvm.experimental.vector.interleave2.nxv8f32(<vscale x 4 x float> %l, <vscale x 4 x float> %r)
|
|
store <vscale x 8 x float> %interleave, ptr %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @interleave_nxdouble_factor2(ptr %ptr, <vscale x 2 x double> %l, <vscale x 2 x double> %r) #0 {
|
|
; CHECK-LABEL: define void @interleave_nxdouble_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 2 x double> [[L:%.*]], <vscale x 2 x double> [[R:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[L]], <vscale x 2 x double> [[R]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%interleave = tail call <vscale x 4 x double> @llvm.experimental.vector.interleave2.nxv4f64(<vscale x 2 x double> %l, <vscale x 2 x double> %r)
|
|
store <vscale x 4 x double> %interleave, ptr %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @interleave_nxptr_factor2(ptr %ptr, <vscale x 2 x ptr> %l, <vscale x 2 x ptr> %r) #0 {
|
|
; CHECK-LABEL: define void @interleave_nxptr_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 2 x ptr> [[L:%.*]], <vscale x 2 x ptr> [[R:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2p0(<vscale x 2 x ptr> [[L]], <vscale x 2 x ptr> [[R]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%interleave = tail call <vscale x 4 x ptr> @llvm.experimental.vector.interleave2.nxv4p0(<vscale x 2 x ptr> %l, <vscale x 2 x ptr> %r)
|
|
store <vscale x 4 x ptr> %interleave, ptr %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
;;; Check that we 'legalize' operations that are wider than the target supports.
|
|
|
|
define { <vscale x 16 x i32>, <vscale x 16 x i32> } @deinterleave_wide_nxi32_factor2(ptr %ptr) #0 {
|
|
; CHECK-LABEL: define { <vscale x 16 x i32>, <vscale x 16 x i32> } @deinterleave_wide_nxi32_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[PTR]], i64 0
|
|
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP1]])
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[PTR]], i64 2
|
|
; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP6]])
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP7]], i64 4)
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 1
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP9]], i64 4)
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[PTR]], i64 4
|
|
; CHECK-NEXT: [[LDN2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP11]])
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN2]], 0
|
|
; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP12]], i64 8)
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN2]], 1
|
|
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP10]], <vscale x 4 x i32> [[TMP14]], i64 8)
|
|
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[PTR]], i64 6
|
|
; CHECK-NEXT: [[LDN3:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP16]])
|
|
; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN3]], 0
|
|
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP13]], <vscale x 4 x i32> [[TMP17]], i64 12)
|
|
; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN3]], 1
|
|
; CHECK-NEXT: [[TMP20:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP15]], <vscale x 4 x i32> [[TMP19]], i64 12)
|
|
; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <vscale x 16 x i32>, <vscale x 16 x i32> } poison, <vscale x 16 x i32> [[TMP18]], 0
|
|
; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <vscale x 16 x i32>, <vscale x 16 x i32> } [[TMP21]], <vscale x 16 x i32> [[TMP20]], 1
|
|
; CHECK-NEXT: ret { <vscale x 16 x i32>, <vscale x 16 x i32> } [[TMP22]]
|
|
;
|
|
%load = load <vscale x 32 x i32>, ptr %ptr, align 4
|
|
%deinterleave = tail call { <vscale x 16 x i32>, <vscale x 16 x i32> } @llvm.experimental.vector.deinterleave2.nxv32i32(<vscale x 32 x i32> %load)
|
|
ret { <vscale x 16 x i32>, <vscale x 16 x i32> } %deinterleave
|
|
}
|
|
|
|
define { <vscale x 4 x double>, <vscale x 4 x double> } @deinterleave_wide_nxdouble_factor2(ptr %ptr) #0 {
|
|
; CHECK-LABEL: define { <vscale x 4 x double>, <vscale x 4 x double> } @deinterleave_wide_nxdouble_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 0
|
|
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP1]])
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 2
|
|
; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP6]])
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP7]], i64 2)
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 1
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP5]], <vscale x 2 x double> [[TMP9]], i64 2)
|
|
; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { <vscale x 4 x double>, <vscale x 4 x double> } poison, <vscale x 4 x double> [[TMP8]], 0
|
|
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 4 x double>, <vscale x 4 x double> } [[TMP11]], <vscale x 4 x double> [[TMP10]], 1
|
|
; CHECK-NEXT: ret { <vscale x 4 x double>, <vscale x 4 x double> } [[TMP12]]
|
|
;
|
|
%load = load <vscale x 8 x double>, ptr %ptr, align 8
|
|
%deinterleave = tail call { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.experimental.vector.deinterleave2.nxv8f64(<vscale x 8 x double> %load)
|
|
ret { <vscale x 4 x double>, <vscale x 4 x double> } %deinterleave
|
|
}
|
|
|
|
define void @interleave_wide_nxdouble_factor2(ptr %ptr, <vscale x 4 x double> %l, <vscale x 4 x double> %r) #0 {
|
|
; CHECK-LABEL: define void @interleave_wide_nxdouble_factor2
|
|
; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 4 x double> [[L:%.*]], <vscale x 4 x double> [[R:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[L]], i64 0)
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[R]], i64 0)
|
|
; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP1]])
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 2
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[L]], i64 2)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[R]], i64 2)
|
|
; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP4]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%interleave = tail call <vscale x 8 x double> @llvm.experimental.vector.interleave2.nxv8f64(<vscale x 4 x double> %l, <vscale x 4 x double> %r)
|
|
store <vscale x 8 x double> %interleave, ptr %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.experimental.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
|
|
declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.experimental.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>)
|
|
declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
|
|
declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.experimental.vector.deinterleave2.nxv4i64(<vscale x 4 x i64>)
|
|
declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.experimental.vector.deinterleave2.nxv8f32(<vscale x 8 x float>)
|
|
declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
|
|
declare { <vscale x 2 x ptr>, <vscale x 2 x ptr> } @llvm.experimental.vector.deinterleave2.nxv4p0(<vscale x 4 x ptr>)
|
|
|
|
; Larger deinterleaves to test 'legalization'
|
|
declare { <vscale x 16 x i32>, <vscale x 16 x i32> } @llvm.experimental.vector.deinterleave2.nxv32i32(<vscale x 32 x i32>)
|
|
declare { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.experimental.vector.deinterleave2.nxv8f64(<vscale x 8 x double>)
|
|
|
|
declare <vscale x 32 x i8> @llvm.experimental.vector.interleave2.nxv32i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
|
|
declare <vscale x 16 x i16> @llvm.experimental.vector.interleave2.nxv16i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
|
|
declare <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
|
|
declare <vscale x 4 x i64> @llvm.experimental.vector.interleave2.nxv4i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
|
|
declare <vscale x 8 x float> @llvm.experimental.vector.interleave2.nxv8f32(<vscale x 4 x float>, <vscale x 4 x float>)
|
|
declare <vscale x 4 x double> @llvm.experimental.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>)
|
|
declare <vscale x 4 x ptr> @llvm.experimental.vector.interleave2.nxv4p0(<vscale x 2 x ptr>, <vscale x 2 x ptr>)
|
|
|
|
; Larger interleaves to test 'legalization'
|
|
declare <vscale x 8 x double> @llvm.experimental.vector.interleave2.nxv8f64(<vscale x 4 x double>, <vscale x 4 x double>)
|
|
|
|
attributes #0 = { vscale_range(1,16) "target-features"="+sve" }
|