528 lines
23 KiB
LLVM
528 lines
23 KiB
LLVM
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
|
||
|
|
||
|
define void @ld1b(<vscale x 16 x i1> %pg, ptr %ptr, i32 %sliceidx) {
|
||
|
; CHECK-LABEL: ld1b:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, w1
|
||
|
; CHECK-NEXT: mov w13, wzr
|
||
|
; CHECK-NEXT: ld1b {za0h.b[w12, 15]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1b {za0v.b[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ret
|
||
|
%tileslice = add i32 %sliceidx, 15
|
||
|
call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i32 0, i32 %tileslice)
|
||
|
call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> %pg, ptr %ptr, i32 0, i32 0)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ld1b_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
|
||
|
; CHECK-LABEL: ld1b_with_addr_offset:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w13, wzr
|
||
|
; CHECK-NEXT: mov w12, w2
|
||
|
; CHECK-NEXT: ld1b {za0h.b[w13, 0]}, p0/z, [x0, x1]
|
||
|
; CHECK-NEXT: ld1b {za0v.b[w12, 15]}, p0/z, [x0, x1]
|
||
|
; CHECK-NEXT: ret
|
||
|
%base = getelementptr i8, ptr %ptr, i64 %index
|
||
|
%tileslice = add i32 %sliceidx, 15
|
||
|
call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> %pg, ptr %base, i32 0, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> %pg, ptr %base, i32 0, i32 %tileslice)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ld1h(<vscale x 8 x i1> %pg, ptr %ptr, i32 %sliceidx) {
|
||
|
; CHECK-LABEL: ld1h:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, w1
|
||
|
; CHECK-NEXT: mov w13, wzr
|
||
|
; CHECK-NEXT: ld1h {za0h.h[w12, 7]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1h {za1h.h[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1h {za0v.h[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1h {za1v.h[w12, 7]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ret
|
||
|
%tileslice = add i32 %sliceidx, 7
|
||
|
call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> %pg, ptr %ptr, i32 0, i32 %tileslice)
|
||
|
call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> %pg, ptr %ptr, i32 1, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> %pg, ptr %ptr, i32 0, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> %pg, ptr %ptr, i32 1, i32 %tileslice)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ld1h_with_addr_offset(<vscale x 8 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
|
||
|
; CHECK-LABEL: ld1h_with_addr_offset:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, w2
|
||
|
; CHECK-NEXT: mov w13, wzr
|
||
|
; CHECK-NEXT: ld1h {za0h.h[w12, 7]}, p0/z, [x0, x1, lsl #1]
|
||
|
; CHECK-NEXT: ld1h {za1v.h[w13, 0]}, p0/z, [x0, x1, lsl #1]
|
||
|
; CHECK-NEXT: ret
|
||
|
%base = getelementptr i16, ptr %ptr, i64 %index
|
||
|
%tileslice = add i32 %sliceidx, 7
|
||
|
call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> %pg, ptr %base, i32 0, i32 %tileslice)
|
||
|
call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> %pg, ptr %base, i32 1, i32 0)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ld1w(<vscale x 4 x i1> %pg, ptr %ptr, i32 %sliceidx) {
|
||
|
; CHECK-LABEL: ld1w:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, w1
|
||
|
; CHECK-NEXT: mov w13, wzr
|
||
|
; CHECK-NEXT: ld1w {za0h.s[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1w {za1h.s[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1w {za2h.s[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1w {za3h.s[w12, 3]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1w {za0v.s[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1w {za1v.s[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1w {za2v.s[w12, 3]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1w {za3v.s[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ret
|
||
|
%tileslice = add i32 %sliceidx, 3
|
||
|
call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 0, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 1, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 2, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 3, i32 %tileslice)
|
||
|
call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 0, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 1, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 2, i32 %tileslice)
|
||
|
call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 3, i32 0)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ld1w_with_addr_offset(<vscale x 4 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
|
||
|
; CHECK-LABEL: ld1w_with_addr_offset:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, w2
|
||
|
; CHECK-NEXT: mov w13, wzr
|
||
|
; CHECK-NEXT: ld1w {za0h.s[w13, 0]}, p0/z, [x0, x1, lsl #2]
|
||
|
; CHECK-NEXT: ld1w {za3v.s[w12, 3]}, p0/z, [x0, x1, lsl #2]
|
||
|
; CHECK-NEXT: ret
|
||
|
%base = getelementptr i32, ptr %ptr, i64 %index
|
||
|
%tileslice = add i32 %sliceidx, 3
|
||
|
call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %base, i32 0, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %base, i32 3, i32 %tileslice)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ld1d(<vscale x 2 x i1> %pg, ptr %ptr, i32 %sliceidx) {
|
||
|
; CHECK-LABEL: ld1d:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, w1
|
||
|
; CHECK-NEXT: mov w13, wzr
|
||
|
; CHECK-NEXT: ld1d {za0h.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za1h.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za2h.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za3h.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za4h.d[w12, 1]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za5h.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za6h.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za7h.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za0v.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za1v.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za2v.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za3v.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za4v.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za5v.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za6v.d[w13, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1d {za7v.d[w12, 1]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ret
|
||
|
%tileslice = add i32 %sliceidx, 1
|
||
|
call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 0, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 1, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 2, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 3, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 4, i32 %tileslice)
|
||
|
call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 5, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 6, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 7, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 0, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 1, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 2, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 3, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 4, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 5, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 6, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 7, i32 %tileslice)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ld1d_with_addr_offset(<vscale x 2 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
|
||
|
; CHECK-LABEL: ld1d_with_addr_offset:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, w2
|
||
|
; CHECK-NEXT: mov w13, wzr
|
||
|
; CHECK-NEXT: ld1d {za0h.d[w12, 1]}, p0/z, [x0, x1, lsl #3]
|
||
|
; CHECK-NEXT: ld1d {za7v.d[w13, 0]}, p0/z, [x0, x1, lsl #3]
|
||
|
; CHECK-NEXT: ret
|
||
|
%base = getelementptr i64, ptr %ptr, i64 %index
|
||
|
%tileslice = add i32 %sliceidx, 1
|
||
|
call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %base, i32 0, i32 %tileslice)
|
||
|
call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %base, i32 7, i32 0)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ld1q(<vscale x 1 x i1> %pg, ptr %ptr) {
|
||
|
; CHECK-LABEL: ld1q:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, wzr
|
||
|
; CHECK-NEXT: ld1q {za0h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za1h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za2h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za3h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za4h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za5h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za6h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za7h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za8h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za9h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za10h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za11h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za12h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za13h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za14h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za15h.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za0v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za1v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za2v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za3v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za4v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za5v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za6v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za7v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za8v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za9v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za10v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za11v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za12v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za13v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za14v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ret
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 0, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 1, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 2, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 3, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 4, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 5, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 6, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 7, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 8, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 9, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 10, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 11, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 12, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 13, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 14, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 15, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 0, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 1, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 2, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 3, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 4, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 5, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 6, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 7, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 8, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 9, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 10, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 11, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 12, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 13, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 14, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 15, i32 0)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ld1q_with_addr_offset(<vscale x 1 x i1> %pg, ptr %ptr, i64 %index) {
|
||
|
; CHECK-LABEL: ld1q_with_addr_offset:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, wzr
|
||
|
; CHECK-NEXT: ld1q {za0h.q[w12, 0]}, p0/z, [x0, x1, lsl #4]
|
||
|
; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0, x1, lsl #4]
|
||
|
; CHECK-NEXT: ret
|
||
|
%base = getelementptr i128, ptr %ptr, i64 %index
|
||
|
call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %base, i32 0, i32 0)
|
||
|
call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %base, i32 15, i32 0)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ldr(ptr %ptr) {
|
||
|
; CHECK-LABEL: ldr:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, wzr
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x0]
|
||
|
; CHECK-NEXT: ret
|
||
|
call void @llvm.aarch64.sme.ldr(i32 0, ptr %ptr, i32 0)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_15(ptr %ptr) {
|
||
|
; CHECK-LABEL: ldr_with_off_15:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, #15 // =0xf
|
||
|
; CHECK-NEXT: add x8, x0, #15
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x8]
|
||
|
; CHECK-NEXT: ret
|
||
|
%base = getelementptr i8, ptr %ptr, i64 15
|
||
|
call void @llvm.aarch64.sme.ldr(i32 15, ptr %base, i32 0)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_15mulvl(ptr %ptr) {
|
||
|
; CHECK-LABEL: ldr_with_off_15mulvl:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, #15 // =0xf
|
||
|
; CHECK-NEXT: addvl x8, x0, #15
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x8]
|
||
|
; CHECK-NEXT: ret
|
||
|
%vscale = call i64 @llvm.vscale.i64()
|
||
|
%mulvl = mul i64 %vscale, 240
|
||
|
%base = getelementptr i8, ptr %ptr, i64 %mulvl
|
||
|
call void @llvm.aarch64.sme.ldr(i32 15, ptr %base, i32 0)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_16mulvl(ptr %ptr) {
|
||
|
; CHECK-LABEL: ldr_with_off_16mulvl:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, #16 // =0x10
|
||
|
; CHECK-NEXT: addvl x8, x0, #16
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x8]
|
||
|
; CHECK-NEXT: ret
|
||
|
%vscale = call i64 @llvm.vscale.i64()
|
||
|
%mulvl = mul i64 %vscale, 256
|
||
|
%base = getelementptr i8, ptr %ptr, i64 %mulvl
|
||
|
call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 0)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_var(ptr %base, i32 %off) {
|
||
|
; CHECK-LABEL: ldr_with_off_var:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
|
||
|
; CHECK-NEXT: sxtw x8, w1
|
||
|
; CHECK-NEXT: rdsvl x9, #1
|
||
|
; CHECK-NEXT: add w12, w1, #16
|
||
|
; CHECK-NEXT: madd x8, x9, x8, x0
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x8]
|
||
|
; CHECK-NEXT: ret
|
||
|
call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 %off)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_15imm(ptr %base) {
|
||
|
; CHECK-LABEL: ldr_with_off_15imm:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: mov w12, #16 // =0x10
|
||
|
; CHECK-NEXT: ldr za[w12, 15], [x0, #15, mul vl]
|
||
|
; CHECK-NEXT: ret
|
||
|
call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 15)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_16imm(ptr %base) {
|
||
|
; CHECK-LABEL: ldr_with_off_16imm:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: rdsvl x8, #1
|
||
|
; CHECK-NEXT: mov w12, #32 // =0x20
|
||
|
; CHECK-NEXT: add x8, x0, x8, lsl #4
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x8]
|
||
|
; CHECK-NEXT: ret
|
||
|
call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 16)
|
||
|
ret void;
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_many_imm(i32 %tile_slice, ptr %ptr) {
|
||
|
; CHECK-LABEL: ldr_with_off_many_imm:
|
||
|
; CHECK: // %bb.0: // %entry
|
||
|
; CHECK-NEXT: mov w12, w0
|
||
|
; CHECK-NEXT: ldr za[w12, 1], [x1, #1, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 2], [x1, #2, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 3], [x1, #3, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 4], [x1, #4, mul vl]
|
||
|
; CHECK-NEXT: ret
|
||
|
entry:
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 1)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 2)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 3)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 4)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_many_imm_15_18(i32 %tile_slice, ptr %ptr) {
|
||
|
; CHECK-LABEL: ldr_with_off_many_imm_15_18:
|
||
|
; CHECK: // %bb.0: // %entry
|
||
|
; CHECK-NEXT: rdsvl x8, #1
|
||
|
; CHECK-NEXT: mov w12, w0
|
||
|
; CHECK-NEXT: add x8, x1, x8, lsl #4
|
||
|
; CHECK-NEXT: ldr za[w12, 15], [x1, #15, mul vl]
|
||
|
; CHECK-NEXT: add w12, w0, #16
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x8]
|
||
|
; CHECK-NEXT: ldr za[w12, 1], [x8, #1, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl]
|
||
|
; CHECK-NEXT: ret
|
||
|
entry:
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 15)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 16)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 17)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 18)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_many_imm_16_19(i32 %tile_slice, ptr %ptr) {
|
||
|
; CHECK-LABEL: ldr_with_off_many_imm_16_19:
|
||
|
; CHECK: // %bb.0: // %entry
|
||
|
; CHECK-NEXT: rdsvl x8, #1
|
||
|
; CHECK-NEXT: add w12, w0, #16
|
||
|
; CHECK-NEXT: add x8, x1, x8, lsl #4
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x8]
|
||
|
; CHECK-NEXT: ldr za[w12, 1], [x8, #1, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 3], [x8, #3, mul vl]
|
||
|
; CHECK-NEXT: ret
|
||
|
entry:
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 16)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 17)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 18)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 19)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_many_imm_31_34(i32 %tile_slice, ptr %ptr) {
|
||
|
; CHECK-LABEL: ldr_with_off_many_imm_31_34:
|
||
|
; CHECK: // %bb.0: // %entry
|
||
|
; CHECK-NEXT: rdsvl x8, #1
|
||
|
; CHECK-NEXT: add w12, w0, #16
|
||
|
; CHECK-NEXT: add x9, x1, x8, lsl #4
|
||
|
; CHECK-NEXT: add x8, x1, x8, lsl #5
|
||
|
; CHECK-NEXT: ldr za[w12, 15], [x9, #15, mul vl]
|
||
|
; CHECK-NEXT: add w12, w0, #32
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x8]
|
||
|
; CHECK-NEXT: ldr za[w12, 1], [x8, #1, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl]
|
||
|
; CHECK-NEXT: ret
|
||
|
entry:
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 31)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 32)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 33)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 34)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_many_imm_32_35(i32 %tile_slice, ptr %ptr, i64 %vnum) {
|
||
|
; CHECK-LABEL: ldr_with_off_many_imm_32_35:
|
||
|
; CHECK: // %bb.0: // %entry
|
||
|
; CHECK-NEXT: rdsvl x8, #1
|
||
|
; CHECK-NEXT: add w12, w0, #32
|
||
|
; CHECK-NEXT: add x8, x1, x8, lsl #5
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x8]
|
||
|
; CHECK-NEXT: ldr za[w12, 1], [x8, #1, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 3], [x8, #3, mul vl]
|
||
|
; CHECK-NEXT: ret
|
||
|
entry:
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 32)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 33)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 34)
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 35)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_many_var(i32 %tile_slice, ptr %ptr, i64 %vnum) {
|
||
|
; CHECK-LABEL: ldr_with_off_many_var:
|
||
|
; CHECK: // %bb.0: // %entry
|
||
|
; CHECK-NEXT: sxtw x8, w2
|
||
|
; CHECK-NEXT: rdsvl x9, #1
|
||
|
; CHECK-NEXT: add w12, w0, w2
|
||
|
; CHECK-NEXT: madd x8, x9, x8, x1
|
||
|
; CHECK-NEXT: ldr za[w12, 0], [x8]
|
||
|
; CHECK-NEXT: ldr za[w12, 1], [x8, #1, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 3], [x8, #3, mul vl]
|
||
|
; CHECK-NEXT: ret
|
||
|
entry:
|
||
|
%0 = trunc i64 %vnum to i32
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %0)
|
||
|
%1 = add i32 %0, 1
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %1)
|
||
|
%2 = add i32 %0, 2
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %2)
|
||
|
%3 = add i32 %0, 3
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %3)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define void @ldr_with_off_many_var_high(i32 %tile_slice, ptr %ptr, i64 %vnum) {
|
||
|
; CHECK-LABEL: ldr_with_off_many_var_high:
|
||
|
; CHECK: // %bb.0: // %entry
|
||
|
; CHECK-NEXT: add w8, w2, #32
|
||
|
; CHECK-NEXT: rdsvl x10, #1
|
||
|
; CHECK-NEXT: sxtw x9, w8
|
||
|
; CHECK-NEXT: add w12, w0, w8
|
||
|
; CHECK-NEXT: madd x9, x10, x9, x1
|
||
|
; CHECK-NEXT: ldr za[w12, 1], [x9, #1, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 2], [x9, #2, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 3], [x9, #3, mul vl]
|
||
|
; CHECK-NEXT: ldr za[w12, 4], [x9, #4, mul vl]
|
||
|
; CHECK-NEXT: ret
|
||
|
entry:
|
||
|
%0 = trunc i64 %vnum to i32
|
||
|
%1 = add i32 %0, 33
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %1)
|
||
|
%2 = add i32 %0, 34
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %2)
|
||
|
%3 = add i32 %0, 35
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %3)
|
||
|
%4 = add i32 %0, 36
|
||
|
tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %4)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Ensure that the tile offset is sunk, given that this is likely to be an 'add'
|
||
|
; that's decomposed into a base + offset in ISel.
|
||
|
define void @test_ld1_sink_tile0_offset_operand(<vscale x 4 x i1> %pg, ptr %src, i32 %base, i32 %N) {
|
||
|
; CHECK-LABEL: test_ld1_sink_tile0_offset_operand:
|
||
|
; CHECK: // %bb.0: // %entry
|
||
|
; CHECK-NEXT: mov w12, w1
|
||
|
; CHECK-NEXT: .LBB24_1: // %for.body
|
||
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||
|
; CHECK-NEXT: ld1w {za0h.s[w12, 0]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: subs w2, w2, #1
|
||
|
; CHECK-NEXT: ld1w {za0h.s[w12, 1]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: ld1w {za0h.s[w12, 2]}, p0/z, [x0]
|
||
|
; CHECK-NEXT: b.ne .LBB24_1
|
||
|
; CHECK-NEXT: // %bb.2: // %exit
|
||
|
; CHECK-NEXT: ret
|
||
|
entry:
|
||
|
%add1 = add i32 %base, 1
|
||
|
%add2 = add i32 %base, 2
|
||
|
br label %for.body
|
||
|
|
||
|
for.body:
|
||
|
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||
|
call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i32 0, i32 %base)
|
||
|
call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i32 0, i32 %add1)
|
||
|
call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i32 0, i32 %add2)
|
||
|
%inc = add nuw nsw i32 %i, 1
|
||
|
%exitcond.not = icmp eq i32 %inc, %N
|
||
|
br i1 %exitcond.not, label %exit, label %for.body
|
||
|
|
||
|
exit:
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
|
||
|
declare void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1>, ptr, i32, i32)
|
||
|
declare void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1>, ptr, i32, i32)
|
||
|
declare void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1>, ptr, i32, i32)
|
||
|
declare void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1>, ptr, i32, i32)
|
||
|
declare void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1>, ptr, i32, i32)
|
||
|
declare void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1>, ptr, i32, i32)
|
||
|
declare void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1>, ptr, i32, i32)
|
||
|
declare void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1>, ptr, i32, i32)
|
||
|
declare void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1>, ptr, i32, i32)
|
||
|
declare void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1>, ptr, i32, i32)
|
||
|
|
||
|
declare void @llvm.aarch64.sme.ldr(i32, ptr, i32)
|
||
|
declare i64 @llvm.vscale.i64()
|