; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s ; ; Move Multi-Vector To Tile (Write) x 2 ; ; Horizontal define void @za_write_vg2_horiz_b(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_b: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.b[w12, 0:1], { z0.b, z1.b } ; CHECK-NEXT: mov za0h.b[w12, 14:15], { z0.b, z1.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 %slice, %zn1, %zn2) %slice.14 = add i32 %slice, 14 call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 %slice.14, %zn1, %zn2) ret void } define void @za_write_vg2_horiz_h(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_h: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 0, i32 %slice, %zn1, %zn2) %slice.6 = add i32 %slice, 6 call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 %slice.6, %zn1, %zn2) ret void } define void @za_write_vg2_horiz_f16(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 0, i32 %slice, %zn1, %zn2) %slice.6 = add i32 %slice, 6 call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 %slice.6, %zn1, %zn2) ret void } define void @za_write_vg2_horiz_bf16(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 0, i32 %slice, %zn1, %zn2) %slice.6 = add i32 %slice, 6 call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 %slice.6, %zn1, %zn2) ret void } define void @za_write_vg2_horiz_s(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 0, i32 %slice, %zn1, %zn2) %slice.2 = add i32 %slice, 2 call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 %slice.2, %zn1, %zn2) ret void } define void @za_write_vg2_horiz_f32(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 0, i32 %slice, %zn1, %zn2) %slice.2 = add i32 %slice, 2 call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 %slice.2, %zn1, %zn2) ret void } define void @za_write_vg2_horiz_d(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 0, i32 %slice, %zn1, %zn2) ret void } define void @za_write_vg2_horiz_f64(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 0, i32 %slice, %zn1, %zn2) ret void } ; Vertical define void @za_write_vg2_vert_b(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_b: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.b[w12, 0:1], { z0.b, z1.b } ; CHECK-NEXT: mov za0v.b[w12, 14:15], { z0.b, z1.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 %slice, %zn1, %zn2) %slice.14 = add i32 %slice, 14 call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 %slice.14, %zn1, %zn2) ret void } define void @za_write_vg2_vert_h(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_h: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 0, i32 %slice, %zn1, %zn2) %slice.6 = add i32 %slice, 6 call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 %slice.6, %zn1, %zn2) ret void } define void @za_write_vg2_vert_f16(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 0, i32 %slice, %zn1, %zn2) %slice.6 = add i32 %slice, 6 call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 %slice.6, %zn1, %zn2) ret void } define void @za_write_vg2_vert_bf16(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 0, i32 %slice, %zn1, %zn2) %slice.6 = add i32 %slice, 6 call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 %slice.6, %zn1, %zn2) ret void } define void @za_write_vg2_vert_s(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 0, i32 %slice, %zn1, %zn2) %slice.2 = add i32 %slice, 2 call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 %slice.2, %zn1, %zn2) ret void } define void @za_write_vg2_vert_f32(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 0, i32 %slice, %zn1, %zn2) %slice.2 = add i32 %slice, 2 call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 %slice.2, %zn1, %zn2) ret void } define void @za_write_vg2_vert_d(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 0, i32 %slice, %zn1, %zn2) ret void } define void @za_write_vg2_vert_f64(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 0, i32 %slice, %zn1, %zn2) ret void } ; ; Move Multi-Vector To Tile (Write) x 4 ; ; Horizontal define void @za_write_vg4_horiz_b(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_b: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.b[w12, 0:3], { z0.b - z3.b } ; CHECK-NEXT: mov za0h.b[w12, 12:15], { z0.b - z3.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) %slice.12 = add i32 %slice, 12 call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 %slice.12, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_horiz_h(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_h: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) %slice.4 = add i32 %slice, 4 call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 %slice.4, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_horiz_f16(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) %slice.4 = add i32 %slice, 4 call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 %slice.4, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_horiz_bf16(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) %slice.4 = add i32 %slice, 4 call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 %slice.4, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_horiz_s(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_horiz_f32(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_horiz_d(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_horiz_f64(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) ret void } ; Vertical define void @za_write_vg4_vert_b(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_b: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.b[w12, 0:3], { z0.b - z3.b } ; CHECK-NEXT: mov za0v.b[w12, 12:15], { z0.b - z3.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) %slice.12 = add i32 %slice, 12 call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 %slice.12, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_vert_h(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_h: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) %slice.4 = add i32 %slice, 4 call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 %slice.4, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_vert_f16(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) %slice.4 = add i32 %slice, 4 call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 %slice.4, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_vert_bf16(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) %slice.4 = add i32 %slice, 4 call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 %slice.4, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_vert_s(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_vert_f32(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_vert_d(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) ret void } define void @za_write_vg4_vert_f64(i32 %slice, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) ret void } ; ; Move Multi-Vector To ZA (Write) x2 ; define void @za_write_vg1x2_b(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_b: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 %slice, %za1, %za2) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 %slice.7, %za1, %za2) ret void } define void @za_write_vg1x2_h(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_h: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 %slice, %za1, %za2) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 %slice.7, %za1, %za2) ret void } define void @za_write_vg1x2_f16(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 %slice, %za1, %za2) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 %slice.7, %za1, %za2) ret void } define void @za_write_vg1x2_bf16(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 %slice, %za1, %za2) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 %slice.7, %za1, %za2) ret void } define void @za_write_vg1x2_s(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 %slice, %za1, %za2) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 %slice.7, %za1, %za2) ret void } define void @za_write_vg1x2_f32(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 %slice, %za1, %za2) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 %slice.7, %za1, %za2) ret void } define void @za_write_vg1x2_d(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 %slice, %za1, %za2) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 %slice.7, %za1, %za2) ret void } define void @za_write_vg1x2_f64(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 %slice, %za1, %za2) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 %slice.7, %za1, %za2) ret void } ; ; Move Multi-Vector To ZA (Write) x4 ; define void @za_write_vg1x4_b(i32 %slice, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_b: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 %slice, %za1, %za2, %za3, %za4) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 %slice.7, %za1, %za2, %za3, %za4) ret void } define void @za_write_vg1x4_h(i32 %slice, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_h: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 %slice, %za1, %za2, %za3, %za4) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 %slice.7, %za1, %za2, %za3, %za4) ret void } define void @za_write_vg1x4_f16(i32 %slice, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 %slice, %za1, %za2, %za3, %za4) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 %slice.7, %za1, %za2, %za3, %za4) ret void } define void @za_write_vg1x4_bf16(i32 %slice, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 %slice, %za1, %za2, %za3, %za4) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 %slice.7, %za1, %za2, %za3, %za4) ret void } define void @za_write_vg1x4_s(i32 %slice, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 %slice, %za1, %za2, %za3, %za4) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 %slice.7, %za1, %za2, %za3, %za4) ret void } define void @za_write_vg1x4_f32(i32 %slice, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 %slice, %za1, %za2, %za3, %za4) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 %slice.7, %za1, %za2, %za3, %za4) ret void } define void @za_write_vg1x4_d(i32 %slice, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 %slice, %za1, %za2, %za3, %za4) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 %slice.7, %za1, %za2, %za3, %za4) ret void } define void @za_write_vg1x4_f64(i32 %slice, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 %slice, %za1, %za2, %za3, %za4) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 %slice.7, %za1, %za2, %za3, %za4) ret void } declare void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32, i32, , ) declare void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32, i32, , ) declare void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32, i32, , ) declare void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32, i32, , ) declare void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32, i32, , ) declare void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32, i32, , ) declare void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32, i32, , ) declare void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32, i32, , ) declare void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32, i32, , ) declare void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32, i32, , ) declare void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32, i32, , ) declare void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32, i32, , ) declare void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32, i32, , ) declare void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32, i32, , ) declare void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32, i32, , ) declare void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32, i32, , ) declare void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32, i32, , , , ) declare void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32, , ) declare void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32, , ) declare void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32, , ) declare void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32, , ) declare void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32, , ) declare void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32, , ) declare void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32, , ) declare void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32, , ) declare void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32, , , , ) declare void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32, , , , ) declare void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32, , , , ) declare void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32, , , , ) declare void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32, , , , ) declare void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32, , , , ) declare void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32, , , , ) declare void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32, , , , )