; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -verify-machineinstrs < %s | FileCheck %s ; ; ADD Multi-Single x2 ; define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice, %zn0, %zn1, %zm) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice.7, %zn0, %zn1, %zm) ret void } define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice, %zn0, %zn1, %zm) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice.7, %zn0, %zn1, %zm) ret void } ; ; ADD Multi-Single x4 ; define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret %zn2, %zn3, %zm) { call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice.7, %zn0, %zn1, %zn2, %zn3, %zm) ret void } define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice, ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: ret %zn0, %zn1, %zn2, %zn3, %zm) { call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 %slice.7, %zn0, %zn1, %zn2, %zn3, %zm) ret void } ; ; ADD Multi-Multi x2 ; define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret %zm1, %zm2) { call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice, %zn0, %zn1, %zm1, %zm2) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice.7, %zn0, %zn1, %zm1, %zm2) ret void } define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret %zm1, %zm2) { call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice, %zn0, %zn1, %zm1, %zm2) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice.7, %zn0, %zn1, %zm1, %zm2) ret void } ; ; ADD Multi-Multi x4 ; define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 %slice.7, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) ret void } define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 %slice.7, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) ret void } ; ; ADD and accumulate into ZA ; ; x2 define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice.7, %zn0, %zn1) ret void } define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice.7, %zn0, %zn1) ret void } define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: fadd za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice.7, %zn0, %zn1) ret void } define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: fadd za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice.7, %zn0, %zn1) ret void } ; x4 define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice, %zn0, %zn1, %zn2, %zn3) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice.7, %zn0, %zn1, %zn2, %zn3) ret void } define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice, %zn0, %zn1, %zn2, %zn3) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice.7, %zn0, %zn1, %zn2, %zn3) ret void } define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: fadd za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice, %zn0, %zn1, %zn2, %zn3) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice.7, %zn0, %zn1, %zn2, %zn3) ret void } define void @multi_vector_add_za_vg1x4_f64(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: fadd za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice, %zn0, %zn1, %zn2, %zn3) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice.7, %zn0, %zn1, %zn2, %zn3) ret void } ; ; ADD Vectors Multi-Single x2 ; define { , } @multi_vec_add_single_x2_s8( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_add_single_x2_s8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: add { z4.b, z5.b }, { z4.b, z5.b }, z3.b ; CHECK-NEXT: mov z0.d, z4.d ; CHECK-NEXT: mov z1.d, z5.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( %zdn1, %zdn2, %zm) ret { , } %res } define { , } @multi_vec_add_single_x2_s16( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_add_single_x2_s16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: add { z4.h, z5.h }, { z4.h, z5.h }, z3.h ; CHECK-NEXT: mov z0.d, z4.d ; CHECK-NEXT: mov z1.d, z5.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( %zdn1, %zdn2, %zm) ret { , } %res } define { , } @multi_vec_add_single_x2_s32( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_add_single_x2_s32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: add { z4.s, z5.s }, { z4.s, z5.s }, z3.s ; CHECK-NEXT: mov z0.d, z4.d ; CHECK-NEXT: mov z1.d, z5.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( %zdn1, %zdn2, %zm) ret { , } %res } define { , } @multi_vec_add_single_x2_s64( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_add_single_x2_s64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: add { z4.d, z5.d }, { z4.d, z5.d }, z3.d ; CHECK-NEXT: mov z0.d, z4.d ; CHECK-NEXT: mov z1.d, z5.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( %zdn1, %zdn2, %zm) ret { , } %res } ; ; ADD Vectors Multi-Single x4 ; define { , , , } @multi_vec_add_single_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_add_single_x4_s8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: mov z26.d, z3.d ; CHECK-NEXT: mov z25.d, z2.d ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: add { z24.b - z27.b }, { z24.b - z27.b }, z5.b ; CHECK-NEXT: mov z0.d, z24.d ; CHECK-NEXT: mov z1.d, z25.d ; CHECK-NEXT: mov z2.d, z26.d ; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } define { , , , } @multi_vec_add_x4_single_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_add_x4_single_s16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: mov z26.d, z3.d ; CHECK-NEXT: mov z25.d, z2.d ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: add { z24.h - z27.h }, { z24.h - z27.h }, z5.h ; CHECK-NEXT: mov z0.d, z24.d ; CHECK-NEXT: mov z1.d, z25.d ; CHECK-NEXT: mov z2.d, z26.d ; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } define { , , , } @multi_vec_add_x4_single_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_add_x4_single_s32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: mov z26.d, z3.d ; CHECK-NEXT: mov z25.d, z2.d ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: add { z24.s - z27.s }, { z24.s - z27.s }, z5.s ; CHECK-NEXT: mov z0.d, z24.d ; CHECK-NEXT: mov z1.d, z25.d ; CHECK-NEXT: mov z2.d, z26.d ; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } define { , , , } @multi_vec_add_x4_single_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_add_x4_single_s64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: mov z26.d, z3.d ; CHECK-NEXT: mov z25.d, z2.d ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: add { z24.d - z27.d }, { z24.d - z27.d }, z5.d ; CHECK-NEXT: mov z0.d, z24.d ; CHECK-NEXT: mov z1.d, z25.d ; CHECK-NEXT: mov z2.d, z26.d ; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } declare void@llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32, , , ) declare void@llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32, , , ) declare void@llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32, , , , , ) declare void@llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32, , , , , ) declare void@llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32, , , , ) declare void@llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32, , , , ) declare void@llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32, , , , , , , , ) declare void@llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32, , , , , , , , ) declare void@llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32, ,) declare void@llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32, ,) declare void@llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32, ,,,) declare void@llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32, ,,, ) declare void@llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32, , ) declare void@llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32, , ) declare void@llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32, , ,, ) declare void@llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32, , ,, ) declare { , } @llvm.aarch64.sve.add.single.x2.nxv16i8(, , ) declare { , } @llvm.aarch64.sve.add.single.x2.nxv8i16(, , ) declare { , } @llvm.aarch64.sve.add.single.x2.nxv4i32(, , ) declare { , } @llvm.aarch64.sve.add.single.x2.nxv2i64(, , ) declare { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8(, , , , ) declare { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16(, , , , ) declare { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32(, , , , ) declare { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64(, , , , )