; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s ; ; Masked Loads ; define @masked_zload_nxv2i8(* %src, %mask) { ; CHECK-LABEL: masked_zload_nxv2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv2i8(* %src, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_zload_nxv2i16(* %src, %mask) { ; CHECK-LABEL: masked_zload_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv2i16(* %src, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_zload_nxv2i32(* %src, %mask) { ; CHECK-LABEL: masked_zload_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv2i32(* %src, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_zload_nxv4i8(* %src, %mask) { ; CHECK-LABEL: masked_zload_nxv4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv4i8(* %src, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_zload_nxv4i16(* %src, %mask) { ; CHECK-LABEL: masked_zload_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv4i16(* %src, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_zload_nxv8i8(* %src, %mask) { ; CHECK-LABEL: masked_zload_nxv8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv8i8(* %src, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_zload_passthru(* %src, %mask, %passthru) { ; CHECK-LABEL: masked_zload_passthru: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0] ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv2i32(* %src, i32 1, %mask, %passthru) %ext = zext %load to ret %ext } ; Return type requires splitting define @masked_zload_nxv8i16(* %a, %mask) { ; CHECK-LABEL: masked_zload_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h ; CHECK-NEXT: uunpklo z0.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z2.d, z3.s ; CHECK-NEXT: uunpkhi z3.d, z3.s ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv8i16(* %a, i32 2, %mask, undef) %ext = zext %load to ret %ext } ; Masked load requires promotion define @masked_zload_2i16_2f64(* noalias %in, %mask) { ; CHECK-LABEL: masked_zload_2i16_2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] ; CHECK-NEXT: ucvtf z0.d, p1/m, z0.d ; CHECK-NEXT: ret %wide.load = call @llvm.masked.load.nxv2i16(* %in, i32 2, %mask, undef) %zext = zext %wide.load to %res = uitofp %zext to ret %res } ; Extending loads from unpacked to wide illegal types define @masked_zload_4i8_4i64(ptr %a, %b) { ; CHECK-LABEL: masked_zload_4i8_4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z1.s }, p0/z, [x0] ; CHECK-NEXT: uunpklo z0.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: ret %aval = call @llvm.masked.load.nxv4i8( *%a, i32 16, %b, zeroinitializer) %aext = zext %aval to ret %aext } define @masked_zload_4i16_4i64(ptr %a, %b) { ; CHECK-LABEL: masked_zload_4i16_4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0] ; CHECK-NEXT: uunpklo z0.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: ret %aval = call @llvm.masked.load.nxv4i16( *%a, i32 16, %b, zeroinitializer) %aext = zext %aval to ret %aext } define @masked_zload_8i8_8i32(ptr %a, %b) { ; CHECK-LABEL: masked_zload_8i8_8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z1.h }, p0/z, [x0] ; CHECK-NEXT: uunpklo z0.s, z1.h ; CHECK-NEXT: uunpkhi z1.s, z1.h ; CHECK-NEXT: ret %aval = call @llvm.masked.load.nxv8i8( *%a, i32 16, %b, zeroinitializer) %aext = zext %aval to ret %aext } define @masked_zload_8i8_8i64(ptr %a, %b) { ; CHECK-LABEL: masked_zload_8i8_8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h ; CHECK-NEXT: uunpklo z0.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z2.d, z3.s ; CHECK-NEXT: uunpkhi z3.d, z3.s ; CHECK-NEXT: ret %aval = call @llvm.masked.load.nxv8i8( *%a, i32 16, %b, zeroinitializer) %aext = zext %aval to ret %aext } define @masked_zload_x2_4i8_4i64(ptr %a, ptr %b, %c) { ; CHECK-LABEL: masked_zload_x2_4i8_4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: punpkhi p1.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: ld1b { z1.d }, p1/z, [x0, #1, mul vl] ; CHECK-NEXT: ld1b { z2.d }, p1/z, [x1, #1, mul vl] ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1b { z3.d }, p0/z, [x1] ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: add z0.d, z0.d, z3.d ; CHECK-NEXT: ret %aval = call @llvm.masked.load.nxv4i8( *%a, i32 16, %c, zeroinitializer) %bval = call @llvm.masked.load.nxv4i8( *%b, i32 16, %c, zeroinitializer) %aext = zext %aval to %bext = zext %bval to %res = add %aext, %bext ret %res } define @masked_zload_x2_4i16_4i64(ptr %a, ptr %b, %c) { ; CHECK-LABEL: masked_zload_x2_4i16_4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: punpkhi p1.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: ld1h { z1.d }, p1/z, [x0, #1, mul vl] ; CHECK-NEXT: ld1h { z2.d }, p1/z, [x1, #1, mul vl] ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1h { z3.d }, p0/z, [x1] ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: add z0.d, z0.d, z3.d ; CHECK-NEXT: ret %aval = call @llvm.masked.load.nxv4i16( *%a, i32 16, %c, zeroinitializer) %bval = call @llvm.masked.load.nxv4i16( *%b, i32 16, %c, zeroinitializer) %aext = zext %aval to %bext = zext %bval to %res = add %aext, %bext ret %res } define @masked_zload_x2_8i8_8i32(ptr %a, ptr %b, %c) { ; CHECK-LABEL: masked_zload_x2_8i8_8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: punpkhi p1.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: ld1b { z1.s }, p1/z, [x0, #1, mul vl] ; CHECK-NEXT: ld1b { z2.s }, p1/z, [x1, #1, mul vl] ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1b { z3.s }, p0/z, [x1] ; CHECK-NEXT: add z1.s, z1.s, z2.s ; CHECK-NEXT: add z0.s, z0.s, z3.s ; CHECK-NEXT: ret %aval = call @llvm.masked.load.nxv8i8( *%a, i32 16, %c, zeroinitializer) %bval = call @llvm.masked.load.nxv8i8( *%b, i32 16, %c, zeroinitializer) %aext = zext %aval to %bext = zext %bval to %res = add %aext, %bext ret %res } define @masked_zload_x2_8i8_8i64(ptr %a, ptr %b, %c) { ; CHECK-LABEL: masked_zload_x2_8i8_8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: punpkhi p1.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpkhi p2.h, p1.b ; CHECK-NEXT: punpklo p1.h, p1.b ; CHECK-NEXT: punpkhi p3.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: ld1b { z3.d }, p2/z, [x0, #3, mul vl] ; CHECK-NEXT: ld1b { z5.d }, p2/z, [x1, #3, mul vl] ; CHECK-NEXT: ld1b { z2.d }, p1/z, [x0, #2, mul vl] ; CHECK-NEXT: ld1b { z6.d }, p1/z, [x1, #2, mul vl] ; CHECK-NEXT: ld1b { z1.d }, p3/z, [x0, #1, mul vl] ; CHECK-NEXT: ld1b { z7.d }, p3/z, [x1, #1, mul vl] ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1b { z4.d }, p0/z, [x1] ; CHECK-NEXT: add z3.d, z3.d, z5.d ; CHECK-NEXT: add z2.d, z2.d, z6.d ; CHECK-NEXT: add z1.d, z1.d, z7.d ; CHECK-NEXT: add z0.d, z0.d, z4.d ; CHECK-NEXT: ret %aval = call @llvm.masked.load.nxv8i8( *%a, i32 16, %c, zeroinitializer) %bval = call @llvm.masked.load.nxv8i8( *%b, i32 16, %c, zeroinitializer) %aext = zext %aval to %bext = zext %bval to %res = add %aext, %bext ret %res } declare @llvm.masked.load.nxv2i8(*, i32, , ) declare @llvm.masked.load.nxv2i16(*, i32, , ) declare @llvm.masked.load.nxv2i32(*, i32, , ) declare @llvm.masked.load.nxv4i8(*, i32, , ) declare @llvm.masked.load.nxv4i16(*, i32, , ) declare @llvm.masked.load.nxv8i8(*, i32, , ) declare @llvm.masked.load.nxv8i16(*, i32, , )