; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LD1R ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+no-sve-fp-ld1r < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-LD1R ; ; Check that ldr1* instruction is generated to splat scalar during load, ; rather than mov from scalar to vector register (which would require the vector unit). ; ; one-off: ld1r_stack checks that ldr1b works with stack objects. ; ; Test axes: ; types = [i8, i16, i32, i64, half, float, double] ; methods = [direct load, gep upper bound - 1, gep out of range x {neg,pos}, sext..., zext..., unpacked_floats...] ; @g8 = external global i8 ; One-off test for splatted value coming from stack load. define @ld1r_stack() { ; CHECK-LABEL: ld1r_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: adrp x8, :got:g8 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:g8] ; CHECK-NEXT: ldrb w8, [x8] ; CHECK-NEXT: strb w8, [sp, #12] ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [sp, #14] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %valp = alloca i8 %valp2 = load volatile i8, ptr @g8 store volatile i8 %valp2, ptr %valp %valp3 = getelementptr i8, ptr %valp, i32 2 %val = load i8, ptr %valp3 %1 = insertelement undef, i8 %val, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 } define @ld1rb(ptr %valp) { ; CHECK-LABEL: ld1rb: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i8, ptr %valp %ins = insertelement undef, i8 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rb_gep(ptr %valp) { ; CHECK-LABEL: ld1rb_gep: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0, #63] ; CHECK-NEXT: ret %valp2 = getelementptr i8, ptr %valp, i32 63 %val = load i8, ptr %valp2 %ins = insertelement undef, i8 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rb_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rb_gep_out_of_range_up: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: add x8, x0, #64 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i8, ptr %valp, i32 64 %val = load i8, ptr %valp2 %ins = insertelement undef, i8 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rb_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rb_gep_out_of_range_down: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: sub x8, x0, #1 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i8, ptr %valp, i32 -1 %val = load i8, ptr %valp2 %ins = insertelement undef, i8 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rb_i8_i16_zext(ptr %valp) { ; CHECK-LABEL: ld1rb_i8_i16_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rb { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i8, ptr %valp %ext = zext i8 %val to i16 %ins = insertelement undef, i16 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rb_i8_i16_sext(ptr %valp) { ; CHECK-LABEL: ld1rb_i8_i16_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rsb { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i8, ptr %valp %ext = sext i8 %val to i16 %ins = insertelement undef, i16 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rb_i8_i32_zext(ptr %valp) { ; CHECK-LABEL: ld1rb_i8_i32_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rb { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i8, ptr %valp %ext = zext i8 %val to i32 %ins = insertelement undef, i32 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rb_i8_i32_sext(ptr %valp) { ; CHECK-LABEL: ld1rb_i8_i32_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rsb { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i8, ptr %valp %ext = sext i8 %val to i32 %ins = insertelement undef, i32 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rb_i8_i64_zext(ptr %valp) { ; CHECK-LABEL: ld1rb_i8_i64_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rb { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i8, ptr %valp %ext = zext i8 %val to i64 %ins = insertelement undef, i64 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rb_i8_i64_sext(ptr %valp) { ; CHECK-LABEL: ld1rb_i8_i64_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i8, ptr %valp %ext = sext i8 %val to i64 %ins = insertelement undef, i64 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh(ptr %valp) { ; CHECK-LABEL: ld1rh: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i16, ptr %valp %ins = insertelement undef, i16 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_gep(ptr %valp) { ; CHECK-LABEL: ld1rh_gep: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0, #126] ; CHECK-NEXT: ret %valp2 = getelementptr i16, ptr %valp, i32 63 %val = load i16, ptr %valp2 %ins = insertelement undef, i16 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rh_gep_out_of_range_up: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i16, ptr %valp, i32 64 %val = load i16, ptr %valp2 %ins = insertelement undef, i16 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rh_gep_out_of_range_down: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i16, ptr %valp, i32 -1 %val = load i16, ptr %valp2 %ins = insertelement undef, i16 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_i16_i32_zext(ptr %valp) { ; CHECK-LABEL: ld1rh_i16_i32_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i16, ptr %valp %ext = zext i16 %val to i32 %ins = insertelement undef, i32 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_i16_i32_sext(ptr %valp) { ; CHECK-LABEL: ld1rh_i16_i32_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rsh { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i16, ptr %valp %ext = sext i16 %val to i32 %ins = insertelement undef, i32 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_i16_i64_zext(ptr %valp) { ; CHECK-LABEL: ld1rh_i16_i64_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i16, ptr %valp %ext = zext i16 %val to i64 %ins = insertelement undef, i64 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_i16_i64_sext(ptr %valp) { ; CHECK-LABEL: ld1rh_i16_i64_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rsh { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i16, ptr %valp %ext = sext i16 %val to i64 %ins = insertelement undef, i64 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw(ptr %valp) { ; CHECK-LABEL: ld1rw: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i32, ptr %valp %ins = insertelement undef, i32 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_gep(ptr %valp) { ; CHECK-LABEL: ld1rw_gep: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0, #252] ; CHECK-NEXT: ret %valp2 = getelementptr i32, ptr %valp, i32 63 %val = load i32, ptr %valp2 %ins = insertelement undef, i32 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rw_gep_out_of_range_up: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i32, ptr %valp, i32 64 %val = load i32, ptr %valp2 %ins = insertelement undef, i32 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rw_gep_out_of_range_down: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i32, ptr %valp, i32 -1 %val = load i32, ptr %valp2 %ins = insertelement undef, i32 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_i32_i64_zext(ptr %valp) { ; CHECK-LABEL: ld1rw_i32_i64_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i32, ptr %valp %ext = zext i32 %val to i64 %ins = insertelement undef, i64 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_i32_i64_sext(ptr %valp) { ; CHECK-LABEL: ld1rw_i32_i64_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rsw { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i32, ptr %valp %ext = sext i32 %val to i64 %ins = insertelement undef, i64 %ext, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rd(ptr %valp) { ; CHECK-LABEL: ld1rd: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %val = load i64, ptr %valp %ins = insertelement undef, i64 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rd_gep(ptr %valp) { ; CHECK-LABEL: ld1rd_gep: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0, #504] ; CHECK-NEXT: ret %valp2 = getelementptr i64, ptr %valp, i32 63 %val = load i64, ptr %valp2 %ins = insertelement undef, i64 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rd_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rd_gep_out_of_range_up: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: add x8, x0, #512 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i64, ptr %valp, i32 64 %val = load i64, ptr %valp2 %ins = insertelement undef, i64 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rd_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rd_gep_out_of_range_down: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sub x8, x0, #8 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i64, ptr %valp, i32 -1 %val = load i64, ptr %valp2 %ins = insertelement undef, i64 %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.h ; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %val = load half, ptr %valp %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_neoverse(ptr %valp) #1 { ; CHECK-LABEL: ld1rh_half_neoverse: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: ret %val = load half, ptr %valp %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_gep(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_gep: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.h ; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0, #126] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_gep: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #126] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 63 %val = load half, ptr %valp2 %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_gep_out_of_range_up(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_gep_out_of_range_up: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.h ; CHECK-LD1R-NEXT: add x8, x0, #128 ; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_gep_out_of_range_up: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #128] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 64 %val = load half, ptr %valp2 %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_gep_out_of_range_down(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_gep_out_of_range_down: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.h ; CHECK-LD1R-NEXT: sub x8, x0, #2 ; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_gep_out_of_range_down: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldur h0, [x0, #-2] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 -1 %val = load half, ptr %valp2 %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_unpacked4(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_unpacked4: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.s ; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %val = load half, ptr %valp %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_unpacked4_gep(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_unpacked4_gep: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.s ; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0, #126] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4_gep: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #126] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 63 %val = load half, ptr %valp2 %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_unpacked4_gep_out_of_range_up(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.s ; CHECK-LD1R-NEXT: add x8, x0, #128 ; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #128] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 64 %val = load half, ptr %valp2 %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_unpacked4_gep_out_of_range_down(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.s ; CHECK-LD1R-NEXT: sub x8, x0, #2 ; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldur h0, [x0, #-2] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 -1 %val = load half, ptr %valp2 %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_unpacked2(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_unpacked2: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %val = load half, ptr %valp %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_unpacked2_gep(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_unpacked2_gep: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x0, #126] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2_gep: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #126] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 63 %val = load half, ptr %valp2 %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_unpacked2_gep_out_of_range_up(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: add x8, x0, #128 ; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #128] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 64 %val = load half, ptr %valp2 %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rh_half_unpacked2_gep_out_of_range_down(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: sub x8, x0, #2 ; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldur h0, [x0, #-2] ; CHECK-NO-LD1R-NEXT: mov z0.h, h0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 -1 %val = load half, ptr %valp2 %ins = insertelement undef, half %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_float(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rw_float: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.s ; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rw_float: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr s0, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.s, s0 ; CHECK-NO-LD1R-NEXT: ret %val = load float, ptr %valp %ins = insertelement undef, float %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_float_gep(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rw_float_gep: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.s ; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0, #252] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rw_float_gep: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #252] ; CHECK-NO-LD1R-NEXT: mov z0.s, s0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr float, ptr %valp, i32 63 %val = load float, ptr %valp2 %ins = insertelement undef, float %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_float_gep_out_of_range_up(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rw_float_gep_out_of_range_up: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.s ; CHECK-LD1R-NEXT: add x8, x0, #256 ; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rw_float_gep_out_of_range_up: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #256] ; CHECK-NO-LD1R-NEXT: mov z0.s, s0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr float, ptr %valp, i32 64 %val = load float, ptr %valp2 %ins = insertelement undef, float %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_float_gep_out_of_range_down(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rw_float_gep_out_of_range_down: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.s ; CHECK-LD1R-NEXT: sub x8, x0, #4 ; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rw_float_gep_out_of_range_down: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldur s0, [x0, #-4] ; CHECK-NO-LD1R-NEXT: mov z0.s, s0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr float, ptr %valp, i32 -1 %val = load float, ptr %valp2 %ins = insertelement undef, float %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_float_unpacked2(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rw_float_unpacked2: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr s0, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.s, s0 ; CHECK-NO-LD1R-NEXT: ret %val = load float, ptr %valp %ins = insertelement undef, float %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_float_unpacked2_gep(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rw_float_unpacked2_gep: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x0, #252] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2_gep: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #252] ; CHECK-NO-LD1R-NEXT: mov z0.s, s0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr float, ptr %valp, i32 63 %val = load float, ptr %valp2 %ins = insertelement undef, float %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_float_unpacked2_gep_out_of_range_up(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: add x8, x0, #256 ; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #256] ; CHECK-NO-LD1R-NEXT: mov z0.s, s0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr float, ptr %valp, i32 64 %val = load float, ptr %valp2 %ins = insertelement undef, float %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rw_float_unpacked2_gep_out_of_range_down(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: sub x8, x0, #4 ; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldur s0, [x0, #-4] ; CHECK-NO-LD1R-NEXT: mov z0.s, s0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr float, ptr %valp, i32 -1 %val = load float, ptr %valp2 %ins = insertelement undef, float %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rd_double(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rd_double: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rd_double: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr d0, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.d, d0 ; CHECK-NO-LD1R-NEXT: ret %val = load double, ptr %valp %ins = insertelement undef, double %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rd_double_gep(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rd_double_gep: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0, #504] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rd_double_gep: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr d0, [x0, #504] ; CHECK-NO-LD1R-NEXT: mov z0.d, d0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr double, ptr %valp, i32 63 %val = load double, ptr %valp2 %ins = insertelement undef, double %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rd_double_gep_out_of_range_up(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rd_double_gep_out_of_range_up: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: add x8, x0, #512 ; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rd_double_gep_out_of_range_up: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr d0, [x0, #512] ; CHECK-NO-LD1R-NEXT: mov z0.d, d0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr double, ptr %valp, i32 64 %val = load double, ptr %valp2 %ins = insertelement undef, double %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @ld1rd_double_gep_out_of_range_down(ptr %valp) { ; CHECK-LD1R-LABEL: ld1rd_double_gep_out_of_range_down: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ptrue p0.d ; CHECK-LD1R-NEXT: sub x8, x0, #8 ; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: ld1rd_double_gep_out_of_range_down: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldur d0, [x0, #-8] ; CHECK-NO-LD1R-NEXT: mov z0.d, d0 ; CHECK-NO-LD1R-NEXT: ret %valp2 = getelementptr double, ptr %valp, i32 -1 %val = load double, ptr %valp2 %ins = insertelement undef, double %val, i32 0 %shf = shufflevector %ins, undef, zeroinitializer ret %shf } define @dupq_ld1rqd_f64(ptr %a) { ; CHECK-LABEL: dupq_ld1rqd_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %1 = load <2 x double>, ptr %a %2 = tail call fast @llvm.vector.insert.nxv2f64.v2f64( undef, <2 x double> %1, i64 0) %3 = tail call fast @llvm.aarch64.sve.dupq.lane.nxv2f64( %2, i64 0) ret %3 } define @dupq_ld1rqw_f32(ptr %a) { ; CHECK-LABEL: dupq_ld1rqw_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %1 = load <4 x float>, ptr %a %2 = tail call fast @llvm.vector.insert.nxv4f32.v4f32( undef, <4 x float> %1, i64 0) %3 = tail call fast @llvm.aarch64.sve.dupq.lane.nxv4f32( %2, i64 0) ret %3 } define @dupq_ld1rqh_f16(ptr %a) { ; CHECK-LABEL: dupq_ld1rqh_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %1 = load <8 x half>, ptr %a %2 = tail call fast @llvm.vector.insert.nxv8f16.v8f16( undef, <8 x half> %1, i64 0) %3 = tail call fast @llvm.aarch64.sve.dupq.lane.nxv8f16( %2, i64 0) ret %3 } define @dupq_ld1rqh_bf16(ptr %a) #0 { ; CHECK-LABEL: dupq_ld1rqh_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %1 = load <8 x bfloat>, ptr %a %2 = tail call fast @llvm.vector.insert.nxv8bf16.v8bf16( undef, <8 x bfloat> %1, i64 0) %3 = tail call fast @llvm.aarch64.sve.dupq.lane.nxv8bf16( %2, i64 0) ret %3 } define @dupq_ld1rqd_i64(ptr %a) #0 { ; CHECK-LABEL: dupq_ld1rqd_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %1 = load <2 x i64>, ptr %a %2 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, <2 x i64> %1, i64 0) %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2, i64 0) ret %3 } define @dupq_ld1rqw_i32(ptr %a) #0 { ; CHECK-LABEL: dupq_ld1rqw_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %1 = load <4 x i32>, ptr %a %2 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, <4 x i32> %1, i64 0) %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %2, i64 0) ret %3 } define @dupq_ld1rqw_i16(ptr %a) #0 { ; CHECK-LABEL: dupq_ld1rqw_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %1 = load <8 x i16>, ptr %a %2 = tail call @llvm.vector.insert.nxv8i16.v8i16( undef, <8 x i16> %1, i64 0) %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv8i16( %2, i64 0) ret %3 } define @dupq_ld1rqw_i8(ptr %a) #0 { ; CHECK-LABEL: dupq_ld1rqw_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0] ; CHECK-NEXT: ret %1 = load <16 x i8>, ptr %a %2 = tail call @llvm.vector.insert.nxv16i8.v16i8( undef, <16 x i8> %1, i64 0) %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv16i8( %2, i64 0) ret %3 } ; ; ; Tests for dup: ; ; Positive tests: ; * dup with passthru=undef or passthrue=zero. ; * sign/zero extending. ; * unpacked types. ; ; Negative tests: ; * dup with passthru as a parameter. ; ; define @dup_ld1rb_i8_passthruundef_nxv16i8( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rb_i8_passthruundef_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i8, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv16i8( undef, %pg, i8 %ld) ret %res } define @dup_ld1rh_i16_passthruundef_nxv8i16( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rh_i16_passthruundef_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i16, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv8i16( undef, %pg, i16 %ld) ret %res } define @dup_ld1rh_i8_passthruundef_nxv8i16_sext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rh_i8_passthruundef_nxv8i16_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rsb { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i8, ptr %addr %ext = sext i8 %ld to i16 %res = call @llvm.aarch64.sve.dup.nxv8i16( undef, %pg, i16 %ext) ret %res } define @dup_ld1rh_i8_passthruundef_nxv8i16_zext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rh_i8_passthruundef_nxv8i16_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rb { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i8, ptr %addr %ext = zext i8 %ld to i16 %res = call @llvm.aarch64.sve.dup.nxv8i16( undef, %pg, i16 %ext) ret %res } define @dup_ld1rs_i32_passthruundef_nxv4i32( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i32_passthruundef_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i32, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv4i32( undef, %pg, i32 %ld) ret %res } define @dup_ld1rs_i8_passthruundef_nxv4i32_sext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i8_passthruundef_nxv4i32_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rsb { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i8, ptr %addr %ext = sext i8 %ld to i32 %res = call @llvm.aarch64.sve.dup.nxv4i32( undef, %pg, i32 %ext) ret %res } define @dup_ld1rs_i8_passthruundef_nxv4i32_zext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i8_passthruundef_nxv4i32_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rb { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i8, ptr %addr %ext = zext i8 %ld to i32 %res = call @llvm.aarch64.sve.dup.nxv4i32( undef, %pg, i32 %ext) ret %res } define @dup_ld1rs_i16_passthruundef_nxv4i32_sext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i16_passthruundef_nxv4i32_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rsh { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i16, ptr %addr %ext = sext i16 %ld to i32 %res = call @llvm.aarch64.sve.dup.nxv4i32( undef, %pg, i32 %ext) ret %res } define @dup_ld1rs_i16_passthruundef_nxv4i32_zext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i16_passthruundef_nxv4i32_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i16, ptr %addr %ext = zext i16 %ld to i32 %res = call @llvm.aarch64.sve.dup.nxv4i32( undef, %pg, i32 %ext) ret %res } define @dup_ld1rd_i64_passthruundef_nxv2i64( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rd_i64_passthruundef_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i64, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv2i64( undef, %pg, i64 %ld) ret %res } define @dup_ld1rs_i8_passthruundef_nxv2i64_sext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i8_passthruundef_nxv2i64_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i8, ptr %addr %ext = sext i8 %ld to i64 %res = call @llvm.aarch64.sve.dup.nxv2i64( undef, %pg, i64 %ext) ret %res } define @dup_ld1rs_i8_passthruundef_nxv2i64_zext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i8_passthruundef_nxv2i64_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rb { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i8, ptr %addr %ext = zext i8 %ld to i64 %res = call @llvm.aarch64.sve.dup.nxv2i64( undef, %pg, i64 %ext) ret %res } define @dup_ld1rs_i16_passthruundef_nxv2i64_sext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i16_passthruundef_nxv2i64_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rsh { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i16, ptr %addr %ext = sext i16 %ld to i64 %res = call @llvm.aarch64.sve.dup.nxv2i64( undef, %pg, i64 %ext) ret %res } define @dup_ld1rs_i16_passthruundef_nxv2i64_zext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i16_passthruundef_nxv2i64_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i16, ptr %addr %ext = zext i16 %ld to i64 %res = call @llvm.aarch64.sve.dup.nxv2i64( undef, %pg, i64 %ext) ret %res } define @dup_ld1rs_i32_passthruundef_nxv2i64_sext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i32_passthruundef_nxv2i64_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rsw { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i32, ptr %addr %ext = sext i32 %ld to i64 %res = call @llvm.aarch64.sve.dup.nxv2i64( undef, %pg, i64 %ext) ret %res } define @dup_ld1rs_i32_passthruundef_nxv2i64_zext( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i32_passthruundef_nxv2i64_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i32, ptr %addr %ext = zext i32 %ld to i64 %res = call @llvm.aarch64.sve.dup.nxv2i64( undef, %pg, i64 %ext) ret %res } define @dup_ld1rh_half_passthruundef_nxv8f16( %pg, ptr %addr) { ; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv8f16: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv8f16: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h0 ; CHECK-NO-LD1R-NEXT: ret %ld = load half, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv8f16( undef, %pg, half %ld) ret %res } define @dup_ld1rs_float_passthruundef_nxv4f32( %pg, ptr %addr) { ; CHECK-LD1R-LABEL: dup_ld1rs_float_passthruundef_nxv4f32: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: dup_ld1rs_float_passthruundef_nxv4f32: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr s0, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.s, p0/m, s0 ; CHECK-NO-LD1R-NEXT: ret %ld = load float, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv4f32( undef, %pg, float %ld) ret %res } define @dup_ld1rd_double_passthruundef_nxv2f64( %pg, ptr %addr) { ; CHECK-LD1R-LABEL: dup_ld1rd_double_passthruundef_nxv2f64: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: dup_ld1rd_double_passthruundef_nxv2f64: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr d0, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.d, p0/m, d0 ; CHECK-NO-LD1R-NEXT: ret %ld = load double, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv2f64( undef, %pg, double %ld) ret %res } define @dup_ld1rh_half_passthruundef_nxv4f16( %pg, ptr %addr) { ; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv4f16: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv4f16: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: ldr h0, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h0 ; CHECK-NO-LD1R-NEXT: ret %ld = load half, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv4f16( undef, %pg, half %ld) ret %res } define @dup_ld1rb_i8_passthruzero_nxv16i8( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rb_i8_passthruzero_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i8, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv16i8( zeroinitializer, %pg, i8 %ld) ret %res } define @dup_ld1rh_i16_passthruzero_nxv8i16( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rh_i16_passthruzero_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i16, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv8i16( zeroinitializer, %pg, i16 %ld) ret %res } define @dup_ld1rs_i32_passthruzero_nxv4i32( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rs_i32_passthruzero_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i32, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv4i32( zeroinitializer, %pg, i32 %ld) ret %res } define @dup_ld1rd_i64_passthruzero_nxv2i64( %pg, ptr %addr) { ; CHECK-LABEL: dup_ld1rd_i64_passthruzero_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %ld = load i64, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv2i64( zeroinitializer, %pg, i64 %ld) ret %res } define @dup_ld1rh_half_passthruzero_nxv8f16( %pg, ptr %addr) { ; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv8f16: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv8f16: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NO-LD1R-NEXT: ldr h1, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h1 ; CHECK-NO-LD1R-NEXT: ret %ld = load half, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv8f16( zeroinitializer, %pg, half %ld) ret %res } define @dup_ld1rs_float_passthruzero_nxv4f32( %pg, ptr %addr) { ; CHECK-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv4f32: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv4f32: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NO-LD1R-NEXT: ldr s1, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.s, p0/m, s1 ; CHECK-NO-LD1R-NEXT: ret %ld = load float, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv4f32( zeroinitializer, %pg, float %ld) ret %res } define @dup_ld1rd_double_passthruzero_nxv2f64( %pg, ptr %addr) { ; CHECK-LD1R-LABEL: dup_ld1rd_double_passthruzero_nxv2f64: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: dup_ld1rd_double_passthruzero_nxv2f64: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NO-LD1R-NEXT: ldr d1, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.d, p0/m, d1 ; CHECK-NO-LD1R-NEXT: ret %ld = load double, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv2f64( zeroinitializer, %pg, double %ld) ret %res } define @dup_ld1rh_half_passthruzero_nxv4f16( %pg, ptr %addr) { ; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv4f16: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv4f16: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NO-LD1R-NEXT: ldr h1, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h1 ; CHECK-NO-LD1R-NEXT: ret %ld = load half, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv4f16( zeroinitializer, %pg, half %ld) ret %res } define @dup_ld1rh_half_passthruzero_nxv2f16( %pg, ptr %addr) { ; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv2f16: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv2f16: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NO-LD1R-NEXT: ldr h1, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h1 ; CHECK-NO-LD1R-NEXT: ret %ld = load half, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv2f16( zeroinitializer, %pg, half %ld) ret %res } define @dup_ld1rs_float_passthruzero_nxv2f32( %pg, ptr %addr) { ; CHECK-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv2f32: ; CHECK-LD1R: // %bb.0: ; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x0] ; CHECK-LD1R-NEXT: ret ; ; CHECK-NO-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv2f32: ; CHECK-NO-LD1R: // %bb.0: ; CHECK-NO-LD1R-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NO-LD1R-NEXT: ldr s1, [x0] ; CHECK-NO-LD1R-NEXT: mov z0.s, p0/m, s1 ; CHECK-NO-LD1R-NEXT: ret %ld = load float, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv2f32( zeroinitializer, %pg, float %ld) ret %res } define @negtest_dup_ld1rb_i8_passthru_nxv16i8( %pt, %pg, ptr %addr) { ; CHECK-LABEL: negtest_dup_ld1rb_i8_passthru_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %ld = load i8, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv16i8( %pt, %pg, i8 %ld) ret %res } define @negtest_dup_ld1rh_i16_passthru_nxv8i16( %pt, %pg, ptr %addr) { ; CHECK-LABEL: negtest_dup_ld1rh_i16_passthru_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: ret %ld = load i16, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv8i16( %pt, %pg, i16 %ld) ret %res } define @negtest_dup_ld1rs_i32_passthru_nxv4i32( %pt, %pg, ptr %addr) { ; CHECK-LABEL: negtest_dup_ld1rs_i32_passthru_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: ret %ld = load i32, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv4i32( %pt, %pg, i32 %ld) ret %res } define @negtest_dup_ld1rd_i64_passthru_nxv2i64( %pt, %pg, ptr %addr) { ; CHECK-LABEL: negtest_dup_ld1rd_i64_passthru_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %ld = load i64, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv2i64( %pt, %pg, i64 %ld) ret %res } define @negtest_dup_ld1rh_half_passthru_nxv8f16( %pt, %pg, ptr %addr) { ; CHECK-LABEL: negtest_dup_ld1rh_half_passthru_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr h1, [x0] ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret %ld = load half, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv8f16( %pt, %pg, half %ld) ret %res } define @negtest_dup_ld1rs_float_passthru_nxv4f32( %pt, %pg, ptr %addr) { ; CHECK-LABEL: negtest_dup_ld1rs_float_passthru_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s1, [x0] ; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: ret %ld = load float, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv4f32( %pt, %pg, float %ld) ret %res } define @negtest_dup_ld1rd_double_passthru_nxv2f64( %pt, %pg, ptr %addr) { ; CHECK-LABEL: negtest_dup_ld1rd_double_passthru_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: mov z0.d, p0/m, d1 ; CHECK-NEXT: ret %ld = load double, ptr %addr %res = call @llvm.aarch64.sve.dup.nxv2f64( %pt, %pg, double %ld) ret %res } ; Check that a load consumed by a scalable splat prefers a replicating load. define i8* @avoid_preindex_load(i8* %src, * %out) { ; CHECK-LABEL: avoid_preindex_load: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] ; CHECK-NEXT: add x0, x0, #1 ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 %ext = sext i8 %tmp to i64 %ins = insertelement undef, i64 %ext, i32 0 %dup = shufflevector %ins, undef, zeroinitializer store %dup, * %out ret i8* %ptr } ; Check that a load consumed by a scalable splat prefers a replicating ; load over a pre-indexed load. define i8* @avoid_preindex_load_dup(i8* %src, %pg, * %out) { ; CHECK-LABEL: avoid_preindex_load_dup: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] ; CHECK-NEXT: add x0, x0, #1 ; CHECK-NEXT: st1d { z0.d }, p1, [x1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 %ext = sext i8 %tmp to i64 %dup = call @llvm.aarch64.sve.dup.nxv2i64( undef, %pg, i64 %ext) store %dup, * %out ret i8* %ptr } ; Same as avoid_preindex_load_dup, but with zero passthru. define i8* @avoid_preindex_load_dup_passthru_zero(i8* %src, %pg, * %out) { ; CHECK-LABEL: avoid_preindex_load_dup_passthru_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] ; CHECK-NEXT: add x0, x0, #1 ; CHECK-NEXT: st1d { z0.d }, p1, [x1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 %ext = sext i8 %tmp to i64 %dup = call @llvm.aarch64.sve.dup.nxv2i64( zeroinitializer, %pg, i64 %ext) store %dup, * %out ret i8* %ptr } ; If a dup has a non-undef passthru, stick with the pre-indexed load. define i8* @preindex_load_dup_passthru( %passthru, i8* %src, %pg, * %out) { ; CHECK-LABEL: preindex_load_dup_passthru: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ldrsb x8, [x0, #1]! ; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: st1d { z0.d }, p1, [x1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 %ext = sext i8 %tmp to i64 %dup = call @llvm.aarch64.sve.dup.nxv2i64( %passthru, %pg, i64 %ext) store %dup, * %out ret i8* %ptr } ; Show that a second user of the load prevents the replicating load ; check which would ordinarily inhibit indexed loads from firing. define i8* @preidx8sext64_instead_of_ld1r(i8* %src, * %out, i64* %dst) { ; CHECK-LABEL: preidx8sext64_instead_of_ld1r: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldrsb x8, [x0, #1]! ; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: str x8, [x2] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 %ext = sext i8 %tmp to i64 %ins = insertelement undef, i64 %ext, i32 0 %dup = shufflevector %ins, undef, zeroinitializer store %dup, * %out store i64 %ext, i64* %dst ret i8* %ptr } declare @llvm.aarch64.sve.dupq.lane.nxv16i8(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv8i16(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv4i32(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv2i64(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv8f16(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv8bf16(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv4f32(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv2f64(, i64) declare @llvm.vector.insert.nxv2f64.v2f64(, <2 x double>, i64) declare @llvm.vector.insert.nxv4f32.v4f32(, <4 x float>, i64) declare @llvm.vector.insert.nxv8f16.v8f16(, <8 x half>, i64) declare @llvm.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) declare @llvm.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) declare @llvm.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) declare @llvm.vector.insert.nxv16i8.v16i8(, <16 x i8>, i64) declare @llvm.vector.insert.nxv8bf16.v8bf16(, <8 x bfloat>, i64) declare @llvm.aarch64.sve.dup.nxv16i8(, , i8) declare @llvm.aarch64.sve.dup.nxv8i16(, , i16) declare @llvm.aarch64.sve.dup.nxv4i32(, , i32) declare @llvm.aarch64.sve.dup.nxv2i64(, , i64) declare @llvm.aarch64.sve.dup.nxv8f16(, , half) declare @llvm.aarch64.sve.dup.nxv4f32(, , float) declare @llvm.aarch64.sve.dup.nxv2f64(, , double) declare @llvm.aarch64.sve.dup.nxv4f16(, , half) declare @llvm.aarch64.sve.dup.nxv2f16(, , half) declare @llvm.aarch64.sve.dup.nxv2f32(, , float) attributes #0 = { "target-features"="+sve,+bf16" } attributes #1 = { "target-cpu"="neoverse-v1" }