; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s ; ; LD1RQB ; define @ld1rqb_i8( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqb_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ld1rq.nxv16i8( %pred, ptr %addr) ret %res } define @ld1rqb_i8_imm( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqb_i8_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0, #16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, ptr %addr, i8 16 %res = call @llvm.aarch64.sve.ld1rq.nxv16i8( %pred, ptr %ptr) ret %res } define @ld1rqb_i8_scalar( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqb_i8_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0, x1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, ptr %addr, i64 %idx %res = call @llvm.aarch64.sve.ld1rq.nxv16i8( %pred, ptr %ptr) ret %res } define @ld1rqb_i8_imm_lower_bound( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqb_i8_imm_lower_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0, #-128] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, ptr %addr, i8 -128 %res = call @llvm.aarch64.sve.ld1rq.nxv16i8( %pred, ptr %ptr) ret %res } define @ld1rqb_i8_imm_upper_bound( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqb_i8_imm_upper_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0, #112] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, ptr %addr, i8 112 %res = call @llvm.aarch64.sve.ld1rq.nxv16i8( %pred, ptr %ptr) ret %res } define @ld1rqb_i8_imm_out_of_lower_bound( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqb_i8_imm_out_of_lower_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #-129 ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, ptr %addr, i64 -129 %res = call @llvm.aarch64.sve.ld1rq.nxv16i8( %pred, ptr %ptr) ret %res } define @ld1rqb_i8_imm_out_of_upper_bound( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqb_i8_imm_out_of_upper_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #113 ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, ptr %addr, i64 113 %res = call @llvm.aarch64.sve.ld1rq.nxv16i8( %pred, ptr %ptr) ret %res } define @ld1rqb_i8_imm_dupqlane( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqb_i8_imm_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0, #-16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds <16 x i8>, ptr %addr, i16 -1 %load = load <16 x i8>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv16i8.v16i8( undef, <16 x i8> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv16i8( %1, i64 0) ret %2 } define @ld1rqb_i8_scalar_dupqlane( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqb_i8_scalar_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0, x1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, ptr %addr, i64 %idx %load = load <16 x i8>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv16i8.v16i8( undef, <16 x i8> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv16i8( %1, i64 0) ret %2 } ; ; LD1RQH ; define @ld1rqh_i16( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqh_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ld1rq.nxv8i16( %pred, ptr %addr) ret %res } define @ld1rqh_f16( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqh_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ld1rq.nxv8f16( %pred, ptr %addr) ret %res } define @ld1rqh_i16_imm( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqh_i16_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, #-64] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i16, ptr %addr, i16 -32 %res = call @llvm.aarch64.sve.ld1rq.nxv8i16( %pred, ptr %ptr) ret %res } define @ld1rqh_f16_imm( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqh_f16_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, #-16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds half, ptr %addr, i16 -8 %res = call @llvm.aarch64.sve.ld1rq.nxv8f16( %pred, ptr %ptr) ret %res } define @ld1rqh_i16_scalar( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqh_i16_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i16, ptr %addr, i64 %idx %res = call @llvm.aarch64.sve.ld1rq.nxv8i16( %pred, ptr %ptr) ret %res } define @ld1rqh_f16_scalar( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqh_f16_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds half, ptr %addr, i64 %idx %res = call @llvm.aarch64.sve.ld1rq.nxv8f16( %pred, ptr %ptr) ret %res } define @ld1rqh_bf16( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqh_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ld1rq.nxv8bf16( %pred, ptr %addr) ret %res } define @ld1rqh_bf16_imm( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqh_bf16_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, #-16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds bfloat, ptr %addr, i16 -8 %res = call @llvm.aarch64.sve.ld1rq.nxv8bf16( %pred, ptr %ptr) ret %res } define @ld1rqh_bf16_scalar( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqh_bf16_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds bfloat, ptr %addr, i64 %idx %res = call @llvm.aarch64.sve.ld1rq.nxv8bf16( %pred, ptr %ptr) ret %res } define @ld1rqh_i16_imm_dupqlane( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqh_i16_imm_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, #-16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds <8 x i16>, ptr %addr, i16 -1 %load = load <8 x i16>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv8i16.v8i16( undef, <8 x i16> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv8i16( %1, i64 0) ret %2 } define @ld1rqh_i16_scalar_dupqlane( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqh_i16_scalar_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i16, ptr %addr, i64 %idx %load = load <8 x i16>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv8i16.v8i16( undef, <8 x i16> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv8i16( %1, i64 0) ret %2 } define @ld1rqh_f16_imm_dupqlane( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqh_f16_imm_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, #-16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds <8 x half>, ptr %addr, i16 -1 %load = load <8 x half>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv8f16.v8f16( undef, <8 x half> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv8f16( %1, i64 0) ret %2 } define @ld1rqh_f16_scalar_dupqlane( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqh_f16_scalar_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds half, ptr %addr, i64 %idx %load = load <8 x half>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv8f16.v8f16( undef, <8 x half> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv8f16( %1, i64 0) ret %2 } define @ld1rqh_bf16_imm_dupqlane( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqh_bf16_imm_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, #-16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds <8 x bfloat>, ptr %addr, i16 -1 %load = load <8 x bfloat>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv8bf16.v8bf16( undef, <8 x bfloat> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv8bf16( %1, i64 0) ret %2 } define @ld1rqh_bf16_scalar_dupqlane( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqh_bf16_scalar_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds bfloat, ptr %addr, i64 %idx %load = load <8 x bfloat>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv8bf16.v8bf16( undef, <8 x bfloat> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv8bf16( %1, i64 0) ret %2 } ; ; LD1RQW ; define @ld1rqw_i32( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqw_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ld1rq.nxv4i32( %pred, ptr %addr) ret %res } define @ld1rqw_f32( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqw_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ld1rq.nxv4f32( %pred, ptr %addr) ret %res } define @ld1rqw_i32_imm( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqw_i32_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0, #112] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i32, ptr %addr, i32 28 %res = call @llvm.aarch64.sve.ld1rq.nxv4i32( %pred, ptr %ptr) ret %res } define @ld1rqw_f32_imm( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqw_f32_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0, #32] ; CHECK-NEXT: ret %ptr = getelementptr inbounds float, ptr %addr, i32 8 %res = call @llvm.aarch64.sve.ld1rq.nxv4f32( %pred, ptr %ptr) ret %res } define @ld1rqw_i32_scalar( %pred, ptr %base, i64 %idx) { ; CHECK-LABEL: ld1rqw_i32_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i32, ptr %base, i64 %idx %res = call @llvm.aarch64.sve.ld1rq.nxv4i32( %pred, ptr %ptr) ret %res } define @ld1rqw_f32_scalar( %pred, ptr %base, i64 %idx) { ; CHECK-LABEL: ld1rqw_f32_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %ptr = getelementptr inbounds float, ptr %base, i64 %idx %res = call @llvm.aarch64.sve.ld1rq.nxv4f32( %pred, ptr %ptr) ret %res } define @ld1rqw_i32_imm_dupqlane( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqw_i32_imm_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0, #16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds <4 x i32>, ptr %addr, i32 1 %load = load <4 x i32>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, <4 x i32> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %1, i64 0) ret %2 } define @ld1rqw_i32_scalar_dupqlane( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqw_i32_scalar_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i32, ptr %addr, i64 %idx %load = load <4 x i32>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv4i32.v4i32( undef, <4 x i32> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %1, i64 0) ret %2 } define @ld1rqw_f32_imm_dupqlane( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqw_f32_imm_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0, #16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds <4 x float>, ptr %addr, i32 1 %load = load <4 x float>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv4f32.v4f32( undef, <4 x float> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv4f32( %1, i64 0) ret %2 } define @ld1rqw_f32_scalar_dupqlane( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqw_f32_scalar_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %ptr = getelementptr inbounds float, ptr %addr, i64 %idx %load = load <4 x float>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv4f32.v4f32( undef, <4 x float> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv4f32( %1, i64 0) ret %2 } ; ; LD1RQD ; define @ld1rqd_i64( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqd_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ld1rq.nxv2i64( %pred, ptr %addr) ret %res } define @ld1rqd_f64( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqd_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ld1rq.nxv2f64( %pred, ptr %addr) ret %res } define @ld1rqd_i64_imm( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqd_i64_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0, #64] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i64, ptr %addr, i64 8 %res = call @llvm.aarch64.sve.ld1rq.nxv2i64( %pred, ptr %ptr) ret %res } define @ld1rqd_f64_imm( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqd_f64_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0, #-128] ; CHECK-NEXT: ret %ptr = getelementptr inbounds double, ptr %addr, i64 -16 %res = call @llvm.aarch64.sve.ld1rq.nxv2f64( %pred, ptr %ptr) ret %res } define @ld1rqd_i64_scalar( %pred, ptr %base, i64 %idx) { ; CHECK-LABEL: ld1rqd_i64_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i64, ptr %base, i64 %idx %res = call @llvm.aarch64.sve.ld1rq.nxv2i64( %pred, ptr %ptr) ret %res } define @ld1rqd_f64_scalar( %pred, ptr %base, i64 %idx) { ; CHECK-LABEL: ld1rqd_f64_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %ptr = getelementptr inbounds double, ptr %base, i64 %idx %res = call @llvm.aarch64.sve.ld1rq.nxv2f64( %pred, ptr %ptr) ret %res } define @ld1rqd_i64_imm_dupqlane( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqd_i64_imm_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0, #16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds <2 x i64>, ptr %addr, i64 1 %load = load <2 x i64>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, <2 x i64> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %1, i64 0) ret %2 } define @ld1rqd_i64_scalar_dupqlane( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqd_i64_scalar_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i64, ptr %addr, i64 %idx %load = load <2 x i64>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv2i64.v2i64( undef, <2 x i64> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %1, i64 0) ret %2 } define @ld1rqd_f64_imm_dupqlane( %pred, ptr %addr) { ; CHECK-LABEL: ld1rqd_f64_imm_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0, #16] ; CHECK-NEXT: ret %ptr = getelementptr inbounds <2 x double>, ptr %addr, i64 1 %load = load <2 x double>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv2f64.v2f64( undef, <2 x double> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv2f64( %1, i64 0) ret %2 } define @ld1rqd_f64_scalar_dupqlane( %pred, ptr %addr, i64 %idx) { ; CHECK-LABEL: ld1rqd_f64_scalar_dupqlane: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %ptr = getelementptr inbounds double, ptr %addr, i64 %idx %load = load <2 x double>, ptr %ptr %1 = tail call @llvm.vector.insert.nxv2f64.v2f64( undef, <2 x double> %load, i64 0) %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv2f64( %1, i64 0) ret %2 } ; ; LDNT1B ; define @ldnt1b_i8( %pred, ptr %addr) { ; CHECK-LABEL: ldnt1b_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldnt1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv16i8( %pred, ptr %addr) ret %res } ; ; LDNT1H ; define @ldnt1h_i16( %pred, ptr %addr) { ; CHECK-LABEL: ldnt1h_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv8i16( %pred, ptr %addr) ret %res } define @ldnt1h_f16( %pred, ptr %addr) { ; CHECK-LABEL: ldnt1h_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv8f16( %pred, ptr %addr) ret %res } define @ldnt1h_bf16( %pred, ptr %addr) { ; CHECK-LABEL: ldnt1h_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv8bf16( %pred, ptr %addr) ret %res } ; ; LDNT1W ; define @ldnt1w_i32( %pred, ptr %addr) { ; CHECK-LABEL: ldnt1w_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv4i32( %pred, ptr %addr) ret %res } define @ldnt1w_f32( %pred, ptr %addr) { ; CHECK-LABEL: ldnt1w_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv4f32( %pred, ptr %addr) ret %res } ; ; LDNT1D ; define @ldnt1d_i64( %pred, ptr %addr) { ; CHECK-LABEL: ldnt1d_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldnt1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv2i64( %pred, ptr %addr) ret %res } define @ldnt1d_f64( %pred, ptr %addr) { ; CHECK-LABEL: ldnt1d_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldnt1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv2f64( %pred, ptr %addr) ret %res } declare @llvm.aarch64.sve.ld1rq.nxv16i8(, ptr) declare @llvm.aarch64.sve.ld1rq.nxv8i16(, ptr) declare @llvm.aarch64.sve.ld1rq.nxv4i32(, ptr) declare @llvm.aarch64.sve.ld1rq.nxv2i64(, ptr) declare @llvm.aarch64.sve.ld1rq.nxv8f16(, ptr) declare @llvm.aarch64.sve.ld1rq.nxv8bf16(, ptr) declare @llvm.aarch64.sve.ld1rq.nxv4f32(, ptr) declare @llvm.aarch64.sve.ld1rq.nxv2f64(, ptr) declare @llvm.aarch64.sve.ldnt1.nxv16i8(, ptr) declare @llvm.aarch64.sve.ldnt1.nxv8i16(, ptr) declare @llvm.aarch64.sve.ldnt1.nxv4i32(, ptr) declare @llvm.aarch64.sve.ldnt1.nxv2i64(, ptr) declare @llvm.aarch64.sve.ldnt1.nxv8f16(, ptr) declare @llvm.aarch64.sve.ldnt1.nxv8bf16(, ptr) declare @llvm.aarch64.sve.ldnt1.nxv4f32(, ptr) declare @llvm.aarch64.sve.ldnt1.nxv2f64(, ptr) declare @llvm.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) declare @llvm.vector.insert.nxv2f64.v2f64(, <2 x double>, i64) declare @llvm.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) declare @llvm.vector.insert.nxv4f32.v4f32(, <4 x float>, i64) declare @llvm.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) declare @llvm.vector.insert.nxv8f16.v8f16(, <8 x half>, i64) declare @llvm.vector.insert.nxv8bf16.v8bf16(, <8 x bfloat>, i64) declare @llvm.vector.insert.nxv16i8.v16i8(, <16 x i8>, i64) declare @llvm.aarch64.sve.dupq.lane.nxv2i64(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv2f64(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv4i32(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv4f32(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv8i16(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv8f16(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv8bf16(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv16i8(, i64)