; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s ;;LD2Q define { , } @ld2q_si_i8_off16( %pg, *%addr ) { ; CHECK-LABEL: ld2q_si_i8_off16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to i8 * %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( %pg, ptr %base_ptr); ret { , } %res } define { , } @ld2q_si_i8_off14( %pg, *%addr ) { ; CHECK-LABEL: ld2q_si_i8_off14: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #14, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 14 %base_ptr = bitcast * %base to i8 * %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( %pg, ptr %base_ptr); ret { , } %res } define { , } @ld2q_ss_i8( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld2q_ss_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( %pg, ptr %addr2); ret { , } %res } define { , } @ld2q_i8( %pg, ptr %addr) { ; CHECK-LABEL: ld2q_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( %pg, ptr %addr); ret { , } %res } define { , } @ld2q_si_i16( %pg, *%addr ) { ; CHECK-LABEL: ld2q_si_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to i16 * %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( %pg, ptr %base_ptr); ret { , } %res } define { , } @ld2q_ss_i16( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld2q_ss_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( %pg, ptr %addr2); ret { , } %res } define { , } @ld2q_i16( %pg, ptr %addr) { ; CHECK-LABEL: ld2q_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( %pg, ptr %addr); ret { , } %res } define { , } @ld2q_si_i32( %pg, ptr %addr ) { ; CHECK-LABEL: ld2q_si_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -16 %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( %pg, ptr %base); ret { , } %res } define { , } @ld2q_ss_i32( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld2q_ss_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( %pg, ptr %addr2); ret { , } %res } define { , } @ld2q_i32( %pg, ptr %addr) { ; CHECK-LABEL: ld2q_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( %pg, ptr %addr); ret { , } %res } define { , } @ld2q_si_i64( %pg, *%addr ) { ; CHECK-LABEL: ld2q_si_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to i64 * %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( %pg, ptr %base_ptr); ret { , } %res } define { , } @ld2q_ss_i64( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld2q_ss_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( %pg, ptr %addr2); ret { , } %res } define { , } @ld2q_i64( %pg, ptr %addr) { ; CHECK-LABEL: ld2q_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( %pg, ptr %addr); ret { , } %res } define { , } @ld2q_si_f16( %pg, *%addr ) { ; CHECK-LABEL: ld2q_si_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to half * %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( %pg, ptr %base_ptr); ret { , } %res } define { , } @ld2q_ss_f16( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld2q_ss_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( %pg, ptr %addr2); ret { , } %res } define { , } @ld2q_f16( %pg, ptr %addr) { ; CHECK-LABEL: ld2q_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( %pg, ptr %addr); ret { , } %res } define { , } @ld2q_si_f32( %pg, *%addr ) { ; CHECK-LABEL: ld2q_si_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to float * %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( %pg, ptr %base_ptr); ret { , } %res } define { , } @ld2q_ss_f32( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld2q_ss_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( %pg, ptr %addr2); ret { , } %res } define { , } @ld2q_f32( %pg, ptr %addr) { ; CHECK-LABEL: ld2q_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( %pg, ptr %addr); ret { , } %res } define { , } @ld2q_si_f64( %pg, *%addr ) { ; CHECK-LABEL: ld2q_si_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to double * %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( %pg, ptr %base_ptr); ret { , } %res } define { , } @ld2q_ss_f64( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld2q_ss_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( %pg, ptr %addr2); ret { , } %res } define { , } @ld2q_f64( %pg, ptr %addr) { ; CHECK-LABEL: ld2q_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( %pg, ptr %addr); ret { , } %res } define { , } @ld2q_si_bf16( %pg, *%addr ) { ; CHECK-LABEL: ld2q_si_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to bfloat * %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( %pg, ptr %base_ptr); ret { , } %res } define { , } @ld2q_ss_bf16( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld2q_ss_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( %pg, ptr %addr2); ret { , } %res } define { , } @ld2q_bf16( %pg, ptr %addr) { ; CHECK-LABEL: ld2q_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( %pg, ptr %addr); ret { , } %res } ;; LD3Q define { , , } @ld3q_si_i8_off24( %pg, *%addr ) { ; CHECK-LABEL: ld3q_si_i8_off24: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to i8 * %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( %pg, ptr %base_ptr); ret { , , } %res } define { , , } @ld3q_si_i8_off21( %pg, *%addr ) { ; CHECK-LABEL: ld3q_si_i8_off21: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to i8 * %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( %pg, ptr %base_ptr); ret { , , } %res } define { , , } @ld3q_ss_i8( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld3q_ss_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( %pg, ptr %addr2); ret { , , } %res } define { , , } @ld3q_i8( %pg, ptr %addr) { ; CHECK-LABEL: ld3q_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( %pg, ptr %addr); ret { , , } %res } define { , , } @ld3q_si_i16( %pg, *%addr ) { ; CHECK-LABEL: ld3q_si_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to i16 * %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( %pg, ptr %base_ptr); ret { , , } %res } define { , , } @ld3q_ss_i16( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld3q_ss_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( %pg, ptr %addr2); ret { , , } %res } define { , , } @ld3q_i16( %pg, ptr %addr) { ; CHECK-LABEL: ld3q_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( %pg, ptr %addr); ret { , , } %res } define { , , } @ld3q_si_i32( %pg, *%addr ) { ; CHECK-LABEL: ld3q_si_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to i32 * %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( %pg, ptr %base_ptr); ret { , , } %res } define { , , } @ld3q_ss_i32( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld3q_ss_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( %pg, ptr %addr2); ret { , , } %res } define { , , } @ld3q_i32( %pg, ptr %addr) { ; CHECK-LABEL: ld3q_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( %pg, ptr %addr); ret { , , } %res } define { , , } @ld3q_si_i64( %pg, ptr %addr ) { ; CHECK-LABEL: ld3q_si_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] ; CHECK-NEXT: ret %addr2 = getelementptr , ptr %addr, i64 -24 %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( %pg, ptr %addr2); ret {, , } %res } define { , , } @ld3q_ss_i64( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld3q_ss_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( %pg, ptr %addr2); ret { , , } %res } define { , , } @ld3q_i64( %pg, ptr %addr) { ; CHECK-LABEL: ld3q_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( %pg, ptr %addr); ret { , , } %res } define { , , } @ld3q_si_f16( %pg, *%addr ) { ; CHECK-LABEL: ld3q_si_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to half * %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( %pg, ptr %base_ptr); ret { , , } %res } define { , , } @ld3q_ss_f16( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld3q_ss_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( %pg, ptr %addr2); ret { , , } %res } define { , , } @ld3q_f16( %pg, ptr %addr) { ; CHECK-LABEL: ld3q_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( %pg, ptr %addr); ret { , , } %res } define { , , } @ld3q_si_f32( %pg, *%addr ) { ; CHECK-LABEL: ld3q_si_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to float * %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( %pg, ptr %base_ptr); ret { , , } %res } define { , , } @ld3q_ss_f32( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld3q_ss_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( %pg, ptr %addr2); ret { , , } %res } define { , , } @ld3q_f32( %pg, ptr %addr) { ; CHECK-LABEL: ld3q_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( %pg, ptr %addr); ret { , , } %res } define { , , } @ld3q_si_f64( %pg, *%addr ) { ; CHECK-LABEL: ld3q_si_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to double * %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( %pg, ptr %base_ptr); ret { , , } %res } define { , , } @ld3q_ss_f64( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld3q_ss_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( %pg, ptr %addr2); ret { , , } %res } define { , , } @ld3q_f64( %pg, ptr %addr) { ; CHECK-LABEL: ld3q_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( %pg, ptr %addr); ret { , , } %res } define { , , } @ld3q_si_bf16( %pg, *%addr ) { ; CHECK-LABEL: ld3q_si_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to bfloat * %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( %pg, ptr %base_ptr); ret { , , } %res } define { , , } @ld3q_ss_bf16( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld3q_ss_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( %pg, ptr %addr2); ret { , , } %res } define { , , } @ld3q_bf16( %pg, ptr %addr) { ; CHECK-LABEL: ld3q_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( %pg, ptr %addr); ret { , , } %res } ;; LD4Q define { , , , } @ld4q_si_i8_off32( %pg, *%addr ) { ; CHECK-LABEL: ld4q_si_i8_off32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to i8 * %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( %pg, ptr %base_ptr); ret { , , , } %res } define { , , , } @ld4q_si_i8_off28( %pg, *%addr ) { ; CHECK-LABEL: ld4q_si_i8_off28: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28 %base_ptr = bitcast * %base to i8 * %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( %pg, ptr %base_ptr); ret { , , , } %res } define { , , , } @ld4q_ss_i8( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld4q_ss_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( %pg, ptr %addr2); ret { , , , } %res } define { , , , } @ld4q_i8( %pg, ptr %addr) { ; CHECK-LABEL: ld4q_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( %pg, ptr %addr); ret { , , , } %res } define { , , , } @ld4q_si_i16( %pg, *%addr ) { ; CHECK-LABEL: ld4q_si_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to i16 * %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( %pg, ptr %base_ptr); ret { , , , } %res } define { , , , } @ld4q_ss_i16( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld4q_ss_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( %pg, ptr %addr2); ret { , , , } %res } define { , , , } @ld4q_i16( %pg, ptr %addr) { ; CHECK-LABEL: ld4q_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( %pg, ptr %addr); ret { , , , } %res } define { , , , } @ld4q_si_i32( %pg, *%addr ) { ; CHECK-LABEL: ld4q_si_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to i32 * %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( %pg, ptr %base_ptr); ret { , , , } %res } define { , , , } @ld4q_ss_i32( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld4q_ss_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( %pg, ptr %addr2); ret { , , , } %res } define { , , , } @ld4q_i32( %pg, ptr %addr) { ; CHECK-LABEL: ld4q_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( %pg, ptr %addr); ret { , , , } %res } define { , , , } @ld4q_si_i64( %pg, *%addr ) { ; CHECK-LABEL: ld4q_si_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to i64 * %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( %pg, ptr %base_ptr); ret { , , , } %res } define { , , , } @ld4q_ss_i64( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld4q_ss_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( %pg, ptr %addr2); ret { , , , } %res } define { , , , } @ld4q_i64( %pg, ptr %addr) { ; CHECK-LABEL: ld4q_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( %pg, ptr %addr); ret { , , , } %res } define { , , , } @ld4q_si_f16( %pg, *%addr ) { ; CHECK-LABEL: ld4q_si_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to half * %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( %pg, ptr %base_ptr); ret { , , , } %res } define { , , , } @ld4q_ss_f16( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld4q_ss_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( %pg, ptr %addr2); ret { , , , } %res } define { , , , } @ld4q_f16( %pg, ptr %addr) { ; CHECK-LABEL: ld4q_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( %pg, ptr %addr); ret { , , , } %res } define { , , , } @ld4q_si_f32( %pg, *%addr ) { ; CHECK-LABEL: ld4q_si_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to float * %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( %pg, ptr %base_ptr); ret { , , , } %res } define { , , , } @ld4q_ss_f32( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld4q_ss_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( %pg, ptr %addr2); ret { , , , } %res } define { , , , } @ld4q_f32( %pg, ptr %addr) { ; CHECK-LABEL: ld4q_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( %pg, ptr %addr); ret { , , , } %res } define { , , , } @ld4q_si_f64( %pg, *%addr ) { ; CHECK-LABEL: ld4q_si_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to double * %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( %pg, ptr %base_ptr); ret { , , , } %res } define { , , , } @ld4q_ss_f64( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld4q_ss_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( %pg, ptr %addr2); ret { , , , } %res } define { , , , } @ld4q_f64( %pg, ptr %addr) { ; CHECK-LABEL: ld4q_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( %pg, ptr %addr); ret { , , , } %res } define { , , , } @ld4q_si_bf16( %pg, *%addr ) { ; CHECK-LABEL: ld4q_si_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to bfloat * %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( %pg, ptr %base_ptr); ret { , , , } %res } define { , , , } @ld4q_ss_bf16( %pg, ptr %addr, i64 %a) { ; CHECK-LABEL: ld4q_ss_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %addr2 = getelementptr i128, ptr %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( %pg, ptr %addr2); ret { , , , } %res } define { , , , } @ld4q_bf16( %pg, ptr %addr) { ; CHECK-LABEL: ld4q_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( %pg, ptr %addr); ret { , , , } %res } declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount"), ptr) declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount"), ptr) declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount"), ptr) declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount"), ptr) declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount"), ptr) declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount"), ptr) declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount"), ptr) declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8bf16(target("aarch64.svcount"), ptr) declare { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8(, ptr) declare { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16(, ptr) declare { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32(, ptr) declare { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64(, ptr) declare { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16(, ptr) declare { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32(, ptr) declare { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64(, ptr) declare { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16(, ptr) declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8(, ptr) declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16(, ptr) declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32(, ptr) declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64(, ptr) declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16(, ptr) declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32(, ptr) declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64(, ptr) declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16(, ptr) declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8(, ptr) declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16(, ptr) declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32(, ptr) declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64(, ptr) declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16(, ptr) declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32(, ptr) declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64(, ptr) declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16(, ptr)