; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s define @ld2.nxv32i8( %Pg, i8 *%base_ptr) { ; CHECK: %1 = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( %Pg, ptr %base_ptr) ; CHECK-NEXT: %2 = extractvalue { , } %1, 0 ; CHECK-NEXT: %3 = call @llvm.vector.insert.nxv32i8.nxv16i8( poison, %2, i64 0) ; CHECK-NEXT: %4 = extractvalue { , } %1, 1 ; CHECK-NEXT: %res = call @llvm.vector.insert.nxv32i8.nxv16i8( %3, %4, i64 16) ; CHECK-NEXT: ret %res %res = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) ret %res } define @ld3.nxv48i8( %Pg, i8 *%base_ptr) { ; CHECK: %1 = call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( %Pg, ptr %base_ptr) ; CHECK-NEXT: %2 = extractvalue { , , } %1, 0 ; CHECK-NEXT: %3 = call @llvm.vector.insert.nxv48i8.nxv16i8( poison, %2, i64 0) ; CHECK-NEXT: %4 = extractvalue { , , } %1, 1 ; CHECK-NEXT: %5 = call @llvm.vector.insert.nxv48i8.nxv16i8( %3, %4, i64 16) ; CHECK-NEXT: %6 = extractvalue { , , } %1, 2 ; CHECK-NEXT: %res = call @llvm.vector.insert.nxv48i8.nxv16i8( %5, %6, i64 32) ; CHECK-NEXT: ret %res %res = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) ret %res } define @ld4.nxv64i8_lower_bound( %Pg, i8 *%base_ptr) { ; CHECK: %1 = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, ptr %base_ptr) ; CHECK-NEXT: %2 = extractvalue { , , , } %1, 0 ; CHECK-NEXT: %3 = call @llvm.vector.insert.nxv64i8.nxv16i8( poison, %2, i64 0) ; CHECK-NEXT: %4 = extractvalue { , , , } %1, 1 ; CHECK-NEXT: %5 = call @llvm.vector.insert.nxv64i8.nxv16i8( %3, %4, i64 16) ; CHECK-NEXT: %6 = extractvalue { , , , } %1, 2 ; CHECK-NEXT: %7 = call @llvm.vector.insert.nxv64i8.nxv16i8( %5, %6, i64 32) ; CHECK-NEXT: %8 = extractvalue { , , , } %1, 3 ; CHECK-NEXT: %res = call @llvm.vector.insert.nxv64i8.nxv16i8( %7, %8, i64 48) ; CHECK-NEXT: ret %res %res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) ret %res } ; Check short mangling name ; ldN intrinsic name without any element type define @ld2.nxv32i8_no_eltty( %Pg, i8 *%base_ptr) { ; CHECK-LABEL: @ld2.nxv32i8_no_eltty ; CHECK: %1 = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( %Pg, ptr %base_ptr) ; CHECK-NEXT: %2 = extractvalue { , } %1, 0 ; CHECK-NEXT: %3 = call @llvm.vector.insert.nxv32i8.nxv16i8( poison, %2, i64 0) ; CHECK-NEXT: %4 = extractvalue { , } %1, 1 ; CHECK-NEXT: %res = call @llvm.vector.insert.nxv32i8.nxv16i8( %3, %4, i64 16) ; CHECK-NEXT: ret %res %res = call @llvm.aarch64.sve.ld2( %Pg, i8 *%base_ptr) ret %res } ; ldN instrinsic name with only output type define @ld2.nxv32i8_no_predty_pty( %Pg, i8 *%base_ptr) { ; CHECK-LABEL: @ld2.nxv32i8_no_predty_pty ; CHECK: %1 = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( %Pg, ptr %base_ptr) ; CHECK-NEXT: %2 = extractvalue { , } %1, 0 ; CHECK-NEXT: %3 = call @llvm.vector.insert.nxv32i8.nxv16i8( poison, %2, i64 0) ; CHECK-NEXT: %4 = extractvalue { , } %1, 1 ; CHECK-NEXT: %res = call @llvm.vector.insert.nxv32i8.nxv16i8( %3, %4, i64 16) ; CHECK-NEXT: ret %res %res = call @llvm.aarch64.sve.ld2.nxv32i8( %Pg, i8 *%base_ptr) ret %res } declare @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(, i8*) declare @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(, i8*) declare @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(, i8*) declare @llvm.aarch64.sve.ld2(, i8 *) declare @llvm.aarch64.sve.ld2.nxv32i8(, i8 *) ; aarch64.sve.tuple.create.N define @create2_nxv32i8_nxv16i8( %z1, %z2) { ; CHECK-LABEL: @create2_nxv32i8_nxv16i8 ; CHECK: %1 = call @llvm.vector.insert.nxv32i8.nxv16i8( poison, %z1, i64 0) ; CHECK-NEXT: %tuple = call @llvm.vector.insert.nxv32i8.nxv16i8( %1, %z2, i64 16) ; CHECK-NEXT: ret %tuple %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z1, %z2) ret %tuple } define @create3_nxv24i8_nxv16i8( %unused_z0, %z1, %z2, %z3) { ; CHECK-LABEL: @create3_nxv24i8_nxv16i8 ; CHECK: %1 = call @llvm.vector.insert.nxv24i16.nxv8i16( poison, %z1, i64 0) ; CHECK-NEXT: %2 = call @llvm.vector.insert.nxv24i16.nxv8i16( %1, %z2, i64 8) ; CHECK-NEXT: %tuple = call @llvm.vector.insert.nxv24i16.nxv8i16( %2, %z3, i64 16) ; CHECK-NEXT: ret %tuple %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z1, %z2, %z3) ret %tuple } define @create4_nxv64i8_nxv16i8( %unused_z0, %z1, %z2, %z3, %z4) { ; CHECK-LABEL: @create4_nxv64i8_nxv16i8 ; CHECK: %1 = call @llvm.vector.insert.nxv64i8.nxv16i8( poison, %z1, i64 0) ; CHECK-NEXT: %2 = call @llvm.vector.insert.nxv64i8.nxv16i8( %1, %z2, i64 16) ; CHECK-NEXT: %3 = call @llvm.vector.insert.nxv64i8.nxv16i8( %2, %z3, i64 32) ; CHECK-NEXT: %tuple = call @llvm.vector.insert.nxv64i8.nxv16i8( %3, %z4, i64 48) ; CHECK-NEXT: ret %tuple %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z1, %z2, %z3, %z4) ret %tuple } ; Accept short mangling name define @create2_nxv32i8( %z1, %z2) { ; CHECK-LABEL: @create2_nxv32i8 ; CHECK: %1 = call @llvm.vector.insert.nxv32i8.nxv16i8( poison, %z1, i64 0) ; CHECK-NEXT: %tuple = call @llvm.vector.insert.nxv32i8.nxv16i8( %1, %z2, i64 16) ; CHECK-NEXT: ret %tuple %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8( %z1, %z2) ret %tuple } define @create2( %z1, %z2) { ; CHECK-LABEL: @create2 ; CHECK: %1 = call @llvm.vector.insert.nxv32i8.nxv16i8( poison, %z1, i64 0) ; CHECK-NEXT: %tuple = call @llvm.vector.insert.nxv32i8.nxv16i8( %1, %z2, i64 16) ; CHECK-NEXT: ret %tuple %tuple = tail call @llvm.aarch64.sve.tuple.create2( %z1, %z2) ret %tuple } ; Negative test for create ; Should not upgrade when create is not 2,3 or 4 define @sve_tuple_create1( %z0) { ; CHECK-LABEL: @sve_tuple_create1 ; CHECK: %tuple = tail call @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8( %z0) ; CHECK-NEXT: ret %tuple %tuple = tail call @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8( %z0); ret %tuple; } ; aarch64.sve.tuple.set define void @set_tuple2_nxv8i32_elt1( %z0, %z1) { ; CHECK-LABEL: @set_tuple2_nxv8i32_elt1 ; CHECK: %ins = call @llvm.vector.insert.nxv8i32.nxv4i32( %z0, %z1, i64 4) ; CHECK-NEXT: ret void %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %z0, i32 1, %z1) ret void } ; aarch64.sve.tuple.get define @get_tuple2_nxv8i32_elt1( %tuple) { ; CHECK-LABEL: @get_tuple2_nxv8i32_elt1 ; CHECK: %ext = call @llvm.vector.extract.nxv4i32.nxv8i32( %tuple, i64 4) ; CHECK-NEXT: ret %ext %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %tuple, i32 1) ret %ext } ; bfdot define @bfdot_lane( %a, %b, %c) nounwind { ; CHECK-LABEL: @bfdot_lane ; CHECK: %out = call @llvm.aarch64.sve.bfdot.lane.v2( %a, %b, %c, i32 0) ; CHECK-NEXT: ret %out %out = call @llvm.aarch64.sve.bfdot.lane( %a, %b, %c, i64 0) ret %out } ; bfmlalb define @bfmlalb_lane( %a, %b, %c) nounwind { ; CHECK-LABEL: @bfmlalb_lane ; CHECK: %out = call @llvm.aarch64.sve.bfmlalb.lane.v2( %a, %b, %c, i32 0) ; CHECK-NEXT: ret %out %out = call @llvm.aarch64.sve.bfmlalb.lane( %a, %b, %c, i64 0) ret %out } ; bfmlalt define @bfmlalt_lane( %a, %b, %c) nounwind { ; CHECK-LABEL: @bfmlalt_lane ; CHECK: %out = call @llvm.aarch64.sve.bfmlalt.lane.v2( %a, %b, %c, i32 0) ; CHECK-NEXT: ret %out %out = call @llvm.aarch64.sve.bfmlalt.lane( %a, %b, %c, i64 0) ret %out } declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) declare @llvm.aarch64.sve.tuple.create2.nxv32i8(, ) declare @llvm.aarch64.sve.tuple.create2(, ) declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(, , , ) declare @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8() declare @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(, i32, ) declare @llvm.aarch64.sve.tuple.get.nxv8i32(, i32) declare @llvm.aarch64.sve.bfdot.lane(, , , i64) declare @llvm.aarch64.sve.bfmlalb.lane(, , , i64) declare @llvm.aarch64.sve.bfmlalt.lane(, , , i64)