; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-none-eabi -mattr=+v8.9a -mattr=+rcpc3 < %s | FileCheck --check-prefixes=BOTH,RCPC3 %s ; RUN: llc -mtriple=aarch64-none-eabi -mattr=+v8.9a < %s | FileCheck --check-prefixes=BOTH,NO-RCPC3 %s define hidden <2 x i64> @test_ldap1_2xi64_lane0(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_ldap1_2xi64_lane0: ; RCPC3: // %bb.0: ; RCPC3-NEXT: ldap1 { v0.d }[0], [x0] ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_ldap1_2xi64_lane0: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: ldapr x8, [x0] ; NO-RCPC3-NEXT: mov v0.d[0], x8 ; NO-RCPC3-NEXT: ret %1 = load atomic i64, ptr %a acquire, align 8 %ldap1 = insertelement <2 x i64> %b, i64 %1, i64 0 ret <2 x i64> %ldap1 } define hidden <2 x i64> @test_ldap1_2xi64_lane1(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_ldap1_2xi64_lane1: ; RCPC3: // %bb.0: ; RCPC3-NEXT: ldap1 { v0.d }[1], [x0] ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_ldap1_2xi64_lane1: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: ldapr x8, [x0] ; NO-RCPC3-NEXT: mov v0.d[1], x8 ; NO-RCPC3-NEXT: ret %1 = load atomic i64, ptr %a acquire, align 8 %ldap1 = insertelement <2 x i64> %b, i64 %1, i64 1 ret <2 x i64> %ldap1 } define hidden nofpclass(nan inf) <2 x double> @test_ldap1_2xdouble_lane0(ptr nocapture noundef readonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_ldap1_2xdouble_lane0: ; RCPC3: // %bb.0: ; RCPC3-NEXT: ldap1 { v0.d }[0], [x0] ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_ldap1_2xdouble_lane0: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: ldapr x8, [x0] ; NO-RCPC3-NEXT: fmov d1, x8 ; NO-RCPC3-NEXT: mov v0.d[0], v1.d[0] ; NO-RCPC3-NEXT: ret %1 = load atomic double, ptr %a acquire, align 8 %ldap1 = insertelement <2 x double> %b, double %1, i64 0 ret <2 x double> %ldap1 } define hidden nofpclass(nan inf) <2 x double> @test_ldap1_2xdouble_lane1(ptr nocapture noundef readonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_ldap1_2xdouble_lane1: ; RCPC3: // %bb.0: ; RCPC3-NEXT: ldap1 { v0.d }[1], [x0] ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_ldap1_2xdouble_lane1: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: ldapr x8, [x0] ; NO-RCPC3-NEXT: fmov d1, x8 ; NO-RCPC3-NEXT: mov v0.d[1], v1.d[0] ; NO-RCPC3-NEXT: ret %1 = load atomic double, ptr %a acquire, align 8 %ldap1 = insertelement <2 x double> %b, double %1, i64 1 ret <2 x double> %ldap1 } define hidden <1 x i64> @test_ldap1_1xi64_lane0(ptr nocapture noundef readonly %a, <1 x i64> noundef %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_ldap1_1xi64_lane0: ; RCPC3: // %bb.0: ; RCPC3-NEXT: ldap1 { v0.d }[0], [x0] ; RCPC3-NEXT: // kill: def $d0 killed $d0 killed $q0 ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_ldap1_1xi64_lane0: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: ldapr x8, [x0] ; NO-RCPC3-NEXT: fmov d0, x8 ; NO-RCPC3-NEXT: ret %1 = load atomic i64, ptr %a acquire, align 8 %ldap1 = insertelement <1 x i64> poison, i64 %1, i64 0 ret <1 x i64> %ldap1 } define hidden nofpclass(nan inf) <1 x double> @test_ldap1_1xdouble_lane0(ptr nocapture noundef readonly %a, <1 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_ldap1_1xdouble_lane0: ; RCPC3: // %bb.0: ; RCPC3-NEXT: ldap1 { v0.d }[0], [x0] ; RCPC3-NEXT: // kill: def $d0 killed $d0 killed $q0 ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_ldap1_1xdouble_lane0: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: ldapr x8, [x0] ; NO-RCPC3-NEXT: fmov d0, x8 ; NO-RCPC3-NEXT: ret %1 = load atomic double, ptr %a acquire, align 8 %ldap1 = insertelement <1 x double> poison, double %1, i64 0 ret <1 x double> %ldap1 } define hidden void @test_stl1_2xi64_lane0(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_stl1_2xi64_lane0: ; RCPC3: // %bb.0: ; RCPC3-NEXT: stl1 { v0.d }[0], [x0] ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_stl1_2xi64_lane0: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: fmov x8, d0 ; NO-RCPC3-NEXT: stlr x8, [x0] ; NO-RCPC3-NEXT: ret %1 = extractelement <2 x i64> %b, i64 0 store atomic i64 %1, ptr %a release, align 8 ret void } define hidden void @test_stl1_2xi64_lane1(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_stl1_2xi64_lane1: ; RCPC3: // %bb.0: ; RCPC3-NEXT: stl1 { v0.d }[1], [x0] ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_stl1_2xi64_lane1: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: mov x8, v0.d[1] ; NO-RCPC3-NEXT: stlr x8, [x0] ; NO-RCPC3-NEXT: ret %1 = extractelement <2 x i64> %b, i64 1 store atomic i64 %1, ptr %a release, align 8 ret void } define hidden void @test_stl1_2xdouble_lane0(ptr nocapture noundef writeonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_stl1_2xdouble_lane0: ; RCPC3: // %bb.0: ; RCPC3-NEXT: stl1 { v0.d }[0], [x0] ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_stl1_2xdouble_lane0: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: fmov x8, d0 ; NO-RCPC3-NEXT: stlr x8, [x0] ; NO-RCPC3-NEXT: ret %1 = extractelement <2 x double> %b, i64 0 store atomic double %1, ptr %a release, align 8 ret void } define hidden void @test_stl1_2xdouble_lane1(ptr nocapture noundef writeonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_stl1_2xdouble_lane1: ; RCPC3: // %bb.0: ; RCPC3-NEXT: stl1 { v0.d }[1], [x0] ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_stl1_2xdouble_lane1: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: mov d0, v0.d[1] ; NO-RCPC3-NEXT: fmov x8, d0 ; NO-RCPC3-NEXT: stlr x8, [x0] ; NO-RCPC3-NEXT: ret %1 = extractelement <2 x double> %b, i64 1 store atomic double %1, ptr %a release, align 8 ret void } define hidden void @test_stl1_1xi64_lane0(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_stl1_1xi64_lane0: ; RCPC3: // %bb.0: ; RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 ; RCPC3-NEXT: stl1 { v0.d }[0], [x0] ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_stl1_1xi64_lane0: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 ; NO-RCPC3-NEXT: fmov x8, d0 ; NO-RCPC3-NEXT: stlr x8, [x0] ; NO-RCPC3-NEXT: ret %1 = extractelement <1 x i64> %b, i64 0 store atomic i64 %1, ptr %a release, align 8 ret void } define hidden void @test_stl1_1xdouble_lane0(ptr nocapture noundef writeonly %a, <1 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { ; ; RCPC3-LABEL: test_stl1_1xdouble_lane0: ; RCPC3: // %bb.0: ; RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 ; RCPC3-NEXT: stl1 { v0.d }[0], [x0] ; RCPC3-NEXT: ret ; ; NO-RCPC3-LABEL: test_stl1_1xdouble_lane0: ; NO-RCPC3: // %bb.0: ; NO-RCPC3-NEXT: fmov x8, d0 ; NO-RCPC3-NEXT: stlr x8, [x0] ; NO-RCPC3-NEXT: ret %1 = extractelement <1 x double> %b, i64 0 store atomic double %1, ptr %a release, align 8 ret void } ; The remaining tests do not have any particular RCPC3-specific codegen: ; load-acquire a plain non-vector double value define hidden double @test_double_load(ptr nocapture noundef readonly %a) local_unnamed_addr { ; BOTH-LABEL: test_double_load: ; BOTH: // %bb.0: ; BOTH-NEXT: ldapr x8, [x0] ; BOTH-NEXT: fmov d0, x8 ; BOTH-NEXT: ret %1 = load atomic double, ptr %a acquire, align 8 ret double %1 } ; store-release a plain non-vector double value define hidden void @test_double_store(ptr nocapture noundef writeonly %a, double noundef %b) local_unnamed_addr { ; BOTH-LABEL: test_double_store: ; BOTH: // %bb.0: ; BOTH-NEXT: fmov x8, d0 ; BOTH-NEXT: stlr x8, [x0] ; BOTH-NEXT: ret store atomic double %b, ptr %a release, align 8 ret void } ; load-acquire an i64, followed by a bitcast to a 64-bit vector define hidden <2 x i32> @test_load_i64_bitcast_2xi32(ptr nocapture noundef readonly %a) local_unnamed_addr { ; BOTH-LABEL: test_load_i64_bitcast_2xi32: ; BOTH: // %bb.0: ; BOTH-NEXT: ldapr x8, [x0] ; BOTH-NEXT: fmov d0, x8 ; BOTH-NEXT: ret %1 = load atomic i64, ptr %a acquire, align 8 %2 = bitcast i64 %1 to <2 x i32> ret <2 x i32> %2 } ; bitcast from a 64-bit vector, followed by a store-release of the i64 define hidden void @test_bitcast_2xi32_store_i64(ptr nocapture noundef readonly %a, <2 x i32> noundef %b) local_unnamed_addr { ; BOTH-LABEL: test_bitcast_2xi32_store_i64: ; BOTH: // %bb.0: ; BOTH-NEXT: fmov x8, d0 ; BOTH-NEXT: stlr x8, [x0] ; BOTH-NEXT: ret %1 = bitcast <2 x i32> %b to i64 store atomic i64 %1, ptr %a release, align 8 ret void } ; (non-atomic) load a 64-bit vector define hidden <2 x i32> @test_load_2xi32(ptr nocapture noundef readonly %a) local_unnamed_addr { ; BOTH-LABEL: test_load_2xi32: ; BOTH: // %bb.0: ; BOTH-NEXT: ldr d0, [x0] ; BOTH-NEXT: ret %1 = load <2 x i32>, ptr %a, align 8 ret <2 x i32> %1 } ; (non-atomic) store a 64-bit vector define hidden void @test_store_2xi32(ptr nocapture noundef writeonly %a, <2 x i32> noundef %b) local_unnamed_addr { ; BOTH-LABEL: test_store_2xi32: ; BOTH: // %bb.0: ; BOTH-NEXT: str d0, [x0] ; BOTH-NEXT: ret store <2 x i32> %b, ptr %a, align 8 ret void } ; (non-atomic) load a 64-bit vector define hidden <1 x i64> @test_load_1xi64(ptr nocapture noundef readonly %a) local_unnamed_addr { ; BOTH-LABEL: test_load_1xi64: ; BOTH: // %bb.0: ; BOTH-NEXT: ldr d0, [x0] ; BOTH-NEXT: ret %1 = load <1 x i64>, ptr %a, align 8 ret <1 x i64> %1 } ; (non-atomic) store a 64-bit vector define hidden void @test_store_1xi64(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr { ; BOTH-LABEL: test_store_1xi64: ; BOTH: // %bb.0: ; BOTH-NEXT: str d0, [x0] ; BOTH-NEXT: ret store <1 x i64> %b, ptr %a, align 8 ret void } ; (non-atomic) load a 64-bit value and insert into vector define hidden <2 x i64> @test_load_insert_2xi64(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr { ; BOTH-LABEL: test_load_insert_2xi64: ; BOTH: // %bb.0: ; BOTH-NEXT: ld1 { v0.d }[0], [x0] ; BOTH-NEXT: ret %1 = load i64, ptr %a, align 8 %2 = insertelement <2 x i64> %b, i64 %1, i64 0 ret <2 x i64> %2 } ; extract from vector and (non-atomic) store a 64-bit value define hidden void @test_extract_store_2xi64(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr { ; BOTH-LABEL: test_extract_store_2xi64: ; BOTH: // %bb.0: ; BOTH-NEXT: st1 { v0.d }[1], [x0] ; BOTH-NEXT: ret %1 = extractelement <2 x i64> %b, i64 1 store i64 %1, ptr %a, align 8 ret void }