; RUN: opt < %s -interleaved-access -S | FileCheck %s ; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s target triple = "aarch64-linux-gnu" define void @load_factor2(ptr %ptr) #0 { ; CHECK-LABEL: @load_factor2( ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[PTRUE]], ptr %ptr) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: [[EXT1:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv8i16( [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 0 ; CHECK-NEXT: [[EXT2:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv8i16( [[TMP3]], i64 0) ; CHECK-NEXT: ret void %interleaved.vec = load <32 x i16>, ptr %ptr, align 4 %v0 = shufflevector <32 x i16> %interleaved.vec, <32 x i16> poison, <16 x i32> %v1 = shufflevector <32 x i16> %interleaved.vec, <32 x i16> poison, <16 x i32> ret void } define void @load_factor3(ptr %ptr) #0 { ; CHECK-LABEL: @load_factor3( ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[PTRUE]], ptr %ptr) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[LDN]], 2 ; CHECK-NEXT: [[EXT1:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv4i32( [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[LDN]], 1 ; CHECK-NEXT: [[EXT2:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv4i32( [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[LDN]], 0 ; CHECK-NEXT: [[EXT3:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv4i32( [[TMP4]], i64 0) ; CHECK-NEXT: ret void %interleaved.vec = load <24 x i32>, ptr %ptr, align 4 %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> %v2 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> ret void } define void @load_factor4(ptr %ptr) #0 { ; CHECK-LABEL: @load_factor4( ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[PTRUE]], ptr %ptr) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[LDN]], 3 ; CHECK-NEXT: [[EXT1:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[LDN]], 2 ; CHECK-NEXT: [[EXT2:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[LDN]], 1 ; CHECK-NEXT: [[EXT3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[LDN]], 0 ; CHECK-NEXT: [[EXT4:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP5]], i64 0) ; CHECK-NEXT: ret void %interleaved.vec = load <16 x i64>, ptr %ptr, align 4 %v0 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <4 x i32> %v1 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <4 x i32> %v2 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <4 x i32> %v3 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <4 x i32> ret void } define void @store_factor2(ptr %ptr, <16 x i16> %v0, <16 x i16> %v1) #0 { ; CHECK-LABEL: @store_factor2( ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> %v0, <16 x i16> %v1, <16 x i32> ; CHECK-NEXT: [[INS1:%.*]] = call @llvm.vector.insert.nxv8i16.v16i16( undef, <16 x i16> [[TMP1]], i64 0) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> %v0, <16 x i16> %v1, <16 x i32> ; CHECK-NEXT: [[INS2:%.*]] = call @llvm.vector.insert.nxv8i16.v16i16( undef, <16 x i16> [[TMP2]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16( [[INS1]], [[INS2]], [[PTRUE]], ptr %ptr) ; CHECK-NEXT: ret void %interleaved.vec = shufflevector <16 x i16> %v0, <16 x i16> %v1, <32 x i32> store <32 x i16> %interleaved.vec, ptr %ptr, align 4 ret void } define void @store_factor3(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2) #0 { ; CHECK-LABEL: @store_factor3( ; CHECK: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <8 x i32> ; CHECK-NEXT: [[INS1:%.*]] = call @llvm.vector.insert.nxv4i32.v8i32( undef, <8 x i32> [[TMP1]], i64 0) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <8 x i32> ; CHECK-NEXT: [[INS2:%.*]] = call @llvm.vector.insert.nxv4i32.v8i32( undef, <8 x i32> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <8 x i32> ; CHECK-NEXT: [[INS3:%.*]] = call @llvm.vector.insert.nxv4i32.v8i32( undef, <8 x i32> [[TMP3]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4i32( [[INS1]], [[INS2]], [[INS3]], [[PTRUE]], ptr %ptr) ; CHECK-NEXT: ret void %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> %s1 = shufflevector <8 x i32> %v2, <8 x i32> poison, <16 x i32> %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <24 x i32> store <24 x i32> %interleaved.vec, ptr %ptr, align 4 ret void } define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) #0 { ; CHECK-LABEL: @store_factor4( ; CHECK: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> %s0, <8 x i64> %s1, <4 x i32> ; CHECK-NEXT: [[INS1:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP1]], i64 0) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> %s0, <8 x i64> %s1, <4 x i32> ; CHECK-NEXT: [[INS2:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> %s0, <8 x i64> %s1, <4 x i32> ; CHECK-NEXT: [[INS3:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> %s0, <8 x i64> %s1, <4 x i32> ; CHECK-NEXT: [[INS4:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64( [[INS1]], [[INS2]], [[INS3]], [[INS4]], [[PTRUE]], ptr %ptr) ; CHECK-NEXT: ret void %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> store <16 x i64> %interleaved.vec, ptr %ptr, align 4 ret void } define void @load_ptrvec_factor2(ptr %ptr) #0 { ; CHECK-LABEL: @load_ptrvec_factor2( ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[PTRUE]], ptr %ptr) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: [[EXT1:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP2]], i64 0) ; CHECK-NEXT: [[TOP1:%.*]] = inttoptr <4 x i64> [[EXT1]] to <4 x ptr> ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 0 ; CHECK-NEXT: [[EXT2:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP3]], i64 0) ; CHECK-NEXT: [[TOP2:%.*]] = inttoptr <4 x i64> [[EXT2]] to <4 x ptr> ; CHECK-NEXT: ret void %interleaved.vec = load <8 x ptr>, ptr %ptr, align 4 %v0 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <4 x i32> %v1 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <4 x i32> ret void } define void @load_ptrvec_factor3(ptr %ptr) #0 { ; CHECK-LABEL: @load_ptrvec_factor3( ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[PTRUE]], ptr %ptr) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[LDN]], 2 ; CHECK-NEXT: [[EXT1:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP2]], i64 0) ; CHECK-NEXT: [[TOP1:%.*]] = inttoptr <4 x i64> [[EXT1]] to <4 x ptr> ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[LDN]], 1 ; CHECK-NEXT: [[EXT2:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP3]], i64 0) ; CHECK-NEXT: [[TOP2:%.*]] = inttoptr <4 x i64> [[EXT2]] to <4 x ptr> ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[LDN]], 0 ; CHECK-NEXT: [[EXT3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP4]], i64 0) ; CHECK-NEXT: [[TOP3:%.*]] = inttoptr <4 x i64> [[EXT3]] to <4 x ptr> ; CHECK-NEXT: ret void %interleaved.vec = load <12 x ptr>, ptr %ptr, align 4 %v0 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> %v1 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> %v2 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> ret void } define void @load_ptrvec_factor4(ptr %ptr) #0 { ; CHECK-LABEL: @load_ptrvec_factor4( ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[PTRUE]], ptr %ptr) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[LDN]], 3 ; CHECK-NEXT: [[EXT1:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP2]], i64 0) ; CHECK-NEXT: [[TOP1:%.*]] = inttoptr <4 x i64> [[EXT1]] to <4 x ptr> ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[LDN]], 2 ; CHECK-NEXT: [[EXT2:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP3]], i64 0) ; CHECK-NEXT: [[TOP2:%.*]] = inttoptr <4 x i64> [[EXT2]] to <4 x ptr> ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[LDN]], 1 ; CHECK-NEXT: [[EXT3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP4]], i64 0) ; CHECK-NEXT: [[TOP3:%.*]] = inttoptr <4 x i64> [[EXT3]] to <4 x ptr> ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[LDN]], 0 ; CHECK-NEXT: [[EXT4:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP5]], i64 0) ; CHECK-NEXT: [[TOP4:%.*]] = inttoptr <4 x i64> [[EXT4]] to <4 x ptr> ; CHECK-NEXT: ret void %interleaved.vec = load <16 x ptr>, ptr %ptr, align 4 %v0 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> poison, <4 x i32> %v1 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> poison, <4 x i32> %v2 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> poison, <4 x i32> %v3 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> poison, <4 x i32> ret void } define void @store_ptrvec_factor2(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1) #0 { ; CHECK-LABEL: @store_ptrvec_factor2( ; CHECK-NEXT: [[TOI1:%.*]] = ptrtoint <4 x ptr> %v0 to <4 x i64> ; CHECK-NEXT: [[TOI2:%.*]] = ptrtoint <4 x ptr> %v1 to <4 x i64> ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TOI1]], <4 x i64> [[TOI2]], <4 x i32> ; CHECK-NEXT: [[INS1:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP1]], i64 0) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TOI1]], <4 x i64> [[TOI2]], <4 x i32> ; CHECK-NEXT: [[INS2:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64( [[INS1]], [[INS2]], [[PTRUE]], ptr %ptr) ; CHECK-NEXT: ret void %interleaved.vec = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> store <8 x ptr> %interleaved.vec, ptr %ptr, align 4 ret void } define void @store_ptrvec_factor3(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x ptr> %v2) #0 { ; CHECK-LABEL: @store_ptrvec_factor3( ; CHECK: [[TOI1:%.*]] = ptrtoint <8 x ptr> %s0 to <8 x i64> ; CHECK-NEXT: [[TOI2:%.*]] = ptrtoint <8 x ptr> %s1 to <8 x i64> ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[TOI1]], <8 x i64> [[TOI2]], <4 x i32> ; CHECK-NEXT: [[INS1:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP1]], i64 0) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[TOI1]], <8 x i64> [[TOI2]], <4 x i32> ; CHECK-NEXT: [[INS2:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[TOI1]], <8 x i64> [[TOI2]], <4 x i32> ; CHECK-NEXT: [[INS3:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP3]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv2i64( [[INS1]], [[INS2]], [[INS3]], [[PTRUE]], ptr %ptr) ; CHECK-NEXT: ret void %s0 = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> %s1 = shufflevector <4 x ptr> %v2, <4 x ptr> poison, <8 x i32> %interleaved.vec = shufflevector <8 x ptr> %s0, <8 x ptr> %s1, <12 x i32> store <12 x ptr> %interleaved.vec, ptr %ptr, align 4 ret void } define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x ptr> %v2, <4 x ptr> %v3) #0 { ; CHECK-LABEL: @store_ptrvec_factor4( ; CHECK: [[TOI1:%.*]] = ptrtoint <8 x ptr> %s0 to <8 x i64> ; CHECK-NEXT: [[TOI2:%.*]] = ptrtoint <8 x ptr> %s1 to <8 x i64> ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[TOI1]], <8 x i64> [[TOI2]], <4 x i32> ; CHECK-NEXT: [[INS1:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP1]], i64 0) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[TOI1]], <8 x i64> [[TOI2]], <4 x i32> ; CHECK-NEXT: [[INS2:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[TOI1]], <8 x i64> [[TOI2]], <4 x i32> ; CHECK-NEXT: [[INS3:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TOI1]], <8 x i64> [[TOI2]], <4 x i32> ; CHECK-NEXT: [[INS4:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64( [[INS1]], [[INS2]], [[INS3]], [[INS4]], [[PTRUE]], ptr %ptr) ; CHECK-NEXT: ret void %s0 = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> %s1 = shufflevector <4 x ptr> %v2, <4 x ptr> %v3, <8 x i32> %interleaved.vec = shufflevector <8 x ptr> %s0, <8 x ptr> %s1, <16 x i32> store <16 x ptr> %interleaved.vec, ptr %ptr, align 4 ret void } define void @load_factor2_wide(ptr %ptr) #0 { ; CHECK-LABEL: @load_factor2_wide( ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[PTRUE]], ptr %ptr) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: [[EXT1:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 0 ; CHECK-NEXT: [[EXT2:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr %ptr, i32 8 ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[PTRUE]], ptr [[TMP4]]) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: [[EXT3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP5]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[LDN]], 0 ; CHECK-NEXT: [[EXT4:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( [[TMP6]], i64 0) ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[EXT1]], <4 x i64> [[EXT3]], <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[EXT2]], <4 x i64> [[EXT4]], <8 x i32> ; CHECK-NEXT: ret void %interleaved.vec = load <16 x i64>, ptr %ptr, align 4 %v0 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <8 x i32> %v1 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <8 x i32> ret void } define void @store_factor2_wide(ptr %ptr, <8 x i64> %v0, <8 x i64> %v1) #0 { ; CHECK-LABEL: @store_factor2_wide( ; CHECK-NEXT: [[PTRUE:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> %v0, <8 x i64> %v1, <4 x i32> ; CHECK-NEXT: [[INS1:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> %v0, <8 x i64> %v1, <4 x i32> ; CHECK-NEXT: [[INS2:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP3]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64( [[INS1]], [[INS2]], [[PTRUE]], ptr %ptr) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> %v0, <8 x i64> %v1, <4 x i32> ; CHECK-NEXT: [[INS3:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> %v0, <8 x i64> %v1, <4 x i32> ; CHECK-NEXT: [[INS4:%.*]] = call @llvm.vector.insert.nxv2i64.v4i64( undef, <4 x i64> [[TMP5]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr %ptr, i32 8 ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64( [[INS3]], [[INS4]], [[PTRUE]], ptr [[TMP6]]) ; CHECK-NEXT: ret void %interleaved.vec = shufflevector <8 x i64> %v0, <8 x i64> %v1, <16 x i32> store <16 x i64> %interleaved.vec, ptr %ptr, align 4 ret void } ; Check that neon is used for illegal multiples of 128-bit types define void @load_384bit(ptr %ptr) #0 { ; CHECK-LABEL: @load_384bit( ; CHECK: llvm.aarch64.neon.ld2 ; CHECK-NOT: llvm.aarch64.sve.ld2 %interleaved.vec = load <12 x i64>, ptr %ptr, align 4 %v0 = shufflevector <12 x i64> %interleaved.vec, <12 x i64> poison, <6 x i32> %v1 = shufflevector <12 x i64> %interleaved.vec, <12 x i64> poison, <6 x i32> ret void } ; Check that neon is used for 128-bit vectors define void @load_128bit(ptr %ptr) #0 { ; CHECK-LABEL: @load_128bit( ; CHECK: llvm.aarch64.neon.ld2 ; CHECK-NOT: llvm.aarch64.sve.ld2 %interleaved.vec = load <4 x i64>, ptr %ptr, align 4 %v0 = shufflevector <4 x i64> %interleaved.vec, <4 x i64> poison, <2 x i32> %v1 = shufflevector <4 x i64> %interleaved.vec, <4 x i64> poison, <2 x i32> ret void } ; Check that correct ptrues are generated for min != max case define void @load_min_not_max(ptr %ptr) #1 { ; CHECK-LABEL: @load_min_not_max( ; CHECK: call @llvm.aarch64.sve.ptrue.nxv2i1(i32 4) %interleaved.vec = load <8 x i64>, ptr %ptr, align 4 %v0 = shufflevector <8 x i64> %interleaved.vec, <8 x i64> poison, <4 x i32> %v1 = shufflevector <8 x i64> %interleaved.vec, <8 x i64> poison, <4 x i32> ret void } define void @store_min_not_max(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1) #1 { ; CHECK-LABEL: @store_min_not_max( ; CHECK: call @llvm.aarch64.sve.ptrue.nxv2i1(i32 4) %interleaved.vec = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> store <8 x i64> %interleaved.vec, ptr %ptr, align 4 ret void } ; Check that correct ptrues are generated for min > type case define void @load_min_ge_type(ptr %ptr) #2 { ; CHECK-LABEL: @load_min_ge_type( ; CHECK: call @llvm.aarch64.sve.ptrue.nxv2i1(i32 4) %interleaved.vec = load <8 x i64>, ptr %ptr, align 4 %v0 = shufflevector <8 x i64> %interleaved.vec, <8 x i64> poison, <4 x i32> %v1 = shufflevector <8 x i64> %interleaved.vec, <8 x i64> poison, <4 x i32> ret void } define void @store_min_ge_type(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1) #2 { ; CHECK-LABEL: @store_min_ge_type( ; CHECK: call @llvm.aarch64.sve.ptrue.nxv2i1(i32 4) %interleaved.vec = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> store <8 x i64> %interleaved.vec, ptr %ptr, align 4 ret void } define void @load_double_factor4(ptr %ptr) #0 { ; CHECK-LABEL: @load_double_factor4( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP1]], ptr [[PTR:%.*]]) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[LDN]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64( [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[LDN]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64( [[TMP5]], i64 0) ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[LDN]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64( [[TMP7]], i64 0) ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[LDN]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64( [[TMP9]], i64 0) ; CHECK-NEXT: ret void ; %interleaved.vec = load <16 x double>, ptr %ptr, align 4 %v0 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> %v1 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> %v2 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> %v3 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> ret void } define void @load_float_factor3(ptr %ptr) #0 { ; CHECK-LABEL: @load_float_factor3( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP1]], ptr [[PTR:%.*]]) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[LDN]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32( [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[LDN]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32( [[TMP5]], i64 0) ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[LDN]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32( [[TMP7]], i64 0) ; CHECK-NEXT: ret void ; %interleaved.vec = load <24 x float>, ptr %ptr, align 4 %v0 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> %v1 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> %v2 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> ret void } define void @load_half_factor2(ptr %ptr) #0 { ; CHECK-LABEL: @load_half_factor2( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP1]], ptr [[PTR:%.*]]) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = call <16 x half> @llvm.vector.extract.v16f16.nxv8f16( [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = call <16 x half> @llvm.vector.extract.v16f16.nxv8f16( [[TMP5]], i64 0) ; CHECK-NEXT: ret void ; %interleaved.vec = load <32 x half>, ptr %ptr, align 4 %v0 = shufflevector <32 x half> %interleaved.vec, <32 x half> poison, <16 x i32> %v1 = shufflevector <32 x half> %interleaved.vec, <32 x half> poison, <16 x i32> ret void } define void @load_bfloat_factor2(ptr %ptr) #0 { ; CHECK-LABEL: @load_bfloat_factor2( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP1]], ptr [[PTR:%.*]]) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16( [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16( [[TMP5]], i64 0) ; CHECK-NEXT: ret void ; %interleaved.vec = load <32 x bfloat>, ptr %ptr, align 4 %v0 = shufflevector <32 x bfloat> %interleaved.vec, <32 x bfloat> poison, <16 x i32> %v1 = shufflevector <32 x bfloat> %interleaved.vec, <32 x bfloat> poison, <16 x i32> ret void } define void @store_double_factor4(ptr %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) #0 { ; CHECK-LABEL: @store_double_factor4( ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP6]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP8]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP3]], [[TMP5]], [[TMP7]], [[TMP9]], [[TMP1]], ptr [[PTR:%.*]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> store <16 x double> %interleaved.vec, ptr %ptr, align 4 ret void } define void @store_float_factor3(ptr %ptr, <8 x float> %v0, <8 x float> %v1, <8 x float> %v2) #0 { ; CHECK-LABEL: @store_float_factor3( ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[V0:%.*]], <8 x float> [[V1:%.*]], <16 x i32> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[V2:%.*]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.vector.insert.nxv4f32.v8f32( undef, <8 x float> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.vector.insert.nxv4f32.v8f32( undef, <8 x float> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vector.insert.nxv4f32.v8f32( undef, <8 x float> [[TMP6]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP3]], [[TMP5]], [[TMP7]], [[TMP1]], ptr [[PTR:%.*]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> %s1 = shufflevector <8 x float> %v2, <8 x float> poison, <16 x i32> %interleaved.vec = shufflevector <16 x float> %s0, <16 x float> %s1, <24 x i32> store <24 x float> %interleaved.vec, ptr %ptr, align 4 ret void } define void @store_half_factor2(ptr %ptr, <16 x half> %v0, <16 x half> %v1) #0 { ; CHECK-LABEL: @store_half_factor2( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[V0:%.*]], <16 x half> [[V1:%.*]], <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.vector.insert.nxv8f16.v16f16( undef, <16 x half> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.vector.insert.nxv8f16.v16f16( undef, <16 x half> [[TMP4]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP3]], [[TMP5]], [[TMP1]], ptr [[PTR:%.*]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <16 x half> %v0, <16 x half> %v1, <32 x i32> store <32 x half> %interleaved.vec, ptr %ptr, align 4 ret void } define void @store_bfloat_factor2(ptr %ptr, <16 x bfloat> %v0, <16 x bfloat> %v1) #0 { ; CHECK-LABEL: @store_bfloat_factor2( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x bfloat> [[V0:%.*]], <16 x bfloat> [[V1:%.*]], <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.vector.insert.nxv8bf16.v16bf16( undef, <16 x bfloat> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.vector.insert.nxv8bf16.v16bf16( undef, <16 x bfloat> [[TMP4]], i64 0) ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP3]], [[TMP5]], [[TMP1]], ptr [[PTR:%.*]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <16 x bfloat> %v0, <16 x bfloat> %v1, <32 x i32> store <32 x bfloat> %interleaved.vec, ptr %ptr, align 4 ret void } attributes #0 = { vscale_range(2,2) "target-features"="+sve" } attributes #1 = { vscale_range(2,4) "target-features"="+sve" } attributes #2 = { vscale_range(4,4) "target-features"="+sve" }