; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+v,+zvl1024b < %s | FileCheck %s --check-prefix=RV64-1024 ; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+v,+zvl2048b < %s | FileCheck %s --check-prefix=RV64-2048 define void @interleave256(ptr %agg.result, ptr %0, ptr %1) { ; RV64-1024-LABEL: interleave256: ; RV64-1024: # %bb.0: # %entry ; RV64-1024-NEXT: li a3, 128 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m2, ta, ma ; RV64-1024-NEXT: vle16.v v8, (a1) ; RV64-1024-NEXT: vle16.v v10, (a2) ; RV64-1024-NEXT: vwaddu.vv v12, v8, v10 ; RV64-1024-NEXT: li a1, -1 ; RV64-1024-NEXT: vwmaccu.vx v12, a1, v10 ; RV64-1024-NEXT: li a1, 256 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RV64-1024-NEXT: vse16.v v12, (a0) ; RV64-1024-NEXT: ret ; ; RV64-2048-LABEL: interleave256: ; RV64-2048: # %bb.0: # %entry ; RV64-2048-NEXT: li a3, 128 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m1, ta, ma ; RV64-2048-NEXT: vle16.v v8, (a1) ; RV64-2048-NEXT: vle16.v v9, (a2) ; RV64-2048-NEXT: vwaddu.vv v10, v8, v9 ; RV64-2048-NEXT: li a1, -1 ; RV64-2048-NEXT: vwmaccu.vx v10, a1, v9 ; RV64-2048-NEXT: li a1, 256 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; RV64-2048-NEXT: vse16.v v10, (a0) ; RV64-2048-NEXT: ret entry: %ve = load <128 x i16>, ptr %0, align 256 %vo = load <128 x i16>, ptr %1, align 256 %2 = shufflevector <128 x i16> %ve, <128 x i16> poison, <256 x i32> %3 = shufflevector <128 x i16> %vo, <128 x i16> poison, <256 x i32> %4 = shufflevector <256 x i16> %2, <256 x i16> %3, <256 x i32> store <256 x i16> %4, ptr %agg.result, align 512 ret void } define void @interleave512(ptr %agg.result, ptr %0, ptr %1) local_unnamed_addr { ; RV64-1024-LABEL: interleave512: ; RV64-1024: # %bb.0: # %entry ; RV64-1024-NEXT: li a3, 256 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; RV64-1024-NEXT: vle16.v v8, (a1) ; RV64-1024-NEXT: vle16.v v12, (a2) ; RV64-1024-NEXT: vwaddu.vv v16, v8, v12 ; RV64-1024-NEXT: li a1, -1 ; RV64-1024-NEXT: vwmaccu.vx v16, a1, v12 ; RV64-1024-NEXT: li a1, 512 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; RV64-1024-NEXT: vse16.v v16, (a0) ; RV64-1024-NEXT: ret ; ; RV64-2048-LABEL: interleave512: ; RV64-2048: # %bb.0: # %entry ; RV64-2048-NEXT: li a3, 256 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, ta, ma ; RV64-2048-NEXT: vle16.v v8, (a1) ; RV64-2048-NEXT: vle16.v v10, (a2) ; RV64-2048-NEXT: vwaddu.vv v12, v8, v10 ; RV64-2048-NEXT: li a1, -1 ; RV64-2048-NEXT: vwmaccu.vx v12, a1, v10 ; RV64-2048-NEXT: li a1, 512 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RV64-2048-NEXT: vse16.v v12, (a0) ; RV64-2048-NEXT: ret entry: %ve = load <256 x i16>, ptr %0, align 512 %vo = load <256 x i16>, ptr %1, align 512 %2 = shufflevector <256 x i16> %ve, <256 x i16> poison, <512 x i32> %3 = shufflevector <256 x i16> %vo, <256 x i16> poison, <512 x i32> %4 = shufflevector <512 x i16> %2, <512 x i16> %3, <512 x i32> store <512 x i16> %4, ptr %agg.result, align 1024 ret void }