; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx2 -interleaved-access -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx512f -mattr=+avx512bw -mattr=+avx512vl -interleaved-access -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx2 -passes=interleaved-access -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx512f -mattr=+avx512bw -mattr=+avx512vl -passes=interleaved-access -S | FileCheck %s define <32 x i8> @interleaved_load_vf32_i8_stride3(ptr %ptr){ ; CHECK-LABEL: @interleaved_load_vf32_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <16 x i8>, ptr [[PTR:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 128 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 16 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[TMP5]], align 16 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 4 ; CHECK-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr [[TMP9]], align 16 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 5 ; CHECK-NEXT: [[TMP12:%.*]] = load <16 x i8>, ptr [[TMP11]], align 16 ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> [[TMP8]], <32 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> [[TMP10]], <32 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP6]], <16 x i8> [[TMP12]], <32 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <32 x i8> [[TMP14]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP15]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <32 x i8> [[TMP18]], <32 x i8> [[TMP16]], <32 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> [[TMP17]], <32 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <32 x i8> [[TMP17]], <32 x i8> [[TMP18]], <32 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <32 x i8> [[TMP20]], <32 x i8> [[TMP19]], <32 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <32 x i8> [[TMP21]], <32 x i8> [[TMP20]], <32 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <32 x i8> [[TMP19]], <32 x i8> [[TMP21]], <32 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <32 x i8> [[TMP23]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <32 x i8> [[TMP22]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[ADD1:%.*]] = add <32 x i8> [[TMP26]], [[TMP25]] ; CHECK-NEXT: [[ADD2:%.*]] = add <32 x i8> [[TMP24]], [[ADD1]] ; CHECK-NEXT: ret <32 x i8> [[ADD2]] ; %wide.vec = load <96 x i8>, ptr %ptr %v1 = shufflevector <96 x i8> %wide.vec, <96 x i8> undef,<32 x i32> %v2 = shufflevector <96 x i8> %wide.vec, <96 x i8> undef,<32 x i32> %v3 = shufflevector <96 x i8> %wide.vec, <96 x i8> undef,<32 x i32> %add1 = add <32 x i8> %v1, %v2 %add2 = add <32 x i8> %v3, %add1 ret <32 x i8> %add2 } define <16 x i8> @interleaved_load_vf16_i8_stride3(ptr %ptr){ ; CHECK-LABEL: @interleaved_load_vf16_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <16 x i8>, ptr [[PTR:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 64 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 16 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[TMP5]], align 16 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP6]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP7]], <16 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP8]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i8> [[TMP8]], <16 x i8> [[TMP9]], <16 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP11]], <16 x i8> [[TMP10]], <16 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP11]], <16 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP12]], <16 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[ADD1:%.*]] = add <16 x i8> [[TMP17]], [[TMP16]] ; CHECK-NEXT: [[ADD2:%.*]] = add <16 x i8> [[TMP15]], [[ADD1]] ; CHECK-NEXT: ret <16 x i8> [[ADD2]] ; %wide.vec = load <48 x i8>, ptr %ptr %v1 = shufflevector <48 x i8> %wide.vec, <48 x i8> undef,<16 x i32> %v2 = shufflevector <48 x i8> %wide.vec, <48 x i8> undef,<16 x i32> %v3 = shufflevector <48 x i8> %wide.vec, <48 x i8> undef,<16 x i32> %add1 = add <16 x i8> %v1, %v2 %add2 = add <16 x i8> %v3, %add1 ret <16 x i8> %add2 } define <8 x i8> @interleaved_load_vf8_i8_stride3(ptr %ptr){ ; CHECK-LABEL: @interleaved_load_vf8_i8_stride3( ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x i8>, ptr [[PTR:%.*]], align 32 ; CHECK-NEXT: [[V1:%.*]] = shufflevector <24 x i8> [[WIDE_VEC]], <24 x i8> undef, <8 x i32> ; CHECK-NEXT: [[V2:%.*]] = shufflevector <24 x i8> [[WIDE_VEC]], <24 x i8> undef, <8 x i32> ; CHECK-NEXT: [[V3:%.*]] = shufflevector <24 x i8> [[WIDE_VEC]], <24 x i8> undef, <8 x i32> ; CHECK-NEXT: [[ADD1:%.*]] = add <8 x i8> [[V1]], [[V2]] ; CHECK-NEXT: [[ADD2:%.*]] = add <8 x i8> [[V3]], [[ADD1]] ; CHECK-NEXT: ret <8 x i8> [[ADD2]] ; %wide.vec = load <24 x i8>, ptr %ptr %v1 = shufflevector <24 x i8> %wide.vec, <24 x i8> undef,<8 x i32> %v2 = shufflevector <24 x i8> %wide.vec, <24 x i8> undef,<8 x i32> %v3 = shufflevector <24 x i8> %wide.vec, <24 x i8> undef,<8 x i32> %add1 = add <8 x i8> %v1, %v2 %add2 = add <8 x i8> %v3, %add1 ret <8 x i8> %add2 } define <64 x i8> @interleaved_load_vf64_i8_stride3(ptr %ptr){ ; CHECK-LABEL: @interleaved_load_vf64_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <16 x i8>, ptr [[PTR:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 4 ; CHECK-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 5 ; CHECK-NEXT: [[TMP12:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 6 ; CHECK-NEXT: [[TMP14:%.*]] = load <16 x i8>, ptr [[TMP13]], align 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 7 ; CHECK-NEXT: [[TMP16:%.*]] = load <16 x i8>, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 8 ; CHECK-NEXT: [[TMP18:%.*]] = load <16 x i8>, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 9 ; CHECK-NEXT: [[TMP20:%.*]] = load <16 x i8>, ptr [[TMP19]], align 1 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 10 ; CHECK-NEXT: [[TMP22:%.*]] = load <16 x i8>, ptr [[TMP21]], align 1 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr <16 x i8>, ptr [[PTR]], i32 11 ; CHECK-NEXT: [[TMP24:%.*]] = load <16 x i8>, ptr [[TMP23]], align 1 ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> [[TMP8]], <32 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> [[TMP10]], <32 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i8> [[TMP6]], <16 x i8> [[TMP12]], <32 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP20]], <32 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> [[TMP22]], <32 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP18]], <16 x i8> [[TMP24]], <32 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <32 x i8> [[TMP25]], <32 x i8> [[TMP28]], <64 x i32> ; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <32 x i8> [[TMP26]], <32 x i8> [[TMP29]], <64 x i32> ; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <32 x i8> [[TMP27]], <32 x i8> [[TMP30]], <64 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <64 x i8> [[TMP31]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <64 x i8> [[TMP32]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <64 x i8> [[TMP33]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <64 x i8> [[TMP36]], <64 x i8> [[TMP34]], <64 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <64 x i8> [[TMP34]], <64 x i8> [[TMP35]], <64 x i32> ; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <64 x i8> [[TMP35]], <64 x i8> [[TMP36]], <64 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <64 x i8> [[TMP38]], <64 x i8> [[TMP37]], <64 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <64 x i8> [[TMP39]], <64 x i8> [[TMP38]], <64 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <64 x i8> [[TMP37]], <64 x i8> [[TMP39]], <64 x i32> ; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <64 x i8> [[TMP41]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <64 x i8> [[TMP40]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[ADD1:%.*]] = add <64 x i8> [[TMP44]], [[TMP43]] ; CHECK-NEXT: [[ADD2:%.*]] = add <64 x i8> [[TMP42]], [[ADD1]] ; CHECK-NEXT: ret <64 x i8> [[ADD2]] ; %wide.vec = load <192 x i8>, ptr %ptr, align 1 %v1 = shufflevector <192 x i8> %wide.vec, <192 x i8> undef, <64 x i32> %v2 = shufflevector <192 x i8> %wide.vec, <192 x i8> undef, <64 x i32> %v3 = shufflevector <192 x i8> %wide.vec, <192 x i8> undef, <64 x i32> %add1 = add <64 x i8> %v1, %v2 %add2 = add <64 x i8> %v3, %add1 ret <64 x i8> %add2 }