; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ALL,BE ; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ALL,LE declare void @use(i16) declare void @use_vec(<8 x i16>) define <4 x i16> @insert_01_poison_v4i16(i32 %x) { ; BE-LABEL: @insert_01_poison_v4i16( ; BE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; BE-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; BE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; BE-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0 ; BE-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1 ; BE-NEXT: ret <4 x i16> [[INS1]] ; ; LE-LABEL: @insert_01_poison_v4i16( ; LE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0 ; LE-NEXT: [[INS1:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x i16> ; LE-NEXT: ret <4 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 0 %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1 ret <4 x i16> %ins1 } define <8 x i16> @insert_10_poison_v8i16(i32 %x) { ; BE-LABEL: @insert_10_poison_v8i16( ; BE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0 ; BE-NEXT: [[INS1:%.*]] = bitcast <4 x i32> [[TMP1]] to <8 x i16> ; BE-NEXT: ret <8 x i16> [[INS1]] ; ; LE-LABEL: @insert_10_poison_v8i16( ; LE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; LE-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; LE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; LE-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[HI16]], i64 0 ; LE-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 1 ; LE-NEXT: ret <8 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins0 = insertelement <8 x i16> poison, i16 %lo16, i64 1 %ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 0 ret <8 x i16> %ins1 } ; negative test - larger element is not aligned in the vector define <4 x i32> @insert_12_poison_v4i32(i64 %x) { ; ALL-LABEL: @insert_12_poison_v4i32( ; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32 ; ALL-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32 ; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32 ; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> poison, i32 [[LO32]], i64 1 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 2 ; ALL-NEXT: ret <4 x i32> [[INS1]] ; %hi64 = lshr i64 %x, 32 %hi32 = trunc i64 %hi64 to i32 %lo32 = trunc i64 %x to i32 %ins0 = insertelement <4 x i32> poison, i32 %lo32, i64 1 %ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 2 ret <4 x i32> %ins1 } ; negative test - larger element is not aligned in the vector define <4 x i16> @insert_21_poison_v4i16(i32 %x) { ; ALL-LABEL: @insert_21_poison_v4i16( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[HI16]], i64 1 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 2 ; ALL-NEXT: ret <4 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 2 %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1 ret <4 x i16> %ins1 } define <4 x i32> @insert_23_poison_v4i32(i64 %x) { ; BE-LABEL: @insert_23_poison_v4i32( ; BE-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32 ; BE-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32 ; BE-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32 ; BE-NEXT: [[INS0:%.*]] = insertelement <4 x i32> poison, i32 [[LO32]], i64 2 ; BE-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 3 ; BE-NEXT: ret <4 x i32> [[INS1]] ; ; LE-LABEL: @insert_23_poison_v4i32( ; LE-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 1 ; LE-NEXT: [[INS1:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32> ; LE-NEXT: ret <4 x i32> [[INS1]] ; %hi64 = lshr i64 %x, 32 %hi32 = trunc i64 %hi64 to i32 %lo32 = trunc i64 %x to i32 %ins0 = insertelement <4 x i32> poison, i32 %lo32, i64 2 %ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 3 ret <4 x i32> %ins1 } define <4 x i16> @insert_32_poison_v4i16(i32 %x) { ; BE-LABEL: @insert_32_poison_v4i16( ; BE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 1 ; BE-NEXT: [[INS1:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x i16> ; BE-NEXT: ret <4 x i16> [[INS1]] ; ; LE-LABEL: @insert_32_poison_v4i16( ; LE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; LE-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; LE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; LE-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[HI16]], i64 2 ; LE-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 3 ; LE-NEXT: ret <4 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 3 %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 2 ret <4 x i16> %ins1 } ; Similar to the above tests but with a non-poison base vector. ; Vector is same size as scalar, so this is just a cast. ; TODO: Could be swapped/rotated into place. define <2 x i16> @insert_01_v2i16(i32 %x, <2 x i16> %v) { ; BE-LABEL: @insert_01_v2i16( ; BE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; BE-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; BE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; BE-NEXT: [[INS0:%.*]] = insertelement <2 x i16> poison, i16 [[LO16]], i64 0 ; BE-NEXT: [[INS1:%.*]] = insertelement <2 x i16> [[INS0]], i16 [[HI16]], i64 1 ; BE-NEXT: ret <2 x i16> [[INS1]] ; ; LE-LABEL: @insert_01_v2i16( ; LE-NEXT: [[INS1:%.*]] = bitcast i32 [[X:%.*]] to <2 x i16> ; LE-NEXT: ret <2 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins0 = insertelement <2 x i16> %v, i16 %lo16, i64 0 %ins1 = insertelement <2 x i16> %ins0, i16 %hi16, i64 1 ret <2 x i16> %ins1 } ; negative test - can't do this safely without knowing something about the base vector define <8 x i16> @insert_10_v8i16(i32 %x, <8 x i16> %v) { ; ALL-LABEL: @insert_10_v8i16( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[HI16]], i64 0 ; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 1 ; ALL-NEXT: ret <8 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 1 %ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 0 ret <8 x i16> %ins1 } ; negative test - larger element is not aligned in the vector define <4 x i32> @insert_12_v4i32(i64 %x, <4 x i32> %v) { ; ALL-LABEL: @insert_12_v4i32( ; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32 ; ALL-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32 ; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32 ; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[LO32]], i64 1 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 2 ; ALL-NEXT: ret <4 x i32> [[INS1]] ; %hi64 = lshr i64 %x, 32 %hi32 = trunc i64 %hi64 to i32 %lo32 = trunc i64 %x to i32 %ins0 = insertelement <4 x i32> %v, i32 %lo32, i64 1 %ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 2 ret <4 x i32> %ins1 } ; negative test - larger element is not aligned in the vector define <4 x i16> @insert_21_v4i16(i32 %x, <4 x i16> %v) { ; ALL-LABEL: @insert_21_v4i16( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[HI16]], i64 1 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 2 ; ALL-NEXT: ret <4 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins0 = insertelement <4 x i16> %v, i16 %lo16, i64 2 %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1 ret <4 x i16> %ins1 } ; negative test - can't do this safely without knowing something about the base vector define <4 x i32> @insert_23_v4i32(i64 %x, <4 x i32> %v) { ; ALL-LABEL: @insert_23_v4i32( ; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32 ; ALL-NEXT: [[HI32:%.*]] = trunc i64 [[HI64]] to i32 ; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32 ; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[LO32]], i64 2 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 3 ; ALL-NEXT: ret <4 x i32> [[INS1]] ; %hi64 = lshr i64 %x, 32 %hi32 = trunc i64 %hi64 to i32 %lo32 = trunc i64 %x to i32 %ins0 = insertelement <4 x i32> %v, i32 %lo32, i64 2 %ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 3 ret <4 x i32> %ins1 } ; negative test - can't do this safely without knowing something about the base vector define <4 x i16> @insert_32_v4i16(i32 %x, <4 x i16> %v) { ; ALL-LABEL: @insert_32_v4i16( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[HI16]], i64 2 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 3 ; ALL-NEXT: ret <4 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins0 = insertelement <4 x i16> %v, i16 %lo16, i64 3 %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 2 ret <4 x i16> %ins1 } ; negative test - need half-width shift define <4 x i16> @insert_01_v4i16_wrong_shift1(i32 %x) { ; ALL-LABEL: @insert_01_v4i16_wrong_shift1( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 8 ; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1 ; ALL-NEXT: ret <4 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 8 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 0 %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1 ret <4 x i16> %ins1 } ; negative test - need common scalar define <4 x i16> @insert_01_v4i16_wrong_op(i32 %x, i32 %y) { ; ALL-LABEL: @insert_01_v4i16_wrong_op( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[Y:%.*]] to i16 ; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0 ; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1 ; ALL-NEXT: ret <4 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %y to i16 %ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 0 %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1 ret <4 x i16> %ins1 } ; TODO: extra use doesn't have to prevent the fold. define <8 x i16> @insert_67_v4i16_uses1(i32 %x, <8 x i16> %v) { ; ALL-LABEL: @insert_67_v4i16_uses1( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; ALL-NEXT: call void @use(i16 [[HI16]]) ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[INS0:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[LO16]], i64 6 ; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[INS0]], i16 [[HI16]], i64 7 ; ALL-NEXT: ret <8 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 call void @use(i16 %hi16) %lo16 = trunc i32 %x to i16 %ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 6 %ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 7 ret <8 x i16> %ins1 } ; negative test - can't do this safely without knowing something about the base vector ; extra use would be ok define <8 x i16> @insert_76_v4i16_uses2(i32 %x, <8 x i16> %v) { ; ALL-LABEL: @insert_76_v4i16_uses2( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: call void @use(i16 [[LO16]]) ; ALL-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[HI16]], i64 6 ; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 7 ; ALL-NEXT: ret <8 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 call void @use(i16 %lo16) %ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 7 %ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 6 ret <8 x i16> %ins1 } ; TODO: extra use doesn't have to prevent the fold. define <8 x i16> @insert_67_v4i16_uses3(i32 %x, <8 x i16> %v) { ; ALL-LABEL: @insert_67_v4i16_uses3( ; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; ALL-NEXT: [[INS0:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[LO16]], i64 6 ; ALL-NEXT: call void @use_vec(<8 x i16> [[INS0]]) ; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[INS0]], i16 [[HI16]], i64 7 ; ALL-NEXT: ret <8 x i16> [[INS1]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 6 call void @use_vec(<8 x i16> %ins0) %ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 7 ret <8 x i16> %ins1 } ; TODO: This is equivalent to the 1st test. define <4 x i16> @insert_01_poison_v4i16_high_first(i32 %x) { ; BE-LABEL: @insert_01_poison_v4i16_high_first( ; BE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16 ; BE-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16 ; BE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16 ; BE-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0 ; BE-NEXT: [[INS0:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[HI16]], i64 1 ; BE-NEXT: ret <4 x i16> [[INS0]] ; ; LE-LABEL: @insert_01_poison_v4i16_high_first( ; LE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0 ; LE-NEXT: [[INS0:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x i16> ; LE-NEXT: ret <4 x i16> [[INS0]] ; %hi32 = lshr i32 %x, 16 %hi16 = trunc i32 %hi32 to i16 %lo16 = trunc i32 %x to i16 %ins1 = insertelement <4 x i16> poison, i16 %hi16, i64 1 %ins0 = insertelement <4 x i16> %ins1, i16 %lo16, i64 0 ret <4 x i16> %ins0 }