; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s define <16 x i32> @no_existing_zext(<16 x i8> %a, <16 x i32> %op) { ; CHECK-LABEL: no_existing_zext: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.16b v5, #10 ; CHECK-NEXT: cmhi.16b v0, v0, v5 ; CHECK-NEXT: sshll.8h v5, v0, #0 ; CHECK-NEXT: sshll2.8h v0, v0, #0 ; CHECK-NEXT: sshll2.4s v16, v0, #0 ; CHECK-NEXT: sshll.4s v6, v5, #0 ; CHECK-NEXT: sshll.4s v7, v0, #0 ; CHECK-NEXT: sshll2.4s v5, v5, #0 ; CHECK-NEXT: and.16b v4, v4, v16 ; CHECK-NEXT: and.16b v0, v1, v6 ; CHECK-NEXT: and.16b v1, v2, v5 ; CHECK-NEXT: and.16b v2, v3, v7 ; CHECK-NEXT: mov.16b v3, v4 ; CHECK-NEXT: ret entry: %cmp = icmp ugt <16 x i8> %a, %sel = select <16 x i1> %cmp, <16 x i32> %op, <16 x i32> zeroinitializer ret <16 x i32> %sel } define <16 x i32> @second_compare_operand_not_splat(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: second_compare_operand_not_splat: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: cmgt.16b v1, v0, v1 ; CHECK-NEXT: ushll.8h v2, v0, #0 ; CHECK-NEXT: ushll2.8h v0, v0, #0 ; CHECK-NEXT: sshll.8h v3, v1, #0 ; CHECK-NEXT: sshll2.8h v1, v1, #0 ; CHECK-NEXT: ushll.4s v4, v2, #0 ; CHECK-NEXT: ushll.4s v5, v0, #0 ; CHECK-NEXT: ushll2.4s v2, v2, #0 ; CHECK-NEXT: ushll2.4s v6, v0, #0 ; CHECK-NEXT: sshll.4s v0, v3, #0 ; CHECK-NEXT: sshll.4s v7, v1, #0 ; CHECK-NEXT: sshll2.4s v16, v3, #0 ; CHECK-NEXT: sshll2.4s v1, v1, #0 ; CHECK-NEXT: and.16b v0, v4, v0 ; CHECK-NEXT: and.16b v3, v6, v1 ; CHECK-NEXT: and.16b v1, v2, v16 ; CHECK-NEXT: and.16b v2, v5, v7 ; CHECK-NEXT: ret entry: %ext = zext <16 x i8> %a to <16 x i32> %cmp = icmp sgt <16 x i8> %a, %b %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer ret <16 x i32> %sel } define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select(<16 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.16b v1, #10 ; CHECK-NEXT: ushll.8h v2, v0, #0 ; CHECK-NEXT: ushll.4s v4, v2, #0 ; CHECK-NEXT: ushll2.4s v2, v2, #0 ; CHECK-NEXT: cmgt.16b v1, v0, v1 ; CHECK-NEXT: ushll2.8h v0, v0, #0 ; CHECK-NEXT: sshll.8h v3, v1, #0 ; CHECK-NEXT: sshll2.8h v1, v1, #0 ; CHECK-NEXT: ushll.4s v5, v0, #0 ; CHECK-NEXT: ushll2.4s v6, v0, #0 ; CHECK-NEXT: sshll.4s v0, v3, #0 ; CHECK-NEXT: sshll.4s v7, v1, #0 ; CHECK-NEXT: sshll2.4s v16, v3, #0 ; CHECK-NEXT: sshll2.4s v1, v1, #0 ; CHECK-NEXT: and.16b v0, v4, v0 ; CHECK-NEXT: and.16b v3, v6, v1 ; CHECK-NEXT: and.16b v1, v2, v16 ; CHECK-NEXT: and.16b v2, v5, v7 ; CHECK-NEXT: ret entry: %ext = zext <16 x i8> %a to <16 x i32> %cmp = icmp sgt <16 x i8> %a, %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer ret <16 x i32> %sel } define <8 x i64> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i64(<8 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: mov w8, #10 ; =0xa ; CHECK-NEXT: dup.2d v2, x8 ; CHECK-NEXT: ushll.4s v1, v0, #0 ; CHECK-NEXT: ushll2.4s v0, v0, #0 ; CHECK-NEXT: ushll.2d v3, v1, #0 ; CHECK-NEXT: ushll2.2d v4, v0, #0 ; CHECK-NEXT: ushll2.2d v1, v1, #0 ; CHECK-NEXT: ushll.2d v5, v0, #0 ; CHECK-NEXT: cmhi.2d v0, v3, v2 ; CHECK-NEXT: cmhi.2d v7, v1, v2 ; CHECK-NEXT: cmhi.2d v6, v5, v2 ; CHECK-NEXT: cmhi.2d v2, v4, v2 ; CHECK-NEXT: and.16b v0, v3, v0 ; CHECK-NEXT: and.16b v1, v1, v7 ; CHECK-NEXT: and.16b v3, v4, v2 ; CHECK-NEXT: and.16b v2, v5, v6 ; CHECK-NEXT: ret %ext = zext <8 x i8> %a to <8 x i64> %cmp = icmp ugt <8 x i8> %a, %sel = select <8 x i1> %cmp, <8 x i64> %ext, <8 x i64> zeroinitializer ret <8 x i64> %sel } define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v16i32(<16 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v16i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ushll.8h v2, v0, #0 ; CHECK-NEXT: ushll2.8h v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll.4s v3, v2, #0 ; CHECK-NEXT: ushll2.4s v4, v0, #0 ; CHECK-NEXT: ushll2.4s v2, v2, #0 ; CHECK-NEXT: ushll.4s v5, v0, #0 ; CHECK-NEXT: cmhi.4s v0, v3, v1 ; CHECK-NEXT: cmhi.4s v7, v2, v1 ; CHECK-NEXT: cmhi.4s v6, v5, v1 ; CHECK-NEXT: cmhi.4s v1, v4, v1 ; CHECK-NEXT: and.16b v0, v3, v0 ; CHECK-NEXT: and.16b v3, v4, v1 ; CHECK-NEXT: and.16b v1, v2, v7 ; CHECK-NEXT: and.16b v2, v5, v6 ; CHECK-NEXT: ret %ext = zext <16 x i8> %a to <16 x i32> %cmp = icmp ugt <16 x i8> %a, %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer ret <16 x i32> %sel } define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32(<8 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll2.4s v2, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: cmhi.4s v3, v0, v1 ; CHECK-NEXT: cmhi.4s v1, v2, v1 ; CHECK-NEXT: and.16b v1, v2, v1 ; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = zext <8 x i8> %a to <8 x i32> %cmp = icmp ugt <8 x i8> %a, %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer ret <8 x i32> %sel } define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2(<8 x i16> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll2.4s v2, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: cmhi.4s v3, v0, v1 ; CHECK-NEXT: cmhi.4s v1, v2, v1 ; CHECK-NEXT: and.16b v1, v2, v1 ; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = zext <8 x i16> %a to <8 x i32> %cmp = icmp ugt <8 x i16> %a, %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer ret <8 x i32> %sel } define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15: ; CHECK: ; %bb.0: ; CHECK-NEXT: bic.8h v0, #128, lsl #8 ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll2.4s v2, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: cmhi.4s v3, v0, v1 ; CHECK-NEXT: cmhi.4s v1, v2, v1 ; CHECK-NEXT: and.16b v1, v2, v1 ; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = zext <8 x i15> %a to <8 x i32> %cmp = icmp ugt <8 x i15> %a, %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer ret <8 x i32> %sel } define <7 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v7i32(<7 x i16> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v7i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.8h v1, #10 ; CHECK-NEXT: ushll.4s v2, v0, #0 ; CHECK-NEXT: cmhi.8h v1, v0, v1 ; CHECK-NEXT: ushll2.4s v0, v0, #0 ; CHECK-NEXT: sshll.4s v3, v1, #0 ; CHECK-NEXT: sshll2.4s v1, v1, #0 ; CHECK-NEXT: and.16b v2, v2, v3 ; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: mov.s w1, v2[1] ; CHECK-NEXT: mov.s w2, v2[2] ; CHECK-NEXT: mov.s w3, v2[3] ; CHECK-NEXT: mov.s w5, v0[1] ; CHECK-NEXT: mov.s w6, v0[2] ; CHECK-NEXT: fmov w0, s2 ; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %ext = zext <7 x i16> %a to <7 x i32> %cmp = icmp ugt <7 x i16> %a, %sel = select <7 x i1> %cmp, <7 x i32> %ext, <7 x i32> zeroinitializer ret <7 x i32> %sel } define <3 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v3i16(<3 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v3i16: ; CHECK: ; %bb.0: ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x8, lCPI9_0@PAGE ; CHECK-NEXT: movi.2d v3, #0x0000ff000000ff ; CHECK-NEXT: Lloh1: ; CHECK-NEXT: ldr d2, [x8, lCPI9_0@PAGEOFF] ; CHECK-NEXT: mov.h v0[1], w1 ; CHECK-NEXT: mov.h v0[2], w2 ; CHECK-NEXT: ushll.4s v1, v0, #0 ; CHECK-NEXT: bic.4h v0, #255, lsl #8 ; CHECK-NEXT: cmhi.4h v0, v0, v2 ; CHECK-NEXT: and.16b v1, v1, v3 ; CHECK-NEXT: sshll.4s v0, v0, #0 ; CHECK-NEXT: and.16b v0, v1, v0 ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 %ext = zext <3 x i8> %a to <3 x i32> %cmp = icmp ugt <3 x i8> %a, %sel = select <3 x i1> %cmp, <3 x i32> %ext, <3 x i32> zeroinitializer ret <3 x i32> %sel } define <4 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v4i32(<4 x i16> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v4i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: cmhi.4s v1, v0, v1 ; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ret %ext = zext <4 x i16> %a to <4 x i32> %cmp = icmp ugt <4 x i16> %a, %sel = select <4 x i1> %cmp, <4 x i32> %ext, <4 x i32> zeroinitializer ret <4 x i32> %sel } define <2 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v2i32(<2 x i16> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v2i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-NEXT: movi.2s v2, #10 ; CHECK-NEXT: and.8b v0, v0, v1 ; CHECK-NEXT: cmhi.2s v1, v0, v2 ; CHECK-NEXT: and.8b v0, v0, v1 ; CHECK-NEXT: ret %ext = zext <2 x i16> %a to <2 x i32> %cmp = icmp ugt <2 x i16> %a, %sel = select <2 x i1> %cmp, <2 x i32> %ext, <2 x i32> zeroinitializer ret <2 x i32> %sel } define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll2.4s v2, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: cmeq.4s v3, v0, v1 ; CHECK-NEXT: cmeq.4s v1, v2, v1 ; CHECK-NEXT: and.16b v1, v2, v1 ; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = zext <8 x i16> %a to <8 x i32> %cmp = icmp eq <8 x i16> %a, %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer ret <8 x i32> %sel } define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13: ; CHECK: ; %bb.0: ; CHECK-NEXT: bic.8h v0, #224, lsl #8 ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll2.4s v2, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: cmeq.4s v3, v0, v1 ; CHECK-NEXT: cmeq.4s v1, v2, v1 ; CHECK-NEXT: and.16b v1, v2, v1 ; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = zext <8 x i13> %a to <8 x i32> %cmp = icmp eq <8 x i13> %a, %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer ret <8 x i32> %sel } define <16 x i32> @same_zext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_ne_and_select_v8i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ushll.8h v2, v0, #0 ; CHECK-NEXT: ushll2.8h v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll.4s v3, v2, #0 ; CHECK-NEXT: ushll2.4s v4, v0, #0 ; CHECK-NEXT: ushll2.4s v2, v2, #0 ; CHECK-NEXT: ushll.4s v5, v0, #0 ; CHECK-NEXT: cmeq.4s v0, v3, v1 ; CHECK-NEXT: cmeq.4s v7, v2, v1 ; CHECK-NEXT: cmeq.4s v6, v5, v1 ; CHECK-NEXT: cmeq.4s v1, v4, v1 ; CHECK-NEXT: bic.16b v0, v3, v0 ; CHECK-NEXT: bic.16b v3, v4, v1 ; CHECK-NEXT: bic.16b v1, v2, v7 ; CHECK-NEXT: bic.16b v2, v5, v6 ; CHECK-NEXT: ret %ext = zext <16 x i8> %a to <16 x i32> %cmp = icmp ne <16 x i8> %a, %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer ret <16 x i32> %sel } ; A variation of @same_zext_used_in_cmp_unsigned_pred_and_select, with with ; multiple users of the compare. define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_other_use(<16 x i8> %a, <16 x i64> %v, ptr %ptr) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_other_use: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.16b v16, #10 ; CHECK-NEXT: ushll.8h v19, v0, #0 ; CHECK-NEXT: ldr q21, [sp] ; CHECK-NEXT: ushll.4s v24, v19, #0 ; CHECK-NEXT: ushll2.4s v19, v19, #0 ; CHECK-NEXT: cmhi.16b v16, v0, v16 ; CHECK-NEXT: ushll2.8h v0, v0, #0 ; CHECK-NEXT: sshll2.8h v17, v16, #0 ; CHECK-NEXT: sshll.8h v16, v16, #0 ; CHECK-NEXT: ushll.4s v25, v0, #0 ; CHECK-NEXT: ushll2.4s v0, v0, #0 ; CHECK-NEXT: sshll2.4s v18, v17, #0 ; CHECK-NEXT: sshll.4s v17, v17, #0 ; CHECK-NEXT: sshll2.4s v22, v16, #0 ; CHECK-NEXT: sshll.4s v16, v16, #0 ; CHECK-NEXT: sshll2.2d v20, v18, #0 ; CHECK-NEXT: sshll.2d v23, v18, #0 ; CHECK-NEXT: sshll2.2d v26, v17, #0 ; CHECK-NEXT: sshll.2d v27, v17, #0 ; CHECK-NEXT: and.16b v20, v21, v20 ; CHECK-NEXT: sshll2.2d v21, v22, #0 ; CHECK-NEXT: and.16b v7, v7, v23 ; CHECK-NEXT: sshll.2d v23, v22, #0 ; CHECK-NEXT: and.16b v6, v6, v26 ; CHECK-NEXT: sshll2.2d v26, v16, #0 ; CHECK-NEXT: and.16b v5, v5, v27 ; CHECK-NEXT: stp q7, q20, [x0, #96] ; CHECK-NEXT: sshll.2d v20, v16, #0 ; CHECK-NEXT: and.16b v21, v4, v21 ; CHECK-NEXT: and.16b v4, v0, v18 ; CHECK-NEXT: and.16b v7, v3, v23 ; CHECK-NEXT: and.16b v3, v19, v22 ; CHECK-NEXT: stp q5, q6, [x0, #64] ; CHECK-NEXT: and.16b v0, v24, v16 ; CHECK-NEXT: and.16b v6, v2, v26 ; CHECK-NEXT: and.16b v2, v25, v17 ; CHECK-NEXT: and.16b v5, v1, v20 ; CHECK-NEXT: mov.16b v1, v3 ; CHECK-NEXT: mov.16b v3, v4 ; CHECK-NEXT: stp q7, q21, [x0, #32] ; CHECK-NEXT: stp q5, q6, [x0] ; CHECK-NEXT: ret entry: %ext = zext <16 x i8> %a to <16 x i32> %cmp = icmp ugt <16 x i8> %a, %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer %sel.2 = select <16 x i1> %cmp, <16 x i64> %v, <16 x i64> zeroinitializer store <16 x i64> %sel.2, ptr %ptr ret <16 x i32> %sel } define <16 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v16i32(<16 x i8> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v16i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sshll.8h v2, v0, #0 ; CHECK-NEXT: sshll2.8h v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: sshll.4s v3, v2, #0 ; CHECK-NEXT: sshll2.4s v4, v0, #0 ; CHECK-NEXT: sshll2.4s v2, v2, #0 ; CHECK-NEXT: sshll.4s v5, v0, #0 ; CHECK-NEXT: cmgt.4s v0, v3, v1 ; CHECK-NEXT: cmgt.4s v7, v2, v1 ; CHECK-NEXT: cmgt.4s v6, v5, v1 ; CHECK-NEXT: cmgt.4s v1, v4, v1 ; CHECK-NEXT: and.16b v0, v3, v0 ; CHECK-NEXT: and.16b v3, v4, v1 ; CHECK-NEXT: and.16b v1, v2, v7 ; CHECK-NEXT: and.16b v2, v5, v6 ; CHECK-NEXT: ret entry: %ext = sext <16 x i8> %a to <16 x i32> %cmp = icmp sgt <16 x i8> %a, %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer ret <16 x i32> %sel } define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: sshll2.4s v2, v0, #0 ; CHECK-NEXT: sshll.4s v0, v0, #0 ; CHECK-NEXT: cmeq.4s v3, v0, v1 ; CHECK-NEXT: cmeq.4s v1, v2, v1 ; CHECK-NEXT: and.16b v1, v2, v1 ; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = sext <8 x i16> %a to <8 x i32> %cmp = icmp eq <8 x i16> %a, %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer ret <8 x i32> %sel } define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13: ; CHECK: ; %bb.0: ; CHECK-NEXT: ushll.4s v2, v0, #0 ; CHECK-NEXT: ushll2.4s v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: shl.4s v0, v0, #19 ; CHECK-NEXT: shl.4s v2, v2, #19 ; CHECK-NEXT: sshr.4s v0, v0, #19 ; CHECK-NEXT: sshr.4s v2, v2, #19 ; CHECK-NEXT: cmeq.4s v3, v2, v1 ; CHECK-NEXT: cmeq.4s v1, v0, v1 ; CHECK-NEXT: and.16b v1, v0, v1 ; CHECK-NEXT: and.16b v0, v2, v3 ; CHECK-NEXT: ret %ext = sext <8 x i13> %a to <8 x i32> %cmp = icmp eq <8 x i13> %a, %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer ret <8 x i32> %sel } define <16 x i32> @same_sext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_ne_and_select_v8i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: sshll.8h v2, v0, #0 ; CHECK-NEXT: sshll2.8h v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: sshll.4s v3, v2, #0 ; CHECK-NEXT: sshll2.4s v4, v0, #0 ; CHECK-NEXT: sshll2.4s v2, v2, #0 ; CHECK-NEXT: sshll.4s v5, v0, #0 ; CHECK-NEXT: cmeq.4s v0, v3, v1 ; CHECK-NEXT: cmeq.4s v7, v2, v1 ; CHECK-NEXT: cmeq.4s v6, v5, v1 ; CHECK-NEXT: cmeq.4s v1, v4, v1 ; CHECK-NEXT: bic.16b v0, v3, v0 ; CHECK-NEXT: bic.16b v3, v4, v1 ; CHECK-NEXT: bic.16b v1, v2, v7 ; CHECK-NEXT: bic.16b v2, v5, v6 ; CHECK-NEXT: ret %ext = sext <16 x i8> %a to <16 x i32> %cmp = icmp ne <16 x i8> %a, %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer ret <16 x i32> %sel } define <8 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v8i32(<8 x i16> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v8i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: sshll2.4s v2, v0, #0 ; CHECK-NEXT: sshll.4s v0, v0, #0 ; CHECK-NEXT: cmgt.4s v3, v0, v1 ; CHECK-NEXT: cmgt.4s v1, v2, v1 ; CHECK-NEXT: and.16b v1, v2, v1 ; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret entry: %ext = sext <8 x i16> %a to <8 x i32> %cmp = icmp sgt <8 x i16> %a, %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer ret <8 x i32> %sel } define <8 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15: ; CHECK: ; %bb.0: ; CHECK-NEXT: ushll.4s v2, v0, #0 ; CHECK-NEXT: ushll2.4s v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: shl.4s v0, v0, #17 ; CHECK-NEXT: shl.4s v2, v2, #17 ; CHECK-NEXT: sshr.4s v0, v0, #17 ; CHECK-NEXT: sshr.4s v2, v2, #17 ; CHECK-NEXT: cmge.4s v3, v2, v1 ; CHECK-NEXT: cmge.4s v1, v0, v1 ; CHECK-NEXT: and.16b v1, v0, v1 ; CHECK-NEXT: and.16b v0, v2, v3 ; CHECK-NEXT: ret %ext = sext <8 x i15> %a to <8 x i32> %cmp = icmp sge <8 x i15> %a, %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer ret <8 x i32> %sel } define <16 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select(<16 x i8> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.16b v1, #10 ; CHECK-NEXT: sshll.8h v2, v0, #0 ; CHECK-NEXT: ext.16b v4, v2, v2, #8 ; CHECK-NEXT: cmhi.16b v1, v0, v1 ; CHECK-NEXT: sshll2.8h v0, v0, #0 ; CHECK-NEXT: sshll.8h v3, v1, #0 ; CHECK-NEXT: sshll2.8h v1, v1, #0 ; CHECK-NEXT: ext.16b v5, v0, v0, #8 ; CHECK-NEXT: ext.16b v6, v3, v3, #8 ; CHECK-NEXT: ext.16b v7, v1, v1, #8 ; CHECK-NEXT: and.8b v2, v2, v3 ; CHECK-NEXT: and.8b v1, v0, v1 ; CHECK-NEXT: sshll.4s v0, v2, #0 ; CHECK-NEXT: and.8b v3, v5, v7 ; CHECK-NEXT: and.8b v4, v4, v6 ; CHECK-NEXT: sshll.4s v2, v1, #0 ; CHECK-NEXT: sshll.4s v3, v3, #0 ; CHECK-NEXT: sshll.4s v1, v4, #0 ; CHECK-NEXT: ret entry: %ext = sext <16 x i8> %a to <16 x i32> %cmp = icmp ugt <16 x i8> %a, %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer ret <16 x i32> %sel } define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred(<16 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: cmge.16b v1, v0, #0 ; CHECK-NEXT: ushll.8h v2, v0, #0 ; CHECK-NEXT: ushll2.8h v0, v0, #0 ; CHECK-NEXT: sshll.8h v3, v1, #0 ; CHECK-NEXT: sshll2.8h v1, v1, #0 ; CHECK-NEXT: ushll.4s v4, v2, #0 ; CHECK-NEXT: ushll.4s v5, v0, #0 ; CHECK-NEXT: ushll2.4s v2, v2, #0 ; CHECK-NEXT: ushll2.4s v6, v0, #0 ; CHECK-NEXT: sshll.4s v0, v3, #0 ; CHECK-NEXT: sshll.4s v7, v1, #0 ; CHECK-NEXT: sshll2.4s v16, v3, #0 ; CHECK-NEXT: sshll2.4s v1, v1, #0 ; CHECK-NEXT: and.16b v0, v4, v0 ; CHECK-NEXT: and.16b v3, v6, v1 ; CHECK-NEXT: and.16b v1, v2, v16 ; CHECK-NEXT: and.16b v2, v5, v7 ; CHECK-NEXT: ret entry: %ext = zext <16 x i8> %a to <16 x i32> %cmp = icmp sgt <16 x i8> %a, %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer ret <16 x i32> %sel } define void @extension_in_loop_v16i8_to_v16i32(ptr %src, ptr %dst) { ; CHECK-LABEL: extension_in_loop_v16i8_to_v16i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh2: ; CHECK-NEXT: adrp x8, lCPI24_0@PAGE ; CHECK-NEXT: Lloh3: ; CHECK-NEXT: adrp x9, lCPI24_1@PAGE ; CHECK-NEXT: Lloh4: ; CHECK-NEXT: adrp x10, lCPI24_2@PAGE ; CHECK-NEXT: Lloh5: ; CHECK-NEXT: ldr q0, [x8, lCPI24_0@PAGEOFF] ; CHECK-NEXT: Lloh6: ; CHECK-NEXT: adrp x8, lCPI24_3@PAGE ; CHECK-NEXT: Lloh7: ; CHECK-NEXT: ldr q1, [x9, lCPI24_1@PAGEOFF] ; CHECK-NEXT: Lloh8: ; CHECK-NEXT: ldr q2, [x10, lCPI24_2@PAGEOFF] ; CHECK-NEXT: Lloh9: ; CHECK-NEXT: ldr q3, [x8, lCPI24_3@PAGEOFF] ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB24_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q4, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: cmge.16b v5, v4, #0 ; CHECK-NEXT: tbl.16b v7, { v4 }, v0 ; CHECK-NEXT: tbl.16b v16, { v4 }, v1 ; CHECK-NEXT: tbl.16b v18, { v4 }, v2 ; CHECK-NEXT: tbl.16b v4, { v4 }, v3 ; CHECK-NEXT: sshll2.8h v6, v5, #0 ; CHECK-NEXT: sshll.8h v5, v5, #0 ; CHECK-NEXT: sshll2.4s v17, v6, #0 ; CHECK-NEXT: sshll.4s v6, v6, #0 ; CHECK-NEXT: sshll2.4s v19, v5, #0 ; CHECK-NEXT: sshll.4s v5, v5, #0 ; CHECK-NEXT: and.16b v7, v7, v17 ; CHECK-NEXT: and.16b v6, v16, v6 ; CHECK-NEXT: and.16b v16, v18, v19 ; CHECK-NEXT: and.16b v4, v4, v5 ; CHECK-NEXT: stp q6, q7, [x1, #32] ; CHECK-NEXT: stp q4, q16, [x1], #64 ; CHECK-NEXT: b.ne LBB24_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh9 ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh8 ; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh7 ; CHECK-NEXT: .loh AdrpAdrp Lloh2, Lloh6 ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh5 entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv %load = load <16 x i8>, ptr %src.gep %cmp = icmp sgt <16 x i8> %load, %ext = zext <16 x i8> %load to <16 x i32> %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer %dst.gep = getelementptr i32, ptr %dst, i64 %iv store <16 x i32> %sel, ptr %dst.gep %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop exit: ret void } define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(ptr %src, ptr %dst) { ; CHECK-LABEL: extension_in_loop_as_shuffle_v16i8_to_v16i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh10: ; CHECK-NEXT: adrp x8, lCPI25_0@PAGE ; CHECK-NEXT: Lloh11: ; CHECK-NEXT: adrp x9, lCPI25_1@PAGE ; CHECK-NEXT: Lloh12: ; CHECK-NEXT: adrp x10, lCPI25_2@PAGE ; CHECK-NEXT: Lloh13: ; CHECK-NEXT: ldr q0, [x8, lCPI25_0@PAGEOFF] ; CHECK-NEXT: Lloh14: ; CHECK-NEXT: adrp x8, lCPI25_3@PAGE ; CHECK-NEXT: Lloh15: ; CHECK-NEXT: ldr q1, [x9, lCPI25_1@PAGEOFF] ; CHECK-NEXT: Lloh16: ; CHECK-NEXT: ldr q2, [x10, lCPI25_2@PAGEOFF] ; CHECK-NEXT: Lloh17: ; CHECK-NEXT: ldr q3, [x8, lCPI25_3@PAGEOFF] ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB25_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q4, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: cmge.16b v5, v4, #0 ; CHECK-NEXT: tbl.16b v7, { v4 }, v0 ; CHECK-NEXT: tbl.16b v16, { v4 }, v1 ; CHECK-NEXT: tbl.16b v18, { v4 }, v2 ; CHECK-NEXT: tbl.16b v4, { v4 }, v3 ; CHECK-NEXT: sshll2.8h v6, v5, #0 ; CHECK-NEXT: sshll.8h v5, v5, #0 ; CHECK-NEXT: sshll2.4s v17, v6, #0 ; CHECK-NEXT: sshll.4s v6, v6, #0 ; CHECK-NEXT: sshll2.4s v19, v5, #0 ; CHECK-NEXT: sshll.4s v5, v5, #0 ; CHECK-NEXT: and.16b v7, v7, v17 ; CHECK-NEXT: and.16b v6, v16, v6 ; CHECK-NEXT: and.16b v16, v18, v19 ; CHECK-NEXT: and.16b v4, v4, v5 ; CHECK-NEXT: stp q6, q7, [x1, #32] ; CHECK-NEXT: stp q4, q16, [x1], #64 ; CHECK-NEXT: b.ne LBB25_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh17 ; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh16 ; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh15 ; CHECK-NEXT: .loh AdrpAdrp Lloh10, Lloh14 ; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh13 entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv %load = load <16 x i8>, ptr %src.gep %cmp = icmp sgt <16 x i8> %load, %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> %ext = bitcast <64 x i8> %ext.shuf to <16 x i32> %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer %dst.gep = getelementptr i32, ptr %dst, i64 %iv store <16 x i32> %sel, ptr %dst.gep %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop exit: ret void } define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(ptr %src, ptr %dst) { ; CHECK-LABEL: shuffle_in_loop_is_no_extend_v16i8_to_v16i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh18: ; CHECK-NEXT: adrp x8, lCPI26_0@PAGE ; CHECK-NEXT: Lloh19: ; CHECK-NEXT: adrp x9, lCPI26_1@PAGE ; CHECK-NEXT: Lloh20: ; CHECK-NEXT: adrp x10, lCPI26_2@PAGE ; CHECK-NEXT: Lloh21: ; CHECK-NEXT: ldr q0, [x8, lCPI26_0@PAGEOFF] ; CHECK-NEXT: Lloh22: ; CHECK-NEXT: adrp x8, lCPI26_3@PAGE ; CHECK-NEXT: Lloh23: ; CHECK-NEXT: ldr q1, [x9, lCPI26_1@PAGEOFF] ; CHECK-NEXT: Lloh24: ; CHECK-NEXT: ldr q2, [x10, lCPI26_2@PAGEOFF] ; CHECK-NEXT: Lloh25: ; CHECK-NEXT: ldr q3, [x8, lCPI26_3@PAGEOFF] ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB26_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q4, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: cmge.16b v5, v4, #0 ; CHECK-NEXT: tbl.16b v7, { v4 }, v0 ; CHECK-NEXT: tbl.16b v16, { v4 }, v1 ; CHECK-NEXT: tbl.16b v18, { v4 }, v2 ; CHECK-NEXT: tbl.16b v4, { v4 }, v3 ; CHECK-NEXT: sshll2.8h v6, v5, #0 ; CHECK-NEXT: sshll.8h v5, v5, #0 ; CHECK-NEXT: sshll2.4s v17, v6, #0 ; CHECK-NEXT: sshll.4s v6, v6, #0 ; CHECK-NEXT: sshll2.4s v19, v5, #0 ; CHECK-NEXT: sshll.4s v5, v5, #0 ; CHECK-NEXT: and.16b v7, v7, v17 ; CHECK-NEXT: and.16b v6, v16, v6 ; CHECK-NEXT: and.16b v16, v18, v19 ; CHECK-NEXT: and.16b v4, v4, v5 ; CHECK-NEXT: stp q6, q7, [x1, #32] ; CHECK-NEXT: stp q4, q16, [x1], #64 ; CHECK-NEXT: b.ne LBB26_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh25 ; CHECK-NEXT: .loh AdrpLdr Lloh20, Lloh24 ; CHECK-NEXT: .loh AdrpLdr Lloh19, Lloh23 ; CHECK-NEXT: .loh AdrpAdrp Lloh18, Lloh22 ; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh21 entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv %load = load <16 x i8>, ptr %src.gep %cmp = icmp sgt <16 x i8> %load, %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> %ext = bitcast <64 x i8> %ext.shuf to <16 x i32> %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer %dst.gep = getelementptr i32, ptr %dst, i64 %iv store <16 x i32> %sel, ptr %dst.gep %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop exit: ret void }