; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; CHECK-GI: warning: Instruction selection used fallback path for test_bit_sink_operand ; BIT Bitwise Insert if True ; ; 8-bit vectors tests define <1 x i8> @test_bit_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) { ; CHECK-SD-LABEL: test_bit_v1i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: bit v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_bit_v1i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: fmov x10, d2 ; CHECK-GI-NEXT: and w9, w10, w9 ; CHECK-GI-NEXT: bic w8, w8, w10 ; CHECK-GI-NEXT: orr w8, w9, w8 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: ret %and = and <1 x i8> %C, %B %neg = xor <1 x i8> %C, %and1 = and <1 x i8> %neg, %A %or = or <1 x i8> %and, %and1 ret <1 x i8> %or } ; 16-bit vectors tests define <1 x i16> @test_bit_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) { ; CHECK-SD-LABEL: test_bit_v1i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: bit v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_bit_v1i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: fmov x10, d2 ; CHECK-GI-NEXT: and w9, w10, w9 ; CHECK-GI-NEXT: bic w8, w8, w10 ; CHECK-GI-NEXT: orr w8, w9, w8 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: ret %and = and <1 x i16> %C, %B %neg = xor <1 x i16> %C, %and1 = and <1 x i16> %neg, %A %or = or <1 x i16> %and, %and1 ret <1 x i16> %or } ; 32-bit vectors tests define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) { ; CHECK-SD-LABEL: test_bit_v1i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: bit v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_bit_v1i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: fmov x10, d2 ; CHECK-GI-NEXT: and w9, w10, w9 ; CHECK-GI-NEXT: bic w8, w8, w10 ; CHECK-GI-NEXT: orr w8, w9, w8 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov v0.s[1], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %and = and <1 x i32> %C, %B %neg = xor <1 x i32> %C, %and1 = and <1 x i32> %neg, %A %or = or <1 x i32> %and, %and1 ret <1 x i32> %or } ; 64-bit vectors tests define <1 x i64> @test_bit_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C) { ; CHECK-SD-LABEL: test_bit_v1i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: bit v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_bit_v1i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov x8, d2 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: fmov x10, d0 ; CHECK-GI-NEXT: and x9, x8, x9 ; CHECK-GI-NEXT: bic x8, x10, x8 ; CHECK-GI-NEXT: orr x8, x9, x8 ; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: ret %and = and <1 x i64> %C, %B %neg = xor <1 x i64> %C, %and1 = and <1 x i64> %neg, %A %or = or <1 x i64> %and, %and1 ret <1 x i64> %or } define <2 x i32> @test_bit_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { ; CHECK-LABEL: test_bit_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %and = and <2 x i32> %C, %B %neg = xor <2 x i32> %C, %and1 = and <2 x i32> %neg, %A %or = or <2 x i32> %and, %and1 ret <2 x i32> %or } define <4 x i16> @test_bit_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { ; CHECK-LABEL: test_bit_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %and = and <4 x i16> %C, %B %neg = xor <4 x i16> %C, %and1 = and <4 x i16> %neg, %A %or = or <4 x i16> %and, %and1 ret <4 x i16> %or } define <8 x i8> @test_bit_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { ; CHECK-LABEL: test_bit_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %and = and <8 x i8> %C, %B %neg = xor <8 x i8> %C, %and1 = and <8 x i8> %neg, %A %or = or <8 x i8> %and, %and1 ret <8 x i8> %or } ; 128-bit vectors tests define <2 x i64> @test_bit_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C) { ; CHECK-LABEL: test_bit_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: bit v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %and = and <2 x i64> %C, %B %neg = xor <2 x i64> %C, %and1 = and <2 x i64> %neg, %A %or = or <2 x i64> %and, %and1 ret <2 x i64> %or } define <4 x i32> @test_bit_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { ; CHECK-LABEL: test_bit_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: bit v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %and = and <4 x i32> %C, %B %neg = xor <4 x i32> %C, %and1 = and <4 x i32> %neg, %A %or = or <4 x i32> %and, %and1 ret <4 x i32> %or } define <8 x i16> @test_bit_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { ; CHECK-LABEL: test_bit_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: bit v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %and = and <8 x i16> %C, %B %neg = xor <8 x i16> %C, %and1 = and <8 x i16> %neg, %A %or = or <8 x i16> %and, %and1 ret <8 x i16> %or } define <16 x i8> @test_bit_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { ; CHECK-LABEL: test_bit_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: bit v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %and = and <16 x i8> %C, %B %neg = xor <16 x i8> %C, %and1 = and <16 x i8> %neg, %A %or = or <16 x i8> %and, %and1 ret <16 x i8> %or } define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32> %mask, i32 %scratch) { ; CHECK-LABEL: test_bit_sink_operand: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: mov w9, wzr ; CHECK-NEXT: cinc w8, w0, lt ; CHECK-NEXT: asr w8, w8, #1 ; CHECK-NEXT: .LBB11_1: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: bit v1.16b, v0.16b, v2.16b ; CHECK-NEXT: add x10, sp, #16 ; CHECK-NEXT: mov x11, sp ; CHECK-NEXT: bfi x10, x9, #2, #2 ; CHECK-NEXT: bfi x11, x9, #2, #2 ; CHECK-NEXT: add w9, w9, #1 ; CHECK-NEXT: cmp w9, #5 ; CHECK-NEXT: str q1, [sp, #16] ; CHECK-NEXT: str w0, [x10] ; CHECK-NEXT: ldr q1, [sp, #16] ; CHECK-NEXT: str q0, [sp] ; CHECK-NEXT: str w8, [x11] ; CHECK-NEXT: ldr q0, [sp] ; CHECK-NEXT: b.ne .LBB11_1 ; CHECK-NEXT: // %bb.2: // %do.end ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret entry: %0 = xor <4 x i32> %mask, %div = sdiv i32 %scratch, 2 br label %do.body do.body: %dst.addr.0 = phi <4 x i32> [ %dst, %entry ], [ %vecins, %do.body ] %src.addr.0 = phi <4 x i32> [ %src, %entry ], [ %vecins1, %do.body ] %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] %vbsl3.i = and <4 x i32> %src.addr.0, %mask %vbsl4.i = and <4 x i32> %dst.addr.0, %0 %vbsl5.i = or <4 x i32> %vbsl3.i, %vbsl4.i %vecins = insertelement <4 x i32> %vbsl5.i, i32 %scratch, i32 %i.0 %vecins1 = insertelement <4 x i32> %src.addr.0, i32 %div, i32 %i.0 %inc = add nuw nsw i32 %i.0, 1 %exitcond.not = icmp eq i32 %inc, 5 br i1 %exitcond.not, label %do.end, label %do.body do.end: ret <4 x i32> %vecins }