; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s define void @add_lshr_rshrnb_b_6(ptr %ptr, ptr %dst, i64 %index){ ; CHECK-LABEL: add_lshr_rshrnb_b_6: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: rshrnb z0.b, z0.h, #6 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, trunc ( shufflevector ( insertelement ( poison, i32 32, i64 0), poison, zeroinitializer) to ) %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i32 6, i64 0), poison, zeroinitializer) to ) %3 = trunc %2 to %4 = getelementptr inbounds i8, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @neg_add_lshr_rshrnb_b_6(ptr %ptr, ptr %dst, i64 %index){ ; CHECK-LABEL: neg_add_lshr_rshrnb_b_6: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: add z0.h, z0.h, #1 // =0x1 ; CHECK-NEXT: lsr z0.h, z0.h, #6 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, trunc ( shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) to ) %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i32 6, i64 0), poison, zeroinitializer) to ) %3 = trunc %2 to %4 = getelementptr inbounds i8, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @add_lshr_rshrnb_h_7(ptr %ptr, ptr %dst, i64 %index){ ; CHECK-LABEL: add_lshr_rshrnb_h_7: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: rshrnb z0.b, z0.h, #7 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, trunc ( shufflevector ( insertelement ( poison, i32 64, i64 0), poison, zeroinitializer) to ) %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) to ) %3 = trunc %2 to %4 = getelementptr inbounds i8, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @add_lshr_rshrn_h_6(ptr %ptr, ptr %dst, i64 %index){ ; CHECK-LABEL: add_lshr_rshrn_h_6: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: rshrnb z0.h, z0.s, #6 ; CHECK-NEXT: st1h { z0.s }, p0, [x1, x2, lsl #1] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, trunc ( shufflevector ( insertelement ( poison, i64 32, i64 0), poison, zeroinitializer) to ) %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i64 6, i64 0), poison, zeroinitializer) to ) %3 = trunc %2 to %4 = getelementptr inbounds i16, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @add_lshr_rshrnb_h_2(ptr %ptr, ptr %dst, i64 %index){ ; CHECK-LABEL: add_lshr_rshrnb_h_2: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: rshrnb z0.h, z0.s, #2 ; CHECK-NEXT: st1h { z0.s }, p0, [x1, x2, lsl #1] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, trunc ( shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) to ) %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) to ) %3 = trunc %2 to %4 = getelementptr inbounds i16, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @neg_add_lshr_rshrnb_h_0(ptr %ptr, ptr %dst, i64 %index){ ; CHECK-LABEL: neg_add_lshr_rshrnb_h_0: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, trunc ( shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) to ) %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i64 -1, i64 0), poison, zeroinitializer) to ) %3 = trunc %2 to %4 = getelementptr inbounds i16, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @neg_zero_shift(ptr %ptr, ptr %dst, i64 %index){ ; CHECK-LABEL: neg_zero_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: add z0.s, z0.s, #1 // =0x1 ; CHECK-NEXT: st1h { z0.s }, p0, [x1, x2, lsl #1] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, trunc ( shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) to ) %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i64 0, i64 0), poison, zeroinitializer) to ) %3 = trunc %2 to %4 = getelementptr inbounds i16, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @wide_add_shift_add_rshrnb_b(ptr %dest, i64 %index, %arg1){ ; CHECK-LABEL: wide_add_shift_add_rshrnb_b: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: rshrnb z1.b, z1.h, #6 ; CHECK-NEXT: rshrnb z0.b, z0.h, #6 ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x1] ; CHECK-NEXT: add z0.b, z1.b, z0.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = add %arg1, shufflevector ( insertelement ( poison, i16 32, i64 0), poison, zeroinitializer) %2 = lshr %1, shufflevector ( insertelement ( poison, i16 6, i64 0), poison, zeroinitializer) %3 = getelementptr inbounds i8, ptr %dest, i64 %index %load = load , ptr %3, align 2 %4 = trunc %2 to %5 = add %load, %4 store %5, ptr %3, align 2 ret void } define void @wide_add_shift_add_rshrnb_h(ptr %dest, i64 %index, %arg1){ ; CHECK-LABEL: wide_add_shift_add_rshrnb_h: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: rshrnb z1.h, z1.s, #6 ; CHECK-NEXT: rshrnb z0.h, z0.s, #6 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: add z0.h, z1.h, z0.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = add %arg1, shufflevector ( insertelement ( poison, i32 32, i64 0), poison, zeroinitializer) %2 = lshr %1, shufflevector ( insertelement ( poison, i32 6, i64 0), poison, zeroinitializer) %3 = getelementptr inbounds i16, ptr %dest, i64 %index %load = load , ptr %3, align 2 %4 = trunc %2 to %5 = add %load, %4 store %5, ptr %3, align 2 ret void } define void @wide_add_shift_add_rshrnb_d(ptr %dest, i64 %index, %arg1){ ; CHECK-LABEL: wide_add_shift_add_rshrnb_d: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: rshrnb z1.s, z1.d, #32 ; CHECK-NEXT: rshrnb z0.s, z0.d, #32 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: add z0.s, z1.s, z0.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = add %arg1, shufflevector ( insertelement ( poison, i64 2147483648, i64 0), poison, zeroinitializer) %2 = lshr %1, shufflevector ( insertelement ( poison, i64 32, i64 0), poison, zeroinitializer) %3 = getelementptr inbounds i32, ptr %dest, i64 %index %load = load , ptr %3, align 4 %4 = trunc %2 to %5 = add %load, %4 store %5, ptr %3, align 4 ret void } ; Do not emit rshrnb if the shift amount is larger than the dest eltsize in bits define void @neg_wide_add_shift_add_rshrnb_d(ptr %dest, i64 %index, %arg1){ ; CHECK-LABEL: neg_wide_add_shift_add_rshrnb_d: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.d, #0x800000000000 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: add z0.d, z0.d, z2.d ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: lsr z1.d, z1.d, #48 ; CHECK-NEXT: lsr z0.d, z0.d, #48 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: add z0.s, z1.s, z0.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = add %arg1, shufflevector ( insertelement ( poison, i64 140737488355328, i64 0), poison, zeroinitializer) %2 = lshr %1, shufflevector ( insertelement ( poison, i64 48, i64 0), poison, zeroinitializer) %3 = getelementptr inbounds i32, ptr %dest, i64 %index %load = load , ptr %3, align 4 %4 = trunc %2 to %5 = add %load, %4 store %5, ptr %3, align 4 ret void } define void @neg_trunc_lsr_add_op1_not_splat(ptr %ptr, ptr %dst, i64 %index, %add_op1){ ; CHECK-LABEL: neg_trunc_lsr_add_op1_not_splat: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] ; CHECK-NEXT: add z0.h, z1.h, z0.h ; CHECK-NEXT: lsr z0.h, z0.h, #6 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, %add_op1 %2 = lshr %1, shufflevector ( insertelement ( poison, i16 6, i64 0), poison, zeroinitializer) %3 = trunc %2 to %4 = getelementptr inbounds i8, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @neg_trunc_lsr_op1_not_splat(ptr %ptr, ptr %dst, i64 %index, %lshr_op1){ ; CHECK-LABEL: neg_trunc_lsr_op1_not_splat: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] ; CHECK-NEXT: add z1.h, z1.h, #32 // =0x20 ; CHECK-NEXT: lsrr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, shufflevector ( insertelement ( poison, i16 32, i64 0), poison, zeroinitializer) %2 = lshr %1, %lshr_op1 %3 = trunc %2 to %4 = getelementptr inbounds i8, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @neg_add_has_two_uses(ptr %ptr, ptr %dst, ptr %dst2, i64 %index){ ; CHECK-LABEL: neg_add_has_two_uses: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: add z0.h, z0.h, #32 // =0x20 ; CHECK-NEXT: add z1.h, z0.h, z0.h ; CHECK-NEXT: lsr z0.h, z0.h, #6 ; CHECK-NEXT: st1h { z1.h }, p0, [x2, x3, lsl #1] ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x3] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, trunc ( shufflevector ( insertelement ( poison, i32 32, i64 0), poison, zeroinitializer) to ) %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i32 6, i64 0), poison, zeroinitializer) to ) %3 = add %1, %1 %4 = getelementptr inbounds i16, ptr %dst2, i64 %index %5 = trunc %2 to %6 = getelementptr inbounds i8, ptr %dst, i64 %index store %3, ptr %4, align 1 store %5, ptr %6, align 1 ret void } define void @add_lshr_rshrnb_s(ptr %ptr, ptr %dst, i64 %index){ ; CHECK-LABEL: add_lshr_rshrnb_s: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: rshrnb z0.s, z0.d, #6 ; CHECK-NEXT: st1w { z0.d }, p0, [x1, x2, lsl #2] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, shufflevector ( insertelement ( poison, i64 32, i64 0), poison, zeroinitializer) %2 = lshr %1, shufflevector ( insertelement ( poison, i64 6, i64 0), poison, zeroinitializer) %3 = trunc %2 to %4 = getelementptr inbounds i32, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @neg_add_lshr_rshrnb_s(ptr %ptr, ptr %dst, i64 %index){ ; CHECK-LABEL: neg_add_lshr_rshrnb_s: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: add z0.d, z0.d, #32 // =0x20 ; CHECK-NEXT: lsr z0.d, z0.d, #6 ; CHECK-NEXT: st1h { z0.d }, p0, [x1, x2, lsl #1] ; CHECK-NEXT: ret %load = load , ptr %ptr, align 2 %1 = add %load, shufflevector ( insertelement ( poison, i64 32, i64 0), poison, zeroinitializer) %2 = lshr %1, shufflevector ( insertelement ( poison, i64 6, i64 0), poison, zeroinitializer) %3 = trunc %2 to %4 = getelementptr inbounds i16, ptr %dst, i64 %index store %3, ptr %4, align 1 ret void } define void @masked_store_rshrnb(ptr %ptr, ptr %dst, i64 %index, %mask) { ; preds = %vector.body, %vector.ph ; CHECK-LABEL: masked_store_rshrnb: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: rshrnb z0.b, z0.h, #6 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2] ; CHECK-NEXT: ret %wide.masked.load = tail call @llvm.masked.load.nxv8i16.p0(ptr %ptr, i32 2, %mask, poison) %1 = add %wide.masked.load, trunc ( shufflevector ( insertelement ( poison, i32 32, i64 0), poison, zeroinitializer) to ) %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i32 6, i64 0), poison, zeroinitializer) to ) %3 = trunc %2 to %4 = getelementptr inbounds i8, ptr %dst, i64 %index tail call void @llvm.masked.store.nxv8i8.p0( %3, ptr %4, i32 1, %mask) ret void } declare void @llvm.masked.store.nxv8i8.p0(, ptr, i32, ) declare @llvm.masked.load.nxv8i16.p0(ptr, i32, , )