; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s define <8 x i16> @not_not_trunc_concat(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: not_not_trunc_concat: ; CHECK: // %bb.0: ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: ret %notx = xor <4 x i32> %x, %trnx = trunc <4 x i32> %notx to <4 x i16> %noty = xor <4 x i32> %y, %trny = trunc <4 x i32> %noty to <4 x i16> %r = shufflevector <4 x i16> %trnx, <4 x i16> %trny, <8 x i32> ret <8 x i16> %r } ; Chains of concat -> truncate -> negate should flatten out to a single negate. define <16 x i8> @not_not_trunc_concat_chain(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: not_not_trunc_concat_chain: ; CHECK: // %bb.0: ; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: ret %nota = xor <4 x i32> %a, %trna = trunc <4 x i32> %nota to <4 x i16> %notb = xor <4 x i32> %b, %trnb = trunc <4 x i32> %notb to <4 x i16> %concat_a = shufflevector <4 x i16> %trna, <4 x i16> %trnb, <8 x i32> %trun_concat_a = trunc <8 x i16> %concat_a to <8 x i8> %notx = xor <4 x i32> %x, %trnx = trunc <4 x i32> %notx to <4 x i16> %noty = xor <4 x i32> %y, %trny = trunc <4 x i32> %noty to <4 x i16> %concat_b = shufflevector <4 x i16> %trnx, <4 x i16> %trny, <8 x i32> %trun_concat_b = trunc <8 x i16> %concat_b to <8 x i8> %r = shufflevector <8 x i8> %trun_concat_a, <8 x i8> %trun_concat_b, <16 x i32> ret <16 x i8> %r } ; Combine should not fire here, otherwise slightly worse code will be emitted. define <8 x i16> @not_not_trunc_concat_multiple_uses(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: not_not_trunc_concat_multiple_uses: ; CHECK: // %bb.0: ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: mvn v0.8b, v0.8b ; CHECK-NEXT: mvn v1.8b, v1.8b ; CHECK-NEXT: add v2.4h, v0.4h, v1.4h ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: mov v2.d[1], v2.d[0] ; CHECK-NEXT: add v0.8h, v0.8h, v2.8h ; CHECK-NEXT: ret %notx = xor <4 x i32> %x, %trnx = trunc <4 x i32> %notx to <4 x i16> %noty = xor <4 x i32> %y, %trny = trunc <4 x i32> %noty to <4 x i16> %concat = shufflevector <4 x i16> %trnx, <4 x i16> %trny, <8 x i32> %add = add <4 x i16> %trnx, %trny %extend_add = shufflevector <4 x i16> %add, <4 x i16> %add, <8 x i32> %r = add <8 x i16> %concat, %extend_add ret <8 x i16> %r }