// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s // REQUIRES: aarch64-registered-target || arm-registered-target #include // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]] // int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) { return vtbl1_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_s8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]] // int8x8_t test_vqtbl1_s8(int8x16_t a, uint8x8_t b) { return vqtbl1_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_s8 // CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]] // int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) { return vtbl2_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_s8 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]] // int8x8_t test_vqtbl2_s8(int8x16x2_t a, uint8x8_t b) { return vqtbl2_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_s8 // CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]] // int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) { return vtbl3_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_s8 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]] // int8x8_t test_vqtbl3_s8(int8x16x3_t a, uint8x8_t b) { return vqtbl3_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_s8 // CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]] // int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) { return vtbl4_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_s8 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]] // int8x8_t test_vqtbl4_s8(int8x16x4_t a, uint8x8_t b) { return vqtbl4_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_s8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]] // int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) { return vqtbl1q_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_s8 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]] // int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) { return vqtbl2q_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_s8 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]] // int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) { return vqtbl3q_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_s8 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]] // int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) { return vqtbl4q_s8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]]) // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] // int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) { return vtbx1_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]] // int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { return vtbx2_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]]) // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]] // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] // int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) { return vtbx3_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]] // int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) { return vtbx4_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]] // int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, uint8x8_t c) { return vqtbx1_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]] // int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, uint8x8_t c) { return vqtbx2_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]] // int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, uint8x8_t c) { return vqtbx3_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_s8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]] // int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, uint8x8_t c) { return vqtbx4_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_s8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]] // int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, uint8x16_t c) { return vqtbx1q_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_s8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]] // int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) { return vqtbx2q_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_s8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]] // int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) { return vqtbx3q_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_s8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]] // int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) { return vqtbx4q_s8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]] // uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) { return vtbl1_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_u8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]] // uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) { return vqtbl1_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_u8 // CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]] // uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) { return vtbl2_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_u8 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]] // uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) { return vqtbl2_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_u8 // CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]] // uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) { return vtbl3_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_u8 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]] // uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) { return vqtbl3_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_u8 // CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]] // uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) { return vtbl4_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_u8 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]] // uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) { return vqtbl4_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_u8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]] // uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) { return vqtbl1q_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_u8 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]] // uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) { return vqtbl2q_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_u8 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]] // uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) { return vqtbl3q_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_u8 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]] // uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) { return vqtbl4q_u8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]]) // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] // uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { return vtbx1_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]] // uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { return vtbx2_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]]) // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]] // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] // uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) { return vtbx3_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]] // uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) { return vtbx4_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]] // uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) { return vqtbx1_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]] // uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) { return vqtbx2_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]] // uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) { return vqtbx3_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_u8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]] // uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) { return vqtbx4_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_u8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]] // uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { return vqtbx1q_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_u8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]] // uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) { return vqtbx2q_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_u8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]] // uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) { return vqtbx3q_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_u8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]] // uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) { return vqtbx4q_u8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_p8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]] // poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) { return vtbl1_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_p8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]] // poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) { return vqtbl1_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_p8 // CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]] // poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) { return vtbl2_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_p8 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]] // poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) { return vqtbl2_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_p8 // CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]] // poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) { return vtbl3_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_p8 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]] // poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) { return vqtbl3_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_p8 // CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]] // poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) { return vtbl4_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_p8 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]]) // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]] // poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) { return vqtbl4_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_p8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]] // poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) { return vqtbl1q_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_p8 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]] // poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) { return vqtbl2q_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_p8 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]] // poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) { return vqtbl3q_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_p8 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]]) // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]] // poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) { return vqtbl4q_p8(a, b); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_p8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]]) // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] // poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) { return vtbx1_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_p8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]] // poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { return vtbx2_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_p8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]]) // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]] // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] // poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) { return vtbx3_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_p8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]] // poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) { return vtbx4_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_p8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]] // poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) { return vqtbx1_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_p8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]] // poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) { return vqtbx2_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_p8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]] // poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) { return vqtbx3_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_p8 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]]) // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]] // poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) { return vqtbx4_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_p8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]] // poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) { return vqtbx1q_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_p8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]] // poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) { return vqtbx2q_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_p8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]] // poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) { return vqtbx3q_p8(a, b, c); } // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_p8 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]]) // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]] // poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) { return vqtbx4q_p8(a, b, c); }