; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; CHECK-GI: warning: Instruction selection used fallback path for sminv_v3i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smaxv_v3i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uminv_v3i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umaxv_v3i64 declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>) declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>) declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>) declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>) declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>) declare i16 @llvm.vector.reduce.smin.v3i16(<3 x i16>) declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) declare i32 @llvm.vector.reduce.smin.v3i32(<3 x i32>) declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>) declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.smin.v3i64(<3 x i64>) declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) declare i128 @llvm.vector.reduce.smin.v2i128(<2 x i128>) declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>) declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>) declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>) declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>) declare i16 @llvm.vector.reduce.smax.v3i16(<3 x i16>) declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) declare i32 @llvm.vector.reduce.smax.v3i32(<3 x i32>) declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>) declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.smax.v3i64(<3 x i64>) declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) declare i128 @llvm.vector.reduce.smax.v2i128(<2 x i128>) declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>) declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>) declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) declare i16 @llvm.vector.reduce.umin.v3i16(<3 x i16>) declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) declare i32 @llvm.vector.reduce.umin.v3i32(<3 x i32>) declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.umin.v3i64(<3 x i64>) declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) declare i128 @llvm.vector.reduce.umin.v2i128(<2 x i128>) declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>) declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>) declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>) declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>) declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>) declare i16 @llvm.vector.reduce.umax.v3i16(<3 x i16>) declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) declare i32 @llvm.vector.reduce.umax.v3i32(<3 x i32>) declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>) declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.umax.v3i64(<3 x i64>) declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) declare i128 @llvm.vector.reduce.umax.v2i128(<2 x i128>) declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) define i8 @smax_B(ptr nocapture readonly %arr) { ; CHECK-LABEL: smax_B: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: smaxv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <16 x i8>, ptr %arr %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %arr.load) ret i8 %r } define i16 @smax_H(ptr nocapture readonly %arr) { ; CHECK-LABEL: smax_H: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: smaxv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <8 x i16>, ptr %arr %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %arr.load) ret i16 %r } define i32 @smax_S(ptr nocapture readonly %arr) { ; CHECK-LABEL: smax_S: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: smaxv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <4 x i32>, ptr %arr %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %arr.load) ret i32 %r } define i8 @umax_B(ptr nocapture readonly %arr) { ; CHECK-LABEL: umax_B: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <16 x i8>, ptr %arr %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %arr.load) ret i8 %r } define i16 @umax_H(ptr nocapture readonly %arr) { ; CHECK-LABEL: umax_H: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: umaxv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <8 x i16>, ptr %arr %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %arr.load) ret i16 %r } define i32 @umax_S(ptr nocapture readonly %arr) { ; CHECK-LABEL: umax_S: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: umaxv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <4 x i32>, ptr %arr %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %arr.load) ret i32 %r } define i8 @smin_B(ptr nocapture readonly %arr) { ; CHECK-LABEL: smin_B: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sminv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <16 x i8>, ptr %arr %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %arr.load) ret i8 %r } define i16 @smin_H(ptr nocapture readonly %arr) { ; CHECK-LABEL: smin_H: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sminv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <8 x i16>, ptr %arr %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %arr.load) ret i16 %r } define i32 @smin_S(ptr nocapture readonly %arr) { ; CHECK-LABEL: smin_S: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sminv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <4 x i32>, ptr %arr %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %arr.load) ret i32 %r } define i8 @umin_B(ptr nocapture readonly %arr) { ; CHECK-LABEL: umin_B: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uminv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <16 x i8>, ptr %arr %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %arr.load) ret i8 %r } define i16 @umin_H(ptr nocapture readonly %arr) { ; CHECK-LABEL: umin_H: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uminv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <8 x i16>, ptr %arr %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %arr.load) ret i16 %r } define i32 @umin_S(ptr nocapture readonly %arr) { ; CHECK-LABEL: umin_S: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uminv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %arr.load = load <4 x i32>, ptr %arr %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %arr.load) ret i32 %r } define float @fmaxnm_S(ptr nocapture readonly %arr) { ; CHECK-LABEL: fmaxnm_S: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fmaxnmv s0, v0.4s ; CHECK-NEXT: ret %arr.load = load <4 x float>, ptr %arr %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arr.load) ret float %r } define float @fminnm_S(ptr nocapture readonly %arr) { ; CHECK-LABEL: fminnm_S: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fminnmv s0, v0.4s ; CHECK-NEXT: ret %arr.load = load <4 x float>, ptr %arr %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arr.load) ret float %r } define i16 @oversized_umax_256(ptr nocapture readonly %arr) { ; CHECK-SD-LABEL: oversized_umax_256: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldp q1, q0, [x0] ; CHECK-SD-NEXT: umax v0.8h, v1.8h, v0.8h ; CHECK-SD-NEXT: umaxv h0, v0.8h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: oversized_umax_256: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldp q0, q1, [x0] ; CHECK-GI-NEXT: umax v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: umaxv h0, v0.8h ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %arr.load = load <16 x i16>, ptr %arr %r = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %arr.load) ret i16 %r } define i32 @oversized_umax_512(ptr nocapture readonly %arr) { ; CHECK-SD-LABEL: oversized_umax_512: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldp q0, q1, [x0, #32] ; CHECK-SD-NEXT: ldp q2, q3, [x0] ; CHECK-SD-NEXT: umax v1.4s, v3.4s, v1.4s ; CHECK-SD-NEXT: umax v0.4s, v2.4s, v0.4s ; CHECK-SD-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: umaxv s0, v0.4s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: oversized_umax_512: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldp q0, q1, [x0] ; CHECK-GI-NEXT: ldp q2, q3, [x0, #32] ; CHECK-GI-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: umax v1.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: umaxv s0, v0.4s ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %arr.load = load <16 x i32>, ptr %arr %r = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %arr.load) ret i32 %r } define i16 @oversized_umin_256(ptr nocapture readonly %arr) { ; CHECK-SD-LABEL: oversized_umin_256: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldp q1, q0, [x0] ; CHECK-SD-NEXT: umin v0.8h, v1.8h, v0.8h ; CHECK-SD-NEXT: uminv h0, v0.8h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: oversized_umin_256: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldp q0, q1, [x0] ; CHECK-GI-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: uminv h0, v0.8h ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %arr.load = load <16 x i16>, ptr %arr %r = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %arr.load) ret i16 %r } define i32 @oversized_umin_512(ptr nocapture readonly %arr) { ; CHECK-SD-LABEL: oversized_umin_512: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldp q0, q1, [x0, #32] ; CHECK-SD-NEXT: ldp q2, q3, [x0] ; CHECK-SD-NEXT: umin v1.4s, v3.4s, v1.4s ; CHECK-SD-NEXT: umin v0.4s, v2.4s, v0.4s ; CHECK-SD-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: uminv s0, v0.4s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: oversized_umin_512: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldp q0, q1, [x0] ; CHECK-GI-NEXT: ldp q2, q3, [x0, #32] ; CHECK-GI-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: umin v1.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: uminv s0, v0.4s ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %arr.load = load <16 x i32>, ptr %arr %r = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %arr.load) ret i32 %r } define i16 @oversized_smax_256(ptr nocapture readonly %arr) { ; CHECK-SD-LABEL: oversized_smax_256: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldp q1, q0, [x0] ; CHECK-SD-NEXT: smax v0.8h, v1.8h, v0.8h ; CHECK-SD-NEXT: smaxv h0, v0.8h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: oversized_smax_256: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldp q0, q1, [x0] ; CHECK-GI-NEXT: smax v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: smaxv h0, v0.8h ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %arr.load = load <16 x i16>, ptr %arr %r = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %arr.load) ret i16 %r } define i32 @oversized_smax_512(ptr nocapture readonly %arr) { ; CHECK-SD-LABEL: oversized_smax_512: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldp q0, q1, [x0, #32] ; CHECK-SD-NEXT: ldp q2, q3, [x0] ; CHECK-SD-NEXT: smax v1.4s, v3.4s, v1.4s ; CHECK-SD-NEXT: smax v0.4s, v2.4s, v0.4s ; CHECK-SD-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: smaxv s0, v0.4s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: oversized_smax_512: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldp q0, q1, [x0] ; CHECK-GI-NEXT: ldp q2, q3, [x0, #32] ; CHECK-GI-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: smax v1.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: smaxv s0, v0.4s ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %arr.load = load <16 x i32>, ptr %arr %r = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %arr.load) ret i32 %r } define i16 @oversized_smin_256(ptr nocapture readonly %arr) { ; CHECK-SD-LABEL: oversized_smin_256: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldp q1, q0, [x0] ; CHECK-SD-NEXT: smin v0.8h, v1.8h, v0.8h ; CHECK-SD-NEXT: sminv h0, v0.8h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: oversized_smin_256: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldp q0, q1, [x0] ; CHECK-GI-NEXT: smin v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: sminv h0, v0.8h ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %arr.load = load <16 x i16>, ptr %arr %r = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %arr.load) ret i16 %r } define i32 @oversized_smin_512(ptr nocapture readonly %arr) { ; CHECK-SD-LABEL: oversized_smin_512: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldp q0, q1, [x0, #32] ; CHECK-SD-NEXT: ldp q2, q3, [x0] ; CHECK-SD-NEXT: smin v1.4s, v3.4s, v1.4s ; CHECK-SD-NEXT: smin v0.4s, v2.4s, v0.4s ; CHECK-SD-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: sminv s0, v0.4s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: oversized_smin_512: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldp q0, q1, [x0] ; CHECK-GI-NEXT: ldp q2, q3, [x0, #32] ; CHECK-GI-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: smin v1.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: sminv s0, v0.4s ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %arr.load = load <16 x i32>, ptr %arr %r = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %arr.load) ret i32 %r } define i8 @sminv_v2i8(<2 x i8> %a) { ; CHECK-SD-LABEL: sminv_v2i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: sminp v0.2s, v0.2s, v0.2s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sminv_v2i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: sxtb w8, w8 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9, sxtb ; CHECK-GI-NEXT: fcsel s0, s0, s1, lt ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> %a) ret i8 %arg1 } define i8 @sminv_v3i8(<3 x i8> %a) { ; CHECK-SD-LABEL: sminv_v3i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi v0.4h, #127 ; CHECK-SD-NEXT: mov v0.h[0], w0 ; CHECK-SD-NEXT: mov v0.h[1], w1 ; CHECK-SD-NEXT: mov v0.h[2], w2 ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: sminv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sminv_v3i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sxtb w8, w0 ; CHECK-GI-NEXT: cmp w8, w1, sxtb ; CHECK-GI-NEXT: csel w8, w0, w1, lt ; CHECK-GI-NEXT: sxtb w9, w8 ; CHECK-GI-NEXT: cmp w9, w2, sxtb ; CHECK-GI-NEXT: csel w0, w8, w2, lt ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> %a) ret i8 %arg1 } define i8 @sminv_v4i8(<4 x i8> %a) { ; CHECK-SD-LABEL: sminv_v4i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: sminv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sminv_v4i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: umov w8, v0.h[0] ; CHECK-GI-NEXT: umov w9, v0.h[1] ; CHECK-GI-NEXT: umov w10, v0.h[2] ; CHECK-GI-NEXT: umov w12, v0.h[3] ; CHECK-GI-NEXT: sxtb w11, w8 ; CHECK-GI-NEXT: cmp w11, w9, sxtb ; CHECK-GI-NEXT: sxtb w11, w10 ; CHECK-GI-NEXT: csel w8, w8, w9, lt ; CHECK-GI-NEXT: cmp w11, w12, sxtb ; CHECK-GI-NEXT: sxtb w9, w8 ; CHECK-GI-NEXT: csel w10, w10, w12, lt ; CHECK-GI-NEXT: cmp w9, w10, sxtb ; CHECK-GI-NEXT: csel w0, w8, w10, lt ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> %a) ret i8 %arg1 } define i8 @sminv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: sminv_v8i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sminv b0, v0.8b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a) ret i8 %arg1 } define i8 @sminv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: sminv_v16i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sminv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a) ret i8 %arg1 } define i8 @sminv_v32i8(<32 x i8> %a) { ; CHECK-LABEL: sminv_v32i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b ; CHECK-NEXT: sminv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %a) ret i8 %arg1 } define i16 @sminv_v2i16(<2 x i16> %a) { ; CHECK-SD-LABEL: sminv_v2i16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: sminp v0.2s, v0.2s, v0.2s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sminv_v2i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: sxth w8, w8 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9, sxth ; CHECK-GI-NEXT: fcsel s0, s0, s1, lt ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> %a) ret i16 %arg1 } define i16 @sminv_v3i16(<3 x i16> %a) { ; CHECK-SD-LABEL: sminv_v3i16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff ; CHECK-SD-NEXT: mov v0.h[3], w8 ; CHECK-SD-NEXT: sminv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sminv_v3i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: smov w8, v0.h[0] ; CHECK-GI-NEXT: umov w9, v0.h[0] ; CHECK-GI-NEXT: umov w10, v0.h[1] ; CHECK-GI-NEXT: smov w11, v0.h[2] ; CHECK-GI-NEXT: umov w13, v0.h[2] ; CHECK-GI-NEXT: fmov w12, s1 ; CHECK-GI-NEXT: cmp w8, w12, sxth ; CHECK-GI-NEXT: csel w8, w9, w10, lt ; CHECK-GI-NEXT: cmp w11, w8, sxth ; CHECK-GI-NEXT: csel w0, w8, w13, gt ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a) ret i16 %arg1 } define i16 @sminv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: sminv_v4i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sminv h0, v0.4h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a) ret i16 %arg1 } define i16 @sminv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: sminv_v8i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sminv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a) ret i16 %arg1 } define i16 @sminv_v16i16(<16 x i16> %a) { ; CHECK-LABEL: sminv_v16i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smin v0.8h, v0.8h, v1.8h ; CHECK-NEXT: sminv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %a) ret i16 %arg1 } define i32 @sminv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: sminv_v2i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sminp v0.2s, v0.2s, v0.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a) ret i32 %arg1 } define i32 @sminv_v3i32(<3 x i32> %a) { ; CHECK-SD-LABEL: sminv_v3i32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #2147483647 // =0x7fffffff ; CHECK-SD-NEXT: mov v0.s[3], w8 ; CHECK-SD-NEXT: sminv s0, v0.4s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sminv_v3i32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov s2, v0.s[2] ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: fmov w9, s2 ; CHECK-GI-NEXT: fcsel s0, s0, s1, lt ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: fcsel s0, s0, s2, lt ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.smin.v3i32(<3 x i32> %a) ret i32 %arg1 } define i32 @sminv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: sminv_v4i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sminv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a) ret i32 %arg1 } define i32 @sminv_v8i32(<8 x i32> %a) { ; CHECK-LABEL: sminv_v8i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: sminv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %a) ret i32 %arg1 } define i64 @sminv_v2i64(<2 x i64> %a) { ; CHECK-SD-LABEL: sminv_v2i64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: cmgt d2, d1, d0 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sminv_v2i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: cmp x8, x9 ; CHECK-GI-NEXT: fcsel d0, d0, d1, lt ; CHECK-GI-NEXT: fmov x0, d0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a) ret i64 %arg1 } define i64 @sminv_v3i64(<3 x i64> %a) { ; CHECK-LABEL: sminv_v3i64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: mov v2.d[1], x8 ; CHECK-NEXT: cmgt v1.2d, v2.2d, v0.2d ; CHECK-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: cmgt d2, d1, d0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.smin.v3i64(<3 x i64> %a) ret i64 %arg1 } define i64 @sminv_v4i64(<4 x i64> %a) { ; CHECK-SD-LABEL: sminv_v4i64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmgt v2.2d, v1.2d, v0.2d ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: cmgt d2, d1, d0 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sminv_v4i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: cmgt v2.2d, v1.2d, v0.2d ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: cmp x8, x9 ; CHECK-GI-NEXT: fcsel d0, d0, d1, lt ; CHECK-GI-NEXT: fmov x0, d0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %a) ret i64 %arg1 } define i128 @sminv_v2i128(<2 x i128> %a) { ; CHECK-SD-LABEL: sminv_v2i128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp x0, x2 ; CHECK-SD-NEXT: sbcs xzr, x1, x3 ; CHECK-SD-NEXT: csel x0, x0, x2, lt ; CHECK-SD-NEXT: csel x1, x1, x3, lt ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sminv_v2i128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: cset w8, lt ; CHECK-GI-NEXT: cmp x0, x2 ; CHECK-GI-NEXT: cset w9, lo ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: csel w8, w9, w8, eq ; CHECK-GI-NEXT: tst w8, #0x1 ; CHECK-GI-NEXT: csel x0, x0, x2, ne ; CHECK-GI-NEXT: csel x1, x1, x3, ne ; CHECK-GI-NEXT: ret entry: %arg1 = call i128 @llvm.vector.reduce.smin.v2i128(<2 x i128> %a) ret i128 %arg1 } define i8 @smaxv_v2i8(<2 x i8> %a) { ; CHECK-SD-LABEL: smaxv_v2i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: smaxp v0.2s, v0.2s, v0.2s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: smaxv_v2i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: sxtb w8, w8 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9, sxtb ; CHECK-GI-NEXT: fcsel s0, s0, s1, gt ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> %a) ret i8 %arg1 } define i8 @smaxv_v3i8(<3 x i8> %a) { ; CHECK-SD-LABEL: smaxv_v3i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi v0.4h, #128 ; CHECK-SD-NEXT: mov v0.h[0], w0 ; CHECK-SD-NEXT: mov v0.h[1], w1 ; CHECK-SD-NEXT: mov v0.h[2], w2 ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: smaxv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: smaxv_v3i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sxtb w8, w0 ; CHECK-GI-NEXT: cmp w8, w1, sxtb ; CHECK-GI-NEXT: csel w8, w0, w1, gt ; CHECK-GI-NEXT: sxtb w9, w8 ; CHECK-GI-NEXT: cmp w9, w2, sxtb ; CHECK-GI-NEXT: csel w0, w8, w2, gt ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> %a) ret i8 %arg1 } define i8 @smaxv_v4i8(<4 x i8> %a) { ; CHECK-SD-LABEL: smaxv_v4i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: smaxv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: smaxv_v4i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: umov w8, v0.h[0] ; CHECK-GI-NEXT: umov w9, v0.h[1] ; CHECK-GI-NEXT: umov w10, v0.h[2] ; CHECK-GI-NEXT: umov w12, v0.h[3] ; CHECK-GI-NEXT: sxtb w11, w8 ; CHECK-GI-NEXT: cmp w11, w9, sxtb ; CHECK-GI-NEXT: sxtb w11, w10 ; CHECK-GI-NEXT: csel w8, w8, w9, gt ; CHECK-GI-NEXT: cmp w11, w12, sxtb ; CHECK-GI-NEXT: sxtb w9, w8 ; CHECK-GI-NEXT: csel w10, w10, w12, gt ; CHECK-GI-NEXT: cmp w9, w10, sxtb ; CHECK-GI-NEXT: csel w0, w8, w10, gt ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> %a) ret i8 %arg1 } define i8 @smaxv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: smaxv_v8i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smaxv b0, v0.8b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a) ret i8 %arg1 } define i8 @smaxv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: smaxv_v16i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smaxv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a) ret i8 %arg1 } define i8 @smaxv_v32i8(<32 x i8> %a) { ; CHECK-LABEL: smaxv_v32i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: smaxv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %a) ret i8 %arg1 } define i16 @smaxv_v2i16(<2 x i16> %a) { ; CHECK-SD-LABEL: smaxv_v2i16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: smaxp v0.2s, v0.2s, v0.2s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: smaxv_v2i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: sxth w8, w8 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9, sxth ; CHECK-GI-NEXT: fcsel s0, s0, s1, gt ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> %a) ret i16 %arg1 } define i16 @smaxv_v3i16(<3 x i16> %a) { ; CHECK-SD-LABEL: smaxv_v3i16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov w8, #-32768 // =0xffff8000 ; CHECK-SD-NEXT: mov v0.h[3], w8 ; CHECK-SD-NEXT: smaxv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: smaxv_v3i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: smov w8, v0.h[0] ; CHECK-GI-NEXT: umov w9, v0.h[0] ; CHECK-GI-NEXT: umov w10, v0.h[1] ; CHECK-GI-NEXT: smov w11, v0.h[2] ; CHECK-GI-NEXT: umov w13, v0.h[2] ; CHECK-GI-NEXT: fmov w12, s1 ; CHECK-GI-NEXT: cmp w8, w12, sxth ; CHECK-GI-NEXT: csel w8, w9, w10, gt ; CHECK-GI-NEXT: cmp w11, w8, sxth ; CHECK-GI-NEXT: csel w0, w8, w13, lt ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smax.v3i16(<3 x i16> %a) ret i16 %arg1 } define i16 @smaxv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: smaxv_v4i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smaxv h0, v0.4h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a) ret i16 %arg1 } define i16 @smaxv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: smaxv_v8i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smaxv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a) ret i16 %arg1 } define i16 @smaxv_v16i16(<16 x i16> %a) { ; CHECK-LABEL: smaxv_v16i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smax v0.8h, v0.8h, v1.8h ; CHECK-NEXT: smaxv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %a) ret i16 %arg1 } define i32 @smaxv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: smaxv_v2i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smaxp v0.2s, v0.2s, v0.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a) ret i32 %arg1 } define i32 @smaxv_v3i32(<3 x i32> %a) { ; CHECK-SD-LABEL: smaxv_v3i32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #-2147483648 // =0x80000000 ; CHECK-SD-NEXT: mov v0.s[3], w8 ; CHECK-SD-NEXT: smaxv s0, v0.4s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: smaxv_v3i32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov s2, v0.s[2] ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: fmov w9, s2 ; CHECK-GI-NEXT: fcsel s0, s0, s1, gt ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: fcsel s0, s0, s2, gt ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.smax.v3i32(<3 x i32> %a) ret i32 %arg1 } define i32 @smaxv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: smaxv_v4i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smaxv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a) ret i32 %arg1 } define i32 @smaxv_v8i32(<8 x i32> %a) { ; CHECK-LABEL: smaxv_v8i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smaxv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %a) ret i32 %arg1 } define i64 @smaxv_v2i64(<2 x i64> %a) { ; CHECK-SD-LABEL: smaxv_v2i64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: cmgt d2, d0, d1 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: smaxv_v2i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: cmp x8, x9 ; CHECK-GI-NEXT: fcsel d0, d0, d1, gt ; CHECK-GI-NEXT: fmov x0, d0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a) ret i64 %arg1 } define i64 @smaxv_v3i64(<3 x i64> %a) { ; CHECK-LABEL: smaxv_v3i64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: mov v2.d[1], x8 ; CHECK-NEXT: cmgt v1.2d, v0.2d, v2.2d ; CHECK-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: cmgt d2, d0, d1 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.smax.v3i64(<3 x i64> %a) ret i64 %arg1 } define i64 @smaxv_v4i64(<4 x i64> %a) { ; CHECK-SD-LABEL: smaxv_v4i64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: cmgt d2, d0, d1 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: smaxv_v4i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: cmgt v2.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: cmp x8, x9 ; CHECK-GI-NEXT: fcsel d0, d0, d1, gt ; CHECK-GI-NEXT: fmov x0, d0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %a) ret i64 %arg1 } define i128 @smaxv_v2i128(<2 x i128> %a) { ; CHECK-SD-LABEL: smaxv_v2i128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp x2, x0 ; CHECK-SD-NEXT: sbcs xzr, x3, x1 ; CHECK-SD-NEXT: csel x0, x0, x2, lt ; CHECK-SD-NEXT: csel x1, x1, x3, lt ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: smaxv_v2i128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: cset w8, gt ; CHECK-GI-NEXT: cmp x0, x2 ; CHECK-GI-NEXT: cset w9, hi ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: csel w8, w9, w8, eq ; CHECK-GI-NEXT: tst w8, #0x1 ; CHECK-GI-NEXT: csel x0, x0, x2, ne ; CHECK-GI-NEXT: csel x1, x1, x3, ne ; CHECK-GI-NEXT: ret entry: %arg1 = call i128 @llvm.vector.reduce.smax.v2i128(<2 x i128> %a) ret i128 %arg1 } define i8 @uminv_v2i8(<2 x i8> %a) { ; CHECK-SD-LABEL: uminv_v2i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: uminp v0.2s, v0.2s, v0.2s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uminv_v2i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: and w8, w8, #0xff ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9, uxtb ; CHECK-GI-NEXT: fcsel s0, s0, s1, lo ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> %a) ret i8 %arg1 } define i8 @uminv_v3i8(<3 x i8> %a) { ; CHECK-SD-LABEL: uminv_v3i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi d0, #0xff00ff00ff00ff ; CHECK-SD-NEXT: mov v0.h[0], w0 ; CHECK-SD-NEXT: mov v0.h[1], w1 ; CHECK-SD-NEXT: mov v0.h[2], w2 ; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 ; CHECK-SD-NEXT: uminv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uminv_v3i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: and w8, w0, #0xff ; CHECK-GI-NEXT: cmp w8, w1, uxtb ; CHECK-GI-NEXT: csel w8, w0, w1, lo ; CHECK-GI-NEXT: and w9, w8, #0xff ; CHECK-GI-NEXT: cmp w9, w2, uxtb ; CHECK-GI-NEXT: csel w0, w8, w2, lo ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> %a) ret i8 %arg1 } define i8 @uminv_v4i8(<4 x i8> %a) { ; CHECK-SD-LABEL: uminv_v4i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 ; CHECK-SD-NEXT: uminv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uminv_v4i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: umov w8, v0.h[0] ; CHECK-GI-NEXT: umov w9, v0.h[1] ; CHECK-GI-NEXT: umov w10, v0.h[2] ; CHECK-GI-NEXT: umov w11, v0.h[3] ; CHECK-GI-NEXT: and w12, w8, #0xff ; CHECK-GI-NEXT: cmp w12, w9, uxtb ; CHECK-GI-NEXT: and w12, w10, #0xff ; CHECK-GI-NEXT: csel w8, w8, w9, lo ; CHECK-GI-NEXT: cmp w12, w11, uxtb ; CHECK-GI-NEXT: csel w9, w10, w11, lo ; CHECK-GI-NEXT: and w10, w8, #0xff ; CHECK-GI-NEXT: cmp w10, w9, uxtb ; CHECK-GI-NEXT: csel w0, w8, w9, lo ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> %a) ret i8 %arg1 } define i8 @uminv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: uminv_v8i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: uminv b0, v0.8b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a) ret i8 %arg1 } define i8 @uminv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: uminv_v16i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: uminv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a) ret i8 %arg1 } define i8 @uminv_v32i8(<32 x i8> %a) { ; CHECK-LABEL: uminv_v32i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b ; CHECK-NEXT: uminv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %a) ret i8 %arg1 } define i16 @uminv_v2i16(<2 x i16> %a) { ; CHECK-SD-LABEL: uminv_v2i16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: uminp v0.2s, v0.2s, v0.2s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uminv_v2i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: and w8, w8, #0xffff ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9, uxth ; CHECK-GI-NEXT: fcsel s0, s0, s1, lo ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> %a) ret i16 %arg1 } define i16 @uminv_v3i16(<3 x i16> %a) { ; CHECK-SD-LABEL: uminv_v3i16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-SD-NEXT: mov v0.h[3], w8 ; CHECK-SD-NEXT: uminv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uminv_v3i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: umov w8, v0.h[0] ; CHECK-GI-NEXT: umov w9, v0.h[0] ; CHECK-GI-NEXT: umov w10, v0.h[1] ; CHECK-GI-NEXT: umov w11, v0.h[2] ; CHECK-GI-NEXT: umov w13, v0.h[2] ; CHECK-GI-NEXT: fmov w12, s1 ; CHECK-GI-NEXT: cmp w8, w12, uxth ; CHECK-GI-NEXT: csel w8, w9, w10, lo ; CHECK-GI-NEXT: cmp w11, w8, uxth ; CHECK-GI-NEXT: csel w0, w8, w13, hi ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umin.v3i16(<3 x i16> %a) ret i16 %arg1 } define i16 @uminv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: uminv_v4i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: uminv h0, v0.4h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a) ret i16 %arg1 } define i16 @uminv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: uminv_v8i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: uminv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a) ret i16 %arg1 } define i16 @uminv_v16i16(<16 x i16> %a) { ; CHECK-LABEL: uminv_v16i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-NEXT: uminv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %a) ret i16 %arg1 } define i32 @uminv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: uminv_v2i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a) ret i32 %arg1 } define i32 @uminv_v3i32(<3 x i32> %a) { ; CHECK-SD-LABEL: uminv_v3i32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-SD-NEXT: mov v0.s[3], w8 ; CHECK-SD-NEXT: uminv s0, v0.4s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uminv_v3i32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov s2, v0.s[2] ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: fmov w9, s2 ; CHECK-GI-NEXT: fcsel s0, s0, s1, lo ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: fcsel s0, s0, s2, lo ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.umin.v3i32(<3 x i32> %a) ret i32 %arg1 } define i32 @uminv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: uminv_v4i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: uminv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a) ret i32 %arg1 } define i32 @uminv_v8i32(<8 x i32> %a) { ; CHECK-LABEL: uminv_v8i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: uminv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %a) ret i32 %arg1 } define i64 @uminv_v2i64(<2 x i64> %a) { ; CHECK-SD-LABEL: uminv_v2i64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: cmhi d2, d1, d0 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uminv_v2i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: cmp x8, x9 ; CHECK-GI-NEXT: fcsel d0, d0, d1, lo ; CHECK-GI-NEXT: fmov x0, d0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a) ret i64 %arg1 } define i64 @uminv_v3i64(<3 x i64> %a) { ; CHECK-LABEL: uminv_v3i64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: mov v2.d[1], x8 ; CHECK-NEXT: cmhi v1.2d, v2.2d, v0.2d ; CHECK-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: cmhi d2, d1, d0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.umin.v3i64(<3 x i64> %a) ret i64 %arg1 } define i64 @uminv_v4i64(<4 x i64> %a) { ; CHECK-SD-LABEL: uminv_v4i64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmhi v2.2d, v1.2d, v0.2d ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: cmhi d2, d1, d0 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uminv_v4i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: cmhi v2.2d, v1.2d, v0.2d ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: cmp x8, x9 ; CHECK-GI-NEXT: fcsel d0, d0, d1, lo ; CHECK-GI-NEXT: fmov x0, d0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %a) ret i64 %arg1 } define i128 @uminv_v2i128(<2 x i128> %a) { ; CHECK-SD-LABEL: uminv_v2i128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp x0, x2 ; CHECK-SD-NEXT: sbcs xzr, x1, x3 ; CHECK-SD-NEXT: csel x0, x0, x2, lo ; CHECK-SD-NEXT: csel x1, x1, x3, lo ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uminv_v2i128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: cset w8, lo ; CHECK-GI-NEXT: cmp x0, x2 ; CHECK-GI-NEXT: cset w9, lo ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: csel w8, w9, w8, eq ; CHECK-GI-NEXT: tst w8, #0x1 ; CHECK-GI-NEXT: csel x0, x0, x2, ne ; CHECK-GI-NEXT: csel x1, x1, x3, ne ; CHECK-GI-NEXT: ret entry: %arg1 = call i128 @llvm.vector.reduce.umin.v2i128(<2 x i128> %a) ret i128 %arg1 } define i8 @umaxv_v2i8(<2 x i8> %a) { ; CHECK-SD-LABEL: umaxv_v2i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: umaxp v0.2s, v0.2s, v0.2s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: umaxv_v2i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: and w8, w8, #0xff ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9, uxtb ; CHECK-GI-NEXT: fcsel s0, s0, s1, hi ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> %a) ret i8 %arg1 } define i8 @umaxv_v3i8(<3 x i8> %a) { ; CHECK-SD-LABEL: umaxv_v3i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: mov v0.h[0], w0 ; CHECK-SD-NEXT: mov v0.h[1], w1 ; CHECK-SD-NEXT: mov v0.h[2], w2 ; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 ; CHECK-SD-NEXT: umaxv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: umaxv_v3i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: and w8, w0, #0xff ; CHECK-GI-NEXT: cmp w8, w1, uxtb ; CHECK-GI-NEXT: csel w8, w0, w1, hi ; CHECK-GI-NEXT: and w9, w8, #0xff ; CHECK-GI-NEXT: cmp w9, w2, uxtb ; CHECK-GI-NEXT: csel w0, w8, w2, hi ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a) ret i8 %arg1 } define i8 @umaxv_v4i8(<4 x i8> %a) { ; CHECK-SD-LABEL: umaxv_v4i8: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 ; CHECK-SD-NEXT: umaxv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: umaxv_v4i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: umov w8, v0.h[0] ; CHECK-GI-NEXT: umov w9, v0.h[1] ; CHECK-GI-NEXT: umov w10, v0.h[2] ; CHECK-GI-NEXT: umov w11, v0.h[3] ; CHECK-GI-NEXT: and w12, w8, #0xff ; CHECK-GI-NEXT: cmp w12, w9, uxtb ; CHECK-GI-NEXT: and w12, w10, #0xff ; CHECK-GI-NEXT: csel w8, w8, w9, hi ; CHECK-GI-NEXT: cmp w12, w11, uxtb ; CHECK-GI-NEXT: csel w9, w10, w11, hi ; CHECK-GI-NEXT: and w10, w8, #0xff ; CHECK-GI-NEXT: cmp w10, w9, uxtb ; CHECK-GI-NEXT: csel w0, w8, w9, hi ; CHECK-GI-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> %a) ret i8 %arg1 } define i8 @umaxv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: umaxv_v8i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umaxv b0, v0.8b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a) ret i8 %arg1 } define i8 @umaxv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: umaxv_v16i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a) ret i8 %arg1 } define i8 @umaxv_v32i8(<32 x i8> %a) { ; CHECK-LABEL: umaxv_v32i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %a) ret i8 %arg1 } define i16 @umaxv_v2i16(<2 x i16> %a) { ; CHECK-SD-LABEL: umaxv_v2i16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: umaxp v0.2s, v0.2s, v0.2s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: umaxv_v2i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: and w8, w8, #0xffff ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9, uxth ; CHECK-GI-NEXT: fcsel s0, s0, s1, hi ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> %a) ret i16 %arg1 } define i16 @umaxv_v3i16(<3 x i16> %a) { ; CHECK-SD-LABEL: umaxv_v3i16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.h[3], wzr ; CHECK-SD-NEXT: umaxv h0, v0.4h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: umaxv_v3i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: umov w8, v0.h[0] ; CHECK-GI-NEXT: umov w9, v0.h[0] ; CHECK-GI-NEXT: umov w10, v0.h[1] ; CHECK-GI-NEXT: umov w11, v0.h[2] ; CHECK-GI-NEXT: umov w13, v0.h[2] ; CHECK-GI-NEXT: fmov w12, s1 ; CHECK-GI-NEXT: cmp w8, w12, uxth ; CHECK-GI-NEXT: csel w8, w9, w10, hi ; CHECK-GI-NEXT: cmp w11, w8, uxth ; CHECK-GI-NEXT: csel w0, w8, w13, lo ; CHECK-GI-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umax.v3i16(<3 x i16> %a) ret i16 %arg1 } define i16 @umaxv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: umaxv_v4i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umaxv h0, v0.4h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a) ret i16 %arg1 } define i16 @umaxv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: umaxv_v8i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umaxv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a) ret i16 %arg1 } define i16 @umaxv_v16i16(<16 x i16> %a) { ; CHECK-LABEL: umaxv_v16i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umax v0.8h, v0.8h, v1.8h ; CHECK-NEXT: umaxv h0, v0.8h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %a) ret i16 %arg1 } define i32 @umaxv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: umaxv_v2i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a) ret i32 %arg1 } define i32 @umaxv_v3i32(<3 x i32> %a) { ; CHECK-SD-LABEL: umaxv_v3i32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov v0.s[3], wzr ; CHECK-SD-NEXT: umaxv s0, v0.4s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: umaxv_v3i32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov s2, v0.s[2] ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: fmov w9, s2 ; CHECK-GI-NEXT: fcsel s0, s0, s1, hi ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: fcsel s0, s0, s2, hi ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a) ret i32 %arg1 } define i32 @umaxv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: umaxv_v4i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umaxv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a) ret i32 %arg1 } define i32 @umaxv_v8i32(<8 x i32> %a) { ; CHECK-LABEL: umaxv_v8i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umaxv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %arg1 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %a) ret i32 %arg1 } define i64 @umaxv_v2i64(<2 x i64> %a) { ; CHECK-SD-LABEL: umaxv_v2i64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: cmhi d2, d0, d1 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: umaxv_v2i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: cmp x8, x9 ; CHECK-GI-NEXT: fcsel d0, d0, d1, hi ; CHECK-GI-NEXT: fmov x0, d0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a) ret i64 %arg1 } define i64 @umaxv_v3i64(<3 x i64> %a) { ; CHECK-LABEL: umaxv_v3i64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: mov v3.16b, v2.16b ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: mov v3.d[1], xzr ; CHECK-NEXT: cmhi v3.2d, v0.2d, v3.2d ; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8 ; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b ; CHECK-NEXT: and v1.8b, v1.8b, v4.8b ; CHECK-NEXT: cmhi d2, d0, d1 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.umax.v3i64(<3 x i64> %a) ret i64 %arg1 } define i64 @umaxv_v4i64(<4 x i64> %a) { ; CHECK-SD-LABEL: umaxv_v4i64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmhi v2.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: cmhi d2, d0, d1 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: umaxv_v4i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: cmhi v2.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: cmp x8, x9 ; CHECK-GI-NEXT: fcsel d0, d0, d1, hi ; CHECK-GI-NEXT: fmov x0, d0 ; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %a) ret i64 %arg1 } define i128 @umaxv_v2i128(<2 x i128> %a) { ; CHECK-SD-LABEL: umaxv_v2i128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp x2, x0 ; CHECK-SD-NEXT: sbcs xzr, x3, x1 ; CHECK-SD-NEXT: csel x0, x0, x2, lo ; CHECK-SD-NEXT: csel x1, x1, x3, lo ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: umaxv_v2i128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: cset w8, hi ; CHECK-GI-NEXT: cmp x0, x2 ; CHECK-GI-NEXT: cset w9, hi ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: csel w8, w9, w8, eq ; CHECK-GI-NEXT: tst w8, #0x1 ; CHECK-GI-NEXT: csel x0, x0, x2, ne ; CHECK-GI-NEXT: csel x1, x1, x3, ne ; CHECK-GI-NEXT: ret entry: %arg1 = call i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a) ret i128 %arg1 }