; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mattr=+simd128 | FileCheck %s ;; Test that SIMD bitmask instruction can be selected target triple = "wasm32-unknown-unknown" define i16 @bitmask_v16i8(<16 x i8> %v) { ; CHECK-LABEL: bitmask_v16i8: ; CHECK: .functype bitmask_v16i8 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: i8x16.eq ; CHECK-NEXT: i8x16.bitmask ; CHECK-NEXT: # fallthrough-return %cmp = icmp eq <16 x i8> %v, zeroinitializer %bitmask = bitcast <16 x i1> %cmp to i16 ret i16 %bitmask } define i8 @bitmask_v8i16(<8 x i16> %v) { ; CHECK-LABEL: bitmask_v8i16: ; CHECK: .functype bitmask_v8i16 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: i16x8.eq ; CHECK-NEXT: i16x8.bitmask ; CHECK-NEXT: # fallthrough-return %cmp = icmp eq <8 x i16> %v, zeroinitializer %bitmask = bitcast <8 x i1> %cmp to i8 ret i8 %bitmask } define i8 @bitmask_v4i32(<4 x i32> %v) { ; CHECK-LABEL: bitmask_v4i32: ; CHECK: .functype bitmask_v4i32 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.const 0, 0, 0, 0 ; CHECK-NEXT: i32x4.eq ; CHECK-NEXT: i32x4.bitmask ; CHECK-NEXT: # fallthrough-return %cmp = icmp eq <4 x i32> %v, zeroinitializer %bitmask = bitcast <4 x i1> %cmp to i4 %ext = zext i4 %bitmask to i8 ret i8 %ext } define i8 @bitmask_v2i64(<2 x i64> %v) { ; CHECK-LABEL: bitmask_v2i64: ; CHECK: .functype bitmask_v2i64 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.const 0, 0 ; CHECK-NEXT: i64x2.eq ; CHECK-NEXT: i64x2.bitmask ; CHECK-NEXT: # fallthrough-return %cmp = icmp eq <2 x i64> %v, zeroinitializer %bitmask = bitcast <2 x i1> %cmp to i2 %ext = zext i2 %bitmask to i8 ret i8 %ext } ;; Test unusual vectors define i1 @bitmask_v1i8(<1 x i8> %v) { ; CHECK-LABEL: bitmask_v1i8: ; CHECK: .functype bitmask_v1i8 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 0 ; CHECK-NEXT: i32.eqz ; CHECK-NEXT: # fallthrough-return %cmp = icmp eq <1 x i8> %v, zeroinitializer %bitmask = bitcast <1 x i1> %cmp to i1 ret i1 %bitmask } define i7 @bitmask_v7i8(<7 x i8> %v) { ; CHECK-LABEL: bitmask_v7i8: ; CHECK: .functype bitmask_v7i8 (i32, i32, i32, i32, i32, i32, i32) -> (i32) ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: global.get __stack_pointer ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.sub ; CHECK-NEXT: drop ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i8x16.replace_lane 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i8x16.replace_lane 2 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i8x16.replace_lane 3 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i8x16.replace_lane 4 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i8x16.replace_lane 5 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i8x16.replace_lane 6 ; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: i8x16.eq ; CHECK-NEXT: local.tee 7 ; CHECK-NEXT: i16x8.extract_lane_u 0 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: local.get 7 ; CHECK-NEXT: i16x8.extend_low_i8x16_s ; CHECK-NEXT: local.tee 7 ; CHECK-NEXT: i16x8.extract_lane_u 1 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 7 ; CHECK-NEXT: i16x8.extract_lane_u 2 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 2 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 7 ; CHECK-NEXT: i16x8.extract_lane_u 3 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 3 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 7 ; CHECK-NEXT: i16x8.extract_lane_u 4 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 4 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 7 ; CHECK-NEXT: i16x8.extract_lane_u 5 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 5 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 7 ; CHECK-NEXT: i16x8.extract_lane_u 6 ; CHECK-NEXT: i32.const 6 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.const 127 ; CHECK-NEXT: i32.and ; CHECK-NEXT: # fallthrough-return %cmp = icmp eq <7 x i8> %v, zeroinitializer %bitmask = bitcast <7 x i1> %cmp to i7 ret i7 %bitmask } define i8 @bitmask_v8i8(<8 x i8> %v) { ; CHECK-LABEL: bitmask_v8i8: ; CHECK: .functype bitmask_v8i8 (v128) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: i8x16.eq ; CHECK-NEXT: i16x8.extend_low_i8x16_s ; CHECK-NEXT: i16x8.bitmask ; CHECK-NEXT: # fallthrough-return %cmp = icmp eq <8 x i8> %v, zeroinitializer %bitmask = bitcast <8 x i1> %cmp to i8 ret i8 %bitmask } define i32 @bitmask_v32i8(<32 x i8> %v) { ; CHECK-LABEL: bitmask_v32i8: ; CHECK: .functype bitmask_v32i8 (v128, v128) -> (i32) ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: global.get __stack_pointer ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.sub ; CHECK-NEXT: drop ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i8x16.eq ; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i8x16.extract_lane_u 0 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 1 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 2 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 2 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 3 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 3 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 4 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 4 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 5 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 5 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 6 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 6 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 7 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 7 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 8 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 8 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 9 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 9 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 10 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 10 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 11 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 11 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 12 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 12 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 13 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 13 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 14 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 14 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 15 ; CHECK-NEXT: i32.const 15 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.const 65535 ; CHECK-NEXT: i32.and ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i8x16.eq ; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i8x16.extract_lane_u 15 ; CHECK-NEXT: i32.const 31 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 14 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 30 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 13 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 29 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 12 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 28 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 11 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 27 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 10 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 26 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 9 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 25 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 8 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 24 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 7 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 23 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 6 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 22 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 5 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 21 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 4 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 20 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 3 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 19 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 2 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 18 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 1 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 17 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.extract_lane_u 0 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: i32.and ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.shl ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: i32.or ; CHECK-NEXT: # fallthrough-return %cmp = icmp eq <32 x i8> %v, zeroinitializer %bitmask = bitcast <32 x i1> %cmp to i32 ret i32 %bitmask }