105 lines
4.6 KiB
LLVM
105 lines
4.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
|
|
|
|
; This test has an instruction that gets sunk into the loop, that is a
|
|
; active.lane.mask operand. (%exitcount.ptrcnt.to.int = ptrtoint). We
|
|
; need to make sure it is loop invariant.
|
|
|
|
define i32 @a(ptr readnone %b, ptr %c) {
|
|
; CHECK-LABEL: a:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r4, lr}
|
|
; CHECK-NEXT: push {r4, lr}
|
|
; CHECK-NEXT: cmp r0, r1
|
|
; CHECK-NEXT: it ls
|
|
; CHECK-NEXT: popls {r4, pc}
|
|
; CHECK-NEXT: .LBB0_1: @ %while.body.preheader
|
|
; CHECK-NEXT: subs r4, r0, r1
|
|
; CHECK-NEXT: movs r2, #0
|
|
; CHECK-NEXT: mov r3, r1
|
|
; CHECK-NEXT: dlstp.8 lr, r4
|
|
; CHECK-NEXT: .LBB0_2: @ %vector.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: adds r0, r1, r2
|
|
; CHECK-NEXT: vidup.u8 q0, r0, #1
|
|
; CHECK-NEXT: adds r2, #16
|
|
; CHECK-NEXT: vstrb.8 q0, [r3], #16
|
|
; CHECK-NEXT: letp lr, .LBB0_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r4, pc}
|
|
entry:
|
|
%cmp3 = icmp ugt ptr %b, %c
|
|
br i1 %cmp3, label %while.body.preheader, label %while.end
|
|
|
|
while.body.preheader: ; preds = %entry
|
|
%c5 = ptrtoint ptr %c to i32
|
|
%0 = sub i32 0, %c5
|
|
%uglygep = getelementptr i8, ptr %b, i32 %0
|
|
%exitcount.ptrcnt.to.int = ptrtoint ptr %uglygep to i32
|
|
%n.rnd.up = add i32 %exitcount.ptrcnt.to.int, 15
|
|
%n.vec = and i32 %n.rnd.up, -16
|
|
br label %vector.body
|
|
|
|
vector.body: ; preds = %vector.body, %while.body.preheader
|
|
%index = phi i32 [ 0, %while.body.preheader ], [ %index.next, %vector.body ]
|
|
%next.gep = getelementptr i8, ptr %c, i32 %index
|
|
%1 = or disjoint i32 %index, 1
|
|
%next.gep7 = getelementptr i8, ptr %c, i32 %1
|
|
%2 = or disjoint i32 %index, 2
|
|
%next.gep8 = getelementptr i8, ptr %c, i32 %2
|
|
%3 = or disjoint i32 %index, 3
|
|
%next.gep9 = getelementptr i8, ptr %c, i32 %3
|
|
%4 = or disjoint i32 %index, 4
|
|
%next.gep10 = getelementptr i8, ptr %c, i32 %4
|
|
%5 = or disjoint i32 %index, 5
|
|
%next.gep11 = getelementptr i8, ptr %c, i32 %5
|
|
%6 = or disjoint i32 %index, 6
|
|
%next.gep12 = getelementptr i8, ptr %c, i32 %6
|
|
%7 = or disjoint i32 %index, 7
|
|
%next.gep13 = getelementptr i8, ptr %c, i32 %7
|
|
%8 = or disjoint i32 %index, 8
|
|
%next.gep14 = getelementptr i8, ptr %c, i32 %8
|
|
%9 = or disjoint i32 %index, 9
|
|
%next.gep15 = getelementptr i8, ptr %c, i32 %9
|
|
%10 = or disjoint i32 %index, 10
|
|
%next.gep16 = getelementptr i8, ptr %c, i32 %10
|
|
%11 = or disjoint i32 %index, 11
|
|
%next.gep17 = getelementptr i8, ptr %c, i32 %11
|
|
%12 = or disjoint i32 %index, 12
|
|
%next.gep18 = getelementptr i8, ptr %c, i32 %12
|
|
%13 = or disjoint i32 %index, 13
|
|
%next.gep19 = getelementptr i8, ptr %c, i32 %13
|
|
%14 = or disjoint i32 %index, 14
|
|
%next.gep20 = getelementptr i8, ptr %c, i32 %14
|
|
%15 = or disjoint i32 %index, 15
|
|
%next.gep21 = getelementptr i8, ptr %c, i32 %15
|
|
%16 = insertelement <16 x ptr> poison, ptr %next.gep, i32 0
|
|
%17 = insertelement <16 x ptr> %16, ptr %next.gep7, i32 1
|
|
%18 = insertelement <16 x ptr> %17, ptr %next.gep8, i32 2
|
|
%19 = insertelement <16 x ptr> %18, ptr %next.gep9, i32 3
|
|
%20 = insertelement <16 x ptr> %19, ptr %next.gep10, i32 4
|
|
%21 = insertelement <16 x ptr> %20, ptr %next.gep11, i32 5
|
|
%22 = insertelement <16 x ptr> %21, ptr %next.gep12, i32 6
|
|
%23 = insertelement <16 x ptr> %22, ptr %next.gep13, i32 7
|
|
%24 = insertelement <16 x ptr> %23, ptr %next.gep14, i32 8
|
|
%25 = insertelement <16 x ptr> %24, ptr %next.gep15, i32 9
|
|
%26 = insertelement <16 x ptr> %25, ptr %next.gep16, i32 10
|
|
%27 = insertelement <16 x ptr> %26, ptr %next.gep17, i32 11
|
|
%28 = insertelement <16 x ptr> %27, ptr %next.gep18, i32 12
|
|
%29 = insertelement <16 x ptr> %28, ptr %next.gep19, i32 13
|
|
%30 = insertelement <16 x ptr> %29, ptr %next.gep20, i32 14
|
|
%31 = insertelement <16 x ptr> %30, ptr %next.gep21, i32 15
|
|
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %exitcount.ptrcnt.to.int)
|
|
%32 = ptrtoint <16 x ptr> %31 to <16 x i32>
|
|
%33 = trunc <16 x i32> %32 to <16 x i8>
|
|
call void @llvm.masked.store.v16i8.p0(<16 x i8> %33, ptr %next.gep, i32 1, <16 x i1> %active.lane.mask)
|
|
%index.next = add i32 %index, 16
|
|
%34 = icmp eq i32 %index.next, %n.vec
|
|
br i1 %34, label %while.end, label %vector.body
|
|
|
|
while.end: ; preds = %vector.body, %entry
|
|
ret i32 undef
|
|
}
|
|
|
|
declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
|
|
declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>)
|