; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -aa-pipeline=basic-aa -passes='loop-mssa(licm)' -S | FileCheck %s @X = global i32 0 ; [#uses=1] declare void @foo() declare i32 @llvm.bitreverse.i32(i32) ; This testcase tests for a problem where LICM hoists ; potentially trapping instructions when they are not guaranteed to execute. define i32 @test1(i1 %c) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @X, align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: Loop: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOPTAIL:%.*]], label [[IFUNEQUAL:%.*]] ; CHECK: IfUnEqual: ; CHECK-NEXT: [[B1:%.*]] = sdiv i32 4, [[A]] ; CHECK-NEXT: br label [[LOOPTAIL]] ; CHECK: LoopTail: ; CHECK-NEXT: [[B:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[B1]], [[IFUNEQUAL]] ] ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[OUT:%.*]] ; CHECK: Out: ; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOPTAIL]] ] ; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]] ; CHECK-NEXT: ret i32 [[C]] ; %A = load i32, ptr @X ; [#uses=2] br label %Loop Loop: ; preds = %LoopTail, %0 call void @foo( ) br i1 %c, label %LoopTail, label %IfUnEqual IfUnEqual: ; preds = %Loop %B1 = sdiv i32 4, %A ; [#uses=1] br label %LoopTail LoopTail: ; preds = %IfUnEqual, %Loop %B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ] ; [#uses=1] br i1 %c, label %Loop, label %Out Out: ; preds = %LoopTail %C = sub i32 %A, %B ; [#uses=1] ret i32 %C } declare void @foo2(i32) nounwind ;; It is ok and desirable to hoist this potentially trapping instruction. define i32 @test2(i1 %c) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @X, align 4 ; CHECK-NEXT: [[B:%.*]] = sdiv i32 4, [[A]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: Loop: ; CHECK-NEXT: br label [[LOOP2:%.*]] ; CHECK: loop2: ; CHECK-NEXT: call void @foo2(i32 [[B]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]] ; CHECK: Out: ; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOP2]] ] ; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]] ; CHECK-NEXT: ret i32 [[C]] ; %A = load i32, ptr @X br label %Loop Loop: ;; Should have hoisted this div! %B = sdiv i32 4, %A br label %loop2 loop2: call void @foo2( i32 %B ) br i1 %c, label %Loop, label %Out Out: %C = sub i32 %A, %B ret i32 %C } ; Don't bother constant folding the add, just hoist it. define i32 @test3(i1 %c) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @X, align 4 ; CHECK-NEXT: [[B:%.*]] = add i32 4, 2 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: Loop: ; CHECK-NEXT: call void @foo2(i32 [[B]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]] ; CHECK: Out: ; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOP]] ] ; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]] ; CHECK-NEXT: ret i32 [[C]] ; %A = load i32, ptr @X ; [#uses=2] br label %Loop Loop: %B = add i32 4, 2 ; [#uses=2] call void @foo2( i32 %B ) br i1 %c, label %Loop, label %Out Out: ; preds = %Loop %C = sub i32 %A, %B ; [#uses=1] ret i32 %C } define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[N_01:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: call void @foo_may_call_exit(i32 0) ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[ADD]] = add nsw i32 [[N_01]], [[DIV]] ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10000 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: [[N_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[N_0_LCSSA]] ; entry: br label %for.body for.body: ; preds = %entry, %for.body %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] call void @foo_may_call_exit(i32 0) %div = sdiv i32 %x, %y %add = add nsw i32 %n.01, %div %inc = add nsw i32 %i.02, 1 %cmp = icmp slt i32 %inc, 10000 br i1 %cmp, label %for.body, label %for.end for.end: ; preds = %for.body %n.0.lcssa = phi i32 [ %add, %for.body ] ret i32 %n.0.lcssa } declare void @foo_may_call_exit(i32) ; PR14854 define { ptr, i32 } @test5(i32 %i, { ptr, i32 } %e) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUT:%.*]] = extractvalue { ptr, i32 } [[E:%.*]], 1 ; CHECK-NEXT: br label [[TAILRECURSE:%.*]] ; CHECK: tailrecurse: ; CHECK-NEXT: [[I_TR:%.*]] = phi i32 [ [[I:%.*]], [[ENTRY:%.*]] ], [ [[CMP2:%.*]], [[THEN:%.*]] ] ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[OUT]], [[I_TR]] ; CHECK-NEXT: br i1 [[CMP1]], label [[THEN]], label [[IFEND:%.*]] ; CHECK: then: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: [[CMP2]] = add i32 [[I_TR]], 1 ; CHECK-NEXT: br label [[TAILRECURSE]] ; CHECK: ifend: ; CHECK-NEXT: [[D_LE:%.*]] = insertvalue { ptr, i32 } [[E]], ptr null, 0 ; CHECK-NEXT: ret { ptr, i32 } [[D_LE]] ; entry: br label %tailrecurse tailrecurse: ; preds = %then, %entry %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ] %out = extractvalue { ptr, i32 } %e, 1 %d = insertvalue { ptr, i32 } %e, ptr null, 0 %cmp1 = icmp sgt i32 %out, %i.tr br i1 %cmp1, label %then, label %ifend then: ; preds = %tailrecurse call void @foo() %cmp2 = add i32 %i.tr, 1 br label %tailrecurse ifend: ; preds = %tailrecurse ret { ptr, i32 } %d } define void @test6(float %f) #2 { ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[NEG:%.*]] = fneg float [[F:%.*]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: call void @foo_may_call_exit(i32 0) ; CHECK-NEXT: call void @use(float [[NEG]]) ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10000 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; entry: br label %for.body for.body: ; preds = %for.body, %entry %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] call void @foo_may_call_exit(i32 0) %neg = fneg float %f call void @use(float %neg) %inc = add nsw i32 %i, 1 %cmp = icmp slt i32 %inc, 10000 br i1 %cmp, label %for.body, label %for.end for.end: ; preds = %for.body ret void } declare void @use(float) define i32 @hoist_bitreverse(i32 %0) { ; CHECK-LABEL: @hoist_bitreverse( ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP0:%.*]]) ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[TMP1:%.*]] ], [ [[TMP5:%.*]], [[LATCH:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP6:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1024 ; CHECK-NEXT: br i1 [[TMP4]], label [[BODY:%.*]], label [[RETURN:%.*]] ; CHECK: body: ; CHECK-NEXT: [[TMP5]] = add i32 [[SUM]], [[TMP2]] ; CHECK-NEXT: br label [[LATCH]] ; CHECK: latch: ; CHECK-NEXT: [[TMP6]] = add nsw i32 [[TMP3]], 1 ; CHECK-NEXT: br label [[HEADER]] ; CHECK: return: ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[HEADER]] ] ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] ; br label %header header: %sum = phi i32 [ 0, %1 ], [ %5, %latch ] %2 = phi i32 [ 0, %1 ], [ %6, %latch ] %3 = icmp slt i32 %2, 1024 br i1 %3, label %body, label %return body: %4 = call i32 @llvm.bitreverse.i32(i32 %0) %5 = add i32 %sum, %4 br label %latch latch: %6 = add nsw i32 %2, 1 br label %header return: ret i32 %sum } ; Can neither sink nor hoist define i32 @test_volatile(i1 %c) { ; CHECK-LABEL: @test_volatile( ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: Loop: ; CHECK-NEXT: [[A:%.*]] = load volatile i32, ptr @X, align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]] ; CHECK: Out: ; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A]], [[LOOP]] ] ; CHECK-NEXT: ret i32 [[A_LCSSA]] ; br label %Loop Loop: %A = load volatile i32, ptr @X br i1 %c, label %Loop, label %Out Out: ret i32 %A } declare ptr @llvm.invariant.start.p0(i64, ptr nocapture) nounwind readonly declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture) nounwind declare void @escaping.invariant.start(ptr) nounwind ; invariant.start dominates the load, and in this scope, the ; load is invariant. So, we can hoist the `addrld` load out of the loop. define i32 @test_fence(ptr %addr, i32 %n, ptr %volatile) { ; CHECK-LABEL: @test_fence( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8 ; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8 ; CHECK-NEXT: fence release ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]]) ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 ; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] ; entry: %gep = getelementptr inbounds i8, ptr %addr, i64 8 store atomic i32 5, ptr %gep unordered, align 8 fence release %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep) br label %loop loop: %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] %volload = load atomic i8, ptr %volatile unordered, align 8 fence acquire %volchk = icmp eq i8 %volload, 0 %addrld = load atomic i32, ptr %gep unordered, align 8 %sel = select i1 %volchk, i32 0, i32 %addrld %sum.next = add i32 %sel, %sum %indvar.next = add i32 %indvar, 1 %cond = icmp slt i32 %indvar.next, %n br i1 %cond, label %loop, label %loopexit loopexit: ret i32 %sum } ; Same as test above, but the load is no longer invariant (presence of ; invariant.end). We cannot hoist the addrld out of loop. define i32 @test_fence1(ptr %addr, i32 %n, ptr %volatile) { ; CHECK-LABEL: @test_fence1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8 ; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8 ; CHECK-NEXT: fence release ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]]) ; CHECK-NEXT: call void @llvm.invariant.end.p0(ptr [[INVST]], i64 4, ptr [[GEP]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 ; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] ; entry: %gep = getelementptr inbounds i8, ptr %addr, i64 8 store atomic i32 5, ptr %gep unordered, align 8 fence release %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep) call void @llvm.invariant.end.p0(ptr %invst, i64 4, ptr %gep) br label %loop loop: %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] %volload = load atomic i8, ptr %volatile unordered, align 8 fence acquire %volchk = icmp eq i8 %volload, 0 %addrld = load atomic i32, ptr %gep unordered, align 8 %sel = select i1 %volchk, i32 0, i32 %addrld %sum.next = add i32 %sel, %sum %indvar.next = add i32 %indvar, 1 %cond = icmp slt i32 %indvar.next, %n br i1 %cond, label %loop, label %loopexit loopexit: ret i32 %sum } ; same as test above, but instead of invariant.end, we have the result of ; invariant.start escaping through a call. We cannot hoist the load. define i32 @test_fence2(ptr %addr, i32 %n, ptr %volatile) { ; CHECK-LABEL: @test_fence2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8 ; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8 ; CHECK-NEXT: fence release ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]]) ; CHECK-NEXT: call void @escaping.invariant.start(ptr [[INVST]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 ; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] ; entry: %gep = getelementptr inbounds i8, ptr %addr, i64 8 store atomic i32 5, ptr %gep unordered, align 8 fence release %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep) call void @escaping.invariant.start(ptr %invst) br label %loop loop: %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] %volload = load atomic i8, ptr %volatile unordered, align 8 fence acquire %volchk = icmp eq i8 %volload, 0 %addrld = load atomic i32, ptr %gep unordered, align 8 %sel = select i1 %volchk, i32 0, i32 %addrld %sum.next = add i32 %sel, %sum %indvar.next = add i32 %indvar, 1 %cond = icmp slt i32 %indvar.next, %n br i1 %cond, label %loop, label %loopexit loopexit: ret i32 %sum } ; Consider the loadoperand addr.i bitcasted before being passed to ; invariant.start define i32 @test_fence3(ptr %addr, i32 %n, ptr %volatile) { ; CHECK-LABEL: @test_fence3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADDR_I:%.*]] = getelementptr inbounds i32, ptr [[ADDR:%.*]], i64 8 ; CHECK-NEXT: store atomic i32 5, ptr [[ADDR_I]] unordered, align 8 ; CHECK-NEXT: fence release ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[ADDR_I]]) ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[ADDR_I]] unordered, align 8 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 ; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] ; entry: %addr.i = getelementptr inbounds i32, ptr %addr, i64 8 store atomic i32 5, ptr %addr.i unordered, align 8 fence release %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %addr.i) br label %loop loop: %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] %volload = load atomic i8, ptr %volatile unordered, align 8 fence acquire %volchk = icmp eq i8 %volload, 0 %addrld = load atomic i32, ptr %addr.i unordered, align 8 %sel = select i1 %volchk, i32 0, i32 %addrld %sum.next = add i32 %sel, %sum %indvar.next = add i32 %indvar, 1 %cond = icmp slt i32 %indvar.next, %n br i1 %cond, label %loop, label %loopexit loopexit: ret i32 %sum } ; We should not hoist the addrld out of the loop. define i32 @test_fence4(ptr %addr, i32 %n, ptr %volatile) { ; CHECK-LABEL: @test_fence4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADDR_I:%.*]] = getelementptr inbounds i32, ptr [[ADDR:%.*]], i64 8 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: store atomic i32 5, ptr [[ADDR_I]] unordered, align 8 ; CHECK-NEXT: fence release ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[ADDR_I]]) ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 ; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[ADDR_I]] unordered, align 8 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] ; entry: %addr.i = getelementptr inbounds i32, ptr %addr, i64 8 br label %loop loop: %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] store atomic i32 5, ptr %addr.i unordered, align 8 fence release %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %addr.i) %volload = load atomic i8, ptr %volatile unordered, align 8 fence acquire %volchk = icmp eq i8 %volload, 0 %addrld = load atomic i32, ptr %addr.i unordered, align 8 %sel = select i1 %volchk, i32 0, i32 %addrld %sum.next = add i32 %sel, %sum %indvar.next = add i32 %indvar, 1 %cond = icmp slt i32 %indvar.next, %n br i1 %cond, label %loop, label %loopexit loopexit: ret i32 %sum } ; We can't hoist the invariant load out of the loop because ; the marker is given a variable size (-1). define i32 @test_fence5(ptr %addr, i32 %n, ptr %volatile) { ; CHECK-LABEL: @test_fence5( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8 ; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8 ; CHECK-NEXT: fence release ; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 -1, ptr [[GEP]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 ; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] ; entry: %gep = getelementptr inbounds i8, ptr %addr, i64 8 store atomic i32 5, ptr %gep unordered, align 8 fence release %invst = call ptr @llvm.invariant.start.p0(i64 -1, ptr %gep) br label %loop loop: %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] %volload = load atomic i8, ptr %volatile unordered, align 8 fence acquire %volchk = icmp eq i8 %volload, 0 %addrld = load atomic i32, ptr %gep unordered, align 8 %sel = select i1 %volchk, i32 0, i32 %addrld %sum.next = add i32 %sel, %sum %indvar.next = add i32 %indvar, 1 %cond = icmp slt i32 %indvar.next, %n br i1 %cond, label %loop, label %loopexit loopexit: ret i32 %sum } declare void @g(i1) @a = external global i8 ; FIXME: Support hoisting invariant loads of globals. define void @test_fence6() { ; CHECK-LABEL: @test_fence6( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr @a) ; CHECK-NEXT: br label [[F:%.*]] ; CHECK: f: ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @a, align 1 ; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], 0 ; CHECK-NEXT: [[T:%.*]] = icmp eq i8 [[TMP1]], 0 ; CHECK-NEXT: tail call void @g(i1 [[T]]) ; CHECK-NEXT: br label [[F]] ; entry: %i = call ptr @llvm.invariant.start.p0(i64 1, ptr @a) br label %f f: %0 = load i8, ptr @a %1 = and i8 %0, 0 %t = icmp eq i8 %1, 0 tail call void @g(i1 %t) br label %f }