; RUN: opt < %s -S -passes='loop(loop-flatten),verify' -verify-loop-info -verify-dom-info -verify-scev | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" ; We should be able to flatten the loops and turn the two geps into one. ; CHECK-LABEL: test1 define void @test1(i32 %N, ptr %A) { entry: %cmp3 = icmp ult i32 0, %N br i1 %cmp3, label %for.outer.preheader, label %for.end ; CHECK-LABEL: for.outer.preheader: ; CHECK: %flatten.tripcount = mul i32 %N, %N for.outer.preheader: br label %for.inner.preheader ; CHECK-LABEL: for.inner.preheader: ; CHECK: %flatten.arrayidx = getelementptr i32, ptr %A, i32 %i for.inner.preheader: %i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ] br label %for.inner ; CHECK-LABEL: for.inner: ; CHECK: store i32 0, ptr %flatten.arrayidx, align 4 ; CHECK: br label %for.outer for.inner: %j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ] %mul = mul i32 %i, %N %gep = getelementptr inbounds i32, ptr %A, i32 %mul %arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j store i32 0, ptr %arrayidx, align 4 %inc1 = add nuw i32 %j, 1 %cmp2 = icmp ult i32 %inc1, %N br i1 %cmp2, label %for.inner, label %for.outer ; CHECK-LABEL: for.outer: ; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount for.outer: %inc2 = add i32 %i, 1 %cmp1 = icmp ult i32 %inc2, %N br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit for.end.loopexit: br label %for.end for.end: ret void } ; We can flatten, but the flattened gep has to be inserted after the load it ; depends on. ; CHECK-LABEL: test2 define void @test2(i32 %N, ptr %A) { entry: %cmp3 = icmp ult i32 0, %N br i1 %cmp3, label %for.outer.preheader, label %for.end ; CHECK-LABEL: for.outer.preheader: ; CHECK: %flatten.tripcount = mul i32 %N, %N for.outer.preheader: br label %for.inner.preheader ; CHECK-LABEL: for.inner.preheader: ; CHECK-NOT: getelementptr i32, ptr %ptr, i32 %i for.inner.preheader: %i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ] br label %for.inner ; CHECK-LABEL: for.inner: ; CHECK: %flatten.arrayidx = getelementptr i32, ptr %ptr, i32 %i ; CHECK: store i32 0, ptr %flatten.arrayidx, align 4 ; CHECK: br label %for.outer for.inner: %j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ] %ptr = load volatile ptr, ptr %A, align 4 %mul = mul i32 %i, %N %gep = getelementptr inbounds i32, ptr %ptr, i32 %mul %arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j store i32 0, ptr %arrayidx, align 4 %inc1 = add nuw i32 %j, 1 %cmp2 = icmp ult i32 %inc1, %N br i1 %cmp2, label %for.inner, label %for.outer ; CHECK-LABEL: for.outer: ; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount for.outer: %inc2 = add i32 %i, 1 %cmp1 = icmp ult i32 %inc2, %N br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit for.end.loopexit: br label %for.end for.end: ret void } ; We can't flatten if the gep offset is smaller than the pointer size. ; CHECK-LABEL: test3 define void @test3(i16 %N, ptr %A) { entry: %cmp3 = icmp ult i16 0, %N br i1 %cmp3, label %for.outer.preheader, label %for.end for.outer.preheader: br label %for.inner.preheader ; CHECK-LABEL: for.inner.preheader: ; CHECK-NOT: getelementptr i32, ptr %A, i16 %i for.inner.preheader: %i = phi i16 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ] br label %for.inner ; CHECK-LABEL: for.inner: ; CHECK-NOT: getelementptr i32, ptr %A, i16 %i ; CHECK: br i1 %cmp2, label %for.inner, label %for.outer for.inner: %j = phi i16 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ] %mul = mul i16 %i, %N %gep = getelementptr inbounds i32, ptr %A, i16 %mul %arrayidx = getelementptr inbounds i32, ptr %gep, i16 %j store i32 0, ptr %arrayidx, align 4 %inc1 = add nuw i16 %j, 1 %cmp2 = icmp ult i16 %inc1, %N br i1 %cmp2, label %for.inner, label %for.outer for.outer: %inc2 = add i16 %i, 1 %cmp1 = icmp ult i16 %inc2, %N br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit for.end.loopexit: br label %for.end for.end: ret void }