; RUN: opt < %s -S -passes='loop(loop-flatten),verify' -verify-loop-info -verify-dom-info -verify-scev | FileCheck %s

target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"

; We should be able to flatten the loops and turn the two geps into one.
; CHECK-LABEL: test1
define void @test1(i32 %N, ptr %A) {
entry:
  %cmp3 = icmp ult i32 0, %N
  br i1 %cmp3, label %for.outer.preheader, label %for.end

; CHECK-LABEL: for.outer.preheader:
; CHECK: %flatten.tripcount = mul i32 %N, %N
for.outer.preheader:
  br label %for.inner.preheader

; CHECK-LABEL: for.inner.preheader:
; CHECK: %flatten.arrayidx = getelementptr i32, ptr %A, i32 %i
for.inner.preheader:
  %i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
  br label %for.inner

; CHECK-LABEL: for.inner:
; CHECK: store i32 0, ptr %flatten.arrayidx, align 4
; CHECK: br label %for.outer
for.inner:
  %j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
  %mul = mul i32 %i, %N
  %gep = getelementptr inbounds i32, ptr %A, i32 %mul
  %arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j
  store i32 0, ptr %arrayidx, align 4
  %inc1 = add nuw i32 %j, 1
  %cmp2 = icmp ult i32 %inc1, %N
  br i1 %cmp2, label %for.inner, label %for.outer

; CHECK-LABEL: for.outer:
; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount
for.outer:
  %inc2 = add i32 %i, 1
  %cmp1 = icmp ult i32 %inc2, %N
  br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit

for.end.loopexit:
  br label %for.end

for.end:
  ret void
}

; We can flatten, but the flattened gep has to be inserted after the load it
; depends on.
; CHECK-LABEL: test2
define void @test2(i32 %N, ptr %A) {
entry:
  %cmp3 = icmp ult i32 0, %N
  br i1 %cmp3, label %for.outer.preheader, label %for.end

; CHECK-LABEL: for.outer.preheader:
; CHECK: %flatten.tripcount = mul i32 %N, %N
for.outer.preheader:
  br label %for.inner.preheader

; CHECK-LABEL: for.inner.preheader:
; CHECK-NOT: getelementptr i32, ptr %ptr, i32 %i
for.inner.preheader:
  %i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
  br label %for.inner

; CHECK-LABEL: for.inner:
; CHECK: %flatten.arrayidx = getelementptr i32, ptr %ptr, i32 %i
; CHECK: store i32 0, ptr %flatten.arrayidx, align 4
; CHECK: br label %for.outer
for.inner:
  %j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
  %ptr = load volatile ptr, ptr %A, align 4
  %mul = mul i32 %i, %N
  %gep = getelementptr inbounds i32, ptr %ptr, i32 %mul
  %arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j
  store i32 0, ptr %arrayidx, align 4
  %inc1 = add nuw i32 %j, 1
  %cmp2 = icmp ult i32 %inc1, %N
  br i1 %cmp2, label %for.inner, label %for.outer

; CHECK-LABEL: for.outer:
; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount
for.outer:
  %inc2 = add i32 %i, 1
  %cmp1 = icmp ult i32 %inc2, %N
  br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit

for.end.loopexit:
  br label %for.end

for.end:
  ret void
}

; We can't flatten if the gep offset is smaller than the pointer size.
; CHECK-LABEL: test3
define void @test3(i16 %N, ptr %A) {
entry:
  %cmp3 = icmp ult i16 0, %N
  br i1 %cmp3, label %for.outer.preheader, label %for.end

for.outer.preheader:
  br label %for.inner.preheader

; CHECK-LABEL: for.inner.preheader:
; CHECK-NOT: getelementptr i32, ptr %A, i16 %i
for.inner.preheader:
  %i = phi i16 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
  br label %for.inner

; CHECK-LABEL: for.inner:
; CHECK-NOT: getelementptr i32, ptr %A, i16 %i
; CHECK: br i1 %cmp2, label %for.inner, label %for.outer
for.inner:
  %j = phi i16 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
  %mul = mul i16 %i, %N
  %gep = getelementptr inbounds i32, ptr %A, i16 %mul
  %arrayidx = getelementptr inbounds i32, ptr %gep, i16 %j
  store i32 0, ptr %arrayidx, align 4
  %inc1 = add nuw i16 %j, 1
  %cmp2 = icmp ult i16 %inc1, %N
  br i1 %cmp2, label %for.inner, label %for.outer

for.outer:
  %inc2 = add i16 %i, 1
  %cmp1 = icmp ult i16 %inc2, %N
  br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit

for.end.loopexit:
  br label %for.end

for.end:
  ret void
}