// RUN: mlir-opt %s -scf-for-loop-peeling -canonicalize -split-input-file -verify-diagnostics | FileCheck %s // RUN: mlir-opt %s -scf-for-loop-peeling=skip-partial=false -canonicalize -split-input-file -verify-diagnostics | FileCheck %s -check-prefix=CHECK-NO-SKIP // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (-s0 + s1) mod s2)> // CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> // CHECK: func @fully_dynamic_bounds( // CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index // CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 // CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[LB]], %[[UB]], %[[STEP]]] // CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[LB]] to %[[NEW_UB]] // CHECK-SAME: step %[[STEP]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { // CHECK: %[[CAST:.*]] = arith.index_cast %[[STEP]] : index to i32 // CHECK: %[[ADD:.*]] = arith.addi %[[ACC]], %[[CAST]] : i32 // CHECK: scf.yield %[[ADD]] // CHECK: } // CHECK: %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]] // CHECK-SAME: step %[[STEP]] iter_args(%[[ACC2:.*]] = %[[LOOP]]) -> (i32) { // CHECK: %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]] // CHECK: %[[CAST2:.*]] = arith.index_cast %[[REM]] // CHECK: %[[ADD2:.*]] = arith.addi %[[ACC2]], %[[CAST2]] // CHECK: scf.yield %[[ADD2]] // CHECK: } // CHECK: return %[[RESULT]] #map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> func.func @fully_dynamic_bounds(%lb : index, %ub: index, %step: index) -> i32 { %c0 = arith.constant 0 : i32 %r = scf.for %iv = %lb to %ub step %step iter_args(%arg = %c0) -> i32 { %s = affine.min #map(%ub, %iv)[%step] %casted = arith.index_cast %s : index to i32 %0 = arith.addi %arg, %casted : i32 scf.yield %0 : i32 } return %r : i32 } // ----- // CHECK: func @fully_static_bounds( // CHECK-DAG: %[[C0_I32:.*]] = arith.constant 0 : i32 // CHECK-DAG: %[[C1_I32:.*]] = arith.constant 1 : i32 // CHECK-DAG: %[[C4_I32:.*]] = arith.constant 4 : i32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C16]] // CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { // CHECK: %[[ADD:.*]] = arith.addi %[[ACC]], %[[C4_I32]] : i32 // CHECK: scf.yield %[[ADD]] // CHECK: } // CHECK: %[[RESULT:.*]] = arith.addi %[[LOOP]], %[[C1_I32]] : i32 // CHECK: return %[[RESULT]] #map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> func.func @fully_static_bounds() -> i32 { %c0_i32 = arith.constant 0 : i32 %lb = arith.constant 0 : index %step = arith.constant 4 : index %ub = arith.constant 17 : index %r = scf.for %iv = %lb to %ub step %step iter_args(%arg = %c0_i32) -> i32 { %s = affine.min #map(%ub, %iv)[%step] %casted = arith.index_cast %s : index to i32 %0 = arith.addi %arg, %casted : i32 scf.yield %0 : i32 } return %r : i32 } // ----- // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)> // CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> // CHECK: func @dynamic_upper_bound( // CHECK-SAME: %[[UB:.*]]: index // CHECK-DAG: %[[C0_I32:.*]] = arith.constant 0 : i32 // CHECK-DAG: %[[C4_I32:.*]] = arith.constant 4 : i32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index // CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]] // CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] // CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { // CHECK: %[[ADD:.*]] = arith.addi %[[ACC]], %[[C4_I32]] : i32 // CHECK: scf.yield %[[ADD]] // CHECK: } // CHECK: %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]] // CHECK-SAME: step %[[C4]] iter_args(%[[ACC2:.*]] = %[[LOOP]]) -> (i32) { // CHECK: %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]] // CHECK: %[[CAST2:.*]] = arith.index_cast %[[REM]] // CHECK: %[[ADD2:.*]] = arith.addi %[[ACC2]], %[[CAST2]] // CHECK: scf.yield %[[ADD2]] // CHECK: } // CHECK: return %[[RESULT]] #map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> func.func @dynamic_upper_bound(%ub : index) -> i32 { %c0_i32 = arith.constant 0 : i32 %lb = arith.constant 0 : index %step = arith.constant 4 : index %r = scf.for %iv = %lb to %ub step %step iter_args(%arg = %c0_i32) -> i32 { %s = affine.min #map(%ub, %iv)[%step] %casted = arith.index_cast %s : index to i32 %0 = arith.addi %arg, %casted : i32 scf.yield %0 : i32 } return %r : i32 } // ----- // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)> // CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> // CHECK: func @no_loop_results( // CHECK-SAME: %[[UB:.*]]: index, %[[MEMREF:.*]]: memref // CHECK-DAG: %[[C4_I32:.*]] = arith.constant 4 : i32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index // CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]] // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] step %[[C4]] { // CHECK: %[[LOAD:.*]] = memref.load %[[MEMREF]][] // CHECK: %[[ADD:.*]] = arith.addi %[[LOAD]], %[[C4_I32]] : i32 // CHECK: memref.store %[[ADD]], %[[MEMREF]] // CHECK: } // CHECK: scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]] step %[[C4]] { // CHECK: %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]] // CHECK: %[[LOAD2:.*]] = memref.load %[[MEMREF]][] // CHECK: %[[CAST2:.*]] = arith.index_cast %[[REM]] // CHECK: %[[ADD2:.*]] = arith.addi %[[LOAD2]], %[[CAST2]] // CHECK: memref.store %[[ADD2]], %[[MEMREF]] // CHECK: } // CHECK: return #map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> func.func @no_loop_results(%ub : index, %d : memref) { %c0_i32 = arith.constant 0 : i32 %lb = arith.constant 0 : index %step = arith.constant 4 : index scf.for %iv = %lb to %ub step %step { %s = affine.min #map(%ub, %iv)[%step] %r = memref.load %d[] : memref %casted = arith.index_cast %s : index to i32 %0 = arith.addi %r, %casted : i32 memref.store %0, %d[] : memref } return } // ----- // Test rewriting of affine.min/max ops. Make sure that more general cases than // the ones above are successfully rewritten. Also make sure that the pattern // does not rewrite ops that should not be rewritten. // CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)> // CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s1 - 1, s0)> // CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0, s1, s2] -> (-d0 + s1, s2, s0)> // CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0] -> (-s0)> // CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> // CHECK-DAG: #[[MAP6:.*]] = affine_map<(d0)[s0] -> (-d0 + s0 + 1)> // CHECK-DAG: #[[MAP7:.*]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)> // CHECK-DAG: #[[MAP8:.*]] = affine_map<(d0)[s0] -> (d0 - s0)> // CHECK: func @test_affine_op_rewrite( // CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index, // CHECK-SAME: %[[MEMREF:.*]]: memref, %[[SOME_VAL:.*]]: index // CHECK: scf.for %[[IV:.*]] = %[[LB]] to %{{.*}} step %[[STEP]] { // (affine.min folded away) // CHECK: memref.store %[[STEP]] // (affine.min folded away) // CHECK: memref.store %[[STEP]] // CHECK: %[[RES2:.*]] = affine.apply #[[MAP1]]()[%[[STEP]]] // CHECK: memref.store %[[RES2]] // CHECK: %[[RES3:.*]] = affine.min #[[MAP2]](%[[IV]])[%[[STEP]], %[[UB]]] // CHECK: memref.store %[[RES3]] // CHECK: %[[RES4:.*]] = affine.min #[[MAP3]](%[[IV]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]] // CHECK: memref.store %[[RES4]] // CHECK: %[[RES5:.*]] = affine.apply #[[MAP4]]()[%[[STEP]]] // CHECK: memref.store %[[RES5]] // CHECK: } // CHECK: scf.for %[[IV2:.*]] = {{.*}} to %[[UB]] step %[[STEP]] { // CHECK: %[[RES_IF_0:.*]] = affine.apply #[[MAP5]](%[[IV2]])[%[[UB]]] // CHECK: memref.store %[[RES_IF_0]] // CHECK: %[[RES_IF_1:.*]] = affine.apply #[[MAP6]](%[[IV2]])[%[[UB]]] // CHECK: memref.store %[[RES_IF_1]] // CHECK: %[[RES_IF_2:.*]] = affine.apply #[[MAP6]](%[[IV2]])[%[[UB]]] // CHECK: memref.store %[[RES_IF_2]] // CHECK: %[[RES_IF_3:.*]] = affine.apply #[[MAP7]](%[[IV2]])[%[[UB]]] // CHECK: memref.store %[[RES_IF_3]] // CHECK: %[[RES_IF_4:.*]] = affine.min #[[MAP3]](%[[IV2]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]] // CHECK: memref.store %[[RES_IF_4]] // CHECK: %[[RES_IF_5:.*]] = affine.apply #[[MAP8]](%[[IV2]])[%[[UB]]] // CHECK: memref.store %[[RES_IF_5]] #map0 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> #map1 = affine_map<(d0, d1)[s0] -> (d0 - d1 + 1, s0)> #map2 = affine_map<(d0, d1)[s0] -> (s0 + 1, d0 - d1 + 1)> #map3 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1 - 1)> #map4 = affine_map<(d0, d1, d2)[s0] -> (s0, d0 - d1, d2)> #map5 = affine_map<(d0, d1)[s0] -> (-s0, -d0 + d1)> func.func @test_affine_op_rewrite(%lb : index, %ub: index, %step: index, %d : memref, %some_val: index) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %c5 = arith.constant 5 : index scf.for %iv = %lb to %ub step %step { // Most common case: Rewrite min(%ub - %iv, %step) to %step. %m0 = affine.min #map0(%ub, %iv)[%step] memref.store %m0, %d[%c0] : memref // Increase %ub - %iv a little bit, pattern should still apply. %m1 = affine.min #map1(%ub, %iv)[%step] memref.store %m1, %d[%c1] : memref // Rewrite min(%ub - %iv + 1, %step + 1) to %step + 1. %m2 = affine.min #map2(%ub, %iv)[%step] memref.store %m2, %d[%c2] : memref // min(%ub - %iv - 1, %step) cannot be simplified because %ub - %iv - 1 // can be smaller than %step. (Can be simplified in if-statement.) %m3 = affine.min #map3(%ub, %iv)[%step] memref.store %m3, %d[%c3] : memref // min(%ub - %iv, %step, %some_val) cannot be simplified because the range // of %some_val is unknown. %m4 = affine.min #map4(%ub, %iv, %some_val)[%step] memref.store %m4, %d[%c4] : memref // Rewrite max(-%ub + %iv, -%step) to -%ub + %iv (and -%step in the scf.if). %m5 = affine.max #map5(%ub, %iv)[%step] memref.store %m5, %d[%c5] : memref } return } // ----- // CHECK: func @nested_loops // CHECK: scf.for {{.*}} { // CHECK: scf.for {{.*}} { // CHECK: } // CHECK: scf.for {{.*}} { // CHECK: } // CHECK: } // CHECK: scf.for {{.*}} { // CHECK: scf.for {{.*}} { // CHECK: } // CHECK-NOT: scf.for // CHECK: } // CHECK-NO-SKIP: func @nested_loops // CHECK-NO-SKIP: scf.for {{.*}} { // CHECK-NO-SKIP: scf.for {{.*}} { // CHECK-NO-SKIP: } // CHECK-NO-SKIP: scf.for {{.*}} { // CHECK-NO-SKIP: } // CHECK-NO-SKIP: } // CHECK-NO-SKIP: scf.for {{.*}} { // CHECK-NO-SKIP: scf.for {{.*}} { // CHECK-NO-SKIP: } // CHECK-NO-SKIP: scf.for {{.*}} { // CHECK-NO-SKIP: } // CHECK-NO-SKIP: } #map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> func.func @nested_loops(%lb0: index, %lb1 : index, %ub0: index, %ub1: index, %step: index) -> i32 { %c0 = arith.constant 0 : i32 %r0 = scf.for %iv0 = %lb0 to %ub0 step %step iter_args(%arg0 = %c0) -> i32 { %r1 = scf.for %iv1 = %lb1 to %ub1 step %step iter_args(%arg1 = %arg0) -> i32 { %s = affine.min #map(%ub1, %iv1)[%step] %casted = arith.index_cast %s : index to i32 %0 = arith.addi %arg1, %casted : i32 scf.yield %0 : i32 } %1 = arith.addi %arg0, %r1 : i32 scf.yield %1 : i32 } return %r0 : i32 } // ----- // CHECK-LABEL: func @regression func.func @regression(%arg0: memref, %arg1: index) { %c0 = arith.constant 0 : index %0 = affine.apply affine_map<()[s0] -> (s0 * s0)>()[%arg1] scf.for %arg2 = %c0 to %0 step %arg1 { %1 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + s0 * s0)>(%arg2)[%arg1] %2 = arith.index_cast %0 : index to i64 memref.store %2, %arg0[] : memref } return } // ----- // Regression test: Make sure that we do not crash. // CHECK-LABEL: func @zero_step( // CHECK: scf.for // CHECK: scf.for func.func @zero_step(%arg0: memref) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %foldto0 = arith.subi %c1, %c1 : index scf.for %arg2 = %c0 to %c1 step %foldto0 { %2 = arith.index_cast %arg2 : index to i64 memref.store %2, %arg0[] : memref } return }