// RUN: mlir-opt %s -scf-for-loop-canonicalization -split-input-file | FileCheck %s // CHECK-LABEL: func @scf_for_canonicalize_min // CHECK: %[[C2:.*]] = arith.constant 2 : i64 // CHECK: scf.for // CHECK: memref.store %[[C2]], %{{.*}}[] : memref func.func @scf_for_canonicalize_min(%A : memref) { %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index %c4 = arith.constant 4 : index scf.for %i = %c0 to %c4 step %c2 { %1 = affine.min affine_map<(d0, d1)[] -> (2, d1 - d0)> (%i, %c4) %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // CHECK-LABEL: func @scf_for_canonicalize_max // CHECK: %[[Cneg2:.*]] = arith.constant -2 : i64 // CHECK: scf.for // CHECK: memref.store %[[Cneg2]], %{{.*}}[] : memref func.func @scf_for_canonicalize_max(%A : memref) { %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index %c4 = arith.constant 4 : index scf.for %i = %c0 to %c4 step %c2 { %1 = affine.max affine_map<(d0, d1)[] -> (-2, -(d1 - d0))> (%i, %c4) %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // CHECK-LABEL: func @scf_for_max_not_canonicalizable // CHECK: scf.for // CHECK: affine.max // CHECK: arith.index_cast func.func @scf_for_max_not_canonicalizable(%A : memref) { %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index scf.for %i = %c0 to %c4 step %c2 { %1 = affine.max affine_map<(d0, d1)[] -> (-2, -(d1 - d0))> (%i, %c3) %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // CHECK-LABEL: func @scf_for_loop_nest_canonicalize_min // CHECK: %[[C5:.*]] = arith.constant 5 : i64 // CHECK: scf.for // CHECK: scf.for // CHECK: memref.store %[[C5]], %{{.*}}[] : memref func.func @scf_for_loop_nest_canonicalize_min(%A : memref) { %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %c6 = arith.constant 6 : index scf.for %i = %c0 to %c4 step %c2 { scf.for %j = %c0 to %c6 step %c3 { %1 = affine.min affine_map<(d0, d1, d2, d3)[] -> (5, d1 + d3 - d0 - d2)> (%i, %c4, %j, %c6) %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } } return } // ----- // CHECK-LABEL: func @scf_for_not_canonicalizable_1 // CHECK: scf.for // CHECK: affine.min // CHECK: arith.index_cast func.func @scf_for_not_canonicalizable_1(%A : memref) { // This should not canonicalize because: 4 - %i may take the value 1 < 2. %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index %c4 = arith.constant 4 : index scf.for %i = %c1 to %c4 step %c2 { %1 = affine.min affine_map<(d0)[s0] -> (2, s0 - d0)> (%i)[%c4] %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // CHECK-LABEL: func @scf_for_canonicalize_partly // CHECK: scf.for // CHECK: affine.apply // CHECK: arith.index_cast func.func @scf_for_canonicalize_partly(%A : memref) { // This should canonicalize only partly: 256 - %i <= 256. %c1 = arith.constant 1 : index %c16 = arith.constant 16 : index %c256 = arith.constant 256 : index scf.for %i = %c1 to %c256 step %c16 { %1 = affine.min affine_map<(d0) -> (256, 256 - d0)> (%i) %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // CHECK-LABEL: func @scf_for_not_canonicalizable_2 // CHECK: scf.for // CHECK: affine.min // CHECK: arith.index_cast func.func @scf_for_not_canonicalizable_2(%A : memref, %step : index) { // This example should simplify but affine_map is currently missing // semi-affine canonicalizations: `((s0 * 42 - 1) floordiv s0) * s0` // should evaluate to 41 * s0. // Note that this may require positivity assumptions on `s0`. // Revisit when support is added. %c0 = arith.constant 0 : index %ub = affine.apply affine_map<(d0) -> (42 * d0)> (%step) scf.for %i = %c0 to %ub step %step { %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d1 - d2)> (%step, %ub, %i) %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // CHECK-LABEL: func @scf_for_not_canonicalizable_3 // CHECK: scf.for // CHECK: affine.min // CHECK: arith.index_cast func.func @scf_for_not_canonicalizable_3(%A : memref, %step : index) { // This example should simplify but affine_map is currently missing // semi-affine canonicalizations: `-(((s0 * s0 - 1) floordiv s0) * s0)` // should evaluate to (s0 - 1) * s0. // Note that this may require positivity assumptions on `s0`. // Revisit when support is added. %c0 = arith.constant 0 : index %ub2 = affine.apply affine_map<(d0)[s0] -> (s0 * d0)> (%step)[%step] scf.for %i = %c0 to %ub2 step %step { %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d2 - d1)> (%step, %i, %ub2) %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // CHECK-LABEL: func @scf_for_invalid_loop // CHECK: scf.for // CHECK: affine.min // CHECK: arith.index_cast func.func @scf_for_invalid_loop(%A : memref, %step : index) { // This is an invalid loop. It should not be touched by the canonicalization // pattern. %c1 = arith.constant 1 : index %c7 = arith.constant 7 : index %c256 = arith.constant 256 : index scf.for %i = %c256 to %c1 step %c1 { %1 = affine.min affine_map<(d0)[s0] -> (s0 + d0, 0)> (%i)[%c7] %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // CHECK-LABEL: func @scf_parallel_canonicalize_min_1 // CHECK: %[[C2:.*]] = arith.constant 2 : i64 // CHECK: scf.parallel // CHECK-NEXT: memref.store %[[C2]], %{{.*}}[] : memref func.func @scf_parallel_canonicalize_min_1(%A : memref) { %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index %c4 = arith.constant 4 : index scf.parallel (%i) = (%c0) to (%c4) step (%c2) { %1 = affine.min affine_map<(d0, d1)[] -> (2, d1 - d0)> (%i, %c4) %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // CHECK-LABEL: func @scf_parallel_canonicalize_min_2 // CHECK: %[[C2:.*]] = arith.constant 2 : i64 // CHECK: scf.parallel // CHECK-NEXT: memref.store %[[C2]], %{{.*}}[] : memref func.func @scf_parallel_canonicalize_min_2(%A : memref) { %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index %c7 = arith.constant 7 : index scf.parallel (%i) = (%c1) to (%c7) step (%c2) { %1 = affine.min affine_map<(d0)[s0] -> (2, s0 - d0)> (%i)[%c7] %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // CHECK-LABEL: func @tensor_dim_of_iter_arg( // CHECK-SAME: %[[t:.*]]: tensor // CHECK: scf.for // CHECK: tensor.dim %[[t]] func.func @tensor_dim_of_iter_arg(%t : tensor) -> index { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c10 = arith.constant 10 : index %0, %1 = scf.for %i = %c0 to %c10 step %c1 iter_args(%arg0 = %t, %arg1 = %c0) -> (tensor, index) { %dim = tensor.dim %arg0, %c0 : tensor scf.yield %arg0, %dim : tensor, index } return %1 : index } // ----- // CHECK-LABEL: func @tensor_dim_of_iter_arg_insertslice( // CHECK-SAME: %[[t:.*]]: tensor, // CHECK: scf.for // CHECK: tensor.dim %[[t]] func.func @tensor_dim_of_iter_arg_insertslice(%t : tensor, %t2 : tensor<10x10xf32>) -> index { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c10 = arith.constant 10 : index %0, %1 = scf.for %i = %c0 to %c10 step %c1 iter_args(%arg0 = %t, %arg1 = %c0) -> (tensor, index) { %dim = tensor.dim %arg0, %c0 : tensor %2 = tensor.insert_slice %t2 into %arg0[0, 0] [10, 10] [1, 1] : tensor<10x10xf32> into tensor %3 = tensor.insert_slice %t2 into %2[1, 1] [10, 10] [1, 1] : tensor<10x10xf32> into tensor scf.yield %3, %dim : tensor, index } return %1 : index } // ----- // CHECK-LABEL: func @tensor_dim_of_iter_arg_nested_for( // CHECK-SAME: %[[t:.*]]: tensor, // CHECK: scf.for // CHECK: scf.for // CHECK: tensor.dim %[[t]] func.func @tensor_dim_of_iter_arg_nested_for(%t : tensor, %t2 : tensor<10x10xf32>) -> index { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c10 = arith.constant 10 : index %0, %1 = scf.for %i = %c0 to %c10 step %c1 iter_args(%arg0 = %t, %arg1 = %c0) -> (tensor, index) { %2, %3 = scf.for %j = %c0 to %c10 step %c1 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor, index) { %dim = tensor.dim %arg2, %c0 : tensor %4 = tensor.insert_slice %t2 into %arg2[0, 0] [10, 10] [1, 1] : tensor<10x10xf32> into tensor scf.yield %4, %dim : tensor, index } scf.yield %2, %3 : tensor, index } return %1 : index } // ----- // A test case that should not canonicalize because the loop is not shape // conserving. // CHECK-LABEL: func @tensor_dim_of_iter_arg_no_canonicalize( // CHECK-SAME: %[[t:.*]]: tensor, // CHECK: scf.for {{.*}} iter_args(%[[arg0:.*]] = %[[t]] // CHECK: tensor.dim %[[arg0]] func.func @tensor_dim_of_iter_arg_no_canonicalize(%t : tensor, %t2 : tensor) -> index { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c10 = arith.constant 10 : index %0, %1 = scf.for %i = %c0 to %c10 step %c1 iter_args(%arg0 = %t, %arg1 = %c0) -> (tensor, index) { %dim = tensor.dim %arg0, %c0 : tensor scf.yield %t2, %dim : tensor, index } return %1 : index } // ----- // CHECK-LABEL: func @tensor_dim_of_loop_result( // CHECK-SAME: %[[t:.*]]: tensor // CHECK: tensor.dim %[[t]] func.func @tensor_dim_of_loop_result(%t : tensor) -> index { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c10 = arith.constant 10 : index %0 = scf.for %i = %c0 to %c10 step %c1 iter_args(%arg0 = %t) -> (tensor) { scf.yield %arg0 : tensor } %dim = tensor.dim %0, %c0 : tensor return %dim : index } // ----- // CHECK-LABEL: func @tensor_dim_of_loop_result_no_canonicalize( // CHECK: %[[loop:.*]]:2 = scf.for // CHECK: tensor.dim %[[loop]]#1 func.func @tensor_dim_of_loop_result_no_canonicalize(%t : tensor, %u : tensor) -> index { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c10 = arith.constant 10 : index %0, %1 = scf.for %i = %c0 to %c10 step %c1 iter_args(%arg0 = %t, %arg1 = %u) -> (tensor, tensor) { scf.yield %arg0, %u : tensor, tensor } %dim = tensor.dim %1, %c0 : tensor return %dim : index } // ----- // CHECK-LABEL: func @one_trip_scf_for_canonicalize_min // CHECK: %[[C4:.*]] = arith.constant 4 : i64 // CHECK: scf.for // CHECK: memref.store %[[C4]], %{{.*}}[] : memref func.func @one_trip_scf_for_canonicalize_min(%A : memref) { %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index %c4 = arith.constant 4 : index scf.for %i = %c0 to %c4 step %c4 { %1 = affine.min affine_map<(d0, d1)[] -> (4, d1 - d0)> (%i, %c4) %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // This is a regression test to ensure that the no assertions are failing. // CHECK: #[[$map:.+]] = affine_map<(d0)[s0] -> (-(d0 * (5 ceildiv s0)) + 5, 3)> // CHECK-LABEL: func @regression_multiplication_with_sym func.func @regression_multiplication_with_sym(%A : memref) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index %c4 = arith.constant 4 : index // CHECK: %[[dummy:.*]] = "test.dummy" %ub = "test.dummy"() : () -> (index) // CHECK: scf.for %[[iv:.*]] = scf.for %i = %c0 to %ub step %c1 { // CHECK: affine.min #[[$map]](%[[iv]])[%[[dummy]]] %1 = affine.min affine_map<(d0)[s0] -> (-(d0 * (5 ceildiv s0)) + 5, 3)>(%i)[%ub] %2 = arith.index_cast %1: index to i64 memref.store %2, %A[]: memref } return } // ----- // Make sure min is transformed into zero. // CHECK-LABEL: func.func @func1() // CHECK: %[[ZERO:.+]] = arith.constant 0 : index // CHECK: call @foo(%[[ZERO]]) : (index) -> () #map6 = affine_map<(d0, d1, d2) -> (d0 floordiv 64)> #map29 = affine_map<(d0, d1, d2) -> (d2 * 64 - 2, 5, (d1 mod 4) floordiv 8)> module { func.func private @foo(%0 : index) -> () func.func @func1() { %true = arith.constant true %c0 = arith.constant 0 : index %c5 = arith.constant 5 : index %c11 = arith.constant 11 : index %c14 = arith.constant 14 : index %c15 = arith.constant 15 : index %alloc_249 = memref.alloc() : memref<7xf32> %135 = affine.apply #map6(%c15, %c0, %c14) %163 = affine.min #map29(%c5, %135, %c11) func.call @foo(%163) : (index) -> () return } }