// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-erase-unused-operands-and-results | FileCheck %s // RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-erase-unnecessary-inputs | FileCheck %s --check-prefix=CHECK-INPUT // CHECK-LABEL: func @remove_deadargs_generic_basic // CHECK-SAME: (%[[ARG0:.*]]: tensor) -> tensor { // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]] : tensor) // CHECK-SAME: outs({{.*}} : tensor) { #map0 = affine_map<(d0) -> (d0)> func.func @remove_deadargs_generic_basic(%arg0: tensor) -> (tensor) { %c0 = arith.constant 0 : index %cst = arith.constant 7.0 : f32 %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.empty(%0) : tensor %2 = tensor.empty(%0) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %1 : tensor, tensor) outs (%2:tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %4 = arith.addf %arg1, %cst : f32 linalg.yield %4 : f32 } -> tensor return %3 : tensor } // ----- // CHECK-LABEL: func @remove_deadargs_generic_mixedaccess // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-NOT: ins // CHECK-SAME: outs({{.*}} : tensor) { #map0 = affine_map<(d0, d1) -> (d0, d1)> #map1 = affine_map<(d0, d1) -> (d1, d0)> func.func @remove_deadargs_generic_mixedaccess(%arg0: tensor) -> (tensor) { %c0 = arith.constant 0 : index %c1 = arith.constant 0 : index %cst1 = arith.constant 7.0 : f32 %cst2 = arith.constant 6.0 : f32 %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.dim %arg0, %c1 : tensor %2 = tensor.empty(%0, %1) : tensor %3 = tensor.empty(%1, %0) : tensor %4 = tensor.empty(%0, %1) : tensor %5 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%2, %3 : tensor, tensor) outs (%4:tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %6 = arith.divf %cst1, %cst2 : f32 linalg.yield %6 : f32 } -> tensor return %5 : tensor } // ----- // Test case: Most basic case. Adding a vector to itself. #map = affine_map<(d0) -> (d0)> // CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: @basic func.func @basic(%arg0: tensor) -> tensor { // CHECK: linalg.generic{{.*}}[#[[$MAP]], #[[$MAP]]] // CHECK: attrs = {someattr} // CHECK: ^bb0(%[[BBARG:.*]]: f32, %{{.*}}: f32): // CHECK: arith.addf %[[BBARG]], %[[BBARG]] %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg0 : tensor, tensor) outs(%arg0 : tensor) attrs = {someattr} { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %1 = arith.addf %arg1, %arg2 : f32 linalg.yield %1 : f32 } -> tensor return %0 : tensor } // ----- // Test case: Different indexing maps mean that args are not redundant, despite // being the same Value. #map0 = affine_map<(d0, d1) -> (d0, d1)> #map1 = affine_map<(d0, d1) -> (d1, d0)> // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1, d0)> // CHECK-LABEL: @distinct_affine_maps func.func @distinct_affine_maps(%arg0: tensor) -> tensor { // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]] %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg0 : tensor, tensor) outs(%arg0 : tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %1 = arith.addf %arg1, %arg2 : f32 linalg.yield %1 : f32 } -> tensor return %0 : tensor } // ----- // Test case: Check rewriting mechanics for mixed redundant and // non-redundant args. #map0 = affine_map<(d0, d1) -> (d0, d1)> #map1 = affine_map<(d0, d1) -> (d1, d0)> // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1, d0)> // CHECK-LABEL: @mixed_redundant_non_redundant func.func @mixed_redundant_non_redundant(%arg0: tensor) -> tensor { // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]] // CHECK: ^bb0(%[[BBARG0:.*]]: f32, %[[BBARG1:.*]]: f32, %{{[a-zA-Z0-9]+}}: f32): // CHECK: "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]]) %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg0, %arg0 : tensor, tensor, tensor) outs(%arg0 : tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): %1 = "test.elementwise_mappable"(%arg1, %arg2, %arg3) : (f32, f32, f32) -> f32 linalg.yield %1 : f32 } -> tensor return %0 : tensor } // ----- // Test case: Check rewriting mechanics for multiple different redundant args. #map = affine_map<(d0) -> (d0)> // CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: @multiple_different_redundant_args func.func @multiple_different_redundant_args(%arg0: tensor, %arg1: tensor) -> tensor { // CHECK: linalg.generic{{.*}}[#[[$MAP]], #[[$MAP]], #[[$MAP]]] // CHECK: ^bb0(%[[BBARG0:.*]]: f32, %[[BBARG1:.*]]: f32, %{{[a-zA-Z0-9]+}}: f32): // CHECK: "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]], %[[BBARG1]]) %0 = linalg.generic {indexing_maps = [#map, #map, #map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg1, %arg0, %arg1 : tensor, tensor, tensor, tensor) outs(%arg0 : tensor) { ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): %1 = "test.elementwise_mappable"(%arg2, %arg3, %arg4, %arg5) : (f32, f32, f32, f32) -> f32 linalg.yield %1 : f32 } -> tensor return %0 : tensor } // ----- // Drop dead result. #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> #map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> #map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> #map4 = affine_map<(d0, d1, d2) -> (d2, d0, d1)> func.func @drop_dead_results(%arg0 : tensor) -> (tensor, tensor) { %0:4 = linalg.generic { indexing_maps = [#map0, #map1, #map2, #map3, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) outs(%arg0, %arg0, %arg0, %arg0 : tensor, tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : %1 = arith.addf %b0, %b0: f32 linalg.yield %1, %1, %1, %1 : f32, f32, f32, f32 } -> (tensor, tensor, tensor, tensor) return %0#0, %0#2 : tensor, tensor } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d0, d2)> // CHECK: func @drop_dead_results( // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- // Current argmax lowering to `linalg.generic`. Cannot drop the // first return even though it isnt used since it has an internal // use. #map0 = affine_map<(d0) -> (d0)> #map1 = affine_map<(d0) -> ()> func.func @argmax_lowering(%arg0 : tensor) -> tensor { %init0 = tensor.empty() : tensor %init1 = tensor.empty() : tensor %0:2 = linalg.generic { indexing_maps = [#map0, #map1, #map1], iterator_types = ["reduction"]} ins(%arg0 : tensor) outs(%init0, %init1 : tensor, tensor) { ^bb0(%b0: f32, %b1: f32, %b2: i32): %8 = linalg.index 0 : index %9 = arith.index_cast %8 : index to i32 %10 = arith.cmpf oge, %b0, %b1 : f32 %11 = arith.select %10, %b0, %b1 : f32 %12 = arith.cmpf oeq, %b0, %b1 : f32 %13 = arith.minsi %9, %b2 : i32 %14 = arith.select %10, %9, %b2 : i32 %15 = arith.select %12, %13, %14 : i32 linalg.yield %11, %15 : f32, i32 } -> (tensor, tensor) return %0#1 : tensor } // CHECK: func @argmax_lowering( // CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK-DAG: %[[INIT0:.+]] = tensor.empty() : tensor // CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] : // CHECK: return %[[GENERIC]]#1 // ----- // Do not remove operand needed for loop dim. func.func @loop_dim_operand(%arg0 : tensor) -> tensor { %cst = arith.constant 0 : i32 %init = tensor.empty() : tensor %fill = linalg.fill ins(%cst : i32) outs(%init : tensor) -> tensor %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%arg0 : tensor) outs(%fill : tensor) { ^bb0(%b0: f32, %b1: i32): %1 = linalg.index 0 : index %2 = arith.index_cast %1 : index to i32 %3 = arith.addi %b1, %2 : i32 linalg.yield %3 : i32 } -> tensor return %0 : tensor } // CHECK: func @loop_dim_operand( // CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0]] : // ----- // Do not remove outs operand needed for loop bound computation. func.func @loop_dim_outs_operand(%arg0 : index) -> tensor { %cst = arith.constant 0 : i32 %init1 = tensor.empty(%arg0) : tensor %init = tensor.empty() : tensor %fill = linalg.fill ins(%cst : i32) outs(%init : tensor) -> tensor %0:2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["parallel"]} outs(%init1, %fill : tensor, tensor) { ^bb0(%b0: i32, %b1: i32): %1 = linalg.index 0 : index %2 = arith.index_cast %1 : index to i32 %3 = arith.addi %b1, %2 : i32 linalg.yield %2, %3 : i32, i32 } -> (tensor, tensor) return %0#1 : tensor } // CHECK: func @loop_dim_outs_operand( // CHECK-SAME: %[[ARG0:.+]]: index // CHECK: %[[INIT:.+]] = tensor.empty(%[[ARG0]]) // CHECK: linalg.generic // CHECK-SAME: outs(%[[INIT]] // ----- #map0 = affine_map<(d0, d1) -> (d0, d1)> #map1 = affine_map<(d0, d1) -> (d1, d0)> #map2 = affine_map<(d0, d1) -> (d0)> #map3 = affine_map<(d0, d1) -> (d1)> func.func @multiple_redundant_args(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor, %arg3 : tensor, %arg4 : tensor) -> tensor { %0 = linalg.generic { indexing_maps = [#map3, #map0, #map0, #map2, #map1, #map1, #map2], iterator_types = ["parallel", "reduction"]} ins(%arg4, %arg0, %arg0, %arg1, %arg3, %arg3 : tensor, tensor, tensor, tensor, tensor, tensor) outs(%arg2 : tensor) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32, %b4 : i32, %b5 : i32, %b6 : i32): %1 = arith.addi %b0, %b1 : i32 %2 = arith.addi %1, %b2 : i32 %3 = arith.addi %2, %b3 : i32 %4 = arith.addi %3, %b4 : i32 %5 = arith.addi %4, %b5 : i32 %6 = arith.addi %5, %b6 : i32 linalg.yield %6 : i32 } -> tensor return %0 : tensor } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d1)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0)> // CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)> // CHECK: func @multiple_redundant_args( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: tensor) // CHECK: %[[RETURN:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "reduction"] // CHECK-SAME: ins(%[[ARG4]], %[[ARG0]], %[[ARG1]], %[[ARG3]] : // CHECK-SAME: outs(%[[ARG2]] : // CHECK: ^{{.+}}(%[[B0:[a-zA-Z0-9]+]]: i32 // CHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: i32 // CHECK-SAME: %[[B2:[a-zA-Z0-9_]+]]: i32 // CHECK-SAME: %[[B3:[a-zA-Z0-9_]+]]: i32 // CHECK-SAME: %[[B4:[a-zA-Z0-9_]+]]: i32) // CHECK: %[[T0:.+]] = arith.addi %[[B0]], %[[B1]] // CHECK: %[[T1:.+]] = arith.addi %[[T0]], %[[B1]] // CHECK: %[[T2:.+]] = arith.addi %[[T1]], %[[B2]] // CHECK: %[[T3:.+]] = arith.addi %[[T2]], %[[B3]] // CHECK: %[[T4:.+]] = arith.addi %[[T3]], %[[B3]] // CHECK: %[[T5:.+]] = arith.addi %[[T4]], %[[B4]] // CHECK: linalg.yield %[[T5]] // CHECK: return %[[RETURN]] // ----- // Drop redundant results. #map = affine_map<(d0, d1) -> (d0, d1)> func.func @drop_redundant_results( %arg0 : tensor) -> (tensor, tensor) { %0:2 = linalg.generic { indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor) outs(%arg0, %arg0 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %1 = arith.addf %b0, %b0 : f32 linalg.yield %1, %1 : f32, f32 } -> (tensor, tensor) return %0#0, %0#1 : tensor, tensor } // CHECK: func @drop_redundant_results // CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: outs(%[[ARG0]] : // CHECK: return %[[GENERIC]] // ----- // Drop dead result with different tensors. #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> #map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> #map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> #map4 = affine_map<(d0, d1, d2) -> (d2, d0, d1)> func.func @drop_dead_results_with_different_tensors(%arg0 : tensor) -> (tensor, tensor) { %c0 = arith.constant 0 : index %d0 = tensor.dim %arg0, %c0 : tensor %c1 = arith.constant 1 : index %d1 = tensor.dim %arg0, %c1 : tensor %c2 = arith.constant 2 : index %d2 = tensor.dim %arg0, %c2 : tensor %init0 = tensor.empty(%d0, %d1, %d2) : tensor %0:4 = linalg.generic { indexing_maps = [#map0, #map1, #map2, #map3, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) outs(%arg0, %arg0, %init0, %init0 : tensor, tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : linalg.yield %b0, %b0, %b3, %b4 : f32, f32, f32, f32 } -> (tensor, tensor, tensor, tensor) return %0#0, %0#1 : tensor, tensor } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)> // CHECK: func @drop_dead_results_with_different_tensors( // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- // Drop dead result with unused cycles. #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> #map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> #map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> #map4 = affine_map<(d0, d1, d2) -> (d2, d0, d1)> func.func @drop_dead_results_with_unused_cycles(%arg0 : tensor) -> (tensor, tensor) { %c0 = arith.constant 0 : index %d0 = tensor.dim %arg0, %c0 : tensor %c1 = arith.constant 1 : index %d1 = tensor.dim %arg0, %c1 : tensor %c2 = arith.constant 2 : index %d2 = tensor.dim %arg0, %c2 : tensor %init0 = tensor.empty(%d0, %d1, %d2) : tensor %0:4 = linalg.generic { indexing_maps = [#map0, #map1, #map2, #map3, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) outs(%arg0, %arg0, %init0, %init0 : tensor, tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : %1 = arith.addf %b0, %b0: f32 %2 = arith.addf %b0, %b3: f32 %3 = arith.addf %b0, %b4: f32 linalg.yield %1, %1, %2, %3 : f32, f32, f32, f32 } -> (tensor, tensor, tensor, tensor) return %0#0, %0#1 : tensor, tensor } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)> // CHECK: func @drop_dead_results_with_unused_cycles( // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- // Drop only the results not used by others. #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> #map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> #map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> func.func @drop_only_the_results_not_used_by_others(%arg0 : tensor) -> (tensor) { %c0 = arith.constant 0 : index %d0 = tensor.dim %arg0, %c0 : tensor %c1 = arith.constant 1 : index %d1 = tensor.dim %arg0, %c1 : tensor %c2 = arith.constant 2 : index %d2 = tensor.dim %arg0, %c2 : tensor %init0 = tensor.empty(%d0, %d1, %d2) : tensor %0:3 = linalg.generic { indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) outs(%arg0, %init0, %init0 : tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) : linalg.yield %b2, %b1, %b3 : f32, f32, f32 } -> (tensor, tensor, tensor) return %0#0 : tensor } // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)> // CHECK: func @drop_only_the_results_not_used_by_others( // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[INIT:.+]] = tensor.empty // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] // CHECK-SAME: outs(%[[ARG0]], %[[INIT]] : // CHECK: return %[[GENERIC]]#0 // ----- // Drop only the cycles not used by others. #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> #map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> #map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> func.func @drop_only_the_cycles_not_used_by_others(%arg0 : tensor) -> (tensor) { %c0 = arith.constant 0 : index %d0 = tensor.dim %arg0, %c0 : tensor %c1 = arith.constant 1 : index %d1 = tensor.dim %arg0, %c1 : tensor %c2 = arith.constant 2 : index %d2 = tensor.dim %arg0, %c2 : tensor %init0 = tensor.empty(%d0, %d1, %d2) : tensor %0:3 = linalg.generic { indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) outs(%arg0, %init0, %init0 : tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) : %1 = arith.addf %b1, %b2: f32 %2 = arith.addf %b1, %b3 : f32 linalg.yield %1, %b1, %2 : f32, f32, f32 } -> (tensor, tensor, tensor) return %0#0 : tensor } // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)> // CHECK: func @drop_only_the_cycles_not_used_by_others( // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[INIT:.+]] = tensor.empty // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] // CHECK-SAME: outs(%[[ARG0]], %[[INIT]] : // CHECK: return %[[GENERIC]]#0 // ----- // CHECK-INPUT-LABEL: func @remove_unnecessary_input( // CHECK-INPUT-SAME: %[[a:.*]]: tensor, %[[b:.*]]: tensor #map = affine_map<(d0) -> (d0)> func.func @remove_unnecessary_input(%a: tensor, %b: tensor) -> tensor { // CHECK-INPUT: %[[result:.*]] = linalg.generic {indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel"]} // CHECK-INPUT-SAME: ins(%[[a]] : tensor) outs(%[[b]] : tensor) { // CHECK-INPUT: ^bb0(%[[in:.*]]: f32, %[[out:.*]]: f32): // CHECK-INPUT: %[[add:.*]] = arith.addf %[[in]], %[[out]] // CHECK-INPUT: linalg.yield %[[add]] // CHECK-INPUT: } -> tensor // CHECK-INPUT: return %[[result]] %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%a, %b : tensor, tensor) outs(%b : tensor) { ^bb0(%in: f32, %in_2: f32, %out: f32): %16 = arith.addf %in, %in_2 : f32 linalg.yield %16 : f32 } -> tensor return %0 : tensor }