// RUN: mlir-opt %s -test-tensor-copy-insertion=allow-return-allocs-from-loops -allow-unregistered-dialect -split-input-file | FileCheck %s // RUN: mlir-opt %s -test-tensor-copy-insertion="allow-return-allocs-from-loops bufferize-function-boundaries" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC // CHECK-LABEL: func @scf_for( // CHECK-SAME: %[[A:.*]]: tensor, %[[B:.*]]: tensor func.func @scf_for(%A : tensor, %B : tensor, %lb : index, %ub : index, %step : index) -> (tensor, tensor) { // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) : tensor // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) : tensor // CHECK: %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[A_copy]], %[[iter2:.*]] = %[[B_copy]]) %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) -> (tensor, tensor) { // CHECK: scf.yield %[[iter1]], %[[iter2]] scf.yield %tA, %tB : tensor, tensor } return %r0#0, %r0#1 : tensor, tensor } // ----- // CHECK-LABEL: func @scf_for_swapping_yields( // CHECK-SAME: %[[A:.*]]: tensor, %[[B:.*]]: tensor func.func @scf_for_swapping_yields(%A : tensor, %B : tensor, %lb : index, %ub : index, %step : index) -> (tensor, tensor) { // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) : tensor // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) : tensor // CHECK: %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[A_copy]], %[[iter2:.*]] = %[[B_copy]]) %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) -> (tensor, tensor) { // Yield tensors in different order. // CHECK-DAG: %[[yield1:.*]] = bufferization.alloc_tensor() copy(%[[iter2]]) : tensor // CHECK-DAG: %[[yield2:.*]] = bufferization.alloc_tensor() copy(%[[iter1]]) : tensor // CHECK: scf.yield %[[yield1]], %[[yield2]] scf.yield %tB, %tA : tensor, tensor } return %r0#0, %r0#1 : tensor, tensor } // ----- // CHECK-LABEL: func @scf_while( // CHECK-SAME: %[[A:.*]]: tensor<5xi1>, %[[B:.*]]: tensor<5xi1> func.func @scf_while(%A: tensor<5xi1>, %B: tensor<5xi1>, %idx: index) -> (tensor<5xi1>, tensor<5xi1>) { // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) : tensor<5xi1> // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) : tensor<5xi1> // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[A_copy]], %[[w1:.*]] = %[[B_copy]]) {{.*}} { %r0, %r1 = scf.while (%w0 = %A, %w1 = %B) : (tensor<5xi1>, tensor<5xi1>) -> (tensor<5xi1>, tensor<5xi1>) { // CHECK: %[[condition:.*]] = tensor.extract %[[w0]] %condition = tensor.extract %w0[%idx] : tensor<5xi1> // Yield tensors in different order. // CHECK: scf.condition(%[[condition]]) %[[w0]], %[[w1]] scf.condition(%condition) %w0, %w1 : tensor<5xi1>, tensor<5xi1> } do { ^bb0(%b0: tensor<5xi1>, %b1: tensor<5xi1>): // CHECK: } do { // CHECK: ^bb0(%[[b0:.*]]: tensor<5xi1>, %[[b1:.*]]: tensor<5xi1>): // CHECK: scf.yield %[[b0]], %[[b1]] // CHECK: } scf.yield %b0, %b1 : tensor<5xi1>, tensor<5xi1> } return %r0, %r1 : tensor<5xi1>, tensor<5xi1> } // ----- // CHECK-LABEL: func @scf_while_non_equiv_condition_and_body( // CHECK-SAME: %[[A:.*]]: tensor<5xi1>, %[[B:.*]]: tensor<5xi1> func.func @scf_while_non_equiv_condition_and_body(%A: tensor<5xi1>, %B: tensor<5xi1>, %idx: index) -> (tensor<5xi1>, tensor<5xi1>) { // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) : tensor<5xi1> // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) : tensor<5xi1> // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[A_copy]], %[[w1:.*]] = %[[B_copy]]) {{.*}} { %r0, %r1 = scf.while (%w0 = %A, %w1 = %B) : (tensor<5xi1>, tensor<5xi1>) -> (tensor<5xi1>, tensor<5xi1>) { // CHECK: %[[condition:.*]] = tensor.extract %[[w0]] %condition = tensor.extract %w0[%idx] : tensor<5xi1> // Yield tensors in different order. // CHECK-DAG: %[[yield0:.*]] = bufferization.alloc_tensor() copy(%[[w1]]) : tensor<5xi1> // CHECK-DAG: %[[yield1:.*]] = bufferization.alloc_tensor() copy(%[[w0]]) : tensor<5xi1> // CHECK: scf.condition(%[[condition]]) %[[yield0]], %[[yield1]] scf.condition(%condition) %w1, %w0 : tensor<5xi1>, tensor<5xi1> } do { ^bb0(%b0: tensor<5xi1>, %b1: tensor<5xi1>): // CHECK: } do { // CHECK: ^bb0(%[[b0:.*]]: tensor<5xi1>, %[[b1:.*]]: tensor<5xi1>): // CHECK: scf.yield %[[b1]], %[[b0]] // CHECK: } scf.yield %b1, %b0 : tensor<5xi1>, tensor<5xi1> } return %r0, %r1 : tensor<5xi1>, tensor<5xi1> } // ----- // CHECK-LABEL: func @scf_forall_out_of_place( // CHECK-SAME: %[[arg0:.*]]: tensor<100xf32>, %[[arg1:.*]]: tensor<100xf32> // CHECK-FUNC-LABEL: func @scf_forall_out_of_place( func.func @scf_forall_out_of_place(%in: tensor<100xf32>, %out: tensor<100xf32>) { %c1 = arith.constant 1 : index %num_threads = arith.constant 100 : index // CHECK-FUNC-NOT: alloc_tensor // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[arg1]]) : tensor<100xf32> // CHECK: scf.forall {{.*}} shared_outs(%[[o:.*]] = %[[alloc]]) %result = scf.forall (%thread_idx) in (%num_threads) shared_outs(%o = %out) -> tensor<100xf32> { // CHECK: tensor.extract_slice // CHECK: scf.forall.in_parallel // CHECK: tensor.parallel_insert_slice %{{.*}} into %[[o]] %1 = tensor.extract_slice %in[%thread_idx][1][1] : tensor<100xf32> to tensor<1xf32> scf.forall.in_parallel { tensor.parallel_insert_slice %1 into %o[%thread_idx][1][1] : tensor<1xf32> into tensor<100xf32> } // CHECK: } {mapping = [#gpu.thread]} } {mapping = [#gpu.thread]} return }