// RUN: mlir-opt -verify-diagnostics -buffer-deallocation -split-input-file %s | FileCheck %s // This file checks the behaviour of BufferDeallocation pass for moving and // inserting missing DeallocOps in their correct positions. Furthermore, // copies and their corresponding AllocOps are inserted. // Test Case: // bb0 // / \ // bb1 bb2 <- Initial position of AllocOp // \ / // bb3 // BufferDeallocation expected behavior: bb2 contains an AllocOp which is // passed to bb3. In the latter block, there should be an deallocation. // Since bb1 does not contain an adequate alloc and the alloc in bb2 is not // moved to bb0, we need to insert allocs and copies. // CHECK-LABEL: func @condBranch func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cf.cond_br %arg0, ^bb1, ^bb2 ^bb1: cf.br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) cf.br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: cf.cond_br // CHECK: %[[ALLOC0:.*]] = bufferization.clone // CHECK-NEXT: cf.br ^bb3(%[[ALLOC0]] // CHECK: %[[ALLOC1:.*]] = memref.alloc // CHECK-NEXT: test.buffer_based // CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]] // CHECK-NEXT: memref.dealloc %[[ALLOC1]] // CHECK-NEXT: cf.br ^bb3(%[[ALLOC2]] // CHECK: test.copy // CHECK-NEXT: memref.dealloc // CHECK-NEXT: return // ----- // Test Case: // bb0 // / \ // bb1 bb2 <- Initial position of AllocOp // \ / // bb3 // BufferDeallocation expected behavior: The existing AllocOp has a dynamic // dependency to block argument %0 in bb2. Since the dynamic type is passed // to bb3 via the block argument %2, it is currently required to allocate a // temporary buffer for %2 that gets copies of %arg0 and %1 with their // appropriate shape dimensions. The copy buffer deallocation will be applied // to %2 in block bb3. // CHECK-LABEL: func @condBranchDynamicType func.func @condBranchDynamicType( %arg0: i1, %arg1: memref, %arg2: memref, %arg3: index) { cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index) ^bb1: cf.br ^bb3(%arg1 : memref) ^bb2(%0: index): %1 = memref.alloc(%0) : memref test.buffer_based in(%arg1: memref) out(%1: memref) cf.br ^bb3(%1 : memref) ^bb3(%2: memref): test.copy(%2, %arg2) : (memref, memref) return } // CHECK-NEXT: cf.cond_br // CHECK: %[[ALLOC0:.*]] = bufferization.clone // CHECK-NEXT: cf.br ^bb3(%[[ALLOC0]] // CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) // CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]]) // CHECK-NEXT: test.buffer_based // CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone // CHECK-NEXT: memref.dealloc %[[ALLOC1]] // CHECK-NEXT: cf.br ^bb3 // CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}}) // CHECK: test.copy(%[[ALLOC3]], // CHECK-NEXT: memref.dealloc %[[ALLOC3]] // CHECK-NEXT: return // ----- // Test case: See above. // CHECK-LABEL: func @condBranchUnrankedType func.func @condBranchUnrankedType( %arg0: i1, %arg1: memref<*xf32>, %arg2: memref<*xf32>, %arg3: index) { cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index) ^bb1: cf.br ^bb3(%arg1 : memref<*xf32>) ^bb2(%0: index): %1 = memref.alloc(%0) : memref %2 = memref.cast %1 : memref to memref<*xf32> test.buffer_based in(%arg1: memref<*xf32>) out(%2: memref<*xf32>) cf.br ^bb3(%2 : memref<*xf32>) ^bb3(%3: memref<*xf32>): test.copy(%3, %arg2) : (memref<*xf32>, memref<*xf32>) return } // CHECK-NEXT: cf.cond_br // CHECK: %[[ALLOC0:.*]] = bufferization.clone // CHECK-NEXT: cf.br ^bb3(%[[ALLOC0]] // CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) // CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]]) // CHECK: test.buffer_based // CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone // CHECK-NEXT: memref.dealloc %[[ALLOC1]] // CHECK-NEXT: cf.br ^bb3 // CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}}) // CHECK: test.copy(%[[ALLOC3]], // CHECK-NEXT: memref.dealloc %[[ALLOC3]] // CHECK-NEXT: return // ----- // Test Case: // bb0 // / \ // bb1 bb2 <- Initial position of AllocOp // | / \ // | bb3 bb4 // | \ / // \ bb5 // \ / // bb6 // | // bb7 // BufferDeallocation expected behavior: The existing AllocOp has a dynamic // dependency to block argument %0 in bb2. Since the dynamic type is passed to // bb5 via the block argument %2 and to bb6 via block argument %3, it is // currently required to allocate temporary buffers for %2 and %3 that gets // copies of %1 and %arg0 1 with their appropriate shape dimensions. The copy // buffer deallocations will be applied to %2 in block bb5 and to %3 in block // bb6. Furthermore, there should be no copy inserted for %4. // CHECK-LABEL: func @condBranchDynamicTypeNested func.func @condBranchDynamicTypeNested( %arg0: i1, %arg1: memref, %arg2: memref, %arg3: index) { cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index) ^bb1: cf.br ^bb6(%arg1 : memref) ^bb2(%0: index): %1 = memref.alloc(%0) : memref test.buffer_based in(%arg1: memref) out(%1: memref) cf.cond_br %arg0, ^bb3, ^bb4 ^bb3: cf.br ^bb5(%1 : memref) ^bb4: cf.br ^bb5(%1 : memref) ^bb5(%2: memref): cf.br ^bb6(%2 : memref) ^bb6(%3: memref): cf.br ^bb7(%3 : memref) ^bb7(%4: memref): test.copy(%4, %arg2) : (memref, memref) return } // CHECK-NEXT: cf.cond_br{{.*}} // CHECK-NEXT: ^bb1 // CHECK-NEXT: %[[ALLOC0:.*]] = bufferization.clone // CHECK-NEXT: cf.br ^bb6(%[[ALLOC0]] // CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) // CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]]) // CHECK-NEXT: test.buffer_based // CHECK: cf.cond_br // CHECK: ^bb3: // CHECK-NEXT: cf.br ^bb5(%[[ALLOC1]]{{.*}}) // CHECK: ^bb4: // CHECK-NEXT: cf.br ^bb5(%[[ALLOC1]]{{.*}}) // CHECK-NEXT: ^bb5(%[[ALLOC2:.*]]:{{.*}}) // CHECK-NEXT: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC2]] // CHECK-NEXT: memref.dealloc %[[ALLOC1]] // CHECK-NEXT: cf.br ^bb6(%[[ALLOC3]]{{.*}}) // CHECK-NEXT: ^bb6(%[[ALLOC4:.*]]:{{.*}}) // CHECK-NEXT: cf.br ^bb7(%[[ALLOC4]]{{.*}}) // CHECK-NEXT: ^bb7(%[[ALLOC5:.*]]:{{.*}}) // CHECK: test.copy(%[[ALLOC5]], // CHECK-NEXT: memref.dealloc %[[ALLOC4]] // CHECK-NEXT: return // ----- // Test Case: Existing AllocOp with no users. // BufferDeallocation expected behavior: It should insert a DeallocOp right // before ReturnOp. // CHECK-LABEL: func @emptyUsesValue func.func @emptyUsesValue(%arg0: memref<4xf32>) { %0 = memref.alloc() : memref<4xf32> return } // CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc() // CHECK-NEXT: memref.dealloc %[[ALLOC]] // CHECK-NEXT: return // ----- // Test Case: // bb0 // / \ // | bb1 <- Initial position of AllocOp // \ / // bb2 // BufferDeallocation expected behavior: It should insert a DeallocOp at the // exit block after CopyOp since %1 is an alias for %0 and %arg1. Furthermore, // we have to insert a copy and an alloc in the beginning of the function. // CHECK-LABEL: func @criticalEdge func.func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) ^bb1: %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) cf.br ^bb2(%0 : memref<2xf32>) ^bb2(%1: memref<2xf32>): test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = bufferization.clone // CHECK-NEXT: cf.cond_br // CHECK: %[[ALLOC1:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based // CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]] // CHECK-NEXT: memref.dealloc %[[ALLOC1]] // CHECK: test.copy // CHECK-NEXT: memref.dealloc // CHECK-NEXT: return // ----- // Test Case: // bb0 <- Initial position of AllocOp // / \ // | bb1 // \ / // bb2 // BufferDeallocation expected behavior: It only inserts a DeallocOp at the // exit block after CopyOp since %1 is an alias for %0 and %arg1. // CHECK-LABEL: func @invCriticalEdge func.func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) ^bb1: cf.br ^bb2(%0 : memref<2xf32>) ^bb2(%1: memref<2xf32>): test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK: dealloc // CHECK-NEXT: return // ----- // Test Case: // bb0 <- Initial position of the first AllocOp // / \ // bb1 bb2 // \ / // bb3 <- Initial position of the second AllocOp // BufferDeallocation expected behavior: It only inserts two missing // DeallocOps in the exit block. %5 is an alias for %0. Therefore, the // DeallocOp for %0 should occur after the last BufferBasedOp. The Dealloc for // %7 should happen after CopyOp. // CHECK-LABEL: func @ifElse func.func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) cf.cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) ^bb1(%1: memref<2xf32>, %2: memref<2xf32>): cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): %7 = memref.alloc() : memref<2xf32> test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>) test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based // CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based // CHECK: memref.dealloc %[[FIRST_ALLOC]] // CHECK: test.copy // CHECK-NEXT: memref.dealloc %[[SECOND_ALLOC]] // CHECK-NEXT: return // ----- // Test Case: No users for buffer in if-else CFG // bb0 <- Initial position of AllocOp // / \ // bb1 bb2 // \ / // bb3 // BufferDeallocation expected behavior: It only inserts a missing DeallocOp // in the exit block since %5 or %6 are the latest aliases of %0. // CHECK-LABEL: func @ifElseNoUsers func.func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) cf.cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) ^bb1(%1: memref<2xf32>, %2: memref<2xf32>): cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc() // CHECK: test.copy // CHECK-NEXT: memref.dealloc %[[FIRST_ALLOC]] // CHECK-NEXT: return // ----- // Test Case: // bb0 <- Initial position of the first AllocOp // / \ // bb1 bb2 // | / \ // | bb3 bb4 // \ \ / // \ / // bb5 <- Initial position of the second AllocOp // BufferDeallocation expected behavior: Two missing DeallocOps should be // inserted in the exit block. // CHECK-LABEL: func @ifElseNested func.func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) cf.cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) ^bb1(%1: memref<2xf32>, %2: memref<2xf32>): cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>) ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>) ^bb3(%5: memref<2xf32>): cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>) ^bb4(%6: memref<2xf32>): cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) ^bb5(%7: memref<2xf32>, %8: memref<2xf32>): %9 = memref.alloc() : memref<2xf32> test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based // CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based // CHECK: memref.dealloc %[[FIRST_ALLOC]] // CHECK: test.copy // CHECK-NEXT: memref.dealloc %[[SECOND_ALLOC]] // CHECK-NEXT: return // ----- // Test Case: Dead operations in a single block. // BufferDeallocation expected behavior: It only inserts the two missing // DeallocOps after the last BufferBasedOp. // CHECK-LABEL: func @redundantOperations func.func @redundantOperations(%arg0: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) %1 = memref.alloc() : memref<2xf32> test.buffer_based in(%0: memref<2xf32>) out(%1: memref<2xf32>) return } // CHECK: (%[[ARG0:.*]]: {{.*}}) // CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}}out(%[[FIRST_ALLOC]] // CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based in(%[[FIRST_ALLOC]]{{.*}}out(%[[SECOND_ALLOC]] // CHECK: dealloc // CHECK-NEXT: dealloc // CHECK-NEXT: return // ----- // Test Case: // bb0 // / \ // Initial pos of the 1st AllocOp -> bb1 bb2 <- Initial pos of the 2nd AllocOp // \ / // bb3 // BufferDeallocation expected behavior: We need to introduce a copy for each // buffer since the buffers are passed to bb3. The both missing DeallocOps are // inserted in the respective block of the allocs. The copy is freed in the exit // block. // CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc func.func @moving_alloc_and_inserting_missing_dealloc( %cond: i1, %arg0: memref<2xf32>, %arg1: memref<2xf32>) { cf.cond_br %cond, ^bb1, ^bb2 ^bb1: %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) cf.br ^exit(%0 : memref<2xf32>) ^bb2: %1 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>) cf.br ^exit(%1 : memref<2xf32>) ^exit(%arg2: memref<2xf32>): test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: cf.cond_br{{.*}} // CHECK-NEXT: ^bb1 // CHECK: %[[ALLOC0:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based // CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %[[ALLOC0]] // CHECK-NEXT: memref.dealloc %[[ALLOC0]] // CHECK-NEXT: cf.br ^bb3(%[[ALLOC1]] // CHECK-NEXT: ^bb2 // CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based // CHECK-NEXT: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC2]] // CHECK-NEXT: memref.dealloc %[[ALLOC2]] // CHECK-NEXT: cf.br ^bb3(%[[ALLOC3]] // CHECK-NEXT: ^bb3(%[[ALLOC4:.*]]:{{.*}}) // CHECK: test.copy // CHECK-NEXT: memref.dealloc %[[ALLOC4]] // CHECK-NEXT: return // ----- // Test Case: Invalid position of the DeallocOp. There is a user after // deallocation. // bb0 // / \ // bb1 bb2 <- Initial position of AllocOp // \ / // bb3 // BufferDeallocation expected behavior: The existing DeallocOp should be // moved to exit block. // CHECK-LABEL: func @moving_invalid_dealloc_op_complex func.func @moving_invalid_dealloc_op_complex( %cond: i1, %arg0: memref<2xf32>, %arg1: memref<2xf32>) { %1 = memref.alloc() : memref<2xf32> cf.cond_br %cond, ^bb1, ^bb2 ^bb1: cf.br ^exit(%arg0 : memref<2xf32>) ^bb2: test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>) memref.dealloc %1 : memref<2xf32> cf.br ^exit(%1 : memref<2xf32>) ^exit(%arg2: memref<2xf32>): test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc() // CHECK-NEXT: cf.cond_br // CHECK: test.copy // CHECK-NEXT: memref.dealloc %[[ALLOC0]] // CHECK-NEXT: return // ----- // Test Case: Inserting missing DeallocOp in a single block. // CHECK-LABEL: func @inserting_missing_dealloc_simple func.func @inserting_missing_dealloc_simple( %arg0 : memref<2xf32>, %arg1: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc() // CHECK: test.copy // CHECK-NEXT: memref.dealloc %[[ALLOC0]] // ----- // Test Case: Moving invalid DeallocOp (there is a user after deallocation) in a // single block. // CHECK-LABEL: func @moving_invalid_dealloc_op func.func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) memref.dealloc %0 : memref<2xf32> test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc() // CHECK: test.copy // CHECK-NEXT: memref.dealloc %[[ALLOC0]] // ----- // Test Case: Nested regions - This test defines a BufferBasedOp inside the // region of a RegionBufferBasedOp. // BufferDeallocation expected behavior: The AllocOp for the BufferBasedOp // should remain inside the region of the RegionBufferBasedOp and it should insert // the missing DeallocOp in the same region. The missing DeallocOp should be // inserted after CopyOp. // CHECK-LABEL: func @nested_regions_and_cond_branch func.func @nested_regions_and_cond_branch( %arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cf.cond_br %arg0, ^bb1, ^bb2 ^bb1: cf.br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = memref.alloc() : memref<2xf32> test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %1 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) %tmp1 = math.exp %gen1_arg0 : f32 test.region_yield %tmp1 : f32 } cf.br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}}) // CHECK-NEXT: cf.cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]] // CHECK: %[[ALLOC0:.*]] = bufferization.clone %[[ARG1]] // CHECK: ^[[BB2]]: // CHECK: %[[ALLOC1:.*]] = memref.alloc() // CHECK-NEXT: test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]] // CHECK: %[[ALLOC2:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC2]] // CHECK: memref.dealloc %[[ALLOC2]] // CHECK-NEXT: %{{.*}} = math.exp // CHECK: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC1]] // CHECK-NEXT: memref.dealloc %[[ALLOC1]] // CHECK: ^[[BB3:.*]]({{.*}}): // CHECK: test.copy // CHECK-NEXT: memref.dealloc // ----- // Test Case: buffer deallocation escaping // BufferDeallocation expected behavior: It must not dealloc %arg1 and %x // since they are operands of return operation and should escape from // deallocating. It should dealloc %y after CopyOp. // CHECK-LABEL: func @memref_in_function_results func.func @memref_in_function_results( %arg0: memref<5xf32>, %arg1: memref<10xf32>, %arg2: memref<5xf32>) -> (memref<10xf32>, memref<15xf32>) { %x = memref.alloc() : memref<15xf32> %y = memref.alloc() : memref<5xf32> test.buffer_based in(%arg0: memref<5xf32>) out(%y: memref<5xf32>) test.copy(%y, %arg2) : (memref<5xf32>, memref<5xf32>) return %arg1, %x : memref<10xf32>, memref<15xf32> } // CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, // CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>) // CHECK: %[[X:.*]] = memref.alloc() // CHECK: %[[Y:.*]] = memref.alloc() // CHECK: test.copy // CHECK: memref.dealloc %[[Y]] // CHECK: return %[[ARG1]], %[[X]] // ----- // Test Case: nested region control flow // The alloc %1 flows through both if branches until it is finally returned. // Hence, it does not require a specific dealloc operation. However, %3 // requires a dealloc. // CHECK-LABEL: func @nested_region_control_flow func.func @nested_region_control_flow( %arg0 : index, %arg1 : index) -> memref { %0 = arith.cmpi eq, %arg0, %arg1 : index %1 = memref.alloc(%arg0, %arg0) : memref %2 = scf.if %0 -> (memref) { scf.yield %1 : memref } else { %3 = memref.alloc(%arg0, %arg1) : memref scf.yield %1 : memref } return %2 : memref } // CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0) // CHECK-NEXT: %[[ALLOC1:.*]] = scf.if // CHECK: scf.yield %[[ALLOC0]] // CHECK: %[[ALLOC2:.*]] = memref.alloc(%arg0, %arg1) // CHECK-NEXT: memref.dealloc %[[ALLOC2]] // CHECK-NEXT: scf.yield %[[ALLOC0]] // CHECK: return %[[ALLOC1]] // ----- // Test Case: nested region control flow with a nested buffer allocation in a // divergent branch. // Buffer deallocation places a copy for both %1 and %3, since they are // returned in the end. // CHECK-LABEL: func @nested_region_control_flow_div func.func @nested_region_control_flow_div( %arg0 : index, %arg1 : index) -> memref { %0 = arith.cmpi eq, %arg0, %arg1 : index %1 = memref.alloc(%arg0, %arg0) : memref %2 = scf.if %0 -> (memref) { scf.yield %1 : memref } else { %3 = memref.alloc(%arg0, %arg1) : memref scf.yield %3 : memref } return %2 : memref } // CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0) // CHECK-NEXT: %[[ALLOC1:.*]] = scf.if // CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC0]] // CHECK: scf.yield %[[ALLOC2]] // CHECK: %[[ALLOC3:.*]] = memref.alloc(%arg0, %arg1) // CHECK-NEXT: %[[ALLOC4:.*]] = bufferization.clone %[[ALLOC3]] // CHECK: memref.dealloc %[[ALLOC3]] // CHECK: scf.yield %[[ALLOC4]] // CHECK: memref.dealloc %[[ALLOC0]] // CHECK-NEXT: return %[[ALLOC1]] // ----- // Test Case: nested region control flow within a region interface. // No copies are required in this case since the allocation finally escapes // the method. // CHECK-LABEL: func @inner_region_control_flow func.func @inner_region_control_flow(%arg0 : index) -> memref { %0 = memref.alloc(%arg0, %arg0) : memref %1 = test.region_if %0 : memref -> (memref) then { ^bb0(%arg1 : memref): test.region_if_yield %arg1 : memref } else { ^bb0(%arg1 : memref): test.region_if_yield %arg1 : memref } join { ^bb0(%arg1 : memref): test.region_if_yield %arg1 : memref } return %1 : memref } // CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0) // CHECK-NEXT: %[[ALLOC1:.*]] = test.region_if // CHECK-NEXT: ^bb0(%[[ALLOC2:.*]]:{{.*}}): // CHECK-NEXT: test.region_if_yield %[[ALLOC2]] // CHECK: ^bb0(%[[ALLOC3:.*]]:{{.*}}): // CHECK-NEXT: test.region_if_yield %[[ALLOC3]] // CHECK: ^bb0(%[[ALLOC4:.*]]:{{.*}}): // CHECK-NEXT: test.region_if_yield %[[ALLOC4]] // CHECK: return %[[ALLOC1]] // ----- // CHECK-LABEL: func @subview func.func @subview(%arg0 : index, %arg1 : index, %arg2 : memref) { %0 = memref.alloc() : memref<64x4xf32, strided<[4, 1], offset: 0>> %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] : memref<64x4xf32, strided<[4, 1], offset: 0>> to memref> test.copy(%1, %arg2) : (memref>, memref) return } // CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc() // CHECK-NEXT: memref.subview // CHECK-NEXT: test.copy // CHECK-NEXT: memref.dealloc %[[ALLOC]] // CHECK-NEXT: return // ----- // Test Case: In the presence of AllocaOps only the AllocOps has top be freed. // Therefore, all allocas are not handled. // CHECK-LABEL: func @condBranchAlloca func.func @condBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cf.cond_br %arg0, ^bb1, ^bb2 ^bb1: cf.br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = memref.alloca() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) cf.br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: cf.cond_br // CHECK: %[[ALLOCA:.*]] = memref.alloca() // CHECK: cf.br ^bb3(%[[ALLOCA:.*]]) // CHECK-NEXT: ^bb3 // CHECK-NEXT: test.copy // CHECK-NEXT: return // ----- // Test Case: In the presence of AllocaOps only the AllocOps has top be freed. // Therefore, all allocas are not handled. In this case, only alloc %0 has a // dealloc. // CHECK-LABEL: func @ifElseAlloca func.func @ifElseAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) cf.cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) ^bb1(%1: memref<2xf32>, %2: memref<2xf32>): cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) ^bb3(%5: memref<2xf32>, %6: memref<2xf32>): %7 = memref.alloca() : memref<2xf32> test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>) test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOCA:.*]] = memref.alloca() // CHECK-NEXT: test.buffer_based // CHECK: memref.dealloc %[[ALLOC]] // CHECK: test.copy // CHECK-NEXT: return // ----- // CHECK-LABEL: func @ifElseNestedAlloca func.func @ifElseNestedAlloca( %arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { %0 = memref.alloca() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) cf.cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) ^bb1(%1: memref<2xf32>, %2: memref<2xf32>): cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>) ^bb2(%3: memref<2xf32>, %4: memref<2xf32>): cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>) ^bb3(%5: memref<2xf32>): cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>) ^bb4(%6: memref<2xf32>): cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) ^bb5(%7: memref<2xf32>, %8: memref<2xf32>): %9 = memref.alloc() : memref<2xf32> test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() // CHECK-NEXT: test.buffer_based // CHECK: %[[ALLOC:.*]] = memref.alloc() // CHECK-NEXT: test.buffer_based // CHECK: test.copy // CHECK-NEXT: memref.dealloc %[[ALLOC]] // CHECK-NEXT: return // ----- // CHECK-LABEL: func @nestedRegionsAndCondBranchAlloca func.func @nestedRegionsAndCondBranchAlloca( %arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cf.cond_br %arg0, ^bb1, ^bb2 ^bb1: cf.br ^bb3(%arg1 : memref<2xf32>) ^bb2: %0 = memref.alloc() : memref<2xf32> test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): %1 = memref.alloca() : memref<2xf32> test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) %tmp1 = math.exp %gen1_arg0 : f32 test.region_yield %tmp1 : f32 } cf.br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) return } // CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}}) // CHECK-NEXT: cf.cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]] // CHECK: ^[[BB1]]: // CHECK: %[[ALLOC0:.*]] = bufferization.clone // CHECK: ^[[BB2]]: // CHECK: %[[ALLOC1:.*]] = memref.alloc() // CHECK-NEXT: test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]] // CHECK: %[[ALLOCA:.*]] = memref.alloca() // CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOCA]] // CHECK: %{{.*}} = math.exp // CHECK: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]] // CHECK-NEXT: memref.dealloc %[[ALLOC1]] // CHECK: ^[[BB3:.*]]({{.*}}): // CHECK: test.copy // CHECK-NEXT: memref.dealloc // ----- // CHECK-LABEL: func @nestedRegionControlFlowAlloca func.func @nestedRegionControlFlowAlloca( %arg0 : index, %arg1 : index) -> memref { %0 = arith.cmpi eq, %arg0, %arg1 : index %1 = memref.alloc(%arg0, %arg0) : memref %2 = scf.if %0 -> (memref) { scf.yield %1 : memref } else { %3 = memref.alloca(%arg0, %arg1) : memref scf.yield %1 : memref } return %2 : memref } // CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0) // CHECK-NEXT: %[[ALLOC1:.*]] = scf.if // CHECK: scf.yield %[[ALLOC0]] // CHECK: %[[ALLOCA:.*]] = memref.alloca(%arg0, %arg1) // CHECK-NEXT: scf.yield %[[ALLOC0]] // CHECK: return %[[ALLOC1]] // ----- // Test Case: structured control-flow loop using a nested alloc. // The iteration argument %iterBuf has to be freed before yielding %3 to avoid // memory leaks. // CHECK-LABEL: func @loop_alloc func.func @loop_alloc( %lb: index, %ub: index, %step: index, %buf: memref<2xf32>, %res: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> %1 = scf.for %i = %lb to %ub step %step iter_args(%iterBuf = %buf) -> memref<2xf32> { %2 = arith.cmpi eq, %i, %ub : index %3 = memref.alloc() : memref<2xf32> scf.yield %3 : memref<2xf32> } test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } // CHECK: %[[ALLOC0:.*]] = memref.alloc() // CHECK-NEXT: memref.dealloc %[[ALLOC0]] // CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %arg3 // CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args // CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]] // CHECK: arith.cmpi // CHECK: memref.dealloc %[[IALLOC]] // CHECK: %[[ALLOC3:.*]] = memref.alloc() // CHECK: %[[ALLOC4:.*]] = bufferization.clone %[[ALLOC3]] // CHECK: memref.dealloc %[[ALLOC3]] // CHECK: scf.yield %[[ALLOC4]] // CHECK: } // CHECK: test.copy(%[[ALLOC2]], %arg4) // CHECK-NEXT: memref.dealloc %[[ALLOC2]] // ----- // Test Case: structured control-flow loop with a nested if operation. // The loop yields buffers that have been defined outside of the loop and the // backedges only use the iteration arguments (or one of its aliases). // Therefore, we do not have to (and are not allowed to) free any buffers // that are passed via the backedges. // CHECK-LABEL: func @loop_nested_if_no_alloc func.func @loop_nested_if_no_alloc( %lb: index, %ub: index, %step: index, %buf: memref<2xf32>, %res: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> %1 = scf.for %i = %lb to %ub step %step iter_args(%iterBuf = %buf) -> memref<2xf32> { %2 = arith.cmpi eq, %i, %ub : index %3 = scf.if %2 -> (memref<2xf32>) { scf.yield %0 : memref<2xf32> } else { scf.yield %iterBuf : memref<2xf32> } scf.yield %3 : memref<2xf32> } test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } // CHECK: %[[ALLOC0:.*]] = memref.alloc() // CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = // CHECK: %[[ALLOC2:.*]] = scf.if // CHECK: scf.yield %[[ALLOC0]] // CHECK: scf.yield %[[IALLOC]] // CHECK: scf.yield %[[ALLOC2]] // CHECK: test.copy(%[[ALLOC1]], %arg4) // CHECK: memref.dealloc %[[ALLOC0]] // ----- // Test Case: structured control-flow loop with a nested if operation using // a deeply nested buffer allocation. // Since the innermost allocation happens in a divergent branch, we have to // introduce additional copies for the nested if operation. Since the loop's // yield operation "returns" %3, it will return a newly allocated buffer. // Therefore, we have to free the iteration argument %iterBuf before // "returning" %3. // CHECK-LABEL: func @loop_nested_if_alloc func.func @loop_nested_if_alloc( %lb: index, %ub: index, %step: index, %buf: memref<2xf32>) -> memref<2xf32> { %0 = memref.alloc() : memref<2xf32> %1 = scf.for %i = %lb to %ub step %step iter_args(%iterBuf = %buf) -> memref<2xf32> { %2 = arith.cmpi eq, %i, %ub : index %3 = scf.if %2 -> (memref<2xf32>) { %4 = memref.alloc() : memref<2xf32> scf.yield %4 : memref<2xf32> } else { scf.yield %0 : memref<2xf32> } scf.yield %3 : memref<2xf32> } return %1 : memref<2xf32> } // CHECK: %[[ALLOC0:.*]] = memref.alloc() // CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %arg3 // CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args // CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]] // CHECK: memref.dealloc %[[IALLOC]] // CHECK: %[[ALLOC3:.*]] = scf.if // CHECK: %[[ALLOC4:.*]] = memref.alloc() // CHECK-NEXT: %[[ALLOC5:.*]] = bufferization.clone %[[ALLOC4]] // CHECK-NEXT: memref.dealloc %[[ALLOC4]] // CHECK-NEXT: scf.yield %[[ALLOC5]] // CHECK: %[[ALLOC6:.*]] = bufferization.clone %[[ALLOC0]] // CHECK-NEXT: scf.yield %[[ALLOC6]] // CHECK: %[[ALLOC7:.*]] = bufferization.clone %[[ALLOC3]] // CHECK-NEXT: memref.dealloc %[[ALLOC3]] // CHECK-NEXT: scf.yield %[[ALLOC7]] // CHECK: memref.dealloc %[[ALLOC0]] // CHECK-NEXT: return %[[ALLOC2]] // ----- // Test Case: several nested structured control-flow loops with a deeply nested // buffer allocation inside an if operation. // Same behavior is an loop_nested_if_alloc: we have to insert deallocations // before each yield in all loops recursively. // CHECK-LABEL: func @loop_nested_alloc func.func @loop_nested_alloc( %lb: index, %ub: index, %step: index, %buf: memref<2xf32>, %res: memref<2xf32>) { %0 = memref.alloc() : memref<2xf32> %1 = scf.for %i = %lb to %ub step %step iter_args(%iterBuf = %buf) -> memref<2xf32> { %2 = scf.for %i2 = %lb to %ub step %step iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> { %3 = scf.for %i3 = %lb to %ub step %step iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> { %4 = memref.alloc() : memref<2xf32> %5 = arith.cmpi eq, %i, %ub : index %6 = scf.if %5 -> (memref<2xf32>) { %7 = memref.alloc() : memref<2xf32> scf.yield %7 : memref<2xf32> } else { scf.yield %iterBuf3 : memref<2xf32> } scf.yield %6 : memref<2xf32> } scf.yield %3 : memref<2xf32> } scf.yield %2 : memref<2xf32> } test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) return } // CHECK: %[[ALLOC0:.*]] = memref.alloc() // CHECK-NEXT: memref.dealloc %[[ALLOC0]] // CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %arg3 // CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args // CHECK-SAME: (%[[IALLOC0:.*]] = %[[ALLOC1]]) // CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[IALLOC0]] // CHECK-NEXT: memref.dealloc %[[IALLOC0]] // CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args // CHECK-SAME: (%[[IALLOC1:.*]] = %[[ALLOC2]]) // CHECK-NEXT: %[[ALLOC5:.*]] = bufferization.clone %[[IALLOC1]] // CHECK-NEXT: memref.dealloc %[[IALLOC1]] // CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args // CHECK-SAME: (%[[IALLOC2:.*]] = %[[ALLOC5]]) // CHECK: %[[ALLOC8:.*]] = memref.alloc() // CHECK-NEXT: memref.dealloc %[[ALLOC8]] // CHECK: %[[ALLOC9:.*]] = scf.if // CHECK: %[[ALLOC11:.*]] = memref.alloc() // CHECK-NEXT: %[[ALLOC12:.*]] = bufferization.clone %[[ALLOC11]] // CHECK-NEXT: memref.dealloc %[[ALLOC11]] // CHECK-NEXT: scf.yield %[[ALLOC12]] // CHECK: %[[ALLOC13:.*]] = bufferization.clone %[[IALLOC2]] // CHECK-NEXT: scf.yield %[[ALLOC13]] // CHECK: memref.dealloc %[[IALLOC2]] // CHECK-NEXT: %[[ALLOC10:.*]] = bufferization.clone %[[ALLOC9]] // CHECK-NEXT: memref.dealloc %[[ALLOC9]] // CHECK-NEXT: scf.yield %[[ALLOC10]] // CHECK: %[[ALLOC7:.*]] = bufferization.clone %[[ALLOC6]] // CHECK-NEXT: memref.dealloc %[[ALLOC6]] // CHECK-NEXT: scf.yield %[[ALLOC7]] // CHECK: %[[ALLOC4:.*]] = bufferization.clone %[[ALLOC3]] // CHECK-NEXT: memref.dealloc %[[ALLOC3]] // CHECK-NEXT: scf.yield %[[ALLOC4]] // CHECK: test.copy(%[[VAL_7]], %arg4) // CHECK-NEXT: memref.dealloc %[[VAL_7]] // ----- // CHECK-LABEL: func @affine_loop func.func @affine_loop() { %buffer = memref.alloc() : memref<1024xf32> %sum_init_0 = arith.constant 0.0 : f32 %res = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_init_0) -> f32 { %t = affine.load %buffer[%i] : memref<1024xf32> %sum_next = arith.addf %sum_iter, %t : f32 affine.yield %sum_next : f32 } // CHECK: %[[M:.*]] = memref.alloc // CHECK: affine.for // CHECK: } // CHECK-NEXT: memref.dealloc %[[M]] return } // ----- // Test Case: explicit control-flow loop with a dynamically allocated buffer. // The BufferDeallocation transformation should fail on this explicit // control-flow loop since they are not supported. // expected-error@+1 {{Only structured control-flow loops are supported}} func.func @loop_dynalloc( %arg0 : i32, %arg1 : i32, %arg2: memref, %arg3: memref) { %const0 = arith.constant 0 : i32 cf.br ^loopHeader(%const0, %arg2 : i32, memref) ^loopHeader(%i : i32, %buff : memref): %lessThan = arith.cmpi slt, %i, %arg1 : i32 cf.cond_br %lessThan, ^loopBody(%i, %buff : i32, memref), ^exit(%buff : memref) ^loopBody(%val : i32, %buff2: memref): %const1 = arith.constant 1 : i32 %inc = arith.addi %val, %const1 : i32 %size = arith.index_cast %inc : i32 to index %alloc1 = memref.alloc(%size) : memref cf.br ^loopHeader(%inc, %alloc1 : i32, memref) ^exit(%buff3 : memref): test.copy(%buff3, %arg3) : (memref, memref) return } // ----- // Test Case: explicit control-flow loop with a dynamically allocated buffer. // The BufferDeallocation transformation should fail on this explicit // control-flow loop since they are not supported. // expected-error@+1 {{Only structured control-flow loops are supported}} func.func @do_loop_alloc( %arg0 : i32, %arg1 : i32, %arg2: memref<2xf32>, %arg3: memref<2xf32>) { %const0 = arith.constant 0 : i32 cf.br ^loopBody(%const0, %arg2 : i32, memref<2xf32>) ^loopBody(%val : i32, %buff2: memref<2xf32>): %const1 = arith.constant 1 : i32 %inc = arith.addi %val, %const1 : i32 %alloc1 = memref.alloc() : memref<2xf32> cf.br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>) ^loopHeader(%i : i32, %buff : memref<2xf32>): %lessThan = arith.cmpi slt, %i, %arg1 : i32 cf.cond_br %lessThan, ^loopBody(%i, %buff : i32, memref<2xf32>), ^exit(%buff : memref<2xf32>) ^exit(%buff3 : memref<2xf32>): test.copy(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>) return } // ----- // CHECK-LABEL: func @assumingOp( func.func @assumingOp( %arg0: !shape.witness, %arg2: memref<2xf32>, %arg3: memref<2xf32>) { // Confirm the alloc will be dealloc'ed in the block. %1 = shape.assuming %arg0 -> memref<2xf32> { %0 = memref.alloc() : memref<2xf32> shape.assuming_yield %arg2 : memref<2xf32> } // Confirm the alloc will be returned and dealloc'ed after its use. %3 = shape.assuming %arg0 -> memref<2xf32> { %2 = memref.alloc() : memref<2xf32> shape.assuming_yield %2 : memref<2xf32> } test.copy(%3, %arg3) : (memref<2xf32>, memref<2xf32>) return } // CHECK-SAME: %[[ARG0:.*]]: !shape.witness, // CHECK-SAME: %[[ARG1:.*]]: {{.*}}, // CHECK-SAME: %[[ARG2:.*]]: {{.*}} // CHECK: %[[UNUSED_RESULT:.*]] = shape.assuming %[[ARG0]] // CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc() // CHECK-NEXT: memref.dealloc %[[ALLOC0]] // CHECK-NEXT: shape.assuming_yield %[[ARG1]] // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[ARG0]] // CHECK-NEXT: %[[TMP_ALLOC:.*]] = memref.alloc() // CHECK-NEXT: %[[RETURNING_ALLOC:.*]] = bufferization.clone %[[TMP_ALLOC]] // CHECK-NEXT: memref.dealloc %[[TMP_ALLOC]] // CHECK-NEXT: shape.assuming_yield %[[RETURNING_ALLOC]] // CHECK: test.copy(%[[ASSUMING_RESULT:.*]], %[[ARG2]]) // CHECK-NEXT: memref.dealloc %[[ASSUMING_RESULT]] // ----- // Test Case: The op "test.bar" does not implement the RegionBranchOpInterface. // This is not allowed in buffer deallocation. func.func @noRegionBranchOpInterface() { // expected-error@+1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}} %0 = "test.bar"() ({ // expected-error@+1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}} %1 = "test.bar"() ({ "test.yield"() : () -> () }) : () -> (i32) "test.yield"() : () -> () }) : () -> (i32) "test.terminator"() : () -> () } // ----- // CHECK-LABEL: func @dealloc_existing_clones // CHECK: (%[[ARG0:.*]]: memref, %[[ARG1:.*]]: memref) // CHECK: %[[RES0:.*]] = bufferization.clone %[[ARG0]] // CHECK: %[[RES1:.*]] = bufferization.clone %[[ARG1]] // CHECK-NOT: memref.dealloc %[[RES0]] // CHECK: memref.dealloc %[[RES1]] // CHECK: return %[[RES0]] func.func @dealloc_existing_clones(%arg0: memref, %arg1: memref) -> memref { %0 = bufferization.clone %arg0 : memref to memref %1 = bufferization.clone %arg1 : memref to memref return %0 : memref } // ----- // CHECK-LABEL: func @while_two_arg func.func @while_two_arg(%arg0: index) { %a = memref.alloc(%arg0) : memref // CHECK: %[[WHILE:.*]]:2 = scf.while (%[[ARG1:.*]] = %[[ALLOC:.*]], %[[ARG2:.*]] = %[[CLONE:.*]]) scf.while (%arg1 = %a, %arg2 = %a) : (memref, memref) -> (memref, memref) { // CHECK-NEXT: make_condition %0 = "test.make_condition"() : () -> i1 // CHECK-NEXT: bufferization.clone %[[ARG2]] // CHECK-NEXT: memref.dealloc %[[ARG2]] scf.condition(%0) %arg1, %arg2 : memref, memref } do { ^bb0(%arg1: memref, %arg2: memref): // CHECK: %[[ALLOC2:.*]] = memref.alloc %b = memref.alloc(%arg0) : memref // CHECK: memref.dealloc %[[ARG2]] // CHECK: %[[CLONE2:.*]] = bufferization.clone %[[ALLOC2]] // CHECK: memref.dealloc %[[ALLOC2]] scf.yield %arg1, %b : memref, memref } // CHECK: } // CHECK-NEXT: memref.dealloc %[[WHILE]]#1 // CHECK-NEXT: memref.dealloc %[[ALLOC]] // CHECK-NEXT: return return } // ----- func.func @while_three_arg(%arg0: index) { // CHECK: %[[ALLOC:.*]] = memref.alloc %a = memref.alloc(%arg0) : memref // CHECK-NEXT: %[[CLONE1:.*]] = bufferization.clone %[[ALLOC]] // CHECK-NEXT: %[[CLONE2:.*]] = bufferization.clone %[[ALLOC]] // CHECK-NEXT: %[[CLONE3:.*]] = bufferization.clone %[[ALLOC]] // CHECK-NEXT: memref.dealloc %[[ALLOC]] // CHECK-NEXT: %[[WHILE:.*]]:3 = scf.while // FIXME: This is non-deterministic // CHECK-SAME-DAG: [[CLONE1]] // CHECK-SAME-DAG: [[CLONE2]] // CHECK-SAME-DAG: [[CLONE3]] scf.while (%arg1 = %a, %arg2 = %a, %arg3 = %a) : (memref, memref, memref) -> (memref, memref, memref) { %0 = "test.make_condition"() : () -> i1 scf.condition(%0) %arg1, %arg2, %arg3 : memref, memref, memref } do { ^bb0(%arg1: memref, %arg2: memref, %arg3: memref): %b = memref.alloc(%arg0) : memref %q = memref.alloc(%arg0) : memref scf.yield %q, %b, %arg2: memref, memref, memref } // CHECK-DAG: memref.dealloc %[[WHILE]]#0 // CHECK-DAG: memref.dealloc %[[WHILE]]#1 // CHECK-DAG: memref.dealloc %[[WHILE]]#2 // CHECK-NEXT: return return } // ----- func.func @select_aliases(%arg0: index, %arg1: memref, %arg2: i1) { // CHECK: memref.alloc // CHECK: memref.alloc // CHECK: arith.select // CHECK: test.copy // CHECK: memref.dealloc // CHECK: memref.dealloc %0 = memref.alloc(%arg0) : memref %1 = memref.alloc(%arg0) : memref %2 = arith.select %arg2, %0, %1 : memref test.copy(%2, %arg1) : (memref, memref) return } // ----- func.func @f(%arg0: memref) -> memref { return %arg0 : memref } // CHECK-LABEL: func @function_call // CHECK: memref.alloc // CHECK: memref.alloc // CHECK: call // CHECK: test.copy // CHECK: memref.dealloc // CHECK: memref.dealloc func.func @function_call() { %alloc = memref.alloc() : memref %alloc2 = memref.alloc() : memref %ret = call @f(%alloc) : (memref) -> memref test.copy(%ret, %alloc2) : (memref, memref) return } // ----- // Memref allocated in `then` region and passed back to the parent if op. #set = affine_set<() : (0 >= 0)> // CHECK-LABEL: func @test_affine_if_1 // CHECK-SAME: %[[ARG0:.*]]: memref<10xf32>) -> memref<10xf32> { func.func @test_affine_if_1(%arg0: memref<10xf32>) -> memref<10xf32> { %0 = affine.if #set() -> memref<10xf32> { %alloc = memref.alloc() : memref<10xf32> affine.yield %alloc : memref<10xf32> } else { affine.yield %arg0 : memref<10xf32> } return %0 : memref<10xf32> } // CHECK-NEXT: %[[IF:.*]] = affine.if // CHECK-NEXT: %[[MEMREF:.*]] = memref.alloc() : memref<10xf32> // CHECK-NEXT: %[[CLONED:.*]] = bufferization.clone %[[MEMREF]] : memref<10xf32> to memref<10xf32> // CHECK-NEXT: memref.dealloc %[[MEMREF]] : memref<10xf32> // CHECK-NEXT: affine.yield %[[CLONED]] : memref<10xf32> // CHECK-NEXT: } else { // CHECK-NEXT: %[[ARG0_CLONE:.*]] = bufferization.clone %[[ARG0]] : memref<10xf32> to memref<10xf32> // CHECK-NEXT: affine.yield %[[ARG0_CLONE]] : memref<10xf32> // CHECK-NEXT: } // CHECK-NEXT: return %[[IF]] : memref<10xf32> // ----- // Memref allocated before parent IfOp and used in `then` region. // Expected result: deallocation should happen after affine.if op. #set = affine_set<() : (0 >= 0)> // CHECK-LABEL: func @test_affine_if_2() -> memref<10xf32> { func.func @test_affine_if_2() -> memref<10xf32> { %alloc0 = memref.alloc() : memref<10xf32> %0 = affine.if #set() -> memref<10xf32> { affine.yield %alloc0 : memref<10xf32> } else { %alloc = memref.alloc() : memref<10xf32> affine.yield %alloc : memref<10xf32> } return %0 : memref<10xf32> } // CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc() : memref<10xf32> // CHECK-NEXT: %[[IF_RES:.*]] = affine.if {{.*}} -> memref<10xf32> { // CHECK-NEXT: %[[ALLOC_CLONE:.*]] = bufferization.clone %[[ALLOC]] : memref<10xf32> to memref<10xf32> // CHECK-NEXT: affine.yield %[[ALLOC_CLONE]] : memref<10xf32> // CHECK-NEXT: } else { // CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc() : memref<10xf32> // CHECK-NEXT: %[[ALLOC2_CLONE:.*]] = bufferization.clone %[[ALLOC2]] : memref<10xf32> to memref<10xf32> // CHECK-NEXT: memref.dealloc %[[ALLOC2]] : memref<10xf32> // CHECK-NEXT: affine.yield %[[ALLOC2_CLONE]] : memref<10xf32> // CHECK-NEXT: } // CHECK-NEXT: memref.dealloc %[[ALLOC]] : memref<10xf32> // CHECK-NEXT: return %[[IF_RES]] : memref<10xf32> // ----- // Memref allocated before parent IfOp and used in `else` region. // Expected result: deallocation should happen after affine.if op. #set = affine_set<() : (0 >= 0)> // CHECK-LABEL: func @test_affine_if_3() -> memref<10xf32> { func.func @test_affine_if_3() -> memref<10xf32> { %alloc0 = memref.alloc() : memref<10xf32> %0 = affine.if #set() -> memref<10xf32> { %alloc = memref.alloc() : memref<10xf32> affine.yield %alloc : memref<10xf32> } else { affine.yield %alloc0 : memref<10xf32> } return %0 : memref<10xf32> } // CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc() : memref<10xf32> // CHECK-NEXT: %[[IFRES:.*]] = affine.if {{.*}} -> memref<10xf32> { // CHECK-NEXT: memref.alloc // CHECK-NEXT: bufferization.clone // CHECK-NEXT: memref.dealloc // CHECK-NEXT: affine.yield // CHECK-NEXT: } else { // CHECK-NEXT: bufferization.clone // CHECK-NEXT: affine.yield // CHECK-NEXT: } // CHECK-NEXT: memref.dealloc %[[ALLOC]] : memref<10xf32> // CHECK-NEXT: return %[[IFRES]] : memref<10xf32> // ----- // Memref allocated before parent IfOp and not used later. // Expected result: deallocation should happen before affine.if op. #set = affine_set<() : (0 >= 0)> // CHECK-LABEL: func @test_affine_if_4({{.*}}: memref<10xf32>) -> memref<10xf32> { func.func @test_affine_if_4(%arg0 : memref<10xf32>) -> memref<10xf32> { %alloc0 = memref.alloc() : memref<10xf32> %0 = affine.if #set() -> memref<10xf32> { affine.yield %arg0 : memref<10xf32> } else { %alloc = memref.alloc() : memref<10xf32> affine.yield %alloc : memref<10xf32> } return %0 : memref<10xf32> } // CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc() : memref<10xf32> // CHECK-NEXT: memref.dealloc %[[ALLOC]] : memref<10xf32> // CHECK-NEXT: affine.if // ----- // Ensure we free the realloc, not the alloc. // CHECK-LABEL: func @auto_dealloc() func.func @auto_dealloc() { %c10 = arith.constant 10 : index %c100 = arith.constant 100 : index %alloc = memref.alloc(%c10) : memref %realloc = memref.realloc %alloc(%c100) : memref to memref return } // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index // CHECK-DAG: %[[C100:.*]] = arith.constant 100 : index // CHECK-NEXT: %[[A:.*]] = memref.alloc(%[[C10]]) : memref // CHECK-NEXT: %[[R:.*]] = memref.realloc %alloc(%[[C100]]) : memref to memref // CHECK-NEXT: memref.dealloc %[[R]] : memref // CHECK-NEXT: return