@@ -88,6 +88,150 @@ def BufferDeallocation : Pass<"buffer-deallocation", "func::FuncOp"> {
88
88
let constructor = "mlir::bufferization::createBufferDeallocationPass()";
89
89
}
90
90
91
+ def OwnershipBasedBufferDeallocation : Pass<
92
+ "ownership-based-buffer-deallocation", "func::FuncOp"> {
93
+ let summary = "Adds all required dealloc operations for all allocations in "
94
+ "the input program";
95
+ let description = [{
96
+ This pass implements an algorithm to automatically introduce all required
97
+ deallocation operations for all buffers in the input program. This ensures
98
+ that the resulting program does not have any memory leaks.
99
+
100
+ The Buffer Deallocation pass operates on the level of operations
101
+ implementing the FunctionOpInterface. Such operations can take MemRefs as
102
+ arguments, but also return them. To ensure compatibility among all functions
103
+ (including external ones), some rules have to be enforced. They are just
104
+ assumed to hold for all external functions. Functions for which the
105
+ definition is available ideally also already adhere to the ABI.
106
+ Otherwise, all MemRef write operations in the input IR must dominate all
107
+ MemRef read operations in the input IR. Then, the pass may modify the input
108
+ IR by inserting `bufferization.clone` operations such that the output IR
109
+ adheres to the function boundary ABI:
110
+ * When a MemRef is passed as a function argument, ownership is never
111
+ acquired. It is always the caller's responsibility to deallocate such
112
+ MemRefs.
113
+ * Returning a MemRef from a function always passes ownership to the caller,
114
+ i.e., it is also the caller's responsibility to deallocate MemRefs
115
+ returned from a called function.
116
+ * A function must not return a MemRef with the same allocated base buffer as
117
+ one of its arguments (in this case a copy has to be created). Note that in
118
+ this context two subviews of the same buffer that don't overlap are also
119
+ considered an alias.
120
+
121
+ It is recommended to bufferize all operations first such that no tensor
122
+ values remain in the IR once this pass is applied. That way all allocated
123
+ MemRefs will be properly deallocated without any additional manual work.
124
+ Otherwise, the pass that bufferizes the remaining tensors is responsible to
125
+ add the corresponding deallocation operations. Note that this pass does not
126
+ consider any values of tensor type and assumes that MemRef values defined by
127
+ `bufferization.to_memref` do not return ownership and do not have to be
128
+ deallocated. `bufferization.to_tensor` operations are handled similarly to
129
+ `bufferization.clone` operations with the exception that the result value is
130
+ not handled because it's a tensor (not a MemRef).
131
+
132
+ Input
133
+
134
+ ```mlir
135
+ #map0 = affine_map<(d0) -> (d0)>
136
+ module {
137
+ func.func @condBranch(%arg0: i1,
138
+ %arg1: memref<2xf32>,
139
+ %arg2: memref<2xf32>) {
140
+ cf.cond_br %arg0, ^bb1, ^bb2
141
+ ^bb1:
142
+ cf.br ^bb3(%arg1 : memref<2xf32>)
143
+ ^bb2:
144
+ %0 = memref.alloc() : memref<2xf32>
145
+ linalg.generic {
146
+ args_in = 1 : i64,
147
+ args_out = 1 : i64,
148
+ indexing_maps = [#map0, #map0],
149
+ iterator_types = ["parallel"]}
150
+ outs(%arg1, %0 : memref<2xf32>, memref<2xf32>) {
151
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
152
+ %tmp1 = exp %gen1_arg0 : f32
153
+ linalg.yield %tmp1 : f32
154
+ }
155
+ cf.br ^bb3(%0 : memref<2xf32>)
156
+ ^bb3(%1: memref<2xf32>):
157
+ "memref.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
158
+ return
159
+ }
160
+ }
161
+ ```
162
+
163
+ Output
164
+
165
+ ```mlir
166
+ #map = affine_map<(d0) -> (d0)>
167
+ module {
168
+ func.func @condBranch(%arg0: i1,
169
+ %arg1: memref<2xf32>,
170
+ %arg2: memref<2xf32>) {
171
+ %false = arith.constant false
172
+ %true = arith.constant true
173
+ cf.cond_br %arg0, ^bb1, ^bb2
174
+ ^bb1: // pred: ^bb0
175
+ cf.br ^bb3(%arg1, %false : memref<2xf32>, i1)
176
+ ^bb2: // pred: ^bb0
177
+ %alloc = memref.alloc() : memref<2xf32>
178
+ linalg.generic {
179
+ indexing_maps = [#map, #map],
180
+ iterator_types = ["parallel"]}
181
+ outs(%arg1, %alloc : memref<2xf32>, memref<2xf32>)
182
+ attrs = {args_in = 1 : i64, args_out = 1 : i64} {
183
+ ^bb0(%out: f32, %out_0: f32):
184
+ %2 = math.exp %out : f32
185
+ linalg.yield %2, %out_0 : f32, f32
186
+ }
187
+ cf.br ^bb3(%alloc, %true : memref<2xf32>, i1)
188
+ ^bb3(%0: memref<2xf32>, %1: i1): // 2 preds: ^bb1, ^bb2
189
+ memref.copy %0, %arg2 : memref<2xf32> to memref<2xf32>
190
+ %base_buffer, %offset, %sizes, %strides =
191
+ memref.extract_strided_metadata %0 :
192
+ memref<2xf32> -> memref<f32>, index, index, index
193
+ bufferization.dealloc (%base_buffer : memref<f32>) if (%1)
194
+ return
195
+ }
196
+ }
197
+ ```
198
+
199
+ The `private-function-dynamic-ownership` pass option allows the pass to add
200
+ additional arguments to private functions to dynamically give ownership of
201
+ MemRefs to callees. This can enable earlier deallocations and allows the
202
+ pass to by-pass the function boundary ABI and thus potentially leading to
203
+ fewer MemRef clones being inserted. For example, the private function
204
+ ```mlir
205
+ func.func private @passthrough(%memref: memref<2xi32>) -> memref<2xi32> {
206
+ return %memref : memref<2xi32>
207
+ }
208
+ ```
209
+ would be converted to
210
+ ```mlir
211
+ func.func private @passthrough(%memref: memref<2xi32>,
212
+ %ownership: i1) -> (memref<2xi32>, i1) {
213
+ return %memref, %ownership : memref<2xi32>, i1
214
+ }
215
+ ```
216
+ and thus allows the returned MemRef to alias with the MemRef passed as
217
+ argument (which would otherwise be forbidden according to the function
218
+ boundary ABI).
219
+ }];
220
+ let options = [
221
+ Option<"privateFuncDynamicOwnership", "private-function-dynamic-ownership",
222
+ "bool", /*default=*/"false",
223
+ "Allows to add additional arguments to private functions to "
224
+ "dynamically pass ownership of memrefs to callees. This can enable "
225
+ "earlier deallocations.">,
226
+ ];
227
+ let constructor = "mlir::bufferization::createOwnershipBasedBufferDeallocationPass()";
228
+
229
+ let dependentDialects = [
230
+ "mlir::bufferization::BufferizationDialect", "mlir::arith::ArithDialect",
231
+ "mlir::memref::MemRefDialect", "mlir::scf::SCFDialect"
232
+ ];
233
+ }
234
+
91
235
def BufferDeallocationSimplification :
92
236
Pass<"buffer-deallocation-simplification", "func::FuncOp"> {
93
237
let summary = "Optimizes `bufferization.dealloc` operation for more "
0 commit comments