// RUN: mlir-opt -transform-interpreter %s --split-input-file --verify-diagnostics | FileCheck %s // Simple test: check that we extract the address computation of a load into // a dedicated subview. // The resulting load will be loading from the subview and have only indices // set to zero. // CHECK-LABEL: @test_load( // CHECK-SAME: %[[BASE:[^:]*]]: memref{{[^,]*}}, // CHECK-SAME: %[[DYN_OFFSET:.*]]: index) // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[SUBVIEW:.*]] = memref.subview %[[BASE]][%[[DYN_OFFSET]], 0, 8] [1, 1, 1] [1, 1, 1] : memref<2x16x16xf32> to memref<1x1x1xf32, strided<[256, 16, 1], offset: ?>> // CHECK: %[[LOADED_VAL:.*]] = memref.load %[[SUBVIEW]][%[[C0]], %[[C0]], %[[C0]]] : memref<1x1x1xf32, strided<[256, 16, 1], offset: ?>> // CHECK: return %[[LOADED_VAL]] : f32 // expected-remark @below {{transformed}} func.func @test_load(%base : memref<2x16x16xf32>, %offset : index) -> f32 { %c0 = arith.constant 0 : index %c8 = arith.constant 8 : index %loaded_val = memref.load %base[%offset, %c0, %c8] : memref<2x16x16xf32> return %loaded_val : f32 } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op // Verify that the returned handle is usable. transform.debug.emit_remark_at %0, "transformed" : !transform.any_op transform.yield } } // ----- // Same as previous @test_load but with the nontemporal flag. // CHECK-LABEL: @test_load_nontemporal( // CHECK-SAME: %[[BASE:[^:]*]]: memref{{[^,]*}}, // CHECK-SAME: %[[DYN_OFFSET:.*]]: index) // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[SUBVIEW:.*]] = memref.subview %[[BASE]][%[[DYN_OFFSET]], 0, 8] [1, 1, 1] [1, 1, 1] : memref<2x16x16xf32> to memref<1x1x1xf32, strided<[256, 16, 1], offset: ?>> // CHECK: %[[LOADED_VAL:.*]] = memref.load %[[SUBVIEW]][%[[C0]], %[[C0]], %[[C0]]] {nontemporal = true} : memref<1x1x1xf32, strided<[256, 16, 1], offset: ?>> // CHECK: return %[[LOADED_VAL]] : f32 func.func @test_load_nontemporal(%base : memref<2x16x16xf32>, %offset : index) -> f32 { %c0 = arith.constant 0 : index %c8 = arith.constant 8 : index %loaded_val = memref.load %base[%offset, %c0, %c8] {nontemporal = true } : memref<2x16x16xf32> return %loaded_val : f32 } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } } // ----- // Simple test: check that we extract the address computation of a store into // a dedicated subview. // The resulting store will use the address from the subview and have only // indices set to zero. // CHECK-LABEL: @test_store( // CHECK-SAME: %[[BASE:[^:]*]]: memref{{[^,]*}}, // CHECK-SAME: %[[DYN_OFFSET:.*]]: index) // CHECK-DAG: %[[CF0:.*]] = arith.constant 0.0{{0*e\+00}} : f32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[SUBVIEW:.*]] = memref.subview %[[BASE]][%[[DYN_OFFSET]], 0, 8] [1, 1, 1] [1, 1, 1] : memref<2x16x16xf32> to memref<1x1x1xf32, strided<[256, 16, 1], offset: ?>> // CHECK: memref.store %[[CF0]], %[[SUBVIEW]][%[[C0]], %[[C0]], %[[C0]]] : memref<1x1x1xf32, strided<[256, 16, 1], offset: ?>> // CHECK: return func.func @test_store(%base : memref<2x16x16xf32>, %offset : index) -> () { %cf0 = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index %c8 = arith.constant 8 : index memref.store %cf0, %base[%offset, %c0, %c8] : memref<2x16x16xf32> return } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } } // ----- // Same as @test_store but check that the nontemporal flag is preserved. // CHECK-LABEL: @test_store_nontemporal( // CHECK-SAME: %[[BASE:[^:]*]]: memref{{[^,]*}}, // CHECK-SAME: %[[DYN_OFFSET:.*]]: index) // CHECK-DAG: %[[CF0:.*]] = arith.constant 0.0{{0*e\+00}} : f32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[SUBVIEW:.*]] = memref.subview %[[BASE]][%[[DYN_OFFSET]], 0, 8] [1, 1, 1] [1, 1, 1] : memref<2x16x16xf32> to memref<1x1x1xf32, strided<[256, 16, 1], offset: ?>> // CHECK: memref.store %[[CF0]], %[[SUBVIEW]][%[[C0]], %[[C0]], %[[C0]]] {nontemporal = true} : memref<1x1x1xf32, strided<[256, 16, 1], offset: ?>> // CHECK: return func.func @test_store_nontemporal(%base : memref<2x16x16xf32>, %offset : index) -> () { %cf0 = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index %c8 = arith.constant 8 : index memref.store %cf0, %base[%offset, %c0, %c8] { nontemporal = true } : memref<2x16x16xf32> return } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } } // ----- // For this test, we made the source memref fully dynamic. // The gist of the check remains the same as the simple test: // The address computation is extracted into its own subview. // CHECK-LABEL: @testWithLoop( // CHECK-SAME: %[[BASE:[^:]*]]: memref // CHECK: %[[SUM_ALL:.*]] = arith.constant 0.0{{0*e\+00}} : f32 // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[C1:.*]] = arith.constant 1 : index // CHECK: %[[C2:.*]] = arith.constant 2 : index // CHECK: %[[UPPER_BOUND0:.*]] = memref.dim %[[BASE]], %[[C0]] : memref (f32) { // CHECK: %[[SUM_RES1:.*]] = scf.for %[[IV1:.*]] = %[[C0]] to %[[UPPER_BOUND1]] step %[[C1]] iter_args(%[[SUM_ITER1:.*]] = %[[SUM_ITER2]]) -> (f32) { // CHECK: %[[SUM_RES0:.*]] = scf.for %[[IV0:.*]] = %[[C0]] to %[[UPPER_BOUND0]] step %[[C1]] iter_args(%[[SUM_ITER0:.*]] = %[[SUM_ITER1]]) -> (f32) { // CHECK: %[[SUBVIEW:.*]] = memref.subview %[[BASE]][%[[IV0]], %[[IV1]], %[[IV2]]] [1, 1, 1] [1, 1, 1] : memref> to memref<1x1x1xf32, strided<[?, ?, ?], offset: ?>> // CHECK: %[[LOADED_VAL:.*]] = memref.load %[[SUBVIEW]][%[[C0]], %[[C0]], %[[C0]]] : memref<1x1x1xf32, strided<[?, ?, ?], offset: ?>> // CHECK: %[[RES:.*]] = arith.addf %[[LOADED_VAL]], %[[SUM_ITER2]] : f32 // CHECK: scf.yield %[[RES]] : f32 // CHECK: } // CHECK: scf.yield %[[SUM_RES0]] : f32 // CHECK: } // CHECK: scf.yield %[[SUM_RES1]] : f32 // CHECK: } // CHECK: return %[[SUM_RES2]] : f32 func.func @testWithLoop(%base : memref>) -> f32 { %sum_all = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index %upper_bound0 = memref.dim %base, %c0 : memref> %upper_bound1 = memref.dim %base, %c1 : memref> %upper_bound2 = memref.dim %base, %c2 : memref> %sum_res2 = scf.for %iv2 = %c0 to %upper_bound2 step %c1 iter_args(%sum_iter2 = %sum_all) -> (f32) { %sum_res1 = scf.for %iv1 = %c0 to %upper_bound1 step %c1 iter_args(%sum_iter1 = %sum_iter2) -> (f32) { %sum_res0 = scf.for %iv0 = %c0 to %upper_bound0 step %c1 iter_args(%sum_iter0 = %sum_iter1) -> (f32) { %loaded_val = memref.load %base[%iv0, %iv1, %iv2] : memref> %res = arith.addf %loaded_val, %sum_iter2 : f32 scf.yield %res : f32 } scf.yield %sum_res0 : f32 } scf.yield %sum_res1 : f32 } return %sum_res2 : f32 } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } } // ----- // Simple test: check that we extract the address computation of a ldmatrix into // a dedicated subview. // The resulting ldmatrix will loaded from with subview and have only indices set // to zero. // Also the sizes of the view are adjusted to `original size - offset`. // CHECK-DAG: #[[$FOUR_MINUS_OFF_MAP:.*]] = affine_map<()[s0] -> (-s0 + 4)> // CHECK-DAG: #[[$THIRTY_TWO_MINUS_OFF_MAP:.*]] = affine_map<()[s0] -> (-s0 + 32)> // CHECK-LABEL: @test_ldmatrix( // CHECK-SAME: %[[BASE:[^:]*]]: memref<{{[^,]*}}, 3>, // CHECK-SAME: %[[DYN_OFFSET0:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET1:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET2:[^:]*]]: index) // CHECK-DAG: %[[DYN_SIZE0:.*]] = affine.apply #[[$FOUR_MINUS_OFF_MAP]]()[%[[DYN_OFFSET0]]] // CHECK-DAG: %[[DYN_SIZE1:.*]] = affine.apply #[[$THIRTY_TWO_MINUS_OFF_MAP]]()[%[[DYN_OFFSET1]]] // CHECK-DAG: %[[DYN_SIZE2:.*]] = affine.apply #[[$THIRTY_TWO_MINUS_OFF_MAP]]()[%[[DYN_OFFSET2]]] // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[SUBVIEW:.*]] = memref.subview %[[BASE]][%[[DYN_OFFSET0]], %[[DYN_OFFSET1]], %[[DYN_OFFSET2]]] [%[[DYN_SIZE0]], %[[DYN_SIZE1]], %[[DYN_SIZE2]]] [1, 1, 1] : memref<4x32x32xf16, 3> to memref, 3> // CHECK: %[[LOADED_VAL:.*]] = nvgpu.ldmatrix %[[SUBVIEW]][%[[C0]], %[[C0]], %[[C0]]] {numTiles = 4 : i32, transpose = false} : memref, 3> -> vector<4x2xf16> // CHECK: return %[[LOADED_VAL]] : vector<4x2xf16> func.func @test_ldmatrix(%base : memref<4x32x32xf16, 3>, %offset0 : index, %offset1: index, %offset2: index) -> vector<4x2xf16> { %loaded_val = nvgpu.ldmatrix %base[%offset0, %offset1, %offset2] {numTiles = 4 : i32, transpose = false} : memref<4x32x32xf16, 3> -> vector<4x2xf16> return %loaded_val : vector<4x2xf16> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } } // ----- // Same as test_ldmatrix but with fully dynamic memref. // CHECK-DAG: #[[$A_MINUS_B_MAP:.*]] = affine_map<()[s0, s1] -> (s0 - s1)> // CHECK-LABEL: @test_ldmatrix( // CHECK-SAME: %[[BASE:[^:]*]]: memref<{{[^,]*}}, 3>, // CHECK-SAME: %[[DYN_OFFSET0:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET1:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET2:[^:]*]]: index) // CHECK-DAG: {{.*}}, {{.*}}, %[[DYN_SIZES:.*]]:3, {{.*}} = memref.extract_strided_metadata %[[BASE]] // CHECK-DAG: %[[DYN_SIZE0:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#0, %[[DYN_OFFSET0]]] // CHECK-DAG: %[[DYN_SIZE1:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#1, %[[DYN_OFFSET1]]] // CHECK-DAG: %[[DYN_SIZE2:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#2, %[[DYN_OFFSET2]]] // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[SUBVIEW:.*]] = memref.subview %[[BASE]][%[[DYN_OFFSET0]], %[[DYN_OFFSET1]], %[[DYN_OFFSET2]]] [%[[DYN_SIZE0]], %[[DYN_SIZE1]], %[[DYN_SIZE2]]] [1, 1, 1] : memref to memref, 3> // CHECK: %[[LOADED_VAL:.*]] = nvgpu.ldmatrix %[[SUBVIEW]][%[[C0]], %[[C0]], %[[C0]]] {numTiles = 4 : i32, transpose = false} : memref, 3> -> vector<4x2xf16> // CHECK: return %[[LOADED_VAL]] : vector<4x2xf16> func.func @test_ldmatrix(%base : memref, %offset0 : index, %offset1: index, %offset2: index) -> vector<4x2xf16> { %loaded_val = nvgpu.ldmatrix %base[%offset0, %offset1, %offset2] {numTiles = 4 : i32, transpose = false} : memref -> vector<4x2xf16> return %loaded_val : vector<4x2xf16> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } } // ----- // Simple test for vector.transfer_read with fully dynamic memref. // We also set a permutation map to make sure it is properly preserved. // CHECK-DAG: #[[$A_MINUS_B_MAP:.*]] = affine_map<()[s0, s1] -> (s0 - s1)> // CHECK-DAG: #[[$PERMUTATION_MAP:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)> // CHECK-LABEL: @test_transfer_read_op( // CHECK-SAME: %[[BASE:[^:]*]]: memref<{{[^,]*}}>, // CHECK-SAME: %[[DYN_OFFSET0:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET1:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET2:[^:]*]]: index) // CHECK-DAG: {{.*}}, {{.*}}, %[[DYN_SIZES:.*]]:3, {{.*}} = memref.extract_strided_metadata %[[BASE]] // CHECK-DAG: %[[DYN_SIZE0:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#0, %[[DYN_OFFSET0]]] // CHECK-DAG: %[[DYN_SIZE1:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#1, %[[DYN_OFFSET1]]] // CHECK-DAG: %[[DYN_SIZE2:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#2, %[[DYN_OFFSET2]]] // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[CF0:.*]] = arith.constant 0.0{{0*e\+00}} : f16 // CHECK-DAG: %[[SUBVIEW:.*]] = memref.subview %[[BASE]][%[[DYN_OFFSET0]], %[[DYN_OFFSET1]], %[[DYN_OFFSET2]]] [%[[DYN_SIZE0]], %[[DYN_SIZE1]], %[[DYN_SIZE2]]] [1, 1, 1] : memref to memref> // CHECK: %[[LOADED_VAL:.*]] = vector.transfer_read %[[SUBVIEW]][%[[C0]], %[[C0]], %[[C0]]], %[[CF0]] {permutation_map = #[[$PERMUTATION_MAP]]} : memref>, vector<4x2xf16> // CHECK: return %[[LOADED_VAL]] : vector<4x2xf16> func.func @test_transfer_read_op(%base : memref, %offset0 : index, %offset1: index, %offset2: index) -> vector<4x2xf16> { %cf0 = arith.constant 0.0 : f16 %loaded_val = vector.transfer_read %base[%offset0, %offset1, %offset2], %cf0 { permutation_map = affine_map<(d0,d1,d2) -> (d2,d0)> } : memref, vector<4x2xf16> return %loaded_val : vector<4x2xf16> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } } // ----- // Same as test_transfer_read_op but with tensors. // Right now this rewrite is not supported but we still shouldn't choke on it. // CHECK: #[[$PERMUTATION_MAP:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)> // CHECK-LABEL: @test_transfer_read_op_with_tensor( // CHECK-SAME: %[[BASE:[^:]*]]: tensor<{{[^,]*}}>, // CHECK-SAME: %[[DYN_OFFSET0:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET1:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET2:[^:]*]]: index) // CHECK: %[[CF0:.*]] = arith.constant 0.0{{0*e\+00}} : f16 // CHECK: %[[LOADED_VAL:.*]] = vector.transfer_read %[[BASE]][%[[DYN_OFFSET0]], %[[DYN_OFFSET1]], %[[DYN_OFFSET2]]], %[[CF0]] {permutation_map = #[[$PERMUTATION_MAP]]} : tensor, vector<4x2xf16> // CHECK: return %[[LOADED_VAL]] : vector<4x2xf16> func.func @test_transfer_read_op_with_tensor(%base : tensor, %offset0 : index, %offset1: index, %offset2: index) -> vector<4x2xf16> { %cf0 = arith.constant 0.0 : f16 %loaded_val = vector.transfer_read %base[%offset0, %offset1, %offset2], %cf0 { permutation_map = affine_map<(d0,d1,d2) -> (d2,d0)> } : tensor, vector<4x2xf16> return %loaded_val : vector<4x2xf16> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } } // ----- // Simple test for vector.transfer_write with fully dynamic memref. // We also set a permutation map to make sure it is properly preserved. // CHECK-DAG: #[[$A_MINUS_B_MAP:.*]] = affine_map<()[s0, s1] -> (s0 - s1)> // CHECK-DAG: #[[$PERMUTATION_MAP:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)> // CHECK-LABEL: @test_transfer_write_op( // CHECK-SAME: %[[BASE:[^:]*]]: memref<{{[^,]*}}>, // CHECK-SAME: %[[DYN_OFFSET0:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET1:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET2:[^:]*]]: index) // CHECK-DAG: {{.*}}, {{.*}}, %[[DYN_SIZES:.*]]:3, {{.*}} = memref.extract_strided_metadata %[[BASE]] // CHECK-DAG: %[[DYN_SIZE0:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#0, %[[DYN_OFFSET0]]] // CHECK-DAG: %[[DYN_SIZE1:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#1, %[[DYN_OFFSET1]]] // CHECK-DAG: %[[DYN_SIZE2:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#2, %[[DYN_OFFSET2]]] // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VCF0:.*]] = arith.constant dense<0.0{{0*e\+00}}> : vector<4x2xf16> // CHECK-DAG: %[[SUBVIEW:.*]] = memref.subview %[[BASE]][%[[DYN_OFFSET0]], %[[DYN_OFFSET1]], %[[DYN_OFFSET2]]] [%[[DYN_SIZE0]], %[[DYN_SIZE1]], %[[DYN_SIZE2]]] [1, 1, 1] : memref to memref> // CHECK: vector.transfer_write %[[VCF0]], %[[SUBVIEW]][%[[C0]], %[[C0]], %[[C0]]] {permutation_map = #[[$PERMUTATION_MAP]]} : vector<4x2xf16>, memref> // CHECK: return func.func @test_transfer_write_op(%base : memref, %offset0 : index, %offset1: index, %offset2: index) { %vcf0 = arith.constant dense<0.000000e+00> : vector<4x2xf16> vector.transfer_write %vcf0, %base[%offset0, %offset1, %offset2] { permutation_map = affine_map<(d0,d1,d2) -> (d2,d0)> } : vector<4x2xf16>, memref return } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } } // ----- // Check that the strides of the original memref are kept. // Moreover even with non-1 strides the subview should still issue [1,...] // strides, since this is a multiplication factor. // CHECK-DAG: #[[$A_MINUS_B_MAP:.*]] = affine_map<()[s0, s1] -> (s0 - s1)> // CHECK-DAG: #[[$PERMUTATION_MAP:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)> // CHECK-LABEL: @test_transfer_write_op_with_strides( // CHECK-SAME: %[[BASE:[^:]*]]: memref<{{[^>]*}}>>, // CHECK-SAME: %[[DYN_OFFSET0:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET1:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET2:[^:]*]]: index) // CHECK-DAG: {{.*}}, {{.*}}, %[[DYN_SIZES:.*]]:3, {{.*}} = memref.extract_strided_metadata %[[BASE]] // CHECK-DAG: %[[DYN_SIZE0:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#0, %[[DYN_OFFSET0]]] // CHECK-DAG: %[[DYN_SIZE1:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#1, %[[DYN_OFFSET1]]] // CHECK-DAG: %[[DYN_SIZE2:.*]] = affine.apply #[[$A_MINUS_B_MAP]]()[%[[DYN_SIZES]]#2, %[[DYN_OFFSET2]]] // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VCF0:.*]] = arith.constant dense<0.0{{0*e\+00}}> : vector<4x2xf16> // CHECK-DAG: %[[SUBVIEW:.*]] = memref.subview %[[BASE]][%[[DYN_OFFSET0]], %[[DYN_OFFSET1]], %[[DYN_OFFSET2]]] [%[[DYN_SIZE0]], %[[DYN_SIZE1]], %[[DYN_SIZE2]]] [1, 1, 1] : memref> to memref> // CHECK: vector.transfer_write %[[VCF0]], %[[SUBVIEW]][%[[C0]], %[[C0]], %[[C0]]] {permutation_map = #[[$PERMUTATION_MAP]]} : vector<4x2xf16>, memref> // CHECK: return func.func @test_transfer_write_op_with_strides(%base : memref>, %offset0 : index, %offset1: index, %offset2: index) { %vcf0 = arith.constant dense<0.000000e+00> : vector<4x2xf16> vector.transfer_write %vcf0, %base[%offset0, %offset1, %offset2] { permutation_map = affine_map<(d0,d1,d2) -> (d2,d0)> } : vector<4x2xf16>, memref> return } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } } // ----- // Same as test_transfer_write_op but with tensors. // Right now this rewrite is not supported but we still shouldn't choke on it. // CHECK: #[[$PERMUTATION_MAP:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)> // CHECK-LABEL: @test_transfer_write_op_with_tensor( // CHECK-SAME: %[[BASE:[^:]*]]: tensor<{{[^,]*}}>, // CHECK-SAME: %[[DYN_OFFSET0:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET1:[^:]*]]: index, // CHECK-SAME: %[[DYN_OFFSET2:[^:]*]]: index) // CHECK-DAG: %[[VCF0:.*]] = arith.constant dense<0.0{{0*e\+00}}> : vector<4x2xf16> // CHECK: %[[RES:.*]] = vector.transfer_write %[[VCF0]], %[[BASE]][%[[DYN_OFFSET0]], %[[DYN_OFFSET1]], %[[DYN_OFFSET2]]] {permutation_map = #[[$PERMUTATION_MAP]]} : vector<4x2xf16>, tensor // CHECK: return %[[RES]] : tensor func.func @test_transfer_write_op_with_tensor(%base : tensor, %offset0 : index, %offset1: index, %offset2: index) -> tensor { %vcf0 = arith.constant dense<0.000000e+00> : vector<4x2xf16> %res = vector.transfer_write %vcf0, %base[%offset0, %offset1, %offset2] { permutation_map = affine_map<(d0,d1,d2) -> (d2,d0)> } : vector<4x2xf16>, tensor return %res : tensor } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %0 { transform.apply_patterns.memref.extract_address_computations } : !transform.any_op transform.yield } }