From 3138ca83c677c3d515e00df92446bb97f7ff2c62 Mon Sep 17 00:00:00 2001 From: Arun Thangamani Date: Tue, 26 Aug 2025 05:29:05 -0700 Subject: [PATCH 1/4] adding memeroy side-effects to amx.tile_zero --- mlir/include/mlir/Dialect/AMX/AMX.td | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/AMX/AMX.td b/mlir/include/mlir/Dialect/AMX/AMX.td index 6bbde43e2d011..91fe1bfb5cd35 100644 --- a/mlir/include/mlir/Dialect/AMX/AMX.td +++ b/mlir/include/mlir/Dialect/AMX/AMX.td @@ -142,8 +142,9 @@ class AMX_Op traits = []> : // Tile reset. // -def TileZeroOp : AMX_Op<"tile_zero", [Pure, - AMXIntrinsicOpInterface +def TileZeroOp : AMX_Op<"tile_zero", [ + AMXIntrinsicOpInterface, + MemoryEffects<[MemWrite]> ]> { let summary = "tile zero operation"; let description = [{ From c1d42593852f56143a8a10ad34bd70b5e2557ce5 Mon Sep 17 00:00:00 2001 From: Arun Thangamani Date: Fri, 29 Aug 2025 01:00:18 -0700 Subject: [PATCH 2/4] updating the description + adding a test-case --- mlir/include/mlir/Dialect/AMX/AMX.td | 10 +++++--- mlir/test/Dialect/AMX/memory-effects.mlir | 30 +++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 mlir/test/Dialect/AMX/memory-effects.mlir diff --git a/mlir/include/mlir/Dialect/AMX/AMX.td b/mlir/include/mlir/Dialect/AMX/AMX.td index 91fe1bfb5cd35..a8f8a7f3d5b7f 100644 --- a/mlir/include/mlir/Dialect/AMX/AMX.td +++ b/mlir/include/mlir/Dialect/AMX/AMX.td @@ -151,6 +151,8 @@ def TileZeroOp : AMX_Op<"tile_zero", [ Zeroes the destination tile, with the shape defined by the 2-dim vector type of the result. This is eventually lowered into the "tilezero" instruction with the corresponding tile configuration. + It includes memory effects and CSE doesn't eliminate multiple + "tilezero" instructions. Example: @@ -180,15 +182,17 @@ def TileZeroOp : AMX_Op<"tile_zero", [ // Tile memory operations. // -def TileLoadOp : AMX_Op<"tile_load", [Pure, - AMXIntrinsicOpInterface +def TileLoadOp : AMX_Op<"tile_load", [ + AMXIntrinsicOpInterface, + MemoryEffects<[MemWrite]> ]> { let summary = "tile load operation"; let description = [{ Loads a tile from memory defined by a base and indices, with the shape defined by the 2-dim vector type of the result. This is eventually lowered into the "tileloadd" instruction with the - corresponding tile configuration. + corresponding tile configuration. It includes memory effects and + CSE doesn't eliminate multiple "tileload" instructions. Example: diff --git a/mlir/test/Dialect/AMX/memory-effects.mlir b/mlir/test/Dialect/AMX/memory-effects.mlir new file mode 100644 index 0000000000000..0b9cdc477ad26 --- /dev/null +++ b/mlir/test/Dialect/AMX/memory-effects.mlir @@ -0,0 +1,30 @@ +// RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | mlir-opt | FileCheck %s + +// CHECK-LABEL: mem_effect( +// CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal" +// CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal" +// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal" +// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal" +// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal" +// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal" +func.func @mem_effect(%arg0: memref<2x32x32xbf16>, %arg1: memref<2x16x32xbf16>) -> memref<16x32xf32> { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %c16 = arith.constant 16 : index + %alloca = memref.alloca() : memref<16x32xf32> + %0 = amx.tile_zero : !amx.tile<16x16xf32> + %1 = amx.tile_zero : !amx.tile<16x16xf32> + %2:2 = scf.for %arg2 = %c0 to %c2 step %c1 iter_args(%arg3 = %0, %arg4 = %1) -> (!amx.tile<16x16xf32>, !amx.tile<16x16xf32>) { + %3 = amx.tile_load %arg0[%arg2, %c0, %c0] : memref<2x32x32xbf16> into !amx.tile<16x32xbf16> + %4 = amx.tile_load %arg0[%arg2, %c16, %c0] : memref<2x32x32xbf16> into !amx.tile<16x32xbf16> + %5 = amx.tile_load %arg1[%arg2, %c0, %c0] : memref<2x16x32xbf16> into !amx.tile<16x32xbf16> + %6 = amx.tile_load %arg1[%arg2, %c0, %c0] : memref<2x16x32xbf16> into !amx.tile<16x32xbf16> + %7 = amx.tile_mulf %3, %5, %arg3 : !amx.tile<16x32xbf16>, !amx.tile<16x32xbf16>, !amx.tile<16x16xf32> + %8 = amx.tile_mulf %4, %6, %arg4 : !amx.tile<16x32xbf16>, !amx.tile<16x32xbf16>, !amx.tile<16x16xf32> + scf.yield %7, %8 : !amx.tile<16x16xf32>, !amx.tile<16x16xf32> + } + amx.tile_store %alloca[%c0, %c0], %2#0 : memref<16x32xf32>, !amx.tile<16x16xf32> + amx.tile_store %alloca[%c0, %c16], %2#1 : memref<16x32xf32>, !amx.tile<16x16xf32> + return %alloca : memref<16x32xf32> +} From c2d068dca5be1660a2c297e0343666d721f72acf Mon Sep 17 00:00:00 2001 From: Arun Thangamani Date: Fri, 29 Aug 2025 02:26:05 -0700 Subject: [PATCH 3/4] updating comments + name correction + description updates --- mlir/include/mlir/Dialect/AMX/AMX.td | 8 ++++---- .../{AMX/memory-effects.mlir => side-effects.mlir} | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) rename mlir/test/Dialect/{AMX/memory-effects.mlir => side-effects.mlir} (87%) diff --git a/mlir/include/mlir/Dialect/AMX/AMX.td b/mlir/include/mlir/Dialect/AMX/AMX.td index a8f8a7f3d5b7f..1236fede4d88b 100644 --- a/mlir/include/mlir/Dialect/AMX/AMX.td +++ b/mlir/include/mlir/Dialect/AMX/AMX.td @@ -151,8 +151,8 @@ def TileZeroOp : AMX_Op<"tile_zero", [ Zeroes the destination tile, with the shape defined by the 2-dim vector type of the result. This is eventually lowered into the "tilezero" instruction with the corresponding tile configuration. - It includes memory effects and CSE doesn't eliminate multiple - "tilezero" instructions. + With memory-effects, each "tilezero" operation serves as a compilation + hint to use a separate tile register. Example: @@ -191,8 +191,8 @@ def TileLoadOp : AMX_Op<"tile_load", [ Loads a tile from memory defined by a base and indices, with the shape defined by the 2-dim vector type of the result. This is eventually lowered into the "tileloadd" instruction with the - corresponding tile configuration. It includes memory effects and - CSE doesn't eliminate multiple "tileload" instructions. + corresponding tile configuration. With memory-effects, each "tileload" + operation serves as a compilation hint to use a separate tile register. Example: diff --git a/mlir/test/Dialect/AMX/memory-effects.mlir b/mlir/test/Dialect/side-effects.mlir similarity index 87% rename from mlir/test/Dialect/AMX/memory-effects.mlir rename to mlir/test/Dialect/side-effects.mlir index 0b9cdc477ad26..d3904e50f970a 100644 --- a/mlir/test/Dialect/AMX/memory-effects.mlir +++ b/mlir/test/Dialect/side-effects.mlir @@ -1,13 +1,15 @@ // RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | mlir-opt | FileCheck %s -// CHECK-LABEL: mem_effect( +// With inclusion of memory side-effects, it is expected CSE not to fold multiple +// "tileload" and "tilezero". +// CHECK-LABEL: do_not_fold_tiles( // CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal" // CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal" // CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal" // CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal" // CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal" // CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal" -func.func @mem_effect(%arg0: memref<2x32x32xbf16>, %arg1: memref<2x16x32xbf16>) -> memref<16x32xf32> { +func.func @do_not_fold_tiles(%arg0: memref<2x32x32xbf16>, %arg1: memref<2x16x32xbf16>) -> memref<16x32xf32> { %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index %c2 = arith.constant 2 : index From fbf578e3158b5608a462b070967459f658146fff Mon Sep 17 00:00:00 2001 From: Arun Thangamani Date: Fri, 29 Aug 2025 03:15:37 -0700 Subject: [PATCH 4/4] moved tests to AMX folder --- mlir/test/Dialect/{ => AMX}/side-effects.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename mlir/test/Dialect/{ => AMX}/side-effects.mlir (98%) diff --git a/mlir/test/Dialect/side-effects.mlir b/mlir/test/Dialect/AMX/side-effects.mlir similarity index 98% rename from mlir/test/Dialect/side-effects.mlir rename to mlir/test/Dialect/AMX/side-effects.mlir index d3904e50f970a..22c76d98c6996 100644 --- a/mlir/test/Dialect/side-effects.mlir +++ b/mlir/test/Dialect/AMX/side-effects.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | mlir-opt | FileCheck %s +// RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | FileCheck %s // With inclusion of memory side-effects, it is expected CSE not to fold multiple // "tileload" and "tilezero".