[mlir][XeGPU] add unroll patterns for load_matrix and store_matrix #154637

chencha3 · 2025-08-20T22:15:21Z

As described by the title.

…ncha3/llvm-project into ld_st_matrix_unroll_patterns

llvmbot · 2025-08-26T15:09:42Z

@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-gpu

Author: Chao Chen (chencha3)

Changes

As described by the title.

Full diff: https://github.com/llvm/llvm-project/pull/154637.diff

6 Files Affected:

(modified) mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td (+2-2)
(modified) mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h (+16)
(modified) mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp (+7-5)
(modified) mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp (+82-5)
(modified) mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp (+46)
(modified) mlir/test/Dialect/XeGPU/xegpu-blocking.mlir (+23)

diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
index 3a88dae041dd1..ddf6b4ac85a90 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
@@ -67,8 +67,8 @@ def XeGPUBlocking: Pass<"xegpu-blocking"> {
     to a hardware instruction.
   }];
   let dependentDialects = [
-      "memref::MemRefDialect", "xegpu::XeGPUDialect", "vector::VectorDialect"
-  ];
+      "memref::MemRefDialect", "xegpu::XeGPUDialect", "vector::VectorDialect",
+      "index::IndexDialect"];
 }
 
 #endif // MLIR_DIALECT_XEGPU_TRANSFORMS_PASSES_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
index db8608c6d20b8..a40dc74edb200 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
@@ -10,6 +10,7 @@
 #define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
 
 #include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/OpDefinition.h"
 namespace mlir {
 
 class VectorType;
@@ -18,6 +19,7 @@ class OpResult;
 class OpBuilder;
 class ValueRange;
 class TypeConverter;
+class OpFoldResult;
 
 namespace xegpu {
 class LayoutAttr;
@@ -128,6 +130,20 @@ void doSCFStructuralTypeConversionWithTensorType(Operation *op,
 /// if no GPU module parent or XeVM target attribute exists.
 std::optional<std::string> getChipStr(Operation *op);
 
+/// Generates element-wise addition ops of two arrays with automatic alignment.
+/// When the input arrays have different sizes, the shorter array is
+/// right-aligned with the longer array, and the unmatched leading elements from
+/// the longer array are preserved unchanged. This is commonly used for offset
+/// computation where higher-dimensional offsets need to be added to
+/// lower-dimensional adjustments.
+///
+/// Example:
+///   lhs = [l1, l2, l3], rhs = [r1, r2]
+///   Result: [11, l2+r1, l3+r2]
+SmallVector<OpFoldResult> addWithRightAligned(OpBuilder &builder, Location loc,
+                                              ArrayRef<OpFoldResult> lhs,
+                                              ArrayRef<OpFoldResult> rhs);
+
 } // namespace xegpu
 
 } // namespace mlir
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index d82c541f31359..b11f5fe87559b 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/Dialect/XeGPU/Transforms/Passes.h"
 
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
 #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
 #include "mlir/Dialect/XeGPU/Transforms/Transforms.h"
@@ -155,10 +156,10 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const {
 std::optional<SmallVector<int64_t>>
 XeGPUBlockingPass::getTileShape(Operation *op) const {
   if (isa<xegpu::CreateNdDescOp, xegpu::UpdateNdOffsetOp, xegpu::CreateDescOp,
-          xegpu::UpdateOffsetOp>(op))
+          xegpu::UpdateOffsetOp, xegpu::LoadMatrixOp>(op))
     return getTileShape(op->getOpResult(0));
   if (isa<xegpu::PrefetchNdOp, xegpu::LoadNdOp, xegpu::PrefetchOp,
-          xegpu::LoadGatherOp>(op))
+          xegpu::LoadGatherOp, xegpu::StoreMatrixOp>(op))
     return getTileShape(op->getOpOperand(0));
   if (isa<xegpu::StoreNdOp, xegpu::StoreScatterOp>(op))
     return getTileShape(op->getOpOperand(1));
@@ -202,17 +203,18 @@ XeGPUBlockingPass::getTileShape(Operation *op) const {
 
 bool XeGPUBlockingPass::needsUnroll(Operation *op) const {
   // skip the op if any of its operands or results has workgroup level layouts
-  bool hasWgLayoutOperands =
+  bool hasSgLayoutOperands =
       llvm::any_of(op->getOpOperands(), [](OpOperand &opr) {
         xegpu::LayoutAttr layout = xegpu::getLayoutAttr(opr);
         return layout && layout.isWgLayout();
       });
-  bool hasWgLayoutResults =
+  bool hasSgLayoutResults =
       llvm::any_of(op->getOpResults(), [](OpResult result) {
         xegpu::LayoutAttr layout = xegpu::getLayoutAttr(result);
         return layout && layout.isWgLayout();
       });
-  if (hasWgLayoutOperands || hasWgLayoutResults) {
+
+  if (hasSgLayoutOperands || hasSgLayoutResults) {
     LDBG() << "skip unrolling for op with workgroup level layout: " << *op;
     return false;
   }
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
index c793b71639e86..219e4e6f44618 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
@@ -682,13 +682,90 @@ struct UnrollUpdateOffsetOp : public UnrollPattern<xegpu::UpdateOffsetOp> {
   }
 };
 
+struct UnrollLoadMatrixOp : public UnrollPattern<xegpu::LoadMatrixOp> {
+  using UnrollPattern<xegpu::LoadMatrixOp>::UnrollPattern;
+  LogicalResult matchAndRewrite(xegpu::LoadMatrixOp op,
+                                PatternRewriter &rewriter) const override {
+    std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
+    if (!targetShape)
+      return failure();
+
+    Location loc = op.getLoc();
+    VectorType valueTy = op.getType();
+    Type elemTy = valueTy.getElementType();
+    ArrayRef<int64_t> shape = valueTy.getShape();
+    auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
+
+    VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
+
+    SmallVector<OpFoldResult> mixedOffsets = op.getMixedOffsets();
+    SmallVector<SmallVector<OpFoldResult>> offsetsList;
+    for (SmallVector<int64_t> offsets :
+         StaticTileOffsetRange(shape, *targetShape)) {
+      auto adds = xegpu::addWithRightAligned(
+          rewriter, loc, mixedOffsets,
+          getAsIndexOpFoldResult(op.getContext(), offsets));
+      offsetsList.push_back(adds);
+    }
+
+    SmallVector<Value> newOps;
+    for (SmallVector<OpFoldResult> offsets : offsetsList) {
+      auto newOp = rewriter.create<xegpu::LoadMatrixOp>(
+          op.getLoc(), newValueTy, op.getMemDesc(), offsets,
+          layout.dropInstData());
+      newOps.push_back(newOp);
+    }
+    Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
+    rewriter.replaceOp(op, castOp);
+    return success();
+  }
+};
+
+struct UnrollStoreMatrixOp : public UnrollPattern<xegpu::StoreMatrixOp> {
+  using UnrollPattern<xegpu::StoreMatrixOp>::UnrollPattern;
+  LogicalResult matchAndRewrite(xegpu::StoreMatrixOp op,
+                                PatternRewriter &rewriter) const override {
+    std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
+    if (!targetShape)
+      return failure();
+
+    Location loc = op.getLoc();
+    VectorType valueTy = op.getData().getType();
+    ArrayRef<int64_t> shape = valueTy.getShape();
+    auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
+
+    SmallVector<Type> convertedValTypes =
+        getUnrolledTypes(valueTy, *targetShape);
+    SmallVector<Value> convertedValues =
+        pack(op.getData(), convertedValTypes, *targetShape, loc, rewriter);
+
+    SmallVector<OpFoldResult> mixedOffsets = op.getMixedOffsets();
+    SmallVector<SmallVector<OpFoldResult>> offsetsList;
+    for (SmallVector<int64_t> offsets :
+         StaticTileOffsetRange(shape, *targetShape)) {
+      auto adds = xegpu::addWithRightAligned(
+          rewriter, loc, mixedOffsets,
+          getAsIndexOpFoldResult(op.getContext(), offsets));
+      offsetsList.push_back(adds);
+    }
+
+    for (auto [v, offsets] : llvm::zip_equal(convertedValues, offsetsList))
+      rewriter.create<xegpu::StoreMatrixOp>(loc, v, op.getMemDesc(), offsets,
+                                            layout.dropInstData());
+
+    rewriter.eraseOp(op);
+    return success();
+  }
+};
+
 } // namespace
 
 void mlir::xegpu::populateXeGPUUnrollPatterns(
     RewritePatternSet &patterns, const xegpu::UnrollOptions &options) {
-  patterns.add<UnrollCreateNdOp, UnrollUpdateNdOffsetOp, UnrollPrefetchNdOp,
-               UnrollLoadNdOp, UnrollStoreNdOp, UnrollDpasOp,
-               UnrollCreateDescOp, UnrollLoadGatherOp, UnrollStoreScatterOp,
-               UnrollPrefetchOp, UnrollUpdateOffsetOp>(patterns.getContext(),
-                                                       options);
+  patterns
+      .add<UnrollCreateNdOp, UnrollUpdateNdOffsetOp, UnrollPrefetchNdOp,
+           UnrollLoadNdOp, UnrollStoreNdOp, UnrollDpasOp, UnrollCreateDescOp,
+           UnrollLoadGatherOp, UnrollStoreScatterOp, UnrollPrefetchOp,
+           UnrollUpdateOffsetOp, UnrollLoadMatrixOp, UnrollStoreMatrixOp>(
+          patterns.getContext(), options);
 }
diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
index 19eedbac0f76b..088e8a8c497d9 100644
--- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
+++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/Index/IR/IndexOps.h"
 #include "mlir/Dialect/LLVMIR/XeVMDialect.h"
 #include "mlir/Dialect/SCF/Transforms/Patterns.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
@@ -133,6 +134,14 @@ xegpu::LayoutAttr xegpu::getLayoutAttr(const Value value) {
     if (auto loadNd = dyn_cast<xegpu::LoadNdOp>(defOp))
       return getLayoutAttr(loadNd.getTensorDesc());
 
+    // for LoadMatrixOp, the layout is attached to the property of the op
+    if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(defOp))
+      return dyn_cast_if_present<xegpu::LayoutAttr>(loadOp.getLayoutAttr());
+
+    // for StoreMatrixOp, the layout is attached to the property of the op
+    if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(defOp))
+      return dyn_cast_if_present<xegpu::LayoutAttr>(storeOp.getLayoutAttr());
+
     std::string layoutName = getLayoutName(result);
     if (defOp->hasAttr(layoutName))
       return defOp->getAttrOfType<xegpu::LayoutAttr>(layoutName);
@@ -152,6 +161,13 @@ xegpu::LayoutAttr xegpu::getLayoutAttr(const Value value) {
 
 xegpu::LayoutAttr xegpu::getLayoutAttr(const OpOperand &opr) {
   Operation *op = opr.getOwner();
+
+  if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(op))
+    return dyn_cast_if_present<xegpu::LayoutAttr>(loadOp.getLayoutAttr());
+
+  if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(op))
+    return dyn_cast_if_present<xegpu::LayoutAttr>(storeOp.getLayoutAttr());
+
   std::string layoutName = xegpu::getLayoutName(opr);
   if (op->hasAttr(layoutName))
     return op->getAttrOfType<xegpu::LayoutAttr>(layoutName);
@@ -179,6 +195,8 @@ xegpu::setLayoutAttr<mlir::OpOperand>(const mlir::OpOperand &operand,
 void xegpu::setLayoutAttrs(Operation *op,
                            function_ref<LayoutAttr(Value)> getLayoutImpl) {
   op->walk([&](Operation *nestOp) {
+    if (isa<xegpu::LoadMatrixOp, xegpu::StoreMatrixOp>(nestOp))
+      return;
     for (OpOperand &opr : nestOp->getOpOperands()) {
       auto layout = getLayoutImpl(opr.get());
       setLayoutAttr(opr, layout);
@@ -424,3 +442,31 @@ std::optional<std::string> xegpu::getChipStr(Operation *op) {
 
   return std::nullopt;
 }
+
+/// Generates element-wise addition ops of two arrays with automatic alignment.
+/// When the input arrays have different sizes, the shorter array is
+/// right-aligned with the longer array, and the unmatched leading elements from
+/// the longer array are preserved unchanged. This is commonly used for offset
+/// computation where higher-dimensional offsets need to be added to
+/// lower-dimensional adjustments.
+///
+/// Example:
+///   lhs = [l1, l2, l3], rhs = [r1, r2]
+///   Result: [11, l2+r1, l3+r2]
+SmallVector<OpFoldResult>
+xegpu::addWithRightAligned(OpBuilder &builder, Location loc,
+                           ArrayRef<OpFoldResult> lhs,
+                           ArrayRef<OpFoldResult> rhs) {
+  // ensure a is longer than b
+  ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
+  ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
+  SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
+  a = a.slice(a.size() - b.size());
+  for (auto [l, r] : llvm::zip(a, b)) {
+    auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);
+    auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);
+    results.push_back(builder.createOrFold<index::AddOp>(loc, lval, rval));
+  }
+  return results;
+  return {};
+}
diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
index d986e5bd1cfb4..9d63c2ddd4895 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
@@ -561,3 +561,26 @@ gpu.module @test_kernel {
     gpu.return %e : vector<8x32x2xf16>
   }
 }
+
+// -----
+gpu.module @test_kernel {
+  //CHECK-LABEL: unroll_load_matrix
+  gpu.func @unroll_load_matrix(%arg0: memref<4096xi8, 3>) -> vector<32x32xf32> {
+    %0 = xegpu.create_mem_desc %arg0 : memref<4096xi8, 3> -> !xegpu.mem_desc<32x32xf32>
+    //CHECK-COUNT-8: xegpu.load_matrix {{.*}} : !xegpu.mem_desc<32x32xf32>, index, index -> vector<8x16xf32>
+    //CHECK-COUNT-8: vector.insert_strided_slice {{.*}} : vector<8x16xf32> into vector<32x32xf32>
+    %1 = xegpu.load_matrix %0[0, 0] <{layout = #xegpu.layout<inst_data = [8, 16]>}>: !xegpu.mem_desc<32x32xf32> -> vector<32x32xf32>
+    gpu.return %1: vector<32x32xf32>
+  }
+}
+
+// -----
+gpu.module @test_kernel {
+  // CHECK-LABEL: unroll_store_matrix
+  gpu.func @unroll_store_matrix(%value: vector<32x32xf32>, %arg0 : memref<32768xi8, 3>) {
+    %mdesc = xegpu.create_mem_desc %arg0 : memref<32768xi8, 3> -> !xegpu.mem_desc<64x128xf32>
+    // CHECK-COUNT-8:  xegpu.store_matrix {{.*}} : vector<8x16xf32>, !xegpu.mem_desc<64x128xf32>, index, index
+    xegpu.store_matrix %value, %mdesc[0, 0] {layout = #xegpu.layout<inst_data = [8, 16]>} : vector<32x32xf32>, !xegpu.mem_desc<64x128xf32>
+    gpu.return
+  }
+}

mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp

Jianhui-Li · 2025-09-02T18:05:17Z

mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp

+    SmallVector<SmallVector<OpFoldResult>> offsetsList;
+    for (SmallVector<int64_t> offsets :
+         StaticTileOffsetRange(shape, *targetShape)) {
+      auto adds = xegpu::addWithRightAligned(


I don't think we need to use addWithRightAligned here? The op's offsets should have always the same number as the distributed offsets (out from shape/targetshape).

This is motivated by the old style (offset in createNd), in which creating a low rank tensor desc (2D) from a high rank tensor desc (e.g., 4D) was allowed. In such case, the local offset for the tensor desc is with lower rank, which needs to be added with the original high rank offsets to get the final one. It is created to make it compatible with these test cases. I am also not sure whether this support will be removed or not. It can be refactored after we completely switch to the new style.

I don't think this applies to load_matrix/store_matrix: we should restrict the shapes size are always 2D - for both matrix_desc and vector.

AFAIK, it's already guaranteed to be 2D by matrix ops verifiers.
It should be fine as is but if you can use a different helper/approach for better readability, that'd be nice too.

addWithRightAligned was supposed to handle both cases. But I think your suggestions are right. So I introduced addElementwise here.

mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp

adam-smnk · 2025-09-03T09:12:22Z

mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp

+    SmallVector<SmallVector<OpFoldResult>> offsetsList;
+    for (SmallVector<int64_t> offsets :
+         StaticTileOffsetRange(shape, *targetShape)) {
+      auto adds = xegpu::addWithRightAligned(


AFAIK, it's already guaranteed to be 2D by matrix ops verifiers.
It should be fine as is but if you can use a different helper/approach for better readability, that'd be nice too.

mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp

adam-smnk · 2025-09-03T15:34:01Z

mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp

+    Location loc = op.getLoc();
+    VectorType valueTy = op.getType();
+    std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
+    if (!targetShape || targetShape->size() != (size_t)valueTy.getRank())


nit: static_cast

You might also need to check that original shape is divisible by the target

adam-smnk · 2025-09-03T15:38:19Z

mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td

-      "memref::MemRefDialect", "xegpu::XeGPUDialect", "vector::VectorDialect"
-  ];
+      "memref::MemRefDialect", "xegpu::XeGPUDialect", "vector::VectorDialect",
+      "index::IndexDialect"];


nit: does it use index dialect at all in the end?

Yes, it is used in addElementwise, so the pass needs to load it.

Jianhui-Li

LGTM

chencha3 added 3 commits August 20, 2025 22:09

add unroll pattern and unit test for load_matrix and store_matrix

22d4193

add unroll pattern and unit test for load_matrix and store_matrix

d20da85

Merge branch 'ld_st_matrix_unroll_patterns' of https://github.com/che…

87e4fd2

…ncha3/llvm-project into ld_st_matrix_unroll_patterns

chencha3 marked this pull request as ready for review August 26, 2025 15:09

chencha3 requested a review from charithaintc August 26, 2025 15:09

llvmbot added mlir:gpu mlir labels Aug 26, 2025

chencha3 requested a review from Jianhui-Li August 26, 2025 15:09

adam-smnk reviewed Aug 26, 2025

View reviewed changes

mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp Outdated Show resolved Hide resolved

mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp Outdated Show resolved Hide resolved

chencha3 added 4 commits August 27, 2025 17:41

Merge branch 'main' into ld_st_matrix_unroll_patterns

b3af260

merge

442c18a

roll back unnecessary change

a368430

add unit test

3f5d692

Jianhui-Li reviewed Sep 2, 2025

View reviewed changes

adam-smnk reviewed Sep 3, 2025

View reviewed changes

address comments

a0512d9

adam-smnk reviewed Sep 3, 2025

View reviewed changes

adam-smnk approved these changes Sep 3, 2025

View reviewed changes

Jianhui-Li approved these changes Sep 3, 2025

View reviewed changes

chencha3 merged commit 6026ca3 into llvm:main Sep 3, 2025
9 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[mlir][XeGPU] add unroll patterns for load_matrix and store_matrix #154637

[mlir][XeGPU] add unroll patterns for load_matrix and store_matrix #154637

chencha3 commented Aug 20, 2025

Uh oh!

llvmbot commented Aug 26, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Jianhui-Li Sep 2, 2025

Uh oh!

chencha3 Sep 2, 2025

Uh oh!

Jianhui-Li Sep 2, 2025

Uh oh!

adam-smnk Sep 3, 2025

Uh oh!

chencha3 Sep 3, 2025

Uh oh!

Uh oh!

adam-smnk Sep 3, 2025

Uh oh!

Uh oh!

adam-smnk Sep 3, 2025

Uh oh!

adam-smnk Sep 3, 2025

Uh oh!

chencha3 Sep 3, 2025

Uh oh!

Jianhui-Li left a comment

Uh oh!

Uh oh!

Uh oh!

[mlir][XeGPU] add unroll patterns for load_matrix and store_matrix #154637

[mlir][XeGPU] add unroll patterns for load_matrix and store_matrix #154637

Conversation

chencha3 commented Aug 20, 2025

Uh oh!

llvmbot commented Aug 26, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Jianhui-Li left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Aug 26, 2025 •

edited

Loading