[Inductor] Migrate from oneDNN Inner Product to oneDNN MatMul for mkldnn._linear_pointwise and mkldnn._linear_pointwise.binary

jiayisunx · jiayisunx · commit 0af3239198bf · 2025-02-18T02:46:29.000-08:00
ghstack-source-id: c376010 Pull Request resolved: #147360
diff --git a/aten/src/ATen/native/mkldnn/Linear.cpp b/aten/src/ATen/native/mkldnn/Linear.cpp
@@ -206,14 +206,14 @@ Tensor mkldnn_linear_pointwise(
       dim == 2 ? input : input.reshape({-1, input.size(input.dim() - 1)});
 
   std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1);
-  output_size.push_back(weight_t.size(0));
+  output_size.push_back(weight_t.size(1));
   auto output = at::empty(output_size, input.options());
   if (output.sym_numel() == 0) {
     return output;
   }
   if (dim != 2) {
     std::vector<int64_t> output_size_reshaped = {input_reshaped.size(0),
-                                                 weight_t.size(0)};
+                                                 weight_t.size(1)};
     output = output.reshape(output_size_reshaped);
   }
 
@@ -228,7 +228,7 @@ Tensor mkldnn_linear_pointwise(
 
   std::optional<ideep::tensor> mkldnn_bias{std::nullopt};
   if (bias.defined()) {
-    mkldnn_bias = itensor_from_tensor(bias);
+    mkldnn_bias = itensor_from_tensor(bias.reshape({1, weight_t.size(1)}));
   }
   const ideep::tensor w = itensor_from_tensor(weight_t);
 
@@ -241,20 +241,22 @@ Tensor mkldnn_linear_pointwise(
   }
 
   if (mkldnn_bias.has_value()) {
-    ideep::inner_product_forward::compute</*reorder_src=*/false, /*reorder_weight=*/false>(
+    ideep::matmul_forward::compute</*reorder_src=*/false, /*reorder_weight=*/false>(
         mkldnn_input,
         w,
         mkldnn_bias.value(),
         mkldnn_output,
-        op_attr,
-        aprop_kind);
+        1.0f,
+        1.0f,
+        op_attr);
   } else {
-    ideep::inner_product_forward::compute</*reorder_src=*/false, /*reorder_weight=*/false>(
+    ideep::matmul_forward::compute</*reorder_src=*/false, /*reorder_weight=*/false>(
         mkldnn_input,
         w,
         mkldnn_output,
-        op_attr,
-        aprop_kind);
+        1.0f,
+        1.0f,
+        op_attr);
   }
 
   if (dim != 2) {
@@ -300,7 +302,7 @@ Tensor mkldnn_linear_pointwise_binary(
       dim == 2 ? input : input.reshape({-1, input.size(input.dim() - 1)});
 
   std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1);
-  output_size.push_back(weight_t.size(0));
+  output_size.push_back(weight_t.size(1));
   auto output = at::empty(output_size, input.options());
   if (output.sym_numel() == 0) {
     return output;
@@ -310,7 +312,7 @@ Tensor mkldnn_linear_pointwise_binary(
 
   if (dim != 2) {
     std::vector<int64_t> output_size_reshaped = {
-        input_reshaped.size(0), weight_t.size(0)};
+        input_reshaped.size(0), weight_t.size(1)};
     output = output.reshape(output_size_reshaped);
     other_reshaped = other_reshaped.reshape(output_size_reshaped);
     TORCH_CHECK(
@@ -329,25 +331,25 @@ Tensor mkldnn_linear_pointwise_binary(
 
   std::optional<ideep::tensor> mkldnn_bias{std::nullopt};
   if (bias.defined()) {
-    mkldnn_bias = itensor_from_tensor(bias);
+    mkldnn_bias = itensor_from_tensor(bias.reshape({1, weight_t.size(1)}));
   }
   const ideep::tensor w = itensor_from_tensor(weight_t);
 
   auto other_desc = mkldnn_other.get_desc();
   auto op_attr = ideep::attr_t::fuse_binary(it_binary->second, other_desc);
 
   if (mkldnn_bias.has_value()) {
-    ideep::inner_product_forward::compute_binary</*reorder_src=*/false, /*reorder_weight=*/false>(
+    ideep::matmul_forward::compute_binary</*reorder_src=*/false, /*reorder_weight=*/false>(
         mkldnn_input,
         mkldnn_other,
         w,
         mkldnn_bias.value(),
         mkldnn_output,
-        op_attr,
-        aprop_kind);
+        1.0f,
+        op_attr);
   } else {
-    ideep::inner_product_forward::compute_binary</*reorder_src=*/false, /*reorder_weight=*/false>(
-        mkldnn_input, mkldnn_other, w, mkldnn_output, op_attr, aprop_kind);
+    ideep::matmul_forward::compute_binary</*reorder_src=*/false, /*reorder_weight=*/false>(
+        mkldnn_input, mkldnn_other, w, mkldnn_output, 1.0f, op_attr);
   }
 
   if (dim != 2) {
diff --git a/aten/src/ATen/native/mkldnn/MKLDNNConversions.cpp b/aten/src/ATen/native/mkldnn/MKLDNNConversions.cpp
@@ -260,21 +260,20 @@ static Tensor mkldnn_reorder_linear_weight(
     const Tensor& self,
     std::optional<int64_t> batch_size_opt) {
   mkldnn_check_low_precision(self.scalar_type(), "mkldnn_reorder_linear_weight");
-  auto out_features = self.size(0);
-  auto in_features = self.size(1);
+  auto in_features = self.size(0);
+  auto out_features = self.size(1);
   auto self_ = self.contiguous();
   auto w = itensor_from_tensor(self_);
   ideep::dims input_size;
   auto dtype = w.get_data_type();
   if (batch_size_opt.has_value()) {
     input_size = {batch_size_opt.value(), in_features};
   }
-  auto packed_desc = ideep::inner_product_forward::expected_weights_desc(
-      {out_features, in_features},
+  auto packed_desc = ideep::matmul_forward::expected_weights_desc(
+      {in_features, out_features},
       input_size,
       /* weight dtype */ dtype,
-      /* src dtype */ dtype,
-      ideep::prop_kind::forward_inference);
+      /* src dtype */ dtype);
   ideep::tensor result;
   result.init(packed_desc);
   result.feed_from(w);
diff --git a/torch/_inductor/fx_passes/mkldnn_fusion.py b/torch/_inductor/fx_passes/mkldnn_fusion.py
@@ -925,9 +925,7 @@ def is_linear_add_bias(match):
             linear_node = add_node.args[0]
             packed_weight_node = linear_node.args[1]
             assert packed_weight_node.target == mkldnn._reorder_linear_weight
-            transpose_weight_node = packed_weight_node.args[0]
-            assert transpose_weight_node.target == aten.permute.default
-            weight_meta = transpose_weight_node.args[0].meta.get("val")
+            weight_meta = packed_weight_node.args[0].meta.get("val")
             bias_node = add_node.args[1]
             if isinstance(bias_node, int):
                 # we only folding bias if it is a constant
@@ -1300,9 +1298,6 @@ def linear(match, *args, **kwargs):
             )
             weight = args[1] if linear_node.target == aten.mm.default else args[2]
             with graph.inserting_before(linear_node):
-                transpose_weight_node = graph.create_node(
-                    "call_function", aten.permute.default, (weight, (1, 0))
-                )
                 weight_dtype = weight.meta.get("val").dtype
                 is_lp_weight = weight_dtype in (
                     torch.bfloat16,
@@ -1313,9 +1308,20 @@ def linear(match, *args, **kwargs):
                     assert (
                         is_lp_weight or mkldnn._is_mkldnn_acl_supported()
                     ), f"only bf16/fp16 weight prepacking supports dynamic shape inputs but got {weight_dtype}"
+                weight_node = (
+                    weight
+                    if (
+                        is_lp_weight
+                        or mkldnn._is_mkldnn_acl_supported()
+                        or V.aot_compilation
+                    )
+                    else graph.create_node(
+                        "call_function", aten.permute.default, (weight, (1, 0))
+                    )
+                )
                 # For bfloat16 dynamic shape path, using input size hint to pack weight for a better performance.
                 packed_weight_inputs = (
-                    transpose_weight_node,
+                    weight_node,
                     batch_size.node.shape_env.size_hint(batch_size.node.expr)
                     if has_free_symbols(batch_size)
                     else batch_size,
@@ -1347,7 +1353,7 @@ def linear(match, *args, **kwargs):
                     packed_linear_inputs += (bias, "none", [], "")
                     packed_linear_op = mkldnn._linear_pointwise.default
                 else:
-                    packed_linear_inputs += (transpose_weight_node, bias, batch_size)
+                    packed_linear_inputs += (weight_node, bias, batch_size)
                     packed_linear_op = torch.ops.mkl._mkl_linear
                 packed_linear_node = graph.create_node(
                     "call_function", packed_linear_op, packed_linear_inputs
diff --git a/torch/_inductor/mkldnn_ir.py b/torch/_inductor/mkldnn_ir.py
@@ -833,7 +833,7 @@ def create(cls, x, w, B, attr, scalars, algorithm):
         w = cls.require_contiguous(cls.realize_input(w))
 
         *m, _ic = x.get_size()
-        oc, _ic = w.get_size()
+        _ic, oc = w.get_size()
         output_size = list(m) + [oc]
         inputs = [x, w]
         constant_args = [attr, scalars if scalars else [-1], algorithm]
@@ -887,7 +887,7 @@ def create(cls, x, y, w, B, attr):
         w = cls.require_contiguous(cls.realize_input(w))
 
         *m, _ic = x.get_size()
-        oc, _ic = w.get_size()
+        _ic, oc = w.get_size()
         output_size = list(m) + [oc]
         inputs = [x, y, w]
         constant_args = [attr]
diff --git a/torch/_inductor/mkldnn_lowerings.py b/torch/_inductor/mkldnn_lowerings.py
@@ -258,7 +258,7 @@ def epilogue_creator(buf):
 
                     kwargs = dict(
                         has_bias=b is not None,
-                        trans_w=True,
+                        trans_w=False,
                         epilogue_creator=None if attr == "none" else epilogue_creator,
                     )
                     if b is not None:
@@ -321,7 +321,7 @@ def epilogue_creator(buf):
 
                     kwargs = dict(
                         has_bias=b is not None,
-                        trans_w=True,
+                        trans_w=False,
                         epilogue_creator=epilogue_creator,
                     )
                     kwargs["input_indices"] = [0, 2, 1] if b is None else [3, 0, 2, 1]
diff --git a/torch/_meta_registrations.py b/torch/_meta_registrations.py
@@ -2477,7 +2477,7 @@ def meta_mkldnn_convolution_default(
     def meta_linear_pointwise_default(
         input_tensor, weight, bias, attr, scalars, algorithm
     ):
-        return input_tensor.new_empty((*input_tensor.shape[:-1], weight.shape[0]))
+        return input_tensor.new_empty((*input_tensor.shape[:-1], weight.shape[1]))
 
     if torch._C.has_mkl:
         _meta_lib_dont_use_me_use_register_meta_for_mkl = torch.library.Library(