diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 5c338d614b49d..42f675676a569 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -369,6 +369,11 @@ Changelog
   A deprecation cycle was introduced.
   :pr:`21576` by :user:`Paul-Emile Dugnat <pedugnat>`.
 
+- |API| The `"wminkowski"` metric of :class:`sklearn.metrics.DistanceMetric` is deprecated
+  and will be removed in version 1.3. Instead the existing `"minkowski"` metric now takes
+  in an optional `w` parameter for weights. This deprecation aims at remaining consistent
+  with SciPy 1.8 convention. :pr:`21873` by :user:`Yar Khine Phyo <yarkhinephyo>`
+
 - |Fix| :func:`metrics.silhouette_score` now supports integer input for precomputed
   distances. :pr:`22108` by `Thomas Fan`_.
 
diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
index 9f0d8b86f0873..bf5744a509b9d 100644
--- a/sklearn/cluster/tests/test_hierarchical.py
+++ b/sklearn/cluster/tests/test_hierarchical.py
@@ -410,6 +410,8 @@ def test_vector_scikit_single_vs_scipy_single(seed):
     assess_same_labelling(cut, cut_scipy)
 
 
+# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize("metric_param_grid", METRICS_DEFAULT_PARAMS)
 def test_mst_linkage_core_memory_mapped(metric_param_grid):
     """The MST-LINKAGE-CORE algorithm must work on mem-mapped dataset.
diff --git a/sklearn/metrics/_dist_metrics.pyx b/sklearn/metrics/_dist_metrics.pyx
index f7d22c1badfa2..946d8d7735601 100644
--- a/sklearn/metrics/_dist_metrics.pyx
+++ b/sklearn/metrics/_dist_metrics.pyx
@@ -30,6 +30,7 @@ cdef DTYPE_t INF = np.inf
 from ..utils._typedefs cimport DTYPE_t, ITYPE_t, DITYPE_t, DTYPECODE
 from ..utils._typedefs import DTYPE, ITYPE
 from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper
+from ..utils import check_array
 
 ######################################################################
 # newObj function
@@ -119,9 +120,12 @@ cdef class DistanceMetric:
     "mahalanobis"   MahalanobisDistance   V or VI   ``sqrt((x - y)' V^-1 (x - y))``
     ==============  ====================  ========  ===============================
 
-    Note that "minkowski" with a non-None `w` parameter actually calls
-    `WMinkowskiDistance` with `w=w ** (1/p)` in order to be consistent with the
-    parametrization of scipy 1.8 and later.
+    .. deprecated:: 1.1
+        `WMinkowskiDistance` is deprecated in version 1.1 and will be removed in version 1.3.
+        Use `MinkowskiDistance` instead. Note that in `MinkowskiDistance`, the weights are
+        applied to the absolute differences already raised to the p power. This is different from
+        `WMinkowskiDistance` where weights are applied to the absolute differences before raising
+        to the p power. The deprecation aims to remain consistent with SciPy 1.8 convention.
 
     **Metrics intended for two-dimensional vector spaces:**  Note that the haversine
     distance metric requires data in the form of [latitude, longitude] and both
@@ -257,25 +261,14 @@ cdef class DistanceMetric:
         if metric is MinkowskiDistance:
             p = kwargs.pop('p', 2)
             w = kwargs.pop('w', None)
-            if w is not None:
-                # Be consistent with scipy 1.8 conventions: in scipy 1.8,
-                # 'wminkowski' was removed in favor of passing a
-                # weight vector directly to 'minkowski', however
-                # the new weights apply to the absolute differences raised to
-                # the p power instead of the absolute difference as in
-                # previous versions of scipy.
-                # WMinkowskiDistance in sklearn implements the weighting
-                # scheme of the old 'wminkowski' in scipy < 1.8, hence the
-                # following adaptation:
-                return WMinkowskiDistance(p, w ** (1/p), **kwargs)
-            if p == 1:
+            if p == 1 and w is None:
                 return ManhattanDistance(**kwargs)
-            elif p == 2:
+            elif p == 2 and w is None:
                 return EuclideanDistance(**kwargs)
-            elif np.isinf(p):
+            elif np.isinf(p) and w is None:
                 return ChebyshevDistance(**kwargs)
             else:
-                return MinkowskiDistance(p, **kwargs)
+                return MinkowskiDistance(p, w, **kwargs)
         else:
             return metric(**kwargs)
 
@@ -554,27 +547,64 @@ cdef class MinkowskiDistance(DistanceMetric):
     r"""Minkowski Distance
 
     .. math::
-       D(x, y) = [\sum_i |x_i - y_i|^p] ^ (1/p)
+        D(x, y) = {||u-v||}_p
+
+    when w is None.
+
+    Here is the more general expanded expression for the weighted case:
+
+    .. math::
+        D(x, y) = [\sum_i w_i *|x_i - y_i|^p] ^ (1/p)
+
+    Parameters
+    ----------
+    p : int
+        The order of the p-norm of the difference (see above).
+    w : (N,) array-like (optional)
+        The weight vector.
 
     Minkowski Distance requires p >= 1 and finite. For p = infinity,
     use ChebyshevDistance.
     Note that for p=1, ManhattanDistance is more efficient, and for
     p=2, EuclideanDistance is more efficient.
     """
-    def __init__(self, p):
+    def __init__(self, p, w=None):
         if p < 1:
             raise ValueError("p must be greater than 1")
         elif np.isinf(p):
             raise ValueError("MinkowskiDistance requires finite p. "
                              "For p=inf, use ChebyshevDistance.")
+
         self.p = p
+        if w is not None:
+            w_array = check_array(
+                w, ensure_2d=False, dtype=DTYPE, input_name="w"
+            )
+            if (w_array < 0).any():
+                raise ValueError("w cannot contain negative weights")
+            self.vec = ReadonlyArrayWrapper(w_array)
+            self.size = self.vec.shape[0]
+        else:
+            self.vec = ReadonlyArrayWrapper(np.asarray([], dtype=DTYPE))
+            self.size = 0
+
+    def _validate_data(self, X):
+        if self.size > 0 and X.shape[1] != self.size:
+            raise ValueError("MinkowskiDistance: the size of w must match "
+                             f"the number of features ({X.shape[1]}). "
+                             f"Currently len(w)={self.size}.")
 
     cdef inline DTYPE_t rdist(self, const DTYPE_t* x1, const DTYPE_t* x2,
                               ITYPE_t size) nogil except -1:
         cdef DTYPE_t d=0
         cdef np.intp_t j
-        for j in range(size):
-            d += pow(fabs(x1[j] - x2[j]), self.p)
+        cdef bint has_w = self.size > 0
+        if has_w:
+            for j in range(size):
+                d += self.vec[j] * pow(fabs(x1[j] - x2[j]), self.p)
+        else:
+            for j in range(size):
+                d += pow(fabs(x1[j] - x2[j]), self.p)
         return d
 
     cdef inline DTYPE_t dist(self, const DTYPE_t* x1, const DTYPE_t* x2,
@@ -595,6 +625,7 @@ cdef class MinkowskiDistance(DistanceMetric):
 
 
 #------------------------------------------------------------
+# TODO: Remove in 1.3 - WMinkowskiDistance class
 # W-Minkowski Distance
 cdef class WMinkowskiDistance(DistanceMetric):
     r"""Weighted Minkowski Distance
@@ -613,6 +644,16 @@ cdef class WMinkowskiDistance(DistanceMetric):
 
     """
     def __init__(self, p, w):
+        from warnings import warn
+        warn("WMinkowskiDistance is deprecated in version 1.1 and will be "
+            "removed in version 1.3. Use MinkowskiDistance instead. Note "
+            "that in MinkowskiDistance, the weights are applied to the "
+            "absolute differences raised to the p power. This is different "
+            "from WMinkowskiDistance where weights are applied to the "
+            "absolute differences before raising to the p power. "
+            "The deprecation aims to remain consistent with SciPy 1.8 "
+            "convention.", FutureWarning)
+
         if p < 1:
             raise ValueError("p must be greater than 1")
         elif np.isinf(p):
diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py
index 1de7471c01812..5dd10d83bfe68 100644
--- a/sklearn/metrics/tests/test_dist_metrics.py
+++ b/sklearn/metrics/tests/test_dist_metrics.py
@@ -7,6 +7,7 @@
 
 import pytest
 
+import scipy.sparse as sp
 from scipy.spatial.distance import cdist
 from sklearn.metrics import DistanceMetric
 from sklearn.utils import check_random_state
@@ -89,6 +90,8 @@ def check_cdist(metric, kwargs, X1, X2):
     assert_array_almost_equal(D_sklearn, D_scipy_cdist)
 
 
+# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize("metric_param_grid", METRICS_DEFAULT_PARAMS)
 @pytest.mark.parametrize("X1, X2", [(X1, X2), (X1_mmap, X2_mmap)])
 def test_cdist(metric_param_grid, X1, X2):
@@ -120,6 +123,8 @@ def check_cdist_bool(metric, D_true):
     assert_array_almost_equal(D12, D_true)
 
 
+# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize("metric_param_grid", METRICS_DEFAULT_PARAMS)
 @pytest.mark.parametrize("X1, X2", [(X1, X2), (X1_mmap, X2_mmap)])
 def test_pdist(metric_param_grid, X1, X2):
@@ -170,6 +175,8 @@ def check_pdist_bool(metric, D_true):
     assert_array_almost_equal(D12, D_true)
 
 
+# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize("writable_kwargs", [True, False])
 @pytest.mark.parametrize("metric_param_grid", METRICS_DEFAULT_PARAMS)
 def test_pickle(writable_kwargs, metric_param_grid):
@@ -185,6 +192,8 @@ def test_pickle(writable_kwargs, metric_param_grid):
         check_pickle(metric, kwargs)
 
 
+# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize("metric", BOOL_METRICS)
 @pytest.mark.parametrize("X1_bool", [X1_bool, X1_bool_mmap])
 def test_pickle_bool_metrics(metric, X1_bool):
@@ -262,6 +271,8 @@ def custom_metric(x, y):
     assert_array_almost_equal(pyfunc.pairwise(X), eucl.pairwise(X) ** 2)
 
 
+# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 def test_readonly_kwargs():
     # Non-regression test for:
     # https://github.com/scikit-learn/scikit-learn/issues/21685
@@ -277,3 +288,55 @@ def test_readonly_kwargs():
     DistanceMetric.get_metric("seuclidean", V=weights)
     DistanceMetric.get_metric("wminkowski", p=1, w=weights)
     DistanceMetric.get_metric("mahalanobis", VI=VI)
+
+
+@pytest.mark.parametrize(
+    "w, err_type, err_msg",
+    [
+        (np.array([1, 1.5, -13]), ValueError, "w cannot contain negative weights"),
+        (np.array([1, 1.5, np.nan]), ValueError, "w contains NaN"),
+        (
+            sp.csr_matrix([1, 1.5, 1]),
+            TypeError,
+            "A sparse matrix was passed, but dense data is required",
+        ),
+        (np.array(["a", "b", "c"]), ValueError, "could not convert string to float"),
+        (np.array([]), ValueError, "a minimum of 1 is required"),
+    ],
+)
+def test_minkowski_metric_validate_weights_values(w, err_type, err_msg):
+    with pytest.raises(err_type, match=err_msg):
+        DistanceMetric.get_metric("minkowski", p=3, w=w)
+
+
+def test_minkowski_metric_validate_weights_size():
+    w2 = rng.random_sample(d + 1)
+    dm = DistanceMetric.get_metric("minkowski", p=3, w=w2)
+    msg = (
+        "MinkowskiDistance: the size of w must match "
+        f"the number of features \\({X1.shape[1]}\\). "
+        f"Currently len\\(w\\)={w2.shape[0]}."
+    )
+    with pytest.raises(ValueError, match=msg):
+        dm.pairwise(X1, X2)
+
+
+# TODO: Remove in 1.3 when wminkowski is removed
+def test_wminkowski_deprecated():
+    w = rng.random_sample(d)
+    msg = "WMinkowskiDistance is deprecated in version 1.1"
+    with pytest.warns(FutureWarning, match=msg):
+        DistanceMetric.get_metric("wminkowski", p=3, w=w)
+
+
+# TODO: Remove in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
+@pytest.mark.parametrize("p", [1, 1.5, 3])
+def test_wminkowski_minkowski_equivalence(p):
+    w = rng.random_sample(d)
+    # Weights are rescaled for consistency w.r.t scipy 1.8 refactoring of 'minkowski'
+    dm_wmks = DistanceMetric.get_metric("wminkowski", p=p, w=(w) ** (1 / p))
+    dm_mks = DistanceMetric.get_metric("minkowski", p=p, w=w)
+    D_wmks = dm_wmks.pairwise(X1, X2)
+    D_mks = dm_mks.pairwise(X1, X2)
+    assert_array_almost_equal(D_wmks, D_mks)
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index b7e90e63f2af1..35f7e19416f0c 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -252,6 +252,8 @@ def callable_rbf_kernel(x, y, **kwds):
     return K
 
 
+# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize(
     "func, metric, kwds",
     [
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 2a4d500610051..c528c3ab900f8 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -1275,6 +1275,8 @@ def test_neighbors_badargs():
         nbrs.radius_neighbors_graph(X, mode="blah")
 
 
+# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 def test_neighbors_metrics(n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5):
     # Test computing the neighbors for various metrics
     # create a symmetric matrix
@@ -1381,6 +1383,8 @@ def custom_metric(x1, x2):
     assert_array_almost_equal(dist1, dist2)
 
 
+# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 def test_valid_brute_metric_for_auto_algorithm():
     X = rng.rand(12, 12)
     Xcsr = csr_matrix(X)
diff --git a/sklearn/neighbors/tests/test_neighbors_tree.py b/sklearn/neighbors/tests/test_neighbors_tree.py
index e043ffb730708..85d578c271faa 100644
--- a/sklearn/neighbors/tests/test_neighbors_tree.py
+++ b/sklearn/neighbors/tests/test_neighbors_tree.py
@@ -233,6 +233,8 @@ def test_gaussian_kde(Cls, n_samples=1000):
         assert_array_almost_equal(dens_tree, dens_gkde, decimal=3)
 
 
+# TODO: Remove filterwarnings in 1.3 when wminkowski is removed
+@pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize(
     "Cls, metric",
     itertools.chain(