Skip to content

ENH support float32 in SpectralEmbedding for LOBPCG and PyAMG solvers #21534

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 43 commits into from
Nov 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
d8119b4
Update _spectral_embedding.py
lobpcg Nov 2, 2021
badf039
Update test_spectral_embedding.py
lobpcg Nov 2, 2021
171d283
Update test_spectral_embedding.py
lobpcg Nov 2, 2021
2d9cbff
Update test_spectral_embedding.py
lobpcg Nov 2, 2021
92724a1
black
lobpcg Nov 2, 2021
3d6b1f5
mistake fixed
lobpcg Nov 2, 2021
3def8f8
added `amgloaded` distinction
lobpcg Nov 2, 2021
3d797c6
lint
lobpcg Nov 2, 2021
971f521
black --line-length 79 plus pytest.warns -> pytest.raises
lobpcg Nov 2, 2021
e7cdd6a
pass + black
lobpcg Nov 2, 2021
b8aa308
Update v1.1.rst
lobpcg Nov 2, 2021
d5e4902
Update v1.1.rst
lobpcg Nov 2, 2021
c3bbd12
@pytest.mark.parametrize("eigen_solver", ("arpack", "lobpcg", "amg"))…
lobpcg Nov 2, 2021
5b9a9c0
Merge branch 'patch-1' of https://github.com/lobpcg/scikit-learn into…
lobpcg Nov 2, 2021
21ce949
np.float32 ->'float32' etc plus amg tests
lobpcg Nov 3, 2021
b4852d6
Update sklearn/manifold/tests/test_spectral_embedding.py
lobpcg Nov 3, 2021
9dd1228
Update _spectral_embedding.py
lobpcg Nov 3, 2021
a75ee55
Update test_spectral_embedding.py
lobpcg Nov 3, 2021
fb09a3a
Update test_spectral_embedding.py
lobpcg Nov 3, 2021
ce059f8
Update _spectral_embedding.py
lobpcg Nov 3, 2021
37bbe2e
Update _spectral_embedding.py
lobpcg Nov 3, 2021
b9f2f0a
Update test_spectral_embedding.py
lobpcg Nov 3, 2021
82fd371
Update sklearn/manifold/_spectral_embedding.py
lobpcg Nov 3, 2021
aab94da
Update _spectral_embedding.py
lobpcg Nov 3, 2021
20a3403
Update test_spectral_embedding.py
lobpcg Nov 4, 2021
9cb7bb0
Update test_spectral_embedding.py
lobpcg Nov 4, 2021
f670050
Update test_spectral_embedding.py
lobpcg Nov 4, 2021
282bbee
Update _spectral_embedding.py
lobpcg Nov 4, 2021
b126e80
Apply suggestions from code review
lobpcg Nov 4, 2021
2a1fa15
Update test_spectral_embedding.py
lobpcg Nov 4, 2021
58ee9bb
Update sklearn/manifold/tests/test_spectral_embedding.py
lobpcg Nov 4, 2021
6a52166
Update doc/whats_new/v1.1.rst
lobpcg Nov 4, 2021
43d2146
Update test_spectral_embedding.py
lobpcg Nov 4, 2021
af18adc
Update test_spectral_embedding.py
lobpcg Nov 4, 2021
d59f194
Update test_spectral_embedding.py
lobpcg Nov 4, 2021
44e111a
Update test_spectral_embedding.py
lobpcg Nov 4, 2021
af860f4
Update test_spectral_embedding.py
lobpcg Nov 4, 2021
e3a7687
Update v1.1.rst
lobpcg Nov 4, 2021
b25f4b0
Update test_spectral_embedding.py
lobpcg Nov 4, 2021
c5b82bd
Update sklearn/manifold/tests/test_spectral_embedding.py
lobpcg Nov 4, 2021
2e0d24a
Update test_spectral_embedding.py
lobpcg Nov 5, 2021
174548d
Update test_spectral_embedding.py
lobpcg Nov 5, 2021
bf70d66
Update test_spectral_embedding.py
lobpcg Nov 5, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/whats_new/v1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,14 @@ Changelog
backward compatibility, but this alias will be removed in 1.3.
:pr:`21177` by :user:`Julien Jerphanion <jjerphan>`.

:mod:`sklearn.manifold`
.......................

- |Enhancement| :func:`manifold.spectral_embedding` and
:class:`manifold.SpectralEmbedding` supports `np.float32` dtype and will
preserve this dtype.
:pr:`21534` by :user:`Andrew Knyazev <lobpcg>`.

:mod:`sklearn.model_selection`
..............................

Expand Down
12 changes: 8 additions & 4 deletions sklearn/manifold/_spectral_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,9 @@ def spectral_embedding(
# problem.
if not sparse.issparse(laplacian):
warnings.warn("AMG works better for sparse matrices")
# lobpcg needs double precision floats
laplacian = check_array(laplacian, dtype=np.float64, accept_sparse=True)
laplacian = check_array(
laplacian, dtype=[np.float64, np.float32], accept_sparse=True
)
laplacian = _set_diag(laplacian, 1, norm_laplacian)

# The Laplacian matrix is always singular, having at least one zero
Expand All @@ -337,6 +338,7 @@ def spectral_embedding(
# Create initial approximation X to eigenvectors
X = random_state.rand(laplacian.shape[0], n_components + 1)
X[:, 0] = dd.ravel()
X = X.astype(laplacian.dtype)
_, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.0e-5, largest=False)
embedding = diffusion_map.T
if norm_laplacian:
Expand All @@ -346,8 +348,9 @@ def spectral_embedding(
raise ValueError

if eigen_solver == "lobpcg":
# lobpcg needs double precision floats
laplacian = check_array(laplacian, dtype=np.float64, accept_sparse=True)
laplacian = check_array(
laplacian, dtype=[np.float64, np.float32], accept_sparse=True
)
if n_nodes < 5 * n_components + 1:
# see note above under arpack why lobpcg has problems with small
# number of nodes
Expand All @@ -366,6 +369,7 @@ def spectral_embedding(
# approximation X to eigenvectors
X = random_state.rand(laplacian.shape[0], n_components + 1)
X[:, 0] = dd.ravel()
X = X.astype(laplacian.dtype)
Comment on lines 370 to +372
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wish it were possible to specify dtype when using numpy PRNG!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the same. But we need to discuss the SLEP about the random state

_, diffusion_map = lobpcg(
laplacian, X, tol=1e-5, largest=False, maxiter=2000
)
Expand Down
131 changes: 107 additions & 24 deletions sklearn/manifold/tests/test_spectral_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_array_equal

try:
from pyamg import smoothed_aggregation_solver # noqa

pyamg_available = True
except ImportError:
pyamg_available = False
skip_if_no_pyamg = pytest.mark.skipif(
not pyamg_available, reason="PyAMG is required for the tests in this function."
)

# non centered, sparse centers to check the
centers = np.array(
Expand Down Expand Up @@ -85,7 +94,16 @@ def test_sparse_graph_connected_component():
assert_array_equal(component_1, component_2)


def test_spectral_embedding_two_components(seed=36):
@pytest.mark.parametrize(
"eigen_solver",
[
"arpack",
"lobpcg",
pytest.param("amg", marks=skip_if_no_pyamg),
],
)
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
def test_spectral_embedding_two_components(eigen_solver, dtype, seed=36):
# Test spectral embedding with two components
random_state = np.random.RandomState(seed)
n_sample = 100
Expand Down Expand Up @@ -117,31 +135,46 @@ def test_spectral_embedding_two_components(seed=36):
true_label[0:n_sample] = 1

se_precomp = SpectralEmbedding(
n_components=1, affinity="precomputed", random_state=np.random.RandomState(seed)
n_components=1,
affinity="precomputed",
random_state=np.random.RandomState(seed),
eigen_solver=eigen_solver,
)
embedded_coordinate = se_precomp.fit_transform(affinity)
# Some numpy versions are touchy with types
embedded_coordinate = se_precomp.fit_transform(affinity.astype(np.float32))
# thresholding on the first components using 0.
label_ = np.array(embedded_coordinate.ravel() < 0, dtype="float")
assert normalized_mutual_info_score(true_label, label_) == pytest.approx(1.0)
for dtype in [np.float32, np.float64]:
embedded_coordinate = se_precomp.fit_transform(affinity.astype(dtype))
# thresholding on the first components using 0.
label_ = np.array(embedded_coordinate.ravel() < 0, dtype=np.int64)
assert normalized_mutual_info_score(true_label, label_) == pytest.approx(1.0)


@pytest.mark.parametrize("X", [S, sparse.csr_matrix(S)], ids=["dense", "sparse"])
def test_spectral_embedding_precomputed_affinity(X, seed=36):
@pytest.mark.parametrize(
"eigen_solver",
[
"arpack",
"lobpcg",
pytest.param("amg", marks=skip_if_no_pyamg),
],
)
@pytest.mark.parametrize("dtype", (np.float32, np.float64))
def test_spectral_embedding_precomputed_affinity(X, eigen_solver, dtype, seed=36):
# Test spectral embedding with precomputed kernel
gamma = 1.0
se_precomp = SpectralEmbedding(
n_components=2, affinity="precomputed", random_state=np.random.RandomState(seed)
n_components=2,
affinity="precomputed",
random_state=np.random.RandomState(seed),
eigen_solver=eigen_solver,
)
se_rbf = SpectralEmbedding(
n_components=2,
affinity="rbf",
gamma=gamma,
random_state=np.random.RandomState(seed),
eigen_solver=eigen_solver,
)
embed_precomp = se_precomp.fit_transform(rbf_kernel(X, gamma=gamma))
embed_rbf = se_rbf.fit_transform(X)
embed_precomp = se_precomp.fit_transform(rbf_kernel(X.astype(dtype), gamma=gamma))
embed_rbf = se_rbf.fit_transform(X.astype(dtype))
assert_array_almost_equal(se_precomp.affinity_matrix_, se_rbf.affinity_matrix_)
_assert_equal_with_sign_flipping(embed_precomp, embed_rbf, 0.05)

Expand Down Expand Up @@ -205,10 +238,11 @@ def test_spectral_embedding_callable_affinity(X, seed=36):
@pytest.mark.filterwarnings(
"ignore:scipy.linalg.pinv2 is deprecated:DeprecationWarning:pyamg.*"
)
def test_spectral_embedding_amg_solver(seed=36):
# Test spectral embedding with amg solver
pytest.importorskip("pyamg")

@pytest.mark.skipif(
not pyamg_available, reason="PyAMG is required for the tests in this function."
)
@pytest.mark.parametrize("dtype", (np.float32, np.float64))
def test_spectral_embedding_amg_solver(dtype, seed=36):
se_amg = SpectralEmbedding(
n_components=2,
affinity="nearest_neighbors",
Expand All @@ -223,8 +257,8 @@ def test_spectral_embedding_amg_solver(seed=36):
n_neighbors=5,
random_state=np.random.RandomState(seed),
)
embed_amg = se_amg.fit_transform(S)
embed_arpack = se_arpack.fit_transform(S)
embed_amg = se_amg.fit_transform(S.astype(dtype))
embed_arpack = se_arpack.fit_transform(S.astype(dtype))
_assert_equal_with_sign_flipping(embed_amg, embed_arpack, 1e-5)

# same with special case in which amg is not actually used
Expand All @@ -239,8 +273,8 @@ def test_spectral_embedding_amg_solver(seed=36):
).toarray()
se_amg.affinity = "precomputed"
se_arpack.affinity = "precomputed"
embed_amg = se_amg.fit_transform(affinity)
embed_arpack = se_arpack.fit_transform(affinity)
embed_amg = se_amg.fit_transform(affinity.astype(dtype))
embed_arpack = se_arpack.fit_transform(affinity.astype(dtype))
_assert_equal_with_sign_flipping(embed_amg, embed_arpack, 1e-5)


Expand All @@ -258,12 +292,15 @@ def test_spectral_embedding_amg_solver(seed=36):
@pytest.mark.filterwarnings(
"ignore:scipy.linalg.pinv2 is deprecated:DeprecationWarning:pyamg.*"
)
def test_spectral_embedding_amg_solver_failure():
@pytest.mark.skipif(
not pyamg_available, reason="PyAMG is required for the tests in this function."
)
@pytest.mark.parametrize("dtype", (np.float32, np.float64))
def test_spectral_embedding_amg_solver_failure(dtype, seed=36):
# Non-regression test for amg solver failure (issue #13393 on github)
pytest.importorskip("pyamg")
seed = 36
num_nodes = 100
X = sparse.rand(num_nodes, num_nodes, density=0.1, random_state=seed)
X = X.astype(dtype)
upper = sparse.triu(X) - sparse.diags(X.diagonal())
sym_matrix = upper + upper.T
embedding = spectral_embedding(
Expand Down Expand Up @@ -314,7 +351,9 @@ def test_spectral_embedding_unknown_eigensolver(seed=36):
def test_spectral_embedding_unknown_affinity(seed=36):
# Test that SpectralClustering fails with an unknown affinity type
se = SpectralEmbedding(
n_components=1, affinity="<unknown>", random_state=np.random.RandomState(seed)
n_components=1,
affinity="<unknown>",
random_state=np.random.RandomState(seed),
)
with pytest.raises(ValueError):
se.fit(S)
Expand Down Expand Up @@ -399,6 +438,50 @@ def test_spectral_embedding_first_eigen_vector():
assert np.std(embedding[:, 1]) > 1e-3


@pytest.mark.parametrize(
"eigen_solver",
[
"arpack",
"lobpcg",
pytest.param("amg", marks=skip_if_no_pyamg),
],
)
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
def test_spectral_embedding_preserves_dtype(eigen_solver, dtype):
"""Check that `SpectralEmbedding is preserving the dtype of the fitted
attribute and transformed data.

Ideally, this test should be covered by the common test
`check_transformer_preserve_dtypes`. However, this test only run
with transformers implementing `transform` while `SpectralEmbedding`
implements only `fit_transform`.
"""
X = S.astype(dtype)
se = SpectralEmbedding(
n_components=2, affinity="rbf", eigen_solver=eigen_solver, random_state=0
)
X_trans = se.fit_transform(X)

assert X_trans.dtype == dtype
assert se.embedding_.dtype == dtype
assert se.affinity_matrix_.dtype == dtype


@pytest.mark.skipif(
pyamg_available,
reason="PyAMG is installed and we should not test for an error.",
)
def test_error_pyamg_not_available():
se_precomp = SpectralEmbedding(
n_components=2,
affinity="rbf",
eigen_solver="amg",
)
err_msg = "The eigen_solver was set to 'amg', but pyamg is not available."
with pytest.raises(ValueError, match=err_msg):
se_precomp.fit_transform(S)


# TODO: Remove in 1.1
@pytest.mark.parametrize("affinity", ["precomputed", "precomputed_nearest_neighbors"])
def test_spectral_embedding_pairwise_deprecated(affinity):
Expand Down