Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
"sklearn.linear_model._sag_fast",
"sklearn.linear_model._sgd_fast",
"sklearn.manifold._barnes_hut_tsne",
"sklearn.manifold._utils",
"sklearn.metrics.cluster._expected_mutual_info_fast",
"sklearn.metrics._pairwise_distances_reduction._datasets_pair",
"sklearn.metrics._pairwise_distances_reduction._middle_term_computer",
Expand Down
14 changes: 7 additions & 7 deletions sklearn/manifold/_utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ from libc cimport math
import numpy as np
cimport numpy as cnp

cnp.import_array()


cdef extern from "numpy/npy_math.h":
Expand All @@ -12,8 +11,9 @@ cdef extern from "numpy/npy_math.h":
cdef float EPSILON_DBL = 1e-8
cdef float PERPLEXITY_TOLERANCE = 1e-5

cpdef cnp.ndarray[cnp.float32_t, ndim=2] _binary_search_perplexity(
cnp.ndarray[cnp.float32_t, ndim=2] sqdistances,
# TODO: have this function support float32 and float64 and preserve inputs' dtypes.
def _binary_search_perplexity(
const cnp.float32_t[:, :] sqdistances,
float desired_perplexity,
int verbose):
"""Binary search for sigmas of conditional Gaussians.
Expand All @@ -23,7 +23,7 @@ cpdef cnp.ndarray[cnp.float32_t, ndim=2] _binary_search_perplexity(

Parameters
----------
sqdistances : array-like, shape (n_samples, n_neighbors)
sqdistances : ndarray of shape (n_samples, n_neighbors), dtype=np.float32
Distances between training samples and their k nearest neighbors.
When using the exact method, this is a square (n_samples, n_samples)
distance matrix. The TSNE default metric is "euclidean" which is
Expand All @@ -37,7 +37,7 @@ cpdef cnp.ndarray[cnp.float32_t, ndim=2] _binary_search_perplexity(

Returns
-------
P : array, shape (n_samples, n_samples)
P : ndarray of shape (n_samples, n_samples), dtype=np.float64
Probabilities of conditional Gaussian distributions p_i|j.
"""
# Maximum number of binary search steps
Expand All @@ -63,7 +63,7 @@ cpdef cnp.ndarray[cnp.float32_t, ndim=2] _binary_search_perplexity(

# This array is later used as a 32bit array. It has multiple intermediate
# floating point additions that benefit from the extra precision
cdef cnp.ndarray[cnp.float64_t, ndim=2] P = np.zeros(
cdef cnp.float64_t[:, :] P = np.zeros(
(n_samples, n_neighbors), dtype=np.float64)

for i in range(n_samples):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to change the return line by:

return P.astype(np.float32)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not the current behavior. Currently it returns a float64 array (this is because it's using the def part of the function which doesn't care about the return type)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather keep the current behavior

Expand Down Expand Up @@ -118,4 +118,4 @@ cpdef cnp.ndarray[cnp.float32_t, ndim=2] _binary_search_perplexity(
if verbose:
print("[t-SNE] Mean sigma: %f"
% np.mean(math.sqrt(n_samples / beta_sum)))
return P
return np.asarray(P)