Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/whats_new/v0.22.rst
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ Changelog

- |Efficiency| The 'liblinear' logistic regression solver is now faster and
requires less memory.
:pr:`14108`, :pr:`14170` by :user:`Alex Henrie <alexhenrie>`.
:pr:`14108`, pr:`14170`, pr:`14296` by :user:`Alex Henrie <alexhenrie>`.

- |Fix| :class:`linear_model.Ridge` with `solver='sag'` now accepts F-ordered
and non-contiguous arrays and makes a conversion instead of failing.
Expand Down
2 changes: 1 addition & 1 deletion sklearn/linear_model/logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1507,7 +1507,7 @@ def fit(self, X, y, sample_weight=None):
raise ValueError("Tolerance for stopping criteria must be "
"positive; got (tol=%r)" % self.tol)

if solver in ['lbfgs', 'liblinear']:
if solver == 'lbfgs':
_dtype = np.float64
else:
_dtype = [np.float64, np.float32]
Expand Down
41 changes: 32 additions & 9 deletions sklearn/linear_model/tests/test_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,34 +1295,48 @@ def test_saga_vs_liblinear():


@pytest.mark.parametrize('multi_class', ['ovr', 'multinomial'])
@pytest.mark.parametrize('solver', ['newton-cg', 'saga'])
def test_dtype_match(solver, multi_class):
@pytest.mark.parametrize('solver', ['newton-cg', 'liblinear', 'saga'])
@pytest.mark.parametrize('fit_intercept', [False, True])
def test_dtype_match(solver, multi_class, fit_intercept):
# Test that np.float32 input data is not cast to np.float64 when possible
# and that the output is approximately the same no matter the input format.

if solver == 'liblinear' and multi_class == 'multinomial':
pytest.skip('liblinear does not support multinomial logistic')

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use pytest.skip(some informative message) instead of return

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

out32_type = np.float64 if solver == 'liblinear' else np.float32

X_32 = np.array(X).astype(np.float32)
y_32 = np.array(Y1).astype(np.float32)
X_64 = np.array(X).astype(np.float64)
y_64 = np.array(Y1).astype(np.float64)
X_sparse_32 = sp.csr_matrix(X, dtype=np.float32)
X_sparse_64 = sp.csr_matrix(X, dtype=np.float64)
solver_tol = 5e-4

lr_templ = LogisticRegression(
solver=solver, multi_class=multi_class,
random_state=42, tol=solver_tol, fit_intercept=True)
# Check type consistency
random_state=42, tol=solver_tol, fit_intercept=fit_intercept)

# Check 32-bit type consistency
lr_32 = clone(lr_templ)
lr_32.fit(X_32, y_32)
assert lr_32.coef_.dtype == X_32.dtype
assert lr_32.coef_.dtype == out32_type

# check consistency with sparsity
# Check 32-bit type consistency with sparsity
lr_32_sparse = clone(lr_templ)
lr_32_sparse.fit(X_sparse_32, y_32)
assert lr_32_sparse.coef_.dtype == X_sparse_32.dtype
assert lr_32_sparse.coef_.dtype == out32_type

# Check accuracy consistency
# Check 64-bit type consistency
lr_64 = clone(lr_templ)
lr_64.fit(X_64, y_64)
assert lr_64.coef_.dtype == X_64.dtype
assert lr_64.coef_.dtype == np.float64

# Check 64-bit type consistency with sparsity
lr_64_sparse = clone(lr_templ)
lr_64_sparse.fit(X_sparse_64, y_64)
assert lr_64_sparse.coef_.dtype == np.float64

# solver_tol bounds the norm of the loss gradient
# dw ~= inv(H)*grad ==> |dw| ~= |inv(H)| * solver_tol, where H - hessian
Expand All @@ -1339,8 +1353,17 @@ def test_dtype_match(solver, multi_class):
# FIXME
atol = 1e-2

# Check accuracy consistency
assert_allclose(lr_32.coef_, lr_64.coef_.astype(np.float32), atol=atol)

if solver == 'saga' and fit_intercept:
# FIXME: SAGA on sparse data fits the intercept inaccurately with the
# default tol and max_iter parameters.
atol = 1e-1

assert_allclose(lr_32.coef_, lr_32_sparse.coef_, atol=atol)
assert_allclose(lr_64.coef_, lr_64_sparse.coef_, atol=atol)


def test_warm_start_converge_LR():
# Test to see that the logistic regression converges on warm start,
Expand Down
4 changes: 2 additions & 2 deletions sklearn/svm/liblinear.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ cdef extern from "linear.h":
cdef extern from "liblinear_helper.c":
void copy_w(void *, model *, int)
parameter *set_parameter(int, double, double, int, char *, char *, int, int, double)
problem *set_problem (char *, char *, int, int, int, double, char *)
problem *csr_set_problem (char *, char *, char *, char *, int, int, int, double, char *)
problem *set_problem (char *, int, int, int, int, double, char *, char *)
problem *csr_set_problem (char *, int, char *, char *, int, int, int, double, char *, char *)

model *set_model(parameter *, char *, np.npy_intp *, char *, double)

Expand Down
13 changes: 7 additions & 6 deletions sklearn/svm/liblinear.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,17 @@ def train_wrap(X, np.ndarray[np.float64_t, ndim=1, mode='c'] Y,

if is_sparse:
problem = csr_set_problem(
(<np.ndarray[np.float64_t, ndim=1, mode='c']>X.data).data,
(<np.ndarray>X.data).data, X.dtype == np.float64,
(<np.ndarray[np.int32_t, ndim=1, mode='c']>X.indices).data,
(<np.ndarray[np.int32_t, ndim=1, mode='c']>X.indptr).data,
Y.data, (<np.int32_t>X.shape[0]), (<np.int32_t>X.shape[1]),
(<np.int32_t>X.nnz), bias, sample_weight.data)
(<np.int32_t>X.shape[0]), (<np.int32_t>X.shape[1]),
(<np.int32_t>X.nnz), bias, sample_weight.data, Y.data)
else:
problem = set_problem(
(<np.ndarray[np.float64_t, ndim=2, mode='c']>X).data,
Y.data, (<np.int32_t>X.shape[0]), (<np.int32_t>X.shape[1]),
(<np.int32_t>np.count_nonzero(X)), bias, sample_weight.data)
(<np.ndarray>X).data, X.dtype == np.float64,
(<np.int32_t>X.shape[0]), (<np.int32_t>X.shape[1]),
(<np.int32_t>np.count_nonzero(X)), bias, sample_weight.data,
Y.data)

cdef np.ndarray[np.int32_t, ndim=1, mode='c'] \
class_weight_label = np.arange(class_weight.shape[0], dtype=np.intc)
Expand Down
56 changes: 36 additions & 20 deletions sklearn/svm/src/liblinear/liblinear_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
*
* If bias is > 0, we append an item at the end.
*/
static struct feature_node **dense_to_sparse(double *x, int n_samples,
int n_features, int n_nonzero, double bias)
static struct feature_node **dense_to_sparse(char *x, int double_precision,
int n_samples, int n_features, int n_nonzero, double bias)
{
float *x32 = (float *)x;
double *x64 = (double *)x;
struct feature_node **sparse;
int i, j; /* number of nonzero elements in row i */
struct feature_node *T; /* pointer to the top of the stack */
Expand All @@ -38,12 +40,21 @@ static struct feature_node **dense_to_sparse(double *x, int n_samples,
sparse[i] = T;

for (j=1; j<=n_features; ++j) {
if (*x != 0) {
T->value = *x;
T->index = j;
++ T;
if (double_precision) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the optimiser likely to compile this out of the loop?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but even if it doesn't, the CPU's branch predictor will reduce the cost of the if statement to zero.

if (*x64 != 0) {
T->value = *x64;
T->index = j;
++ T;
}
++ x64; /* go to next element */
} else {
if (*x32 != 0) {
T->value = *x32;
T->index = j;
++ T;
}
++ x32; /* go to next element */
}
++ x; /* go to next element */
}

/* set bias element */
Expand All @@ -63,11 +74,14 @@ static struct feature_node **dense_to_sparse(double *x, int n_samples,


/*
* Convert scipy.sparse.csr to libsvm's sparse data structure
* Convert scipy.sparse.csr to liblinear's sparse data structure
*/
static struct feature_node **csr_to_sparse(double *values, int *indices,
int *indptr, int n_samples, int n_features, int n_nonzero, double bias)
static struct feature_node **csr_to_sparse(char *x, int double_precision,
int *indices, int *indptr, int n_samples, int n_features, int n_nonzero,
double bias)
{
float *x32 = (float *)x;
double *x64 = (double *)x;
struct feature_node **sparse;
int i, j=0, k=0, n;
struct feature_node *T;
Expand All @@ -89,8 +103,8 @@ static struct feature_node **csr_to_sparse(double *values, int *indices,
n = indptr[i+1] - indptr[i]; /* count elements in row i */

for (j=0; j<n; ++j) {
T->value = values[k];
T->index = indices[k] + 1; /* libsvm uses 1-based indexing */
T->value = double_precision ? x64[k] : x32[k];
T->index = indices[k] + 1; /* liblinear uses 1-based indexing */
++T;
++k;
}
Expand All @@ -110,8 +124,9 @@ static struct feature_node **csr_to_sparse(double *values, int *indices,
return sparse;
}

struct problem * set_problem(char *X, char *Y, int n_samples, int n_features,
int n_nonzero, double bias, char* sample_weight)
struct problem * set_problem(char *X, int double_precision_X, int n_samples,
int n_features, int n_nonzero, double bias, char* sample_weight,
char *Y)
{
struct problem *problem;
/* not performant but simple */
Expand All @@ -127,7 +142,8 @@ struct problem * set_problem(char *X, char *Y, int n_samples, int n_features,

problem->y = (double *) Y;
problem->sample_weight = (double *) sample_weight;
problem->x = dense_to_sparse((double *) X, n_samples, n_features, n_nonzero, bias);
problem->x = dense_to_sparse(X, double_precision_X, n_samples, n_features,
n_nonzero, bias);
problem->bias = bias;
problem->sample_weight = sample_weight;
if (problem->x == NULL) {
Expand All @@ -138,10 +154,10 @@ struct problem * set_problem(char *X, char *Y, int n_samples, int n_features,
return problem;
}

struct problem * csr_set_problem (char *values, char *indices, char *indptr,
char *Y, int n_samples, int n_features, int n_nonzero, double bias,
char *sample_weight) {

struct problem * csr_set_problem (char *X, int double_precision_X,
char *indices, char *indptr, int n_samples, int n_features,
int n_nonzero, double bias, char *sample_weight, char *Y)
{
struct problem *problem;
problem = malloc (sizeof (struct problem));
if (problem == NULL) return NULL;
Expand All @@ -155,7 +171,7 @@ struct problem * csr_set_problem (char *values, char *indices, char *indptr,
}

problem->y = (double *) Y;
problem->x = csr_to_sparse((double *) values, (int *) indices,
problem->x = csr_to_sparse(X, double_precision_X, (int *) indices,
(int *) indptr, n_samples, n_features, n_nonzero, bias);
problem->bias = bias;
problem->sample_weight = sample_weight;
Expand Down