-
-
Notifications
You must be signed in to change notification settings - Fork 26.2k
PERF Support converting 32-bit matrices directly to liblinear format … #14296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,9 +15,11 @@ | |
* | ||
* If bias is > 0, we append an item at the end. | ||
*/ | ||
static struct feature_node **dense_to_sparse(double *x, int n_samples, | ||
int n_features, int n_nonzero, double bias) | ||
static struct feature_node **dense_to_sparse(char *x, int double_precision, | ||
int n_samples, int n_features, int n_nonzero, double bias) | ||
{ | ||
float *x32 = (float *)x; | ||
double *x64 = (double *)x; | ||
struct feature_node **sparse; | ||
int i, j; /* number of nonzero elements in row i */ | ||
struct feature_node *T; /* pointer to the top of the stack */ | ||
|
@@ -38,12 +40,21 @@ static struct feature_node **dense_to_sparse(double *x, int n_samples, | |
sparse[i] = T; | ||
|
||
for (j=1; j<=n_features; ++j) { | ||
if (*x != 0) { | ||
T->value = *x; | ||
T->index = j; | ||
++ T; | ||
if (double_precision) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the optimiser likely to compile this out of the loop? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but even if it doesn't, the CPU's branch predictor will reduce the cost of the if statement to zero. |
||
if (*x64 != 0) { | ||
T->value = *x64; | ||
T->index = j; | ||
++ T; | ||
} | ||
++ x64; /* go to next element */ | ||
} else { | ||
if (*x32 != 0) { | ||
T->value = *x32; | ||
T->index = j; | ||
++ T; | ||
} | ||
++ x32; /* go to next element */ | ||
} | ||
++ x; /* go to next element */ | ||
} | ||
|
||
/* set bias element */ | ||
|
@@ -63,11 +74,14 @@ static struct feature_node **dense_to_sparse(double *x, int n_samples, | |
|
||
|
||
/* | ||
* Convert scipy.sparse.csr to libsvm's sparse data structure | ||
* Convert scipy.sparse.csr to liblinear's sparse data structure | ||
*/ | ||
static struct feature_node **csr_to_sparse(double *values, int *indices, | ||
int *indptr, int n_samples, int n_features, int n_nonzero, double bias) | ||
static struct feature_node **csr_to_sparse(char *x, int double_precision, | ||
int *indices, int *indptr, int n_samples, int n_features, int n_nonzero, | ||
double bias) | ||
{ | ||
float *x32 = (float *)x; | ||
double *x64 = (double *)x; | ||
struct feature_node **sparse; | ||
int i, j=0, k=0, n; | ||
struct feature_node *T; | ||
|
@@ -89,8 +103,8 @@ static struct feature_node **csr_to_sparse(double *values, int *indices, | |
n = indptr[i+1] - indptr[i]; /* count elements in row i */ | ||
|
||
for (j=0; j<n; ++j) { | ||
T->value = values[k]; | ||
T->index = indices[k] + 1; /* libsvm uses 1-based indexing */ | ||
T->value = double_precision ? x64[k] : x32[k]; | ||
T->index = indices[k] + 1; /* liblinear uses 1-based indexing */ | ||
++T; | ||
++k; | ||
} | ||
|
@@ -110,8 +124,9 @@ static struct feature_node **csr_to_sparse(double *values, int *indices, | |
return sparse; | ||
} | ||
|
||
struct problem * set_problem(char *X, char *Y, int n_samples, int n_features, | ||
int n_nonzero, double bias, char* sample_weight) | ||
struct problem * set_problem(char *X, int double_precision_X, int n_samples, | ||
int n_features, int n_nonzero, double bias, char* sample_weight, | ||
char *Y) | ||
{ | ||
struct problem *problem; | ||
/* not performant but simple */ | ||
|
@@ -127,7 +142,8 @@ struct problem * set_problem(char *X, char *Y, int n_samples, int n_features, | |
|
||
problem->y = (double *) Y; | ||
problem->sample_weight = (double *) sample_weight; | ||
problem->x = dense_to_sparse((double *) X, n_samples, n_features, n_nonzero, bias); | ||
problem->x = dense_to_sparse(X, double_precision_X, n_samples, n_features, | ||
n_nonzero, bias); | ||
problem->bias = bias; | ||
problem->sample_weight = sample_weight; | ||
if (problem->x == NULL) { | ||
|
@@ -138,10 +154,10 @@ struct problem * set_problem(char *X, char *Y, int n_samples, int n_features, | |
return problem; | ||
} | ||
|
||
struct problem * csr_set_problem (char *values, char *indices, char *indptr, | ||
char *Y, int n_samples, int n_features, int n_nonzero, double bias, | ||
char *sample_weight) { | ||
|
||
struct problem * csr_set_problem (char *X, int double_precision_X, | ||
char *indices, char *indptr, int n_samples, int n_features, | ||
int n_nonzero, double bias, char *sample_weight, char *Y) | ||
{ | ||
struct problem *problem; | ||
problem = malloc (sizeof (struct problem)); | ||
if (problem == NULL) return NULL; | ||
|
@@ -155,7 +171,7 @@ struct problem * csr_set_problem (char *values, char *indices, char *indptr, | |
} | ||
|
||
problem->y = (double *) Y; | ||
problem->x = csr_to_sparse((double *) values, (int *) indices, | ||
problem->x = csr_to_sparse(X, double_precision_X, (int *) indices, | ||
(int *) indptr, n_samples, n_features, n_nonzero, bias); | ||
problem->bias = bias; | ||
problem->sample_weight = sample_weight; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use
pytest.skip(some informative message)
instead of returnThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.