Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sklearn/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ class that has the highest probability, and can thus be different
check_is_fitted(self)
return self.classes_[np.argmax(self.predict_proba(X), axis=1)]

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}


class _CalibratedClassifier:
"""Probability calibration with isotonic regression or sigmoid.
Expand Down
16 changes: 16 additions & 0 deletions sklearn/cluster/_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,14 @@ def score(self, X, y=None, sample_weight=None):
return -_labels_inertia(X, sample_weight, x_squared_norms,
self.cluster_centers_)[1]

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}


def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,
old_center_buffer, compute_squared_diff,
Expand Down Expand Up @@ -1859,3 +1867,11 @@ def predict(self, X, sample_weight=None):

X = self._check_test_data(X)
return self._labels_inertia_minibatch(X, sample_weight)[0]

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}
8 changes: 8 additions & 0 deletions sklearn/ensemble/_iforest.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,14 @@ def _compute_score_samples(self, X, subsample_features):
)
return scores

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}


def _average_path_length(n_samples_leaf):
"""
Expand Down
8 changes: 8 additions & 0 deletions sklearn/linear_model/_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2084,3 +2084,11 @@ def score(self, X, y, sample_weight=None):
scoring = get_scorer(scoring)

return scoring(self, X, y, sample_weight=sample_weight)

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}
8 changes: 8 additions & 0 deletions sklearn/linear_model/_ransac.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,3 +493,11 @@ def score(self, X, y):
check_is_fitted(self)

return self.estimator_.score(X, y)

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}
8 changes: 8 additions & 0 deletions sklearn/linear_model/_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1905,3 +1905,11 @@ def fit(self, X, y, sample_weight=None):
@property
def classes_(self):
return self._label_binarizer.classes_

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}
Copy link
Member Author

@rth rth Mar 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RidgeClassifierCV now also started to fail with a quite high tolerance required to pass.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completely unrelated to this PR but since you've been doing this (which is helpful, thanks): do you think it'd be useful for github to support comments from PR author to reviewers, but something distinct from regular review comment?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completely unrelated to this PR but since you've been doing this (which is helpful, thanks): do you think it'd be useful for github to support comments from PR author to reviewers, but something distinct from regular review comment?

Maybe, but I can't say the current way it works is an issue for me.

16 changes: 16 additions & 0 deletions sklearn/linear_model/_stochastic_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,6 +1073,14 @@ def predict_log_proba(self):
def _predict_log_proba(self, X):
return np.log(self.predict_proba(X))

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}


class BaseSGDRegressor(RegressorMixin, BaseSGD):

Expand Down Expand Up @@ -1537,3 +1545,11 @@ def __init__(self, loss="squared_loss", penalty="l2", alpha=0.0001,
validation_fraction=validation_fraction,
n_iter_no_change=n_iter_no_change, warm_start=warm_start,
average=average)

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}
8 changes: 8 additions & 0 deletions sklearn/neighbors/_kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,11 @@ def sample(self, n_samples=1, random_state=None):
correction = (gammainc(0.5 * dim, 0.5 * s_sq) ** (1. / dim)
* self.bandwidth / np.sqrt(s_sq))
return data[i] + X * correction[:, np.newaxis]

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'sample_weight must have positive values',
}
}
52 changes: 51 additions & 1 deletion sklearn/svm/_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,14 @@ def fit(self, X, y, sample_weight=None):

return self

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}


class LinearSVR(RegressorMixin, LinearModel):
"""Linear Support Vector Regression.
Expand Down Expand Up @@ -408,6 +416,14 @@ def fit(self, X, y, sample_weight=None):

return self

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}


class SVC(BaseSVC):
"""C-Support Vector Classification.
Expand Down Expand Up @@ -625,6 +641,14 @@ def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='scale',
break_ties=break_ties,
random_state=random_state)

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course the amount of repetitions could be reduced by defining this tag in BaseSVC but the expectations is that these estimators would be fixed one by one and it's easier to understand what needs fixing this way.



class NuSVC(BaseSVC):
"""Nu-Support Vector Classification.
Expand Down Expand Up @@ -833,7 +857,9 @@ def _more_tags(self):
'_xfail_test': {
'check_methods_subset_invariance':
'fails for the decision_function method',
'check_class_weight_classifiers': 'class_weight is ignored.'
'check_class_weight_classifiers': 'class_weight is ignored.',
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}

Expand Down Expand Up @@ -986,6 +1012,14 @@ def probA_(self):
def probB_(self):
return self._probB

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}


class NuSVR(RegressorMixin, BaseLibSVM):
"""Nu Support Vector Regression.
Expand Down Expand Up @@ -1111,6 +1145,14 @@ def __init__(self, nu=0.5, C=1.0, kernel='rbf', degree=3,
probability=False, cache_size=cache_size, class_weight=None,
verbose=verbose, max_iter=max_iter, random_state=None)

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}


class OneClassSVM(OutlierMixin, BaseLibSVM):
"""Unsupervised Outlier Detection.
Expand Down Expand Up @@ -1319,3 +1361,11 @@ def probA_(self):
@property
def probB_(self):
return self._probB

def _more_tags(self):
return {
'_xfail_test': {
'check_sample_weights_invariance(kind=zeros)':
'zero sample_weight is not equivalent to removing samples',
}
}
55 changes: 42 additions & 13 deletions sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def _yield_checks(name, estimator):
yield check_sample_weights_not_an_array
yield check_sample_weights_list
yield check_sample_weights_shape
yield check_sample_weights_invariance
yield partial(check_sample_weights_invariance, kind='ones')
yield partial(check_sample_weights_invariance, kind='zeros')
yield check_estimators_fit_returns_self
yield partial(check_estimators_fit_returns_self, readonly_memmap=True)

Expand Down Expand Up @@ -443,6 +444,7 @@ def check_estimator(Estimator, generate_only=False):
if isinstance(Estimator, type):
# got a class
checks_generator = _generate_class_checks(Estimator)
estimator = _construct_instance(Estimator)
else:
# got an instance
estimator = Estimator
Expand All @@ -452,12 +454,19 @@ def check_estimator(Estimator, generate_only=False):
if generate_only:
return checks_generator

xfail_checks = _safe_tags(estimator, '_xfail_test')

for estimator, check in checks_generator:
check_name = _set_check_estimator_ids(check)
if xfail_checks and check_name in xfail_checks:
# skip tests marked as a known failure and raise a warning
msg = xfail_checks[check_name]
warnings.warn(f'Skipping {check_name}: {msg}', SkipTestWarning)
continue
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a change following #16502 analogous to what was added in test_common.py::test_estimators . Without this test_check_estimator_clones started to fail with this PR, since it runs check_estimator on MiniBatchKMeans which now has one xfail test.

Skipping with a warning tests marked as xfail in check_estimator, as done here, is a solution to this issue.

Comment on lines +462 to +465
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we apply the xfail decorator as a function instead of manually replicating its behavior?

I.e.

check = pytest.mark.xfail(check)

or something like that?

Because we have the try / except logic just below (and we might want to update the comment about pandas)

Copy link
Member Author

@rth rth Mar 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't use pytest in this file, since it's supposed to work without it.

There is indeed some slight redundancy with _mark_xfail_checks but I think trying to factorize it might be more trouble than what it's worth. It's just 6 extra lines in the end.

Updated the comment about pandas.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd say the introduction of the tag is a good reason to start requiring pytest for using check_estimator now. But agreed that's another story.

It's just 6 extra lines in the end

Sure, though I find our whole test suite really hard to maintain in general.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, though I find our whole test suite really hard to maintain in general.

Agreed, I'm just saying that slightly more verbosity and 2 repetitions is easier to maintain than coming up with some function wrappers. The problem is not so much lines of code as complexity.

try:
check(estimator)
except SkipTest as exception:
# the only SkipTest thrown currently results from not
# being able to import pandas.
# raise warning for tests that are are skipped
warnings.warn(str(exception), SkipTestWarning)


Expand Down Expand Up @@ -816,7 +825,7 @@ def check_sample_weights_shape(name, estimator_orig):


@ignore_warnings(category=FutureWarning)
def check_sample_weights_invariance(name, estimator_orig):
def check_sample_weights_invariance(name, estimator_orig, kind="ones"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this check be tested?

# check that the estimators yield same results for
# unit weights and no weights
if (has_fit_parameter(estimator_orig, "sample_weight") and
Expand All @@ -832,25 +841,45 @@ def check_sample_weights_invariance(name, estimator_orig):
X = np.array([[1, 3], [1, 3], [1, 3], [1, 3],
[2, 1], [2, 1], [2, 1], [2, 1],
[3, 3], [3, 3], [3, 3], [3, 3],
[4, 1], [4, 1], [4, 1], [4, 1]], dtype=np.dtype('float'))
[4, 1], [4, 1], [4, 1], [4, 1]], dtype=np.float64)
y = np.array([1, 1, 1, 1, 2, 2, 2, 2,
1, 1, 1, 1, 2, 2, 2, 2], dtype=np.dtype('int'))
1, 1, 1, 1, 2, 2, 2, 2], dtype=np.int)

if kind == 'ones':
X2 = X
y2 = y
sw2 = np.ones(shape=len(y))
err_msg = (f"For {name} sample_weight=None is not equivalent to "
f"sample_weight=ones")
elif kind == 'zeros':
# Construct a dataset that is very different to (X, y) if weights
# are disregarded, but identical to (X, y) given weights.
X2 = np.vstack([X, X + 1])
y2 = np.hstack([y, 3 - y])
sw2 = np.ones(shape=len(y) * 2)
sw2[len(y):] = 0
X2, y2, sw2 = shuffle(X2, y2, sw2, random_state=0)

err_msg = (f"For {name} sample_weight is not equivalent "
f"to removing samples")
else:
raise ValueError

y = _enforce_estimator_tags_y(estimator1, y)
y2 = _enforce_estimator_tags_y(estimator2, y2)

estimator1.fit(X, y=y, sample_weight=np.ones(shape=len(y)))
estimator2.fit(X, y=y, sample_weight=None)
estimator1.fit(X, y=y, sample_weight=None)
estimator2.fit(X2, y=y2, sample_weight=sw2)

for method in ["predict", "transform"]:
for method in ["predict", "predict_proba",
"decision_function", "transform"]:
if hasattr(estimator_orig, method):
X_pred1 = getattr(estimator1, method)(X)
X_pred2 = getattr(estimator2, method)(X)
if sparse.issparse(X_pred1):
X_pred1 = X_pred1.toarray()
X_pred2 = X_pred2.toarray()
assert_allclose(X_pred1, X_pred2,
err_msg="For %s sample_weight=None is not"
" equivalent to sample_weight=ones"
% name)
assert_allclose(X_pred1, X_pred2, err_msg=err_msg)


@ignore_warnings(category=(FutureWarning, UserWarning))
Expand Down