Skip to content

MRG Drop legacy python / remove six dependencies #12639

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 32 commits into from
Jan 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
af354b4
Start removing python 2.7
amueller Nov 20, 2018
b254876
remove xrange
amueller Nov 20, 2018
f01b60f
removing xrange and more six stuff
amueller Nov 20, 2018
d0dd1d1
remove string_types and six imports
amueller Nov 20, 2018
4d1e71e
six no more
amueller Nov 20, 2018
cb844cd
six iteritems
amueller Nov 21, 2018
067a1e3
six metaclass
amueller Nov 21, 2018
44ec241
metaclass with two base classes
amueller Nov 21, 2018
185e0bf
multi-line metaclasses
amueller Nov 21, 2018
6791cbf
more metaclass and b fun
amueller Nov 21, 2018
adfef75
getting rid of six (and python2)
amueller Nov 21, 2018
aa3f485
another moves
amueller Nov 21, 2018
6a5815b
build on 32bit python3.5
amueller Nov 21, 2018
906576e
remove b in pyx
amueller Nov 21, 2018
4031580
minor six fixes
amueller Nov 21, 2018
8aa75f1
typo
amueller Nov 21, 2018
3d0e648
Merge branch 'master' into drop_legacy_python
amueller Dec 14, 2018
28c9fd7
unused imports, minor cleanups
amueller Dec 14, 2018
03bf639
remove six import from openml test
amueller Dec 14, 2018
14c3219
remove six from bicluster example
amueller Dec 14, 2018
c450e95
revert externals
amueller Dec 14, 2018
559db2b
fix some encoding stuff
amueller Dec 14, 2018
fb6a96a
fix more bytes issues/ typos
amueller Dec 14, 2018
823e188
Merge branch 'master' into drop_legacy_python
amueller Dec 17, 2018
9d146f2
Merge branch 'master' into drop_legacy_python
amueller Dec 28, 2018
e414e69
undo encode() changes
amueller Dec 28, 2018
8c849be
fix cython b
amueller Dec 28, 2018
eb8a3ce
fix remark by roman
amueller Dec 28, 2018
ac02f51
Apply suggestions from code review
rth Dec 28, 2018
3cce1c6
pep8
amueller Dec 28, 2018
b849e4e
string formatting fun
amueller Dec 28, 2018
3df7678
more string formatting fun
amueller Dec 28, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions benchmarks/bench_plot_fastkmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,15 @@ def compute_bench_2(chunks):
results = compute_bench(samples_range, features_range)
results_2 = compute_bench_2(chunks)

max_time = max([max(i) for i in [t for (label, t) in six.iteritems(results)
max_time = max([max(i) for i in [t for (label, t) in results.items()
if "speed" in label]])
max_inertia = max([max(i) for i in [
t for (label, t) in six.iteritems(results)
t for (label, t) in results.items()
if "speed" not in label]])

fig = plt.figure('scikit-learn K-Means benchmark results')
for c, (label, timings) in zip('brcy',
sorted(six.iteritems(results))):
sorted(results.items())):
if 'speed' in label:
ax = fig.add_subplot(2, 2, 1, projection='3d')
ax.set_zlim3d(0.0, max_time * 1.1)
Expand All @@ -129,7 +129,7 @@ def compute_bench_2(chunks):

i = 0
for c, (label, timings) in zip('br',
sorted(six.iteritems(results_2))):
sorted(results_2.items())):
i += 1
ax = fig.add_subplot(2, 2, i + 2)
y = np.asarray(timings)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_plot_omp_lars.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def compute_bench(samples_range, features_range):

import matplotlib.pyplot as plt
fig = plt.figure('scikit-learn OMP vs. LARS benchmark results')
for i, (label, timings) in enumerate(sorted(six.iteritems(results))):
for i, (label, timings) in enumerate(sorted(results.items())):
ax = fig.add_subplot(1, 2, i+1)
vmax = max(1 - timings.min(), -1 + timings.max())
plt.matshow(timings, fignum=False, vmin=1 - vmax, vmax=1 + vmax)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_plot_svd.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def compute_bench(samples_range, features_range, n_iter=3, rank=50):
label = 'scikit-learn singular value decomposition benchmark results'
fig = plt.figure(label)
ax = fig.gca(projection='3d')
for c, (label, timings) in zip('rbg', sorted(six.iteritems(results))):
for c, (label, timings) in zip('rbg', sorted(results.items())):
X, Y = np.meshgrid(samples_range, features_range)
Z = np.asarray(timings).reshape(samples_range.shape[0],
features_range.shape[0])
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/bench_random_projections.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import scipy.sparse as sp

from sklearn import clone
from sklearn.externals.six.moves import xrange
from sklearn.random_projection import (SparseRandomProjection,
GaussianRandomProjection,
johnson_lindenstrauss_min_dim)
Expand Down Expand Up @@ -212,7 +211,7 @@ def print_row(clf_type, time_fit, time_transform):
for name in selected_transformers:
print("Perform benchmarks for %s..." % name)

for iteration in xrange(opts.n_times):
for iteration in range(opts.n_times):
print("\titer %s..." % iteration, end="")
time_to_fit, time_to_transform = bench_scikit_transformer(X_dense,
transformers[name])
Expand Down
45 changes: 21 additions & 24 deletions benchmarks/bench_sample_without_replacement.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import numpy as np
import random

from sklearn.externals.six.moves import xrange
from sklearn.utils.random import sample_without_replacement


Expand Down Expand Up @@ -90,49 +89,47 @@ def bench_sample(sampling, n_population, n_samples):
# Set Python core input
sampling_algorithm["python-core-sample"] = \
lambda n_population, n_sample: \
random.sample(xrange(n_population), n_sample)
random.sample(range(n_population), n_sample)

###########################################################################
###########################################################################
# Set custom automatic method selection
sampling_algorithm["custom-auto"] = \
lambda n_population, n_samples, random_state=None: \
sample_without_replacement(n_population,
n_samples,
method="auto",
random_state=random_state)
sample_without_replacement(n_population, n_samples, method="auto",
random_state=random_state)

###########################################################################
# Set custom tracking based method
sampling_algorithm["custom-tracking-selection"] = \
lambda n_population, n_samples, random_state=None: \
sample_without_replacement(n_population,
n_samples,
method="tracking_selection",
random_state=random_state)
sample_without_replacement(n_population,
n_samples,
method="tracking_selection",
random_state=random_state)

###########################################################################
# Set custom reservoir based method
sampling_algorithm["custom-reservoir-sampling"] = \
lambda n_population, n_samples, random_state=None: \
sample_without_replacement(n_population,
n_samples,
method="reservoir_sampling",
random_state=random_state)
sample_without_replacement(n_population,
n_samples,
method="reservoir_sampling",
random_state=random_state)

###########################################################################
# Set custom reservoir based method
sampling_algorithm["custom-pool"] = \
lambda n_population, n_samples, random_state=None: \
sample_without_replacement(n_population,
n_samples,
method="pool",
random_state=random_state)
sample_without_replacement(n_population,
n_samples,
method="pool",
random_state=random_state)

###########################################################################
# Numpy permutation based
sampling_algorithm["numpy-permutation"] = \
lambda n_population, n_sample: \
np.random.permutation(n_population)[:n_sample]
np.random.permutation(n_population)[:n_sample]

###########################################################################
# Remove unspecified algorithm
Expand All @@ -156,11 +153,11 @@ def bench_sample(sampling, n_population, n_samples):
print("Perform benchmarks for %s..." % name, end="")
time[name] = np.zeros(shape=(opts.n_steps, opts.n_times))

for step in xrange(opts.n_steps):
for it in xrange(opts.n_times):
for step in range(opts.n_steps):
for it in range(opts.n_times):
time[name][step, it] = bench_sample(sampling_algorithm[name],
opts.n_population,
n_samples[step])
opts.n_population,
n_samples[step])

print("done")

Expand Down
9 changes: 4 additions & 5 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from __future__ import print_function
import sys
import os
from sklearn.externals.six import u

# If extensions (or modules to document with autodoc) are in another
# directory, add these directories to sys.path here. If the directory
Expand Down Expand Up @@ -74,8 +73,8 @@
master_doc = 'index'

# General information about the project.
project = u('scikit-learn')
copyright = u('2007 - 2018, scikit-learn developers (BSD License)')
project = 'scikit-learn'
copyright = '2007 - 2018, scikit-learn developers (BSD License)'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
Expand Down Expand Up @@ -214,8 +213,8 @@
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass
# [howto/manual]).
latex_documents = [('index', 'user_guide.tex', u('scikit-learn user guide'),
u('scikit-learn developers'), 'manual'), ]
latex_documents = [('index', 'user_guide.tex', 'scikit-learn user guide',
'scikit-learn developers', 'manual'), ]

# The name of an image file (relative to this directory) to place at the top of
# the title page.
Expand Down
4 changes: 1 addition & 3 deletions doc/developers/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -908,9 +908,7 @@ in the examples.
Python versions supported
-------------------------

All scikit-learn code should work unchanged in Python 3.5 or
newer.

Since scikit-learn 0.21, only Python 3.5 and newer is supported.

.. _code_review:

Expand Down
4 changes: 2 additions & 2 deletions doc/developers/performance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,13 @@ Now restart IPython and let us use this new toy::
178 # values justified in the paper
179 48 144 3.0 0.0 alpha = 1
180 48 113 2.4 0.0 beta = 0.1
181 638 1880 2.9 0.1 for n_iter in xrange(1, max_iter + 1):
181 638 1880 2.9 0.1 for n_iter in range(1, max_iter + 1):
182 638 195133 305.9 10.2 grad = np.dot(WtW, H) - WtV
183 638 495761 777.1 25.9 proj_gradient = norm(grad[np.logical_or(grad < 0, H > 0)])
184 638 2449 3.8 0.1 if proj_gradient < tol:
185 48 130 2.7 0.0 break
186
187 1474 4474 3.0 0.2 for inner_iter in xrange(1, 20):
187 1474 4474 3.0 0.2 for inner_iter in range(1, 20):
188 1474 83833 56.9 4.4 Hn = H - alpha * grad
189 # Hn = np.where(Hn > 0, Hn, 0)
190 1474 194239 131.8 10.1 Hn = _pos(Hn)
Expand Down
8 changes: 4 additions & 4 deletions examples/applications/plot_out_of_core_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
import matplotlib.pyplot as plt
from matplotlib import rcParams

from sklearn.externals.six.moves import html_parser
from sklearn.externals.six.moves.urllib.request import urlretrieve
from html.parser import HTMLParser
from urllib.request import urlretrieve
from sklearn.datasets import get_data_home
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.linear_model import SGDClassifier
Expand All @@ -60,11 +60,11 @@ def _not_in_sphinx():
#


class ReutersParser(html_parser.HTMLParser):
class ReutersParser(HTMLParser):
"""Utility class to parse a SGML file and yield documents one at a time."""

def __init__(self, encoding='latin-1'):
html_parser.HTMLParser.__init__(self)
HTMLParser.__init__(self)
self._reset()
self.encoding = encoding

Expand Down
3 changes: 1 addition & 2 deletions examples/applications/svm_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@

from sklearn import svm
from sklearn.datasets import dump_svmlight_file
from sklearn.externals.six.moves import xrange

y_min, y_max = -50, 50
x_min, x_max = -50, 50
Expand Down Expand Up @@ -187,7 +186,7 @@ def update_example(self, model, idx):

def update(self, event, model):
if event == "examples_loaded":
for i in xrange(len(model.data)):
for i in range(len(model.data)):
self.update_example(model, i)

if event == "example_added":
Expand Down
5 changes: 2 additions & 3 deletions examples/applications/wikipedia_principal_eigenvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@
from joblib import Memory

from sklearn.decomposition import randomized_svd
from sklearn.externals.six.moves.urllib.request import urlopen
from sklearn.externals.six import iteritems
from urllib.request import urlopen


print(__doc__)
Expand Down Expand Up @@ -173,7 +172,7 @@ def get_adjacency_matrix(redirects_filename, page_links_filename, limit=None):
# stop after 5M links to make it possible to work in RAM
X, redirects, index_map = get_adjacency_matrix(
redirects_filename, page_links_filename, limit=5000000)
names = dict((i, name) for name, i in iteritems(index_map))
names = dict((i, name) for name, i in index_map.items())

print("Computing the principal singular vectors using randomized_svd")
t0 = time()
Expand Down
3 changes: 1 addition & 2 deletions examples/bicluster/plot_bicluster_newsgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@

from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.cluster import MiniBatchKMeans
from sklearn.externals.six import iteritems
from sklearn.datasets.twenty_newsgroups import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.cluster import v_measure_score
Expand Down Expand Up @@ -116,7 +115,7 @@ def most_common(d):

Like Counter.most_common in Python >=2.7.
"""
return sorted(iteritems(d), key=operator.itemgetter(1), reverse=True)
return sorted(d.items(), key=operator.itemgetter(1), reverse=True)


bicluster_ncuts = list(bicluster_ncut(i)
Expand Down
2 changes: 0 additions & 2 deletions examples/ensemble/plot_adaboost_multiclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
#
# License: BSD 3 clause

from sklearn.externals.six.moves import zip

import matplotlib.pyplot as plt

from sklearn.datasets import make_gaussian_quantiles
Expand Down
6 changes: 3 additions & 3 deletions sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import numpy as np
from scipy import sparse
from .externals import six

from . import __version__


Expand Down Expand Up @@ -58,7 +58,7 @@ def clone(estimator, safe=True):
% (repr(estimator), type(estimator)))
klass = estimator.__class__
new_object_params = estimator.get_params(deep=False)
for name, param in six.iteritems(new_object_params):
for name, param in new_object_params.items():
new_object_params[name] = clone(param, safe=False)
new_object = klass(**new_object_params)
params_set = new_object.get_params(deep=False)
Expand Down Expand Up @@ -97,7 +97,7 @@ def _pprint(params, offset=0, printer=repr):
params_list = list()
this_line_length = offset
line_sep = ',\n' + (1 + offset // 2) * ' '
for i, (k, v) in enumerate(sorted(six.iteritems(params))):
for i, (k, v) in enumerate(sorted(params.items())):
if type(v) is float:
# use str for representing floating point numbers
# this way we get consistent representation across
Expand Down
4 changes: 1 addition & 3 deletions sklearn/cluster/bicluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

from . import KMeans, MiniBatchKMeans
from ..base import BaseEstimator, BiclusterMixin
from ..externals import six
from ..utils import check_random_state

from ..utils.extmath import (make_nonnegative, randomized_svd,
Expand Down Expand Up @@ -85,8 +84,7 @@ def _log_normalize(X):
return L - row_avg - col_avg + avg


class BaseSpectral(six.with_metaclass(ABCMeta, BaseEstimator,
BiclusterMixin)):
class BaseSpectral(BaseEstimator, BiclusterMixin, metaclass=ABCMeta):
"""Base class for spectral biclustering."""

@abstractmethod
Expand Down
3 changes: 1 addition & 2 deletions sklearn/cluster/birch.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from ..metrics.pairwise import euclidean_distances
from ..base import TransformerMixin, ClusterMixin, BaseEstimator
from ..externals.six.moves import xrange
from ..utils import check_array
from ..utils.extmath import row_norms, safe_sparse_dot
from ..utils.validation import check_is_fitted
Expand All @@ -29,7 +28,7 @@ def _iterate_sparse_X(X):
X_data = X.data
X_indptr = X.indptr

for i in xrange(n_samples):
for i in range(n_samples):
row = np.zeros(X.shape[1])
startptr, endptr = X_indptr[i], X_indptr[i + 1]
nonzero_indices = X_indices[startptr:endptr]
Expand Down
Loading