Skip to content

Commit 70a185a

Browse files
authored
MAINT Enable and run black on examples (#20502)
1 parent 31c66a9 commit 70a185a

File tree

262 files changed

+7852
-5520
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

262 files changed

+7852
-5520
lines changed

examples/applications/plot_digits_denoising.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,9 @@ def plot_digits(X, title):
8787
# Let's first have a look to see the difference between noise-free and noisy
8888
# images. We will check the test set in this regard.
8989
plot_digits(X_test, "Uncorrupted test images")
90-
plot_digits(X_test_noisy,
91-
f"Noisy test images\n"
92-
f"MSE: {np.mean((X_test - X_test_noisy) ** 2):.2f}")
90+
plot_digits(
91+
X_test_noisy, f"Noisy test images\nMSE: {np.mean((X_test - X_test_noisy) ** 2):.2f}"
92+
)
9393

9494
# %%
9595
# Learn the `PCA` basis
@@ -100,8 +100,9 @@ def plot_digits(X, title):
100100
from sklearn.decomposition import PCA, KernelPCA
101101

102102
pca = PCA(n_components=32)
103-
kernel_pca = KernelPCA(n_components=400, kernel="rbf", gamma=1e-3,
104-
fit_inverse_transform=True, alpha=5e-3)
103+
kernel_pca = KernelPCA(
104+
n_components=400, kernel="rbf", gamma=1e-3, fit_inverse_transform=True, alpha=5e-3
105+
)
105106

106107
pca.fit(X_train_noisy)
107108
_ = kernel_pca.fit(X_train_noisy)
@@ -118,17 +119,21 @@ def plot_digits(X, title):
118119
# kernel to learn the PCA basis and a kernel ridge to learn the mapping
119120
# function.
120121
X_reconstructed_kernel_pca = kernel_pca.inverse_transform(
121-
kernel_pca.transform(X_test_noisy))
122+
kernel_pca.transform(X_test_noisy)
123+
)
122124
X_reconstructed_pca = pca.inverse_transform(pca.transform(X_test_noisy))
123125

124126
# %%
125127
plot_digits(X_test, "Uncorrupted test images")
126-
plot_digits(X_reconstructed_pca,
127-
f"PCA reconstruction\n"
128-
f"MSE: {np.mean((X_test - X_reconstructed_pca) ** 2):.2f}")
129-
plot_digits(X_reconstructed_kernel_pca,
130-
f"Kernel PCA reconstruction\n"
131-
f"MSE: {np.mean((X_test - X_reconstructed_kernel_pca) ** 2):.2f}")
128+
plot_digits(
129+
X_reconstructed_pca,
130+
f"PCA reconstruction\nMSE: {np.mean((X_test - X_reconstructed_pca) ** 2):.2f}",
131+
)
132+
plot_digits(
133+
X_reconstructed_kernel_pca,
134+
"Kernel PCA reconstruction\n"
135+
f"MSE: {np.mean((X_test - X_reconstructed_kernel_pca) ** 2):.2f}",
136+
)
132137

133138
# %%
134139
# PCA has a lower MSE than kernel PCA. However, the qualitative analysis might

examples/applications/plot_face_recognition.py

+21-17
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
print(__doc__)
4444

4545
# Display progress logs on stdout
46-
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
46+
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
4747

4848

4949
# #############################################################################
@@ -75,19 +75,20 @@
7575

7676
# split into a training and testing set
7777
X_train, X_test, y_train, y_test = train_test_split(
78-
X, y, test_size=0.25, random_state=42)
78+
X, y, test_size=0.25, random_state=42
79+
)
7980

8081

8182
# #############################################################################
8283
# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
8384
# dataset): unsupervised feature extraction / dimensionality reduction
8485
n_components = 150
8586

86-
print("Extracting the top %d eigenfaces from %d faces"
87-
% (n_components, X_train.shape[0]))
87+
print(
88+
"Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])
89+
)
8890
t0 = time()
89-
pca = PCA(n_components=n_components, svd_solver='randomized',
90-
whiten=True).fit(X_train)
91+
pca = PCA(n_components=n_components, svd_solver="randomized", whiten=True).fit(X_train)
9192
print("done in %0.3fs" % (time() - t0))
9293

9394
eigenfaces = pca.components_.reshape((n_components, h, w))
@@ -104,11 +105,11 @@
104105

105106
print("Fitting the classifier to the training set")
106107
t0 = time()
107-
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
108-
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
109-
clf = GridSearchCV(
110-
SVC(kernel='rbf', class_weight='balanced'), param_grid
111-
)
108+
param_grid = {
109+
"C": [1e3, 5e3, 1e4, 5e4, 1e5],
110+
"gamma": [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
111+
}
112+
clf = GridSearchCV(SVC(kernel="rbf", class_weight="balanced"), param_grid)
112113
clf = clf.fit(X_train_pca, y_train)
113114
print("done in %0.3fs" % (time() - t0))
114115
print("Best estimator found by grid search:")
@@ -130,10 +131,11 @@
130131
# #############################################################################
131132
# Qualitative evaluation of the predictions using matplotlib
132133

134+
133135
def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
134136
"""Helper function to plot a gallery of portraits"""
135137
plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
136-
plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
138+
plt.subplots_adjust(bottom=0, left=0.01, right=0.99, top=0.90, hspace=0.35)
137139
for i in range(n_row * n_col):
138140
plt.subplot(n_row, n_col, i + 1)
139141
plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
@@ -144,14 +146,16 @@ def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
144146

145147
# plot the result of the prediction on a portion of the test set
146148

149+
147150
def title(y_pred, y_test, target_names, i):
148-
pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1]
149-
true_name = target_names[y_test[i]].rsplit(' ', 1)[-1]
150-
return 'predicted: %s\ntrue: %s' % (pred_name, true_name)
151+
pred_name = target_names[y_pred[i]].rsplit(" ", 1)[-1]
152+
true_name = target_names[y_test[i]].rsplit(" ", 1)[-1]
153+
return "predicted: %s\ntrue: %s" % (pred_name, true_name)
151154

152155

153-
prediction_titles = [title(y_pred, y_test, target_names, i)
154-
for i in range(y_pred.shape[0])]
156+
prediction_titles = [
157+
title(y_pred, y_test, target_names, i) for i in range(y_pred.shape[0])
158+
]
155159

156160
plot_gallery(X_test, prediction_titles, h, w)
157161

examples/applications/plot_model_complexity_influence.py

+89-72
Original file line numberDiff line numberDiff line change
@@ -72,23 +72,21 @@
7272

7373
def generate_data(case):
7474
"""Generate regression/classification data."""
75-
if case == 'regression':
75+
if case == "regression":
7676
X, y = datasets.load_diabetes(return_X_y=True)
77-
elif case == 'classification':
78-
X, y = datasets.fetch_20newsgroups_vectorized(subset='all',
79-
return_X_y=True)
77+
elif case == "classification":
78+
X, y = datasets.fetch_20newsgroups_vectorized(subset="all", return_X_y=True)
8079
X, y = shuffle(X, y)
8180
offset = int(X.shape[0] * 0.8)
8281
X_train, y_train = X[:offset], y[:offset]
8382
X_test, y_test = X[offset:], y[offset:]
8483

85-
data = {'X_train': X_train, 'X_test': X_test, 'y_train': y_train,
86-
'y_test': y_test}
84+
data = {"X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test}
8785
return data
8886

8987

90-
regression_data = generate_data('regression')
91-
classification_data = generate_data('classification')
88+
regression_data = generate_data("regression")
89+
classification_data = generate_data("classification")
9290

9391

9492
##############################################################################
@@ -110,26 +108,33 @@ def benchmark_influence(conf):
110108
prediction_times = []
111109
prediction_powers = []
112110
complexities = []
113-
for param_value in conf['changing_param_values']:
114-
conf['tuned_params'][conf['changing_param']] = param_value
115-
estimator = conf['estimator'](**conf['tuned_params'])
111+
for param_value in conf["changing_param_values"]:
112+
conf["tuned_params"][conf["changing_param"]] = param_value
113+
estimator = conf["estimator"](**conf["tuned_params"])
116114

117115
print("Benchmarking %s" % estimator)
118-
estimator.fit(conf['data']['X_train'], conf['data']['y_train'])
119-
conf['postfit_hook'](estimator)
120-
complexity = conf['complexity_computer'](estimator)
116+
estimator.fit(conf["data"]["X_train"], conf["data"]["y_train"])
117+
conf["postfit_hook"](estimator)
118+
complexity = conf["complexity_computer"](estimator)
121119
complexities.append(complexity)
122120
start_time = time.time()
123-
for _ in range(conf['n_samples']):
124-
y_pred = estimator.predict(conf['data']['X_test'])
125-
elapsed_time = (time.time() - start_time) / float(conf['n_samples'])
121+
for _ in range(conf["n_samples"]):
122+
y_pred = estimator.predict(conf["data"]["X_test"])
123+
elapsed_time = (time.time() - start_time) / float(conf["n_samples"])
126124
prediction_times.append(elapsed_time)
127-
pred_score = conf['prediction_performance_computer'](
128-
conf['data']['y_test'], y_pred)
125+
pred_score = conf["prediction_performance_computer"](
126+
conf["data"]["y_test"], y_pred
127+
)
129128
prediction_powers.append(pred_score)
130-
print("Complexity: %d | %s: %.4f | Pred. Time: %fs\n" % (
131-
complexity, conf['prediction_performance_label'], pred_score,
132-
elapsed_time))
129+
print(
130+
"Complexity: %d | %s: %.4f | Pred. Time: %fs\n"
131+
% (
132+
complexity,
133+
conf["prediction_performance_label"],
134+
pred_score,
135+
elapsed_time,
136+
)
137+
)
133138
return prediction_powers, prediction_times, complexities
134139

135140

@@ -147,46 +152,58 @@ def benchmark_influence(conf):
147152
# different data.
148153
#
149154

155+
150156
def _count_nonzero_coefficients(estimator):
151157
a = estimator.coef_.toarray()
152158
return np.count_nonzero(a)
153159

154160

155161
configurations = [
156-
{'estimator': SGDClassifier,
157-
'tuned_params': {'penalty': 'elasticnet', 'alpha': 0.001, 'loss':
158-
'modified_huber', 'fit_intercept': True, 'tol': 1e-3},
159-
'changing_param': 'l1_ratio',
160-
'changing_param_values': [0.25, 0.5, 0.75, 0.9],
161-
'complexity_label': 'non_zero coefficients',
162-
'complexity_computer': _count_nonzero_coefficients,
163-
'prediction_performance_computer': hamming_loss,
164-
'prediction_performance_label': 'Hamming Loss (Misclassification Ratio)',
165-
'postfit_hook': lambda x: x.sparsify(),
166-
'data': classification_data,
167-
'n_samples': 30},
168-
{'estimator': NuSVR,
169-
'tuned_params': {'C': 1e3, 'gamma': 2 ** -15},
170-
'changing_param': 'nu',
171-
'changing_param_values': [0.1, 0.25, 0.5, 0.75, 0.9],
172-
'complexity_label': 'n_support_vectors',
173-
'complexity_computer': lambda x: len(x.support_vectors_),
174-
'data': regression_data,
175-
'postfit_hook': lambda x: x,
176-
'prediction_performance_computer': mean_squared_error,
177-
'prediction_performance_label': 'MSE',
178-
'n_samples': 30},
179-
{'estimator': GradientBoostingRegressor,
180-
'tuned_params': {'loss': 'squared_error'},
181-
'changing_param': 'n_estimators',
182-
'changing_param_values': [10, 50, 100, 200, 500],
183-
'complexity_label': 'n_trees',
184-
'complexity_computer': lambda x: x.n_estimators,
185-
'data': regression_data,
186-
'postfit_hook': lambda x: x,
187-
'prediction_performance_computer': mean_squared_error,
188-
'prediction_performance_label': 'MSE',
189-
'n_samples': 30},
162+
{
163+
"estimator": SGDClassifier,
164+
"tuned_params": {
165+
"penalty": "elasticnet",
166+
"alpha": 0.001,
167+
"loss": "modified_huber",
168+
"fit_intercept": True,
169+
"tol": 1e-3,
170+
},
171+
"changing_param": "l1_ratio",
172+
"changing_param_values": [0.25, 0.5, 0.75, 0.9],
173+
"complexity_label": "non_zero coefficients",
174+
"complexity_computer": _count_nonzero_coefficients,
175+
"prediction_performance_computer": hamming_loss,
176+
"prediction_performance_label": "Hamming Loss (Misclassification Ratio)",
177+
"postfit_hook": lambda x: x.sparsify(),
178+
"data": classification_data,
179+
"n_samples": 30,
180+
},
181+
{
182+
"estimator": NuSVR,
183+
"tuned_params": {"C": 1e3, "gamma": 2 ** -15},
184+
"changing_param": "nu",
185+
"changing_param_values": [0.1, 0.25, 0.5, 0.75, 0.9],
186+
"complexity_label": "n_support_vectors",
187+
"complexity_computer": lambda x: len(x.support_vectors_),
188+
"data": regression_data,
189+
"postfit_hook": lambda x: x,
190+
"prediction_performance_computer": mean_squared_error,
191+
"prediction_performance_label": "MSE",
192+
"n_samples": 30,
193+
},
194+
{
195+
"estimator": GradientBoostingRegressor,
196+
"tuned_params": {"loss": "squared_error"},
197+
"changing_param": "n_estimators",
198+
"changing_param_values": [10, 50, 100, 200, 500],
199+
"complexity_label": "n_trees",
200+
"complexity_computer": lambda x: x.n_estimators,
201+
"data": regression_data,
202+
"postfit_hook": lambda x: x,
203+
"prediction_performance_computer": mean_squared_error,
204+
"prediction_performance_label": "MSE",
205+
"n_samples": 30,
206+
},
190207
]
191208

192209

@@ -209,6 +226,7 @@ def _count_nonzero_coefficients(estimator):
209226
# ensemble is not as detrimental.
210227
#
211228

229+
212230
def plot_influence(conf, mse_values, prediction_times, complexities):
213231
"""
214232
Plot influence of model complexity on both accuracy and latency.
@@ -219,38 +237,37 @@ def plot_influence(conf, mse_values, prediction_times, complexities):
219237

220238
# first axes (prediction error)
221239
ax1 = fig.add_subplot(111)
222-
line1 = ax1.plot(complexities, mse_values, c='tab:blue', ls='-')[0]
223-
ax1.set_xlabel('Model Complexity (%s)' % conf['complexity_label'])
224-
y1_label = conf['prediction_performance_label']
240+
line1 = ax1.plot(complexities, mse_values, c="tab:blue", ls="-")[0]
241+
ax1.set_xlabel("Model Complexity (%s)" % conf["complexity_label"])
242+
y1_label = conf["prediction_performance_label"]
225243
ax1.set_ylabel(y1_label)
226244

227-
ax1.spines['left'].set_color(line1.get_color())
245+
ax1.spines["left"].set_color(line1.get_color())
228246
ax1.yaxis.label.set_color(line1.get_color())
229-
ax1.tick_params(axis='y', colors=line1.get_color())
247+
ax1.tick_params(axis="y", colors=line1.get_color())
230248

231249
# second axes (latency)
232250
ax2 = fig.add_subplot(111, sharex=ax1, frameon=False)
233-
line2 = ax2.plot(complexities, prediction_times, c='tab:orange', ls='-')[0]
251+
line2 = ax2.plot(complexities, prediction_times, c="tab:orange", ls="-")[0]
234252
ax2.yaxis.tick_right()
235253
ax2.yaxis.set_label_position("right")
236254
y2_label = "Time (s)"
237255
ax2.set_ylabel(y2_label)
238-
ax1.spines['right'].set_color(line2.get_color())
256+
ax1.spines["right"].set_color(line2.get_color())
239257
ax2.yaxis.label.set_color(line2.get_color())
240-
ax2.tick_params(axis='y', colors=line2.get_color())
258+
ax2.tick_params(axis="y", colors=line2.get_color())
241259

242-
plt.legend((line1, line2), ("prediction error", "latency"),
243-
loc='upper right')
260+
plt.legend((line1, line2), ("prediction error", "latency"), loc="upper right")
244261

245-
plt.title("Influence of varying '%s' on %s" % (conf['changing_param'],
246-
conf['estimator'].__name__))
262+
plt.title(
263+
"Influence of varying '%s' on %s"
264+
% (conf["changing_param"], conf["estimator"].__name__)
265+
)
247266

248267

249268
for conf in configurations:
250-
prediction_performances, prediction_times, complexities = \
251-
benchmark_influence(conf)
252-
plot_influence(conf, prediction_performances, prediction_times,
253-
complexities)
269+
prediction_performances, prediction_times, complexities = benchmark_influence(conf)
270+
plot_influence(conf, prediction_performances, prediction_times, complexities)
254271
plt.show()
255272

256273

0 commit comments

Comments
 (0)