10
10
11
11
.. _LFW: http://vis-www.cs.umass.edu/lfw/
12
12
13
- Expected results for the top 5 most represented people in the dataset:
14
-
15
- ================== ============ ======= ========== =======
16
- precision recall f1-score support
17
- ================== ============ ======= ========== =======
18
- Ariel Sharon 0.67 0.92 0.77 13
19
- Colin Powell 0.75 0.78 0.76 60
20
- Donald Rumsfeld 0.78 0.67 0.72 27
21
- George W Bush 0.86 0.86 0.86 146
22
- Gerhard Schroeder 0.76 0.76 0.76 25
23
- Hugo Chavez 0.67 0.67 0.67 15
24
- Tony Blair 0.81 0.69 0.75 36
25
-
26
- avg / total 0.80 0.80 0.80 322
27
- ================== ============ ======= ========== =======
28
-
29
13
"""
30
-
14
+ # %%
31
15
from time import time
32
- import logging
33
16
import matplotlib .pyplot as plt
34
17
35
18
from sklearn .model_selection import train_test_split
36
- from sklearn .model_selection import GridSearchCV
19
+ from sklearn .model_selection import RandomizedSearchCV
37
20
from sklearn .datasets import fetch_lfw_people
38
21
from sklearn .metrics import classification_report
39
- from sklearn .metrics import confusion_matrix
22
+ from sklearn .metrics import ConfusionMatrixDisplay
23
+ from sklearn .preprocessing import StandardScaler
40
24
from sklearn .decomposition import PCA
41
25
from sklearn .svm import SVC
26
+ from sklearn .utils .fixes import loguniform
42
27
43
28
44
- # Display progress logs on stdout
45
- logging .basicConfig (level = logging .INFO , format = "%(asctime)s %(message)s" )
46
-
47
-
48
- # #############################################################################
29
+ # %%
49
30
# Download the data, if not already on disk and load it as numpy arrays
50
31
51
32
lfw_people = fetch_lfw_people (min_faces_per_person = 70 , resize = 0.4 )
69
50
print ("n_classes: %d" % n_classes )
70
51
71
52
72
- # #############################################################################
73
- # Split into a training set and a test set using a stratified k fold
53
+ # %%
54
+ # Split into a training set and a test and keep 25% of the data for testing.
74
55
75
- # split into a training and testing set
76
56
X_train , X_test , y_train , y_test = train_test_split (
77
57
X , y , test_size = 0.25 , random_state = 42
78
58
)
79
59
60
+ scaler = StandardScaler ()
61
+ X_train = scaler .fit_transform (X_train )
62
+ X_test = scaler .transform (X_test )
80
63
81
- # #############################################################################
64
+ # %%
82
65
# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
83
66
# dataset): unsupervised feature extraction / dimensionality reduction
67
+
84
68
n_components = 150
85
69
86
70
print (
99
83
print ("done in %0.3fs" % (time () - t0 ))
100
84
101
85
102
- # #############################################################################
86
+ # %%
103
87
# Train a SVM classification model
104
88
105
89
print ("Fitting the classifier to the training set" )
106
90
t0 = time ()
107
91
param_grid = {
108
- "C" : [ 1e3 , 5e3 , 1e4 , 5e4 , 1e5 ] ,
109
- "gamma" : [ 0.0001 , 0.0005 , 0.001 , 0.005 , 0.01 , 0.1 ] ,
92
+ "C" : loguniform ( 1e3 , 1e5 ) ,
93
+ "gamma" : loguniform ( 1e-4 , 1e-1 ) ,
110
94
}
111
- clf = GridSearchCV (SVC (kernel = "rbf" , class_weight = "balanced" ), param_grid )
95
+ clf = RandomizedSearchCV (
96
+ SVC (kernel = "rbf" , class_weight = "balanced" ), param_grid , n_iter = 10
97
+ )
112
98
clf = clf .fit (X_train_pca , y_train )
113
99
print ("done in %0.3fs" % (time () - t0 ))
114
100
print ("Best estimator found by grid search:" )
115
101
print (clf .best_estimator_ )
116
102
117
103
118
- # #############################################################################
104
+ # %%
119
105
# Quantitative evaluation of the model quality on the test set
120
106
121
107
print ("Predicting people's names on the test set" )
124
110
print ("done in %0.3fs" % (time () - t0 ))
125
111
126
112
print (classification_report (y_test , y_pred , target_names = target_names ))
127
- print (confusion_matrix (y_test , y_pred , labels = range (n_classes )))
113
+ ConfusionMatrixDisplay .from_estimator (
114
+ clf , X_test_pca , y_test , display_labels = target_names , xticks_rotation = "vertical"
115
+ )
116
+ plt .tight_layout ()
117
+ plt .show ()
128
118
129
119
130
- # #############################################################################
120
+ # %%
131
121
# Qualitative evaluation of the predictions using matplotlib
132
122
133
123
@@ -143,6 +133,7 @@ def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
143
133
plt .yticks (())
144
134
145
135
136
+ # %%
146
137
# plot the result of the prediction on a portion of the test set
147
138
148
139
@@ -157,10 +148,16 @@ def title(y_pred, y_test, target_names, i):
157
148
]
158
149
159
150
plot_gallery (X_test , prediction_titles , h , w )
160
-
151
+ # %%
161
152
# plot the gallery of the most significative eigenfaces
162
153
163
154
eigenface_titles = ["eigenface %d" % i for i in range (eigenfaces .shape [0 ])]
164
155
plot_gallery (eigenfaces , eigenface_titles , h , w )
165
156
166
157
plt .show ()
158
+
159
+ # %%
160
+ # Face recognition problem would be much more effectively solved by training
161
+ # convolutional neural networks but this family of models is outside of the scope of
162
+ # the scikit-learn library. Interested readers should instead try to use pytorch or
163
+ # tensorflow to implement such models.
0 commit comments