|
12 | 12 | MLPClassifier trained on the MNIST dataset.
|
13 | 13 |
|
14 | 14 | The input data consists of 28x28 pixel handwritten digits, leading to 784
|
15 |
| -features in the dataset. Therefore the first layer weight matrix have the shape |
| 15 | +features in the dataset. Therefore the first layer weight matrix has the shape |
16 | 16 | (784, hidden_layer_sizes[0]). We can therefore visualize a single column of
|
17 | 17 | the weight matrix as a 28x28 pixel image.
|
18 | 18 |
|
19 | 19 | To make the example run faster, we use very few hidden units, and train only
|
20 | 20 | for a very short time. Training longer would result in weights with a much
|
21 | 21 | smoother spatial appearance. The example will throw a warning because it
|
22 |
| -doesn't converge, in this case this is what we want because of CI's time |
23 |
| -constraints. |
24 |
| -
|
| 22 | +doesn't converge, in this case this is what we want because of resource |
| 23 | +usage constraints on our Continuous Integration infrastructure that is used |
| 24 | +to build this documentation on a regular basis. |
25 | 25 | """
|
26 | 26 |
|
27 | 27 | import warnings
|
28 |
| - |
29 | 28 | import matplotlib.pyplot as plt
|
30 | 29 | from sklearn.datasets import fetch_openml
|
31 | 30 | from sklearn.exceptions import ConvergenceWarning
|
32 | 31 | from sklearn.neural_network import MLPClassifier
|
| 32 | +from sklearn.model_selection import train_test_split |
33 | 33 |
|
34 | 34 | # Load data from https://www.openml.org/d/554
|
35 | 35 | X, y = fetch_openml("mnist_784", version=1, return_X_y=True)
|
36 | 36 | X = X / 255.0
|
37 | 37 |
|
38 |
| -# rescale the data, use the traditional train/test split |
39 |
| -X_train, X_test = X[:60000], X[60000:] |
40 |
| -y_train, y_test = y[:60000], y[60000:] |
| 38 | +# Split data into train partition and test partition |
| 39 | +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.7) |
41 | 40 |
|
42 | 41 | mlp = MLPClassifier(
|
43 |
| - hidden_layer_sizes=(50,), |
44 |
| - max_iter=10, |
| 42 | + hidden_layer_sizes=(40,), |
| 43 | + max_iter=8, |
45 | 44 | alpha=1e-4,
|
46 | 45 | solver="sgd",
|
47 | 46 | verbose=10,
|
48 | 47 | random_state=1,
|
49 |
| - learning_rate_init=0.1, |
| 48 | + learning_rate_init=0.2, |
50 | 49 | )
|
51 | 50 |
|
52 |
| -# this example won't converge because of CI's time constraints, so we catch the |
53 |
| -# warning and are ignore it here |
| 51 | +# this example won't converge because of resource usage constraints on |
| 52 | +# our Continuous Integration infrastructure, so we catch the warning and |
| 53 | +# ignore it here |
54 | 54 | with warnings.catch_warnings():
|
55 | 55 | warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
|
56 | 56 | mlp.fit(X_train, y_train)
|
|
0 commit comments