Skip to content

Commit 0bf790d

Browse files
committed
Pushing the docs to dev/ for branch: master, commit f13c9c07527413145894fbb72d561db491d3a281
1 parent 44f5354 commit 0bf790d

File tree

696 files changed

+1628
-1640
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

696 files changed

+1628
-1640
lines changed

dev/_downloads/033a4a86b1b4dcbc54b1fb804950489e/plot_huber_vs_ridge.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,13 @@
4545
x = np.linspace(X.min(), X.max(), 7)
4646
epsilon_values = [1.35, 1.5, 1.75, 1.9]
4747
for k, epsilon in enumerate(epsilon_values):
48-
huber = HuberRegressor(fit_intercept=True, alpha=0.0, max_iter=100,
49-
epsilon=epsilon)
48+
huber = HuberRegressor(alpha=0.0, epsilon=epsilon)
5049
huber.fit(X, y)
5150
coef_ = huber.coef_ * x + huber.intercept_
5251
plt.plot(x, coef_, colors[k], label="huber loss, %s" % epsilon)
5352

5453
# Fit a ridge regressor to compare it to huber regressor.
55-
ridge = Ridge(fit_intercept=True, alpha=0.0, random_state=0, normalize=True)
54+
ridge = Ridge(alpha=0.0, random_state=0, normalize=True)
5655
ridge.fit(X, y)
5756
coef_ridge = ridge.coef_
5857
coef_ = ridge.coef_ * x + ridge.intercept_

dev/_downloads/275c1a8902428a3a52b079bb6f13591a/plot_sgd_separating_hyperplane.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)
1919

2020
# fit the model
21-
clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=200,
22-
fit_intercept=True)
21+
clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=200)
22+
2323
clf.fit(X, Y)
2424

2525
# plot the line, the points, and the nearest vectors to the plane

dev/_downloads/28477181ee2a477248e703cf646f97f1/plot_sparse_logistic_regression_20newsgroups.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import timeit\nimport warnings\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_20newsgroups_vectorized\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.exceptions import ConvergenceWarning\n\nprint(__doc__)\n# Author: Arthur Mensch\n\nwarnings.filterwarnings(\"ignore\", category=ConvergenceWarning,\n module=\"sklearn\")\nt0 = timeit.default_timer()\n\n# We use SAGA solver\nsolver = 'saga'\n\n# Turn down for faster run time\nn_samples = 10000\n\n# Memorized fetch_rcv1 for faster access\nX, y = fetch_20newsgroups_vectorized('all', return_X_y=True)\nX = X[:n_samples]\ny = y[:n_samples]\n\nX_train, X_test, y_train, y_test = train_test_split(X, y,\n random_state=42,\n stratify=y,\n test_size=0.1)\ntrain_samples, n_features = X_train.shape\nn_classes = np.unique(y).shape[0]\n\nprint('Dataset 20newsgroup, train_samples=%i, n_features=%i, n_classes=%i'\n % (train_samples, n_features, n_classes))\n\nmodels = {'ovr': {'name': 'One versus Rest', 'iters': [1, 2, 4]},\n 'multinomial': {'name': 'Multinomial', 'iters': [1, 3, 7]}}\n\nfor model in models:\n # Add initial chance-level values for plotting purpose\n accuracies = [1 / n_classes]\n times = [0]\n densities = [1]\n\n model_params = models[model]\n\n # Small number of epochs for fast runtime\n for this_max_iter in model_params['iters']:\n print('[model=%s, solver=%s] Number of epochs: %s' %\n (model_params['name'], solver, this_max_iter))\n lr = LogisticRegression(solver=solver,\n multi_class=model,\n C=1,\n penalty='l1',\n fit_intercept=True,\n max_iter=this_max_iter,\n random_state=42,\n )\n t1 = timeit.default_timer()\n lr.fit(X_train, y_train)\n train_time = timeit.default_timer() - t1\n\n y_pred = lr.predict(X_test)\n accuracy = np.sum(y_pred == y_test) / y_test.shape[0]\n density = np.mean(lr.coef_ != 0, axis=1) * 100\n accuracies.append(accuracy)\n densities.append(density)\n times.append(train_time)\n models[model]['times'] = times\n models[model]['densities'] = densities\n models[model]['accuracies'] = accuracies\n print('Test accuracy for model %s: %.4f' % (model, accuracies[-1]))\n print('%% non-zero coefficients for model %s, '\n 'per class:\\n %s' % (model, densities[-1]))\n print('Run time (%i epochs) for model %s:'\n '%.2f' % (model_params['iters'][-1], model, times[-1]))\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nfor model in models:\n name = models[model]['name']\n times = models[model]['times']\n accuracies = models[model]['accuracies']\n ax.plot(times, accuracies, marker='o',\n label='Model: %s' % name)\n ax.set_xlabel('Train time (s)')\n ax.set_ylabel('Test accuracy')\nax.legend()\nfig.suptitle('Multinomial vs One-vs-Rest Logistic L1\\n'\n 'Dataset %s' % '20newsgroups')\nfig.tight_layout()\nfig.subplots_adjust(top=0.85)\nrun_time = timeit.default_timer() - t0\nprint('Example run in %.3f s' % run_time)\nplt.show()"
29+
"import timeit\nimport warnings\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_20newsgroups_vectorized\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.exceptions import ConvergenceWarning\n\nprint(__doc__)\n# Author: Arthur Mensch\n\nwarnings.filterwarnings(\"ignore\", category=ConvergenceWarning,\n module=\"sklearn\")\nt0 = timeit.default_timer()\n\n# We use SAGA solver\nsolver = 'saga'\n\n# Turn down for faster run time\nn_samples = 10000\n\n# Memorized fetch_rcv1 for faster access\nX, y = fetch_20newsgroups_vectorized('all', return_X_y=True)\nX = X[:n_samples]\ny = y[:n_samples]\n\nX_train, X_test, y_train, y_test = train_test_split(X, y,\n random_state=42,\n stratify=y,\n test_size=0.1)\ntrain_samples, n_features = X_train.shape\nn_classes = np.unique(y).shape[0]\n\nprint('Dataset 20newsgroup, train_samples=%i, n_features=%i, n_classes=%i'\n % (train_samples, n_features, n_classes))\n\nmodels = {'ovr': {'name': 'One versus Rest', 'iters': [1, 2, 4]},\n 'multinomial': {'name': 'Multinomial', 'iters': [1, 3, 7]}}\n\nfor model in models:\n # Add initial chance-level values for plotting purpose\n accuracies = [1 / n_classes]\n times = [0]\n densities = [1]\n\n model_params = models[model]\n\n # Small number of epochs for fast runtime\n for this_max_iter in model_params['iters']:\n print('[model=%s, solver=%s] Number of epochs: %s' %\n (model_params['name'], solver, this_max_iter))\n lr = LogisticRegression(solver=solver,\n multi_class=model,\n penalty='l1',\n max_iter=this_max_iter,\n random_state=42,\n )\n t1 = timeit.default_timer()\n lr.fit(X_train, y_train)\n train_time = timeit.default_timer() - t1\n\n y_pred = lr.predict(X_test)\n accuracy = np.sum(y_pred == y_test) / y_test.shape[0]\n density = np.mean(lr.coef_ != 0, axis=1) * 100\n accuracies.append(accuracy)\n densities.append(density)\n times.append(train_time)\n models[model]['times'] = times\n models[model]['densities'] = densities\n models[model]['accuracies'] = accuracies\n print('Test accuracy for model %s: %.4f' % (model, accuracies[-1]))\n print('%% non-zero coefficients for model %s, '\n 'per class:\\n %s' % (model, densities[-1]))\n print('Run time (%i epochs) for model %s:'\n '%.2f' % (model_params['iters'][-1], model, times[-1]))\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nfor model in models:\n name = models[model]['name']\n times = models[model]['times']\n accuracies = models[model]['accuracies']\n ax.plot(times, accuracies, marker='o',\n label='Model: %s' % name)\n ax.set_xlabel('Train time (s)')\n ax.set_ylabel('Test accuracy')\nax.legend()\nfig.suptitle('Multinomial vs One-vs-Rest Logistic L1\\n'\n 'Dataset %s' % '20newsgroups')\nfig.tight_layout()\nfig.subplots_adjust(top=0.85)\nrun_time = timeit.default_timer() - t0\nprint('Example run in %.3f s' % run_time)\nplt.show()"
3030
]
3131
}
3232
],
Binary file not shown.

dev/_downloads/95a10731332acfd8486f17af106608ce/plot_prediction_latency.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,7 @@ def plot_benchmark_throughput(throughputs, configuration):
278278
'estimators': [
279279
{'name': 'Linear Model',
280280
'instance': SGDRegressor(penalty='elasticnet', alpha=0.01,
281-
l1_ratio=0.25, fit_intercept=True,
282-
tol=1e-4),
281+
l1_ratio=0.25, tol=1e-4),
283282
'complexity_label': 'non-zero coefficients',
284283
'complexity_computer': lambda clf: np.count_nonzero(clf.coef_)},
285284
{'name': 'RandomForest',

dev/_downloads/9ebcf2c453d21566c66a5b17116942cc/plot_prediction_latency.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

dev/_downloads/b748a906772598c4cb707f23574f214b/plot_huber_vs_ridge.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Authors: Manoj Kumar mks542@nyu.edu\n# License: BSD 3 clause\n\nprint(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_regression\nfrom sklearn.linear_model import HuberRegressor, Ridge\n\n# Generate toy data.\nrng = np.random.RandomState(0)\nX, y = make_regression(n_samples=20, n_features=1, random_state=0, noise=4.0,\n bias=100.0)\n\n# Add four strong outliers to the dataset.\nX_outliers = rng.normal(0, 0.5, size=(4, 1))\ny_outliers = rng.normal(0, 2.0, size=4)\nX_outliers[:2, :] += X.max() + X.mean() / 4.\nX_outliers[2:, :] += X.min() - X.mean() / 4.\ny_outliers[:2] += y.min() - y.mean() / 4.\ny_outliers[2:] += y.max() + y.mean() / 4.\nX = np.vstack((X, X_outliers))\ny = np.concatenate((y, y_outliers))\nplt.plot(X, y, 'b.')\n\n# Fit the huber regressor over a series of epsilon values.\ncolors = ['r-', 'b-', 'y-', 'm-']\n\nx = np.linspace(X.min(), X.max(), 7)\nepsilon_values = [1.35, 1.5, 1.75, 1.9]\nfor k, epsilon in enumerate(epsilon_values):\n huber = HuberRegressor(fit_intercept=True, alpha=0.0, max_iter=100,\n epsilon=epsilon)\n huber.fit(X, y)\n coef_ = huber.coef_ * x + huber.intercept_\n plt.plot(x, coef_, colors[k], label=\"huber loss, %s\" % epsilon)\n\n# Fit a ridge regressor to compare it to huber regressor.\nridge = Ridge(fit_intercept=True, alpha=0.0, random_state=0, normalize=True)\nridge.fit(X, y)\ncoef_ridge = ridge.coef_\ncoef_ = ridge.coef_ * x + ridge.intercept_\nplt.plot(x, coef_, 'g-', label=\"ridge regression\")\n\nplt.title(\"Comparison of HuberRegressor vs Ridge\")\nplt.xlabel(\"X\")\nplt.ylabel(\"y\")\nplt.legend(loc=0)\nplt.show()"
29+
"# Authors: Manoj Kumar mks542@nyu.edu\n# License: BSD 3 clause\n\nprint(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_regression\nfrom sklearn.linear_model import HuberRegressor, Ridge\n\n# Generate toy data.\nrng = np.random.RandomState(0)\nX, y = make_regression(n_samples=20, n_features=1, random_state=0, noise=4.0,\n bias=100.0)\n\n# Add four strong outliers to the dataset.\nX_outliers = rng.normal(0, 0.5, size=(4, 1))\ny_outliers = rng.normal(0, 2.0, size=4)\nX_outliers[:2, :] += X.max() + X.mean() / 4.\nX_outliers[2:, :] += X.min() - X.mean() / 4.\ny_outliers[:2] += y.min() - y.mean() / 4.\ny_outliers[2:] += y.max() + y.mean() / 4.\nX = np.vstack((X, X_outliers))\ny = np.concatenate((y, y_outliers))\nplt.plot(X, y, 'b.')\n\n# Fit the huber regressor over a series of epsilon values.\ncolors = ['r-', 'b-', 'y-', 'm-']\n\nx = np.linspace(X.min(), X.max(), 7)\nepsilon_values = [1.35, 1.5, 1.75, 1.9]\nfor k, epsilon in enumerate(epsilon_values):\n huber = HuberRegressor(alpha=0.0, epsilon=epsilon)\n huber.fit(X, y)\n coef_ = huber.coef_ * x + huber.intercept_\n plt.plot(x, coef_, colors[k], label=\"huber loss, %s\" % epsilon)\n\n# Fit a ridge regressor to compare it to huber regressor.\nridge = Ridge(alpha=0.0, random_state=0, normalize=True)\nridge.fit(X, y)\ncoef_ridge = ridge.coef_\ncoef_ = ridge.coef_ * x + ridge.intercept_\nplt.plot(x, coef_, 'g-', label=\"ridge regression\")\n\nplt.title(\"Comparison of HuberRegressor vs Ridge\")\nplt.xlabel(\"X\")\nplt.ylabel(\"y\")\nplt.legend(loc=0)\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/c71ac54967b00fcbbf51b1c05b17f31f/plot_sgd_separating_hyperplane.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.datasets.samples_generator import make_blobs\n\n# we create 50 separable points\nX, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)\n\n# fit the model\nclf = SGDClassifier(loss=\"hinge\", alpha=0.01, max_iter=200,\n fit_intercept=True)\nclf.fit(X, Y)\n\n# plot the line, the points, and the nearest vectors to the plane\nxx = np.linspace(-1, 5, 10)\nyy = np.linspace(-1, 5, 10)\n\nX1, X2 = np.meshgrid(xx, yy)\nZ = np.empty(X1.shape)\nfor (i, j), val in np.ndenumerate(X1):\n x1 = val\n x2 = X2[i, j]\n p = clf.decision_function([[x1, x2]])\n Z[i, j] = p[0]\nlevels = [-1.0, 0.0, 1.0]\nlinestyles = ['dashed', 'solid', 'dashed']\ncolors = 'k'\nplt.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)\nplt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired,\n edgecolor='black', s=20)\n\nplt.axis('tight')\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.datasets.samples_generator import make_blobs\n\n# we create 50 separable points\nX, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)\n\n# fit the model\nclf = SGDClassifier(loss=\"hinge\", alpha=0.01, max_iter=200)\n\nclf.fit(X, Y)\n\n# plot the line, the points, and the nearest vectors to the plane\nxx = np.linspace(-1, 5, 10)\nyy = np.linspace(-1, 5, 10)\n\nX1, X2 = np.meshgrid(xx, yy)\nZ = np.empty(X1.shape)\nfor (i, j), val in np.ndenumerate(X1):\n x1 = val\n x2 = X2[i, j]\n p = clf.decision_function([[x1, x2]])\n Z[i, j] = p[0]\nlevels = [-1.0, 0.0, 1.0]\nlinestyles = ['dashed', 'solid', 'dashed']\ncolors = 'k'\nplt.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)\nplt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired,\n edgecolor='black', s=20)\n\nplt.axis('tight')\nplt.show()"
3030
]
3131
}
3232
],
Binary file not shown.

dev/_downloads/e6b0467dcb3937291eefb6297ca0db77/plot_sparse_logistic_regression_20newsgroups.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,7 @@
7474
(model_params['name'], solver, this_max_iter))
7575
lr = LogisticRegression(solver=solver,
7676
multi_class=model,
77-
C=1,
7877
penalty='l1',
79-
fit_intercept=True,
8078
max_iter=this_max_iter,
8179
random_state=42,
8280
)

dev/_downloads/scikit-learn-docs.pdf

-5.52 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes
-104 Bytes
-104 Bytes
-53 Bytes
-452 Bytes
-452 Bytes
-276 Bytes
-276 Bytes
-237 Bytes
-237 Bytes
-91 Bytes
-522 Bytes
-522 Bytes
49 Bytes
-27 Bytes
183 Bytes
-418 Bytes
-1.04 KB
-1.04 KB
-92 Bytes
-129 Bytes
-129 Bytes
-43 Bytes
-43 Bytes
1 Byte
1 Byte
-63 Bytes
-63 Bytes
49 Bytes
49 Bytes
39 Bytes
39 Bytes
-5 Bytes
109 Bytes
-126 Bytes
-126 Bytes
264 Bytes
624 Bytes
-441 Bytes
-100 Bytes
-100 Bytes
105 Bytes
-40 Bytes

dev/_sources/auto_examples/applications/plot_face_recognition.rst.txt

Lines changed: 15 additions & 15 deletions

0 commit comments

Comments
 (0)