Skip to content

Commit b14dd6c

Browse files
committed
Merge branch 'master' into lars
2 parents c8d4e54 + 6986e9b commit b14dd6c

38 files changed

+298
-148
lines changed

.travis.yml

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,62 +3,60 @@ sudo: false
33

44
language: python
55

6-
# Pre-install packages for the ubuntu distribution
76
cache:
87
apt: true
98
directories:
109
- $HOME/.cache/pip
11-
addons:
12-
apt:
13-
packages:
14-
# these only required by the DISTRIB="ubuntu" builds:
15-
- python-scipy
16-
- libatlas3gf-base
17-
- libatlas-dev
10+
1811
dist: trusty
12+
1913
env:
2014
global:
2115
# Directory where tests are run from
2216
- TEST_DIR=/tmp/sklearn
2317
- OMP_NUM_THREADS=4
2418
- OPENBLAS_NUM_THREADS=4
25-
matrix:
19+
20+
matrix:
21+
include:
2622
# This environment tests that scikit-learn can be built against
2723
# versions of numpy, scipy with ATLAS that comes with Ubuntu Trusty 14.04
28-
- DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.4"
29-
COVERAGE=true
24+
- env: DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.4"
25+
COVERAGE=true
26+
addons:
27+
apt:
28+
packages:
29+
# these only required by the DISTRIB="ubuntu" builds:
30+
- python-scipy
31+
- libatlas3gf-base
32+
- libatlas-dev
3033
# This environment tests the oldest supported anaconda env
31-
- DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
32-
NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.4"
33-
COVERAGE=true
34+
- env: DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
35+
NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.4"
36+
COVERAGE=true
3437
# This environment tests the newest supported Anaconda release (4.3.1)
3538
# It also runs tests requiring Pandas.
36-
- DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
37-
NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
38-
CYTHON_VERSION="0.25.2" COVERAGE=true
39+
- env: DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
40+
NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
41+
CYTHON_VERSION="0.25.2" COVERAGE=true
3942
# This environment use pytest to run the tests. It uses the newest
4043
# supported Anaconda release (4.3.1). It also runs tests requiring Pandas.
41-
- USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
42-
NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
43-
CYTHON_VERSION="0.25.2"
44+
# - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6"
45+
# INSTALL_MKL="true" NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1"
46+
# PANDAS_VERSION="0.19.2" CYTHON_VERSION="0.25.2"
4447
# flake8 linting on diff wrt common ancestor with upstream/master
45-
- RUN_FLAKE8="true" SKIP_TESTS="true"
46-
DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
47-
NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
48-
49-
50-
matrix:
51-
allow_failures:
52-
# allow_failures seems to be keyed on the python version
53-
# We are using this to allow failures for DISTRIB=scipy-dev-wheels
54-
- python: 3.5
55-
56-
include:
48+
- env: RUN_FLAKE8="true" SKIP_TESTS="true"
49+
DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
50+
NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
5751
# This environment tests scikit-learn against numpy and scipy master
5852
# installed from their CI wheels in a virtualenv with the Python
5953
# interpreter provided by travis.
60-
- python: 3.5
61-
env: DISTRIB="scipy-dev-wheels"
54+
# - python: 3.5
55+
# env: DISTRIB="scipy-dev-wheels"
56+
# allow_failures:
57+
# # allow_failures seems to be keyed on the python version
58+
# # We are using this to allow failures for DISTRIB=scipy-dev-wheels
59+
# - python: 3.5
6260

6361
install: source build_tools/travis/install.sh
6462
script: bash build_tools/travis/test_script.sh

doc/conf.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,7 @@
241241
'sklearn': None,
242242
'matplotlib': 'http://matplotlib.org',
243243
'numpy': 'http://docs.scipy.org/doc/numpy-1.8.1',
244-
'scipy': 'http://docs.scipy.org/doc/scipy-0.13.3/reference'},
245-
'expected_failing_examples': [
246-
'../examples/applications/plot_stock_market.py']
244+
'scipy': 'http://docs.scipy.org/doc/scipy-0.13.3/reference'}
247245
}
248246

249247

doc/modules/grid_search.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ estimator classes. Typical examples include ``C``, ``kernel`` and ``gamma``
1414
for Support Vector Classifier, ``alpha`` for Lasso, etc.
1515

1616
It is possible and recommended to search the hyper-parameter space for the
17-
best :ref:`cross_validation` score.
17+
best :ref:`cross validation <cross_validation>` score.
1818

1919
Any parameter provided when constructing an estimator may be optimized in this
2020
manner. Specifically, to find the names and current values for all parameters

doc/whats_new.rst

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ Enhancements
178178
removed by setting it to `None`.
179179
:issue:`7674` by :user:`Yichuan Liu <yl565>`.
180180

181+
- Prevent cast from float32 to float64 in
182+
:class:`sklearn.linear_model.LogisticRegression` when using newton-cg solver
183+
by :user:`Joan Massich <massich>`
184+
181185
Bug fixes
182186
.........
183187
- Fixed a bug where :class:`sklearn.ensemble.IsolationForest` uses an
@@ -317,6 +321,9 @@ Bug fixes
317321
classes, and some values proposed in the docstring could raise errors.
318322
:issue:`5359` by `Tom Dupre la Tour`_.
319323

324+
- Fixed a memory leak in our LibLinear implementation. :issue:`9024` by
325+
:user:`Sergei Lebedev <superbobry>`
326+
320327
API changes summary
321328
-------------------
322329

@@ -822,8 +829,8 @@ Model evaluation and meta-estimators
822829
- Added support for substituting or disabling :class:`pipeline.Pipeline`
823830
and :class:`pipeline.FeatureUnion` components using the ``set_params``
824831
interface that powers :mod:`sklearn.grid_search`.
825-
See :ref:`sphx_glr_plot_compare_reduction.py`. By `Joel Nothman`_ and
826-
:user:`Robert McGibbon <rmcgibbo>`.
832+
See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py`
833+
By `Joel Nothman`_ and :user:`Robert McGibbon <rmcgibbo>`.
827834

828835
- The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV`
829836
(and :class:`model_selection.RandomizedSearchCV`) can be easily imported

examples/applications/plot_stock_market.py

Lines changed: 52 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,27 +64,60 @@
6464
# Author: Gael Varoquaux gael.varoquaux@normalesup.org
6565
# License: BSD 3 clause
6666

67-
import datetime
67+
from datetime import datetime
6868

6969
import numpy as np
7070
import matplotlib.pyplot as plt
71-
try:
72-
from matplotlib.finance import quotes_historical_yahoo_ochl
73-
except ImportError:
74-
# quotes_historical_yahoo_ochl was named quotes_historical_yahoo before matplotlib 1.4
75-
from matplotlib.finance import quotes_historical_yahoo as quotes_historical_yahoo_ochl
7671
from matplotlib.collections import LineCollection
72+
from six.moves.urllib.request import urlopen
73+
from six.moves.urllib.parse import urlencode
7774
from sklearn import cluster, covariance, manifold
7875

76+
7977
###############################################################################
8078
# Retrieve the data from Internet
8179

80+
def quotes_historical_google(symbol, date1, date2):
81+
"""Get the historical data from Google finance.
82+
83+
Parameters
84+
----------
85+
symbol : str
86+
Ticker symbol to query for, for example ``"DELL"``.
87+
date1 : datetime.datetime
88+
Start date.
89+
date2 : datetime.datetime
90+
End date.
91+
92+
Returns
93+
-------
94+
X : array
95+
The columns are ``date`` -- datetime, ``open``, ``high``,
96+
``low``, ``close`` and ``volume`` of type float.
97+
"""
98+
params = urlencode({
99+
'q': symbol,
100+
'startdate': date1.strftime('%b %d, %Y'),
101+
'enddate': date2.strftime('%b %d, %Y'),
102+
'output': 'csv'
103+
})
104+
url = 'http://www.google.com/finance/historical?' + params
105+
with urlopen(url) as response:
106+
dtype = {
107+
'names': ['date', 'open', 'high', 'low', 'close', 'volume'],
108+
'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']
109+
}
110+
converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}
111+
return np.genfromtxt(response, delimiter=',', skip_header=1,
112+
dtype=dtype, converters=converters,
113+
missing_values='-', filling_values=-1)
114+
115+
82116
# Choose a time period reasonably calm (not too long ago so that we get
83117
# high-tech firms, and before the 2008 crash)
84-
d1 = datetime.datetime(2003, 1, 1)
85-
d2 = datetime.datetime(2008, 1, 1)
118+
d1 = datetime(2003, 1, 1)
119+
d2 = datetime(2008, 1, 1)
86120

87-
# kraft symbol has now changed from KFT to MDLZ in yahoo
88121
symbol_dict = {
89122
'TOT': 'Total',
90123
'XOM': 'Exxon',
@@ -102,7 +135,6 @@
102135
'AMZN': 'Amazon',
103136
'TM': 'Toyota',
104137
'CAJ': 'Canon',
105-
'MTU': 'Mitsubishi',
106138
'SNE': 'Sony',
107139
'F': 'Ford',
108140
'HMC': 'Honda',
@@ -111,9 +143,8 @@
111143
'BA': 'Boeing',
112144
'KO': 'Coca Cola',
113145
'MMM': '3M',
114-
'MCD': 'Mc Donalds',
146+
'MCD': 'McDonald\'s',
115147
'PEP': 'Pepsi',
116-
'MDLZ': 'Kraft Foods',
117148
'K': 'Kellogg',
118149
'UN': 'Unilever',
119150
'MAR': 'Marriott',
@@ -129,11 +160,9 @@
129160
'AAPL': 'Apple',
130161
'SAP': 'SAP',
131162
'CSCO': 'Cisco',
132-
'TXN': 'Texas instruments',
163+
'TXN': 'Texas Instruments',
133164
'XRX': 'Xerox',
134-
'LMT': 'Lookheed Martin',
135165
'WMT': 'Wal-Mart',
136-
'WBA': 'Walgreen',
137166
'HD': 'Home Depot',
138167
'GSK': 'GlaxoSmithKline',
139168
'PFE': 'Pfizer',
@@ -149,14 +178,16 @@
149178

150179
symbols, names = np.array(list(symbol_dict.items())).T
151180

152-
quotes = [quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True)
153-
for symbol in symbols]
181+
quotes = [
182+
quotes_historical_google(symbol, d1, d2) for symbol in symbols
183+
]
154184

155-
open = np.array([q.open for q in quotes]).astype(np.float)
156-
close = np.array([q.close for q in quotes]).astype(np.float)
185+
close_prices = np.stack([q['close'] for q in quotes])
186+
open_prices = np.stack([q['open'] for q in quotes])
157187

158188
# The daily variations of the quotes are what carry most information
159-
variation = close - open
189+
variation = close_prices - open_prices
190+
160191

161192
###############################################################################
162193
# Learn a graphical structure from the correlations
@@ -209,7 +240,7 @@
209240

210241
# Plot the edges
211242
start_idx, end_idx = np.where(non_zero)
212-
#a sequence of (*line0*, *line1*, *line2*), where::
243+
# a sequence of (*line0*, *line1*, *line2*), where::
213244
# linen = (x0, y0), (x1, y1), ... (xm, ym)
214245
segments = [[embedding[:, start], embedding[:, stop]]
215246
for start, stop in zip(start_idx, end_idx)]

examples/feature_selection/plot_f_test_vs_mi.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
We consider 3 features x_1, x_2, x_3 distributed uniformly over [0, 1], the
1010
target depends on them as follows:
1111
12-
y = x_1 + sin(6 * pi * x_2) + 0.1 * N(0, 1), that is the third features is completely irrelevant.
12+
y = x_1 + sin(6 * pi * x_2) + 0.1 * N(0, 1), that is the third features is
13+
completely irrelevant.
1314
1415
The code below plots the dependency of y against individual x_i and normalized
1516
values of univariate F-tests statistics and mutual information.
@@ -39,11 +40,10 @@
3940
plt.figure(figsize=(15, 5))
4041
for i in range(3):
4142
plt.subplot(1, 3, i + 1)
42-
plt.scatter(X[:, i], y)
43+
plt.scatter(X[:, i], y, edgecolor='black', s=20)
4344
plt.xlabel("$x_{}$".format(i + 1), fontsize=14)
4445
if i == 0:
4546
plt.ylabel("$y$", fontsize=14)
4647
plt.title("F-test={:.2f}, MI={:.2f}".format(f_test[i], mi[i]),
4748
fontsize=16)
4849
plt.show()
49-

examples/feature_selection/plot_feature_selection.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@
5454
scores = -np.log10(selector.pvalues_)
5555
scores /= scores.max()
5656
plt.bar(X_indices - .45, scores, width=.2,
57-
label=r'Univariate score ($-Log(p_{value})$)', color='darkorange')
57+
label=r'Univariate score ($-Log(p_{value})$)', color='darkorange',
58+
edgecolor='black')
5859

5960
###############################################################################
6061
# Compare to the weights of an SVM
@@ -65,7 +66,7 @@
6566
svm_weights /= svm_weights.max()
6667

6768
plt.bar(X_indices - .25, svm_weights, width=.2, label='SVM weight',
68-
color='navy')
69+
color='navy', edgecolor='black')
6970

7071
clf_selected = svm.SVC(kernel='linear')
7172
clf_selected.fit(selector.transform(X), y)
@@ -74,7 +75,8 @@
7475
svm_weights_selected /= svm_weights_selected.max()
7576

7677
plt.bar(X_indices[selector.get_support()] - .05, svm_weights_selected,
77-
width=.2, label='SVM weights after selection', color='c')
78+
width=.2, label='SVM weights after selection', color='c',
79+
edgecolor='black')
7880

7981

8082
plt.title("Comparing feature selection")

examples/feature_selection/plot_permutation_test_for_classification.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,14 @@
4949

5050
###############################################################################
5151
# View histogram of permutation scores
52-
plt.hist(permutation_scores, 20, label='Permutation scores')
52+
plt.hist(permutation_scores, 20, label='Permutation scores',
53+
edgecolor='black')
5354
ylim = plt.ylim()
5455
# BUG: vlines(..., linestyle='--') fails on older versions of matplotlib
55-
#plt.vlines(score, ylim[0], ylim[1], linestyle='--',
56+
# plt.vlines(score, ylim[0], ylim[1], linestyle='--',
5657
# color='g', linewidth=3, label='Classification Score'
5758
# ' (pvalue %s)' % pvalue)
58-
#plt.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',
59+
# plt.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',
5960
# color='k', linewidth=3, label='Luck')
6061
plt.plot(2 * [score], ylim, '--g', linewidth=3,
6162
label='Classification Score'

examples/linear_model/plot_bayesian_ridge.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@
7272

7373
plt.figure(figsize=(6, 5))
7474
plt.title("Histogram of the weights")
75-
plt.hist(clf.coef_, bins=n_features, color='gold', log=True)
75+
plt.hist(clf.coef_, bins=n_features, color='gold', log=True,
76+
edgecolor='black')
7677
plt.scatter(clf.coef_[relevant_features], 5 * np.ones(len(relevant_features)),
7778
color='navy', label="Relevant features")
7879
plt.ylabel("Features")

examples/linear_model/plot_logistic_multinomial.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@
5050
colors = "bry"
5151
for i, color in zip(clf.classes_, colors):
5252
idx = np.where(y == i)
53-
plt.scatter(X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired)
53+
plt.scatter(X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired,
54+
edgecolor='black', s=20)
5455

5556
# Plot the three one-against-all classifiers
5657
xmin, xmax = plt.xlim()

0 commit comments

Comments
 (0)