scikit-learn
diff --git a/‎.travis.yml
Lines changed: 32 additions & 34 deletions b/‎.travis.yml
Lines changed: 32 additions & 34 deletions
diff --git a/‎doc/conf.py
Lines changed: 1 addition & 3 deletions b/‎doc/conf.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎doc/modules/grid_search.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/grid_search.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/whats_new.rst
Lines changed: 9 additions & 2 deletions b/‎doc/whats_new.rst
Lines changed: 9 additions & 2 deletions
diff --git a/‎examples/applications/plot_stock_market.py
Lines changed: 52 additions & 21 deletions b/‎examples/applications/plot_stock_market.py
Lines changed: 52 additions & 21 deletions
diff --git a/‎examples/feature_selection/plot_f_test_vs_mi.py
Lines changed: 3 additions & 3 deletions b/‎examples/feature_selection/plot_f_test_vs_mi.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/feature_selection/plot_feature_selection.py
Lines changed: 5 additions & 3 deletions b/‎examples/feature_selection/plot_feature_selection.py
Lines changed: 5 additions & 3 deletions
diff --git a/‎examples/feature_selection/plot_permutation_test_for_classification.py
Lines changed: 4 additions & 3 deletions b/‎examples/feature_selection/plot_permutation_test_for_classification.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎examples/linear_model/plot_bayesian_ridge.py
Lines changed: 2 additions & 1 deletion b/‎examples/linear_model/plot_bayesian_ridge.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/linear_model/plot_logistic_multinomial.py
Lines changed: 2 additions & 1 deletion b/‎examples/linear_model/plot_logistic_multinomial.py
Lines changed: 2 additions & 1 deletion
@@ -3,62 +3,60 @@ sudo: false
 
 language: python
 
-# Pre-install packages for the ubuntu distribution
 cache:
   apt: true
   directories:
   - $HOME/.cache/pip
-addons:
-  apt:
-    packages:
-      # these only required by the DISTRIB="ubuntu" builds:
-      - python-scipy
-      - libatlas3gf-base
-      - libatlas-dev
+
 dist: trusty
+
 env:
   global:
     # Directory where tests are run from
     - TEST_DIR=/tmp/sklearn
     - OMP_NUM_THREADS=4
     - OPENBLAS_NUM_THREADS=4
-  matrix:
+
+matrix:
+  include:
     # This environment tests that scikit-learn can be built against
     # versions of numpy, scipy with ATLAS that comes with Ubuntu Trusty 14.04
-    - DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.4"
-      COVERAGE=true
+    - env: DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.4"
+           COVERAGE=true
+      addons:
+        apt:
+          packages:
+            # these only required by the DISTRIB="ubuntu" builds:
+            - python-scipy
+            - libatlas3gf-base
+            - libatlas-dev
     # This environment tests the oldest supported anaconda env
-    - DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
-      NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.4"
-      COVERAGE=true
+    - env: DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
+           NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.4"
+           COVERAGE=true
     # This environment tests the newest supported Anaconda release (4.3.1)
     # It also runs tests requiring Pandas.
-    - DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
-      NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
-      CYTHON_VERSION="0.25.2" COVERAGE=true
+    - env: DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
+           NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
+           CYTHON_VERSION="0.25.2" COVERAGE=true
     # This environment use pytest to run the tests. It uses the newest
     # supported Anaconda release (4.3.1). It also runs tests requiring Pandas.
-    - USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
-      NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
-      CYTHON_VERSION="0.25.2"
+    # - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6"
+    #        INSTALL_MKL="true" NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1"
+    #        PANDAS_VERSION="0.19.2" CYTHON_VERSION="0.25.2"
     # flake8 linting on diff wrt common ancestor with upstream/master
-    - RUN_FLAKE8="true" SKIP_TESTS="true"
-      DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
-      NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
-
-
-matrix:
-  allow_failures:
-    # allow_failures seems to be keyed on the python version
-    # We are using this to allow failures for DISTRIB=scipy-dev-wheels
-    - python: 3.5
-
-  include:
+    - env: RUN_FLAKE8="true" SKIP_TESTS="true"
+           DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
+           NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
     # This environment tests scikit-learn against numpy and scipy master
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.
-    -  python: 3.5
-       env: DISTRIB="scipy-dev-wheels"
+  #   -  python: 3.5
+  #      env: DISTRIB="scipy-dev-wheels"
+  # allow_failures:
+  #   # allow_failures seems to be keyed on the python version
+  #   # We are using this to allow failures for DISTRIB=scipy-dev-wheels
+  #   - python: 3.5
 
 install: source build_tools/travis/install.sh
 script: bash build_tools/travis/test_script.sh
 
@@ -241,9 +241,7 @@
         'sklearn': None,
         'matplotlib': 'http://matplotlib.org',
         'numpy': 'http://docs.scipy.org/doc/numpy-1.8.1',
-        'scipy': 'http://docs.scipy.org/doc/scipy-0.13.3/reference'},
-    'expected_failing_examples': [
-        '../examples/applications/plot_stock_market.py']
+        'scipy': 'http://docs.scipy.org/doc/scipy-0.13.3/reference'}
 }
 
 
 
@@ -14,7 +14,7 @@ estimator classes. Typical examples include ``C``, ``kernel`` and ``gamma``
 for Support Vector Classifier, ``alpha`` for Lasso, etc.
 
 It is possible and recommended to search the hyper-parameter space for the
-best :ref:`cross_validation` score.
+best :ref:`cross validation <cross_validation>` score.
 
 Any parameter provided when constructing an estimator may be optimized in this
 manner. Specifically, to find the names and current values for all parameters
 
@@ -178,6 +178,10 @@ Enhancements
      removed by setting it to `None`.
      :issue:`7674` by :user:`Yichuan Liu <yl565>`.
 
+   - Prevent cast from float32 to float64 in
+   :class:`sklearn.linear_model.LogisticRegression` when using newton-cg solver
+   by :user:`Joan Massich <massich>`
+
 Bug fixes
 .........
    - Fixed a bug where :class:`sklearn.ensemble.IsolationForest` uses an
@@ -317,6 +321,9 @@ Bug fixes
      classes, and some values proposed in the docstring could raise errors.
      :issue:`5359` by `Tom Dupre la Tour`_.
 
+   - Fixed a memory leak in our LibLinear implementation. :issue:`9024` by
+     :user:`Sergei Lebedev <superbobry>`
+
 API changes summary
 -------------------
 
@@ -822,8 +829,8 @@ Model evaluation and meta-estimators
    - Added support for substituting or disabling :class:`pipeline.Pipeline`
      and :class:`pipeline.FeatureUnion` components using the ``set_params``
      interface that powers :mod:`sklearn.grid_search`.
-     See :ref:`sphx_glr_plot_compare_reduction.py`. By `Joel Nothman`_ and
-     :user:`Robert McGibbon <rmcgibbo>`.
+     See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py`
+     By `Joel Nothman`_ and :user:`Robert McGibbon <rmcgibbo>`.
 
    - The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV`
      (and :class:`model_selection.RandomizedSearchCV`) can be easily imported
 
@@ -64,27 +64,60 @@
 # Author: Gael Varoquaux gael.varoquaux@normalesup.org
 # License: BSD 3 clause
 
-import datetime
+from datetime import datetime
 
 import numpy as np
 import matplotlib.pyplot as plt
-try:
-     from matplotlib.finance import quotes_historical_yahoo_ochl
-except ImportError:
-     # quotes_historical_yahoo_ochl was named quotes_historical_yahoo before matplotlib 1.4
-     from matplotlib.finance import quotes_historical_yahoo as quotes_historical_yahoo_ochl
 from matplotlib.collections import LineCollection
+from six.moves.urllib.request import urlopen
+from six.moves.urllib.parse import urlencode
 from sklearn import cluster, covariance, manifold
 
+
 ###############################################################################
 # Retrieve the data from Internet
 
+def quotes_historical_google(symbol, date1, date2):
+    """Get the historical data from Google finance.
+
+    Parameters
+    ----------
+    symbol : str
+        Ticker symbol to query for, for example ``"DELL"``.
+    date1 : datetime.datetime
+        Start date.
+    date2 : datetime.datetime
+        End date.
+
+    Returns
+    -------
+    X : array
+        The columns are ``date`` -- datetime, ``open``, ``high``,
+        ``low``, ``close`` and ``volume`` of type float.
+    """
+    params = urlencode({
+        'q': symbol,
+        'startdate': date1.strftime('%b %d, %Y'),
+        'enddate': date2.strftime('%b %d, %Y'),
+        'output': 'csv'
+    })
+    url = 'http://www.google.com/finance/historical?' + params
+    with urlopen(url) as response:
+        dtype = {
+            'names': ['date', 'open', 'high', 'low', 'close', 'volume'],
+            'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']
+        }
+        converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}
+        return np.genfromtxt(response, delimiter=',', skip_header=1,
+                             dtype=dtype, converters=converters,
+                             missing_values='-', filling_values=-1)
+
+
 # Choose a time period reasonably calm (not too long ago so that we get
 # high-tech firms, and before the 2008 crash)
-d1 = datetime.datetime(2003, 1, 1)
-d2 = datetime.datetime(2008, 1, 1)
+d1 = datetime(2003, 1, 1)
+d2 = datetime(2008, 1, 1)
 
-# kraft symbol has now changed from KFT to MDLZ in yahoo
 symbol_dict = {
     'TOT': 'Total',
     'XOM': 'Exxon',
@@ -102,7 +135,6 @@
     'AMZN': 'Amazon',
     'TM': 'Toyota',
     'CAJ': 'Canon',
-    'MTU': 'Mitsubishi',
     'SNE': 'Sony',
     'F': 'Ford',
     'HMC': 'Honda',
@@ -111,9 +143,8 @@
     'BA': 'Boeing',
     'KO': 'Coca Cola',
     'MMM': '3M',
-    'MCD': 'Mc Donalds',
+    'MCD': 'McDonald\'s',
     'PEP': 'Pepsi',
-    'MDLZ': 'Kraft Foods',
     'K': 'Kellogg',
     'UN': 'Unilever',
     'MAR': 'Marriott',
@@ -129,11 +160,9 @@
     'AAPL': 'Apple',
     'SAP': 'SAP',
     'CSCO': 'Cisco',
-    'TXN': 'Texas instruments',
+    'TXN': 'Texas Instruments',
     'XRX': 'Xerox',
-    'LMT': 'Lookheed Martin',
     'WMT': 'Wal-Mart',
-    'WBA': 'Walgreen',
     'HD': 'Home Depot',
     'GSK': 'GlaxoSmithKline',
     'PFE': 'Pfizer',
@@ -149,14 +178,16 @@
 
 symbols, names = np.array(list(symbol_dict.items())).T
 
-quotes = [quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True)
-          for symbol in symbols]
+quotes = [
+    quotes_historical_google(symbol, d1, d2) for symbol in symbols
+]
 
-open = np.array([q.open for q in quotes]).astype(np.float)
-close = np.array([q.close for q in quotes]).astype(np.float)
+close_prices = np.stack([q['close'] for q in quotes])
+open_prices = np.stack([q['open'] for q in quotes])
 
 # The daily variations of the quotes are what carry most information
-variation = close - open
+variation = close_prices - open_prices
+
 
 ###############################################################################
 # Learn a graphical structure from the correlations
@@ -209,7 +240,7 @@
 
 # Plot the edges
 start_idx, end_idx = np.where(non_zero)
-#a sequence of (*line0*, *line1*, *line2*), where::
+# a sequence of (*line0*, *line1*, *line2*), where::
 #            linen = (x0, y0), (x1, y1), ... (xm, ym)
 segments = [[embedding[:, start], embedding[:, stop]]
             for start, stop in zip(start_idx, end_idx)]
 
@@ -9,7 +9,8 @@
 We consider 3 features x_1, x_2, x_3 distributed uniformly over [0, 1], the
 target depends on them as follows:
 
-y = x_1 + sin(6 * pi * x_2) + 0.1 * N(0, 1), that is the third features is completely irrelevant.
+y = x_1 + sin(6 * pi * x_2) + 0.1 * N(0, 1), that is the third features is
+completely irrelevant.
 
 The code below plots the dependency of y against individual x_i and normalized
 values of univariate F-tests statistics and mutual information.
@@ -39,11 +40,10 @@
 plt.figure(figsize=(15, 5))
 for i in range(3):
     plt.subplot(1, 3, i + 1)
-    plt.scatter(X[:, i], y)
+    plt.scatter(X[:, i], y, edgecolor='black', s=20)
     plt.xlabel("$x_{}$".format(i + 1), fontsize=14)
     if i == 0:
         plt.ylabel("$y$", fontsize=14)
     plt.title("F-test={:.2f}, MI={:.2f}".format(f_test[i], mi[i]),
               fontsize=16)
 plt.show()
-
@@ -54,7 +54,8 @@
 scores = -np.log10(selector.pvalues_)
 scores /= scores.max()
 plt.bar(X_indices - .45, scores, width=.2,
-        label=r'Univariate score ($-Log(p_{value})$)', color='darkorange')
+        label=r'Univariate score ($-Log(p_{value})$)', color='darkorange',
+        edgecolor='black')
 
 ###############################################################################
 # Compare to the weights of an SVM
@@ -65,7 +66,7 @@
 svm_weights /= svm_weights.max()
 
 plt.bar(X_indices - .25, svm_weights, width=.2, label='SVM weight',
-        color='navy')
+        color='navy', edgecolor='black')
 
 clf_selected = svm.SVC(kernel='linear')
 clf_selected.fit(selector.transform(X), y)
@@ -74,7 +75,8 @@
 svm_weights_selected /= svm_weights_selected.max()
 
 plt.bar(X_indices[selector.get_support()] - .05, svm_weights_selected,
-        width=.2, label='SVM weights after selection', color='c')
+        width=.2, label='SVM weights after selection', color='c',
+        edgecolor='black')
 
 
 plt.title("Comparing feature selection")
 
@@ -49,13 +49,14 @@
 
 ###############################################################################
 # View histogram of permutation scores
-plt.hist(permutation_scores, 20, label='Permutation scores')
+plt.hist(permutation_scores, 20, label='Permutation scores',
+         edgecolor='black')
 ylim = plt.ylim()
 # BUG: vlines(..., linestyle='--') fails on older versions of matplotlib
-#plt.vlines(score, ylim[0], ylim[1], linestyle='--',
+# plt.vlines(score, ylim[0], ylim[1], linestyle='--',
 #          color='g', linewidth=3, label='Classification Score'
 #          ' (pvalue %s)' % pvalue)
-#plt.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',
+# plt.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',
 #          color='k', linewidth=3, label='Luck')
 plt.plot(2 * [score], ylim, '--g', linewidth=3,
          label='Classification Score'
 
@@ -72,7 +72,8 @@
 
 plt.figure(figsize=(6, 5))
 plt.title("Histogram of the weights")
-plt.hist(clf.coef_, bins=n_features, color='gold', log=True)
+plt.hist(clf.coef_, bins=n_features, color='gold', log=True,
+         edgecolor='black')
 plt.scatter(clf.coef_[relevant_features], 5 * np.ones(len(relevant_features)),
             color='navy', label="Relevant features")
 plt.ylabel("Features")
 
@@ -50,7 +50,8 @@
     colors = "bry"
     for i, color in zip(clf.classes_, colors):
         idx = np.where(y == i)
-        plt.scatter(X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired)
+        plt.scatter(X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired,
+                    edgecolor='black', s=20)
 
     # Plot the three one-against-all classifiers
     xmin, xmax = plt.xlim()
Original file line number	Diff line number	Diff line change
`@@ -241,9 +241,7 @@`
`241`	`241`	`'sklearn': None,`
`242`	`242`	`'matplotlib': 'http://matplotlib.org',`
`243`	`243`	`'numpy': 'http://docs.scipy.org/doc/numpy-1.8.1',`
`244`		`- 'scipy': 'http://docs.scipy.org/doc/scipy-0.13.3/reference'},`
`245`		`- 'expected_failing_examples': [`
`246`		`- '../examples/applications/plot_stock_market.py']`
	`244`	`+ 'scipy': 'http://docs.scipy.org/doc/scipy-0.13.3/reference'}`
`247`	`245`	`}`
`248`	`246`
`249`	`247`