diff --git a/.travis.yml b/.travis.yml index 4022f78aa0928..2f0cbf91067dd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,14 +36,14 @@ matrix: # Python 3.4 build - env: DISTRIB="conda" PYTHON_VERSION="3.4" INSTALL_MKL="false" NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2" - COVERAGE=true + PILLOW_VERSION="4.0.0" COVERAGE=true if: type != cron # This environment tests the newest supported Anaconda release (5.0.0) # It also runs tests requiring Pandas and PyAMG - env: DISTRIB="conda" PYTHON_VERSION="3.6.2" INSTALL_MKL="true" NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" PANDAS_VERSION="0.20.3" - CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" COVERAGE=true - CHECK_PYTEST_SOFT_DEPENDENCY="true" + CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" PILLOW_VERSION="4.3.0" + COVERAGE=true CHECK_PYTEST_SOFT_DEPENDENCY="true" if: type != cron # flake8 linting on diff wrt common ancestor with upstream/master - env: RUN_FLAKE8="true" SKIP_TESTS="true" diff --git a/build_tools/appveyor/requirements.txt b/build_tools/appveyor/requirements.txt index b01693343441f..a14f4032d770c 100644 --- a/build_tools/appveyor/requirements.txt +++ b/build_tools/appveyor/requirements.txt @@ -13,3 +13,4 @@ cython pytest wheel wheelhouse_uploader +pillow diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index 76cd3221cb009..9a5b65ce225bd 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -55,6 +55,10 @@ if [[ "$DISTRIB" == "conda" ]]; then TO_INSTALL="$TO_INSTALL pyamg=$PYAMG_VERSION" fi + if [[ -n "$PILLOW_VERSION" ]]; then + TO_INSTALL="$TO_INSTALL pillow=$PILLOW_VERSION" + fi + conda create -n testenv --yes $TO_INSTALL source activate testenv diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py index 506f727c06583..c42a102f59bc6 100644 --- a/sklearn/datasets/base.py +++ b/sklearn/datasets/base.py @@ -767,16 +767,9 @@ def load_sample_images(): >>> first_img_data.dtype #doctest: +SKIP dtype('uint8') """ - # Try to import imread from scipy. We do this lazily here to prevent - # this module from depending on PIL. - try: - try: - from scipy.misc import imread - except ImportError: - from scipy.misc.pilutil import imread - except ImportError: - raise ImportError("The Python Imaging Library (PIL) " - "is required to load data from jpeg files") + # import PIL only when needed + from ..externals._pilutil import imread + module_path = join(dirname(__file__), "images") with open(join(module_path, 'README.txt')) as f: descr = f.read() diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py index 51850ad6c8898..843e7f15dce0f 100644 --- a/sklearn/datasets/lfw.py +++ b/sklearn/datasets/lfw.py @@ -32,7 +32,6 @@ from .base import get_data_home, _fetch_remote, RemoteFileMetadata from ..utils import Bunch from ..externals.joblib import Memory - from ..externals.six import b logger = logging.getLogger(__name__) @@ -136,18 +135,8 @@ def check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True): def _load_imgs(file_paths, slice_, color, resize): """Internally used to load images""" - - # Try to import imread and imresize from PIL. We do this here to prevent - # the whole sklearn.datasets module from depending on PIL. - try: - try: - from scipy.misc import imread - except ImportError: - from scipy.misc.pilutil import imread - from scipy.misc import imresize - except ImportError: - raise ImportError("The Python Imaging Library (PIL)" - " is required to load data from jpeg files") + # import PIL only when needed + from ..externals._pilutil import imread, imresize # compute the portion of the images to load to respect the slice_ parameter # given by the caller diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 30f0416e72200..a2e75e98eccf8 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -21,6 +21,7 @@ from sklearn.datasets.base import Bunch from sklearn.externals.six import b, u +from sklearn.externals._pilutil import pillow_installed from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_true @@ -161,15 +162,7 @@ def test_load_sample_image(): def test_load_missing_sample_image_error(): - have_PIL = True - try: - try: - from scipy.misc import imread - except ImportError: - from scipy.misc.pilutil import imread # noqa - except ImportError: - have_PIL = False - if have_PIL: + if pillow_installed: assert_raises(AttributeError, load_sample_image, 'blop.jpg') else: diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index ac6395c4958be..97e4f08540265 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -14,14 +14,7 @@ import tempfile import numpy as np from sklearn.externals import six -try: - try: - from scipy.misc import imsave - except ImportError: - from scipy.misc.pilutil import imsave -except ImportError: - imsave = None - +from sklearn.externals._pilutil import pillow_installed, imsave from sklearn.datasets import fetch_lfw_pairs from sklearn.datasets import fetch_lfw_people @@ -48,7 +41,7 @@ def setup_module(): """Test fixture run once and common to all tests of this module""" - if imsave is None: + if not pillow_installed: raise SkipTest("PIL not installed.") if not os.path.exists(LFW_HOME): diff --git a/sklearn/externals/_pilutil.py b/sklearn/externals/_pilutil.py new file mode 100644 index 0000000000000..f5c9b43d92a98 --- /dev/null +++ b/sklearn/externals/_pilutil.py @@ -0,0 +1,498 @@ +""" +A collection of image utilities using the Python Imaging Library (PIL). + +This is a local version of utility functions from scipy that are wrapping PIL +functionality. These functions are deprecated in scipy 1.0.0 and will be +removed in scipy 1.2.0. Therefore, the functionality used in sklearn is copied +here. This file is taken from scipy/misc/pilutil.py in scipy +1.0.0. Modifications include: making this module importable if pillow is not +installed, removal of DeprecationWarning, removal of functions scikit-learn +does not need. + +Copyright (c) 2001, 2002 Enthought, Inc. +All rights reserved. + +Copyright (c) 2003-2017 SciPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of Enthought nor the names of the SciPy Developers + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. +""" +from __future__ import division, print_function, absolute_import + + +import numpy +import tempfile + +from numpy import (amin, amax, ravel, asarray, arange, ones, newaxis, + transpose, iscomplexobj, uint8, issubdtype, array) + +# Modification of original scipy pilutil.py to make this module importable if +# pillow is not installed. If pillow is not installed, functions will raise +# ImportError when called. +try: + try: + from PIL import Image + except ImportError: + import Image + pillow_installed = True + if not hasattr(Image, 'frombytes'): + Image.frombytes = Image.fromstring +except ImportError: + pillow_installed = False + +__all__ = ['bytescale', 'imread', 'imsave', 'fromimage', 'toimage', 'imresize'] + + +def bytescale(data, cmin=None, cmax=None, high=255, low=0): + """ + Byte scales an array (image). + + Byte scaling means converting the input image to uint8 dtype and scaling + the range to ``(low, high)`` (default 0-255). + If the input image already has dtype uint8, no scaling is done. + + This function is only available if Python Imaging Library (PIL) is installed. + + Parameters + ---------- + data : ndarray + PIL image data array. + cmin : scalar, optional + Bias scaling of small values. Default is ``data.min()``. + cmax : scalar, optional + Bias scaling of large values. Default is ``data.max()``. + high : scalar, optional + Scale max value to `high`. Default is 255. + low : scalar, optional + Scale min value to `low`. Default is 0. + + Returns + ------- + img_array : uint8 ndarray + The byte-scaled array. + + Examples + -------- + >>> from scipy.misc import bytescale + >>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ], + ... [ 73.88003259, 80.91433048, 4.88878881], + ... [ 51.53875334, 34.45808177, 27.5873488 ]]) + >>> bytescale(img) + array([[255, 0, 236], + [205, 225, 4], + [140, 90, 70]], dtype=uint8) + >>> bytescale(img, high=200, low=100) + array([[200, 100, 192], + [180, 188, 102], + [155, 135, 128]], dtype=uint8) + >>> bytescale(img, cmin=0, cmax=255) + array([[91, 3, 84], + [74, 81, 5], + [52, 34, 28]], dtype=uint8) + + """ + if data.dtype == uint8: + return data + + if high > 255: + raise ValueError("`high` should be less than or equal to 255.") + if low < 0: + raise ValueError("`low` should be greater than or equal to 0.") + if high < low: + raise ValueError("`high` should be greater than or equal to `low`.") + + if cmin is None: + cmin = data.min() + if cmax is None: + cmax = data.max() + + cscale = cmax - cmin + if cscale < 0: + raise ValueError("`cmax` should be larger than `cmin`.") + elif cscale == 0: + cscale = 1 + + scale = float(high - low) / cscale + bytedata = (data - cmin) * scale + low + return (bytedata.clip(low, high) + 0.5).astype(uint8) + + +def imread(name, flatten=False, mode=None): + """ + Read an image from a file as an array. + + This function is only available if Python Imaging Library (PIL) is installed. + + Parameters + ---------- + name : str or file object + The file name or file object to be read. + flatten : bool, optional + If True, flattens the color layers into a single gray-scale layer. + mode : str, optional + Mode to convert image to, e.g. ``'RGB'``. See the Notes for more + details. + + Returns + ------- + imread : ndarray + The array obtained by reading the image. + + Notes + ----- + `imread` uses the Python Imaging Library (PIL) to read an image. + The following notes are from the PIL documentation. + + `mode` can be one of the following strings: + + * 'L' (8-bit pixels, black and white) + * 'P' (8-bit pixels, mapped to any other mode using a color palette) + * 'RGB' (3x8-bit pixels, true color) + * 'RGBA' (4x8-bit pixels, true color with transparency mask) + * 'CMYK' (4x8-bit pixels, color separation) + * 'YCbCr' (3x8-bit pixels, color video format) + * 'I' (32-bit signed integer pixels) + * 'F' (32-bit floating point pixels) + + PIL also provides limited support for a few special modes, including + 'LA' ('L' with alpha), 'RGBX' (true color with padding) and 'RGBa' + (true color with premultiplied alpha). + + When translating a color image to black and white (mode 'L', 'I' or + 'F'), the library uses the ITU-R 601-2 luma transform:: + + L = R * 299/1000 + G * 587/1000 + B * 114/1000 + + When `flatten` is True, the image is converted using mode 'F'. + When `mode` is not None and `flatten` is True, the image is first + converted according to `mode`, and the result is then flattened using + mode 'F'. + + """ + if not pillow_installed: + raise ImportError("The Python Imaging Library (PIL) " + "is required to load data from jpeg files") + + im = Image.open(name) + return fromimage(im, flatten=flatten, mode=mode) + + +def imsave(name, arr, format=None): + """ + Save an array as an image. + + This function is only available if Python Imaging Library (PIL) is installed. + + .. warning:: + + This function uses `bytescale` under the hood to rescale images to use + the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. + It will also cast data for 2-D images to ``uint32`` for ``mode=None`` + (which is the default). + + Parameters + ---------- + name : str or file object + Output file name or file object. + arr : ndarray, MxN or MxNx3 or MxNx4 + Array containing image values. If the shape is ``MxN``, the array + represents a grey-level image. Shape ``MxNx3`` stores the red, green + and blue bands along the last dimension. An alpha layer may be + included, specified as the last colour band of an ``MxNx4`` array. + format : str + Image format. If omitted, the format to use is determined from the + file name extension. If a file object was used instead of a file name, + this parameter should always be used. + + Examples + -------- + Construct an array of gradient intensity values and save to file: + + >>> from scipy.misc import imsave + >>> x = np.zeros((255, 255)) + >>> x = np.zeros((255, 255), dtype=np.uint8) + >>> x[:] = np.arange(255) + >>> imsave('gradient.png', x) + + Construct an array with three colour bands (R, G, B) and store to file: + + >>> rgb = np.zeros((255, 255, 3), dtype=np.uint8) + >>> rgb[..., 0] = np.arange(255) + >>> rgb[..., 1] = 55 + >>> rgb[..., 2] = 1 - np.arange(255) + >>> imsave('rgb_gradient.png', rgb) + + """ + im = toimage(arr, channel_axis=2) + if format is None: + im.save(name) + else: + im.save(name, format) + return + + +def fromimage(im, flatten=False, mode=None): + """ + Return a copy of a PIL image as a numpy array. + + This function is only available if Python Imaging Library (PIL) is installed. + + Parameters + ---------- + im : PIL image + Input image. + flatten : bool + If true, convert the output to grey-scale. + mode : str, optional + Mode to convert image to, e.g. ``'RGB'``. See the Notes of the + `imread` docstring for more details. + + Returns + ------- + fromimage : ndarray + The different colour bands/channels are stored in the + third dimension, such that a grey-image is MxN, an + RGB-image MxNx3 and an RGBA-image MxNx4. + + """ + if not pillow_installed: + raise ImportError("The Python Imaging Library (PIL) " + "is required to load data from jpeg files") + + if not Image.isImageType(im): + raise TypeError("Input is not a PIL image.") + + if mode is not None: + if mode != im.mode: + im = im.convert(mode) + elif im.mode == 'P': + # Mode 'P' means there is an indexed "palette". If we leave the mode + # as 'P', then when we do `a = array(im)` below, `a` will be a 2-D + # containing the indices into the palette, and not a 3-D array + # containing the RGB or RGBA values. + if 'transparency' in im.info: + im = im.convert('RGBA') + else: + im = im.convert('RGB') + + if flatten: + im = im.convert('F') + elif im.mode == '1': + # Workaround for crash in PIL. When im is 1-bit, the call array(im) + # can cause a seg. fault, or generate garbage. See + # https://github.com/scipy/scipy/issues/2138 and + # https://github.com/python-pillow/Pillow/issues/350. + # + # This converts im from a 1-bit image to an 8-bit image. + im = im.convert('L') + + a = array(im) + return a + +_errstr = "Mode is unknown or incompatible with input array shape." + + +def toimage(arr, high=255, low=0, cmin=None, cmax=None, pal=None, + mode=None, channel_axis=None): + """Takes a numpy array and returns a PIL image. + + This function is only available if Python Imaging Library (PIL) is installed. + + The mode of the PIL image depends on the array shape and the `pal` and + `mode` keywords. + + For 2-D arrays, if `pal` is a valid (N,3) byte-array giving the RGB values + (from 0 to 255) then ``mode='P'``, otherwise ``mode='L'``, unless mode + is given as 'F' or 'I' in which case a float and/or integer array is made. + + .. warning:: + + This function uses `bytescale` under the hood to rescale images to use + the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. + It will also cast data for 2-D images to ``uint32`` for ``mode=None`` + (which is the default). + + Notes + ----- + For 3-D arrays, the `channel_axis` argument tells which dimension of the + array holds the channel data. + + For 3-D arrays if one of the dimensions is 3, the mode is 'RGB' + by default or 'YCbCr' if selected. + + The numpy array must be either 2 dimensional or 3 dimensional. + + """ + if not pillow_installed: + raise ImportError("The Python Imaging Library (PIL) " + "is required to load data from jpeg files") + + data = asarray(arr) + if iscomplexobj(data): + raise ValueError("Cannot convert a complex-valued array.") + shape = list(data.shape) + valid = len(shape) == 2 or ((len(shape) == 3) and + ((3 in shape) or (4 in shape))) + if not valid: + raise ValueError("'arr' does not have a suitable array shape for " + "any mode.") + if len(shape) == 2: + shape = (shape[1], shape[0]) # columns show up first + if mode == 'F': + data32 = data.astype(numpy.float32) + image = Image.frombytes(mode, shape, data32.tostring()) + return image + if mode in [None, 'L', 'P']: + bytedata = bytescale(data, high=high, low=low, + cmin=cmin, cmax=cmax) + image = Image.frombytes('L', shape, bytedata.tostring()) + if pal is not None: + image.putpalette(asarray(pal, dtype=uint8).tostring()) + # Becomes a mode='P' automagically. + elif mode == 'P': # default gray-scale + pal = (arange(0, 256, 1, dtype=uint8)[:, newaxis] * + ones((3,), dtype=uint8)[newaxis, :]) + image.putpalette(asarray(pal, dtype=uint8).tostring()) + return image + if mode == '1': # high input gives threshold for 1 + bytedata = (data > high) + image = Image.frombytes('1', shape, bytedata.tostring()) + return image + if cmin is None: + cmin = amin(ravel(data)) + if cmax is None: + cmax = amax(ravel(data)) + data = (data*1.0 - cmin)*(high - low)/(cmax - cmin) + low + if mode == 'I': + data32 = data.astype(numpy.uint32) + image = Image.frombytes(mode, shape, data32.tostring()) + else: + raise ValueError(_errstr) + return image + + # if here then 3-d array with a 3 or a 4 in the shape length. + # Check for 3 in datacube shape --- 'RGB' or 'YCbCr' + if channel_axis is None: + if (3 in shape): + ca = numpy.flatnonzero(asarray(shape) == 3)[0] + else: + ca = numpy.flatnonzero(asarray(shape) == 4) + if len(ca): + ca = ca[0] + else: + raise ValueError("Could not find channel dimension.") + else: + ca = channel_axis + + numch = shape[ca] + if numch not in [3, 4]: + raise ValueError("Channel axis dimension is not valid.") + + bytedata = bytescale(data, high=high, low=low, cmin=cmin, cmax=cmax) + if ca == 2: + strdata = bytedata.tostring() + shape = (shape[1], shape[0]) + elif ca == 1: + strdata = transpose(bytedata, (0, 2, 1)).tostring() + shape = (shape[2], shape[0]) + elif ca == 0: + strdata = transpose(bytedata, (1, 2, 0)).tostring() + shape = (shape[2], shape[1]) + if mode is None: + if numch == 3: + mode = 'RGB' + else: + mode = 'RGBA' + + if mode not in ['RGB', 'RGBA', 'YCbCr', 'CMYK']: + raise ValueError(_errstr) + + if mode in ['RGB', 'YCbCr']: + if numch != 3: + raise ValueError("Invalid array shape for mode.") + if mode in ['RGBA', 'CMYK']: + if numch != 4: + raise ValueError("Invalid array shape for mode.") + + # Here we know data and mode is correct + image = Image.frombytes(mode, shape, strdata) + return image + + +def imresize(arr, size, interp='bilinear', mode=None): + """ + Resize an image. + + This function is only available if Python Imaging Library (PIL) is installed. + + .. warning:: + + This function uses `bytescale` under the hood to rescale images to use + the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. + It will also cast data for 2-D images to ``uint32`` for ``mode=None`` + (which is the default). + + Parameters + ---------- + arr : ndarray + The array of image to be resized. + size : int, float or tuple + * int - Percentage of current size. + * float - Fraction of current size. + * tuple - Size of the output image (height, width). + + interp : str, optional + Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear', + 'bicubic' or 'cubic'). + mode : str, optional + The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing. + If ``mode=None`` (the default), 2-D images will be treated like + ``mode='L'``, i.e. casting to long integer. For 3-D and 4-D arrays, + `mode` will be set to ``'RGB'`` and ``'RGBA'`` respectively. + + Returns + ------- + imresize : ndarray + The resized array of image. + + See Also + -------- + toimage : Implicitly used to convert `arr` according to `mode`. + scipy.ndimage.zoom : More generic implementation that does not use PIL. + + """ + im = toimage(arr, mode=mode) + ts = type(size) + if issubdtype(ts, numpy.signedinteger): + percent = size / 100.0 + size = tuple((array(im.size)*percent).astype(int)) + elif issubdtype(type(size), numpy.floating): + size = tuple((array(im.size)*size).astype(int)) + else: + size = (size[1], size[0]) + func = {'nearest': 0, 'lanczos': 1, 'bilinear': 2, 'bicubic': 3, 'cubic': 3} + imnew = im.resize(size, resample=func[interp]) + return fromimage(imnew)