diff --git a/sklearn/conftest.py b/sklearn/conftest.py index a9cc21d1c6949..f4db3431cf533 100644 --- a/sklearn/conftest.py +++ b/sklearn/conftest.py @@ -9,7 +9,6 @@ from sklearn.utils import _IS_32BIT from sklearn.utils._openmp_helpers import _openmp_effective_n_threads -from sklearn.externals import _pilutil from sklearn._min_dependencies import PYTEST_MIN_VERSION from sklearn.utils.fixes import parse_version from sklearn.datasets import fetch_20newsgroups @@ -169,7 +168,14 @@ def pytest_collection_modifyitems(config, items): # details. if item.name != "sklearn._config.config_context": item.add_marker(skip_marker) - elif not _pilutil.pillow_installed: + try: + import PIL # noqa + + pillow_installed = True + except ImportError: + pillow_installed = False + + if not pillow_installed: skip_marker = pytest.mark.skip(reason="pillow (or PIL) not installed!") for item in items: if item.name in [ diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index 4f7a31790ffdb..a09077217bac8 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -1387,8 +1387,15 @@ def load_sample_images(): >>> first_img_data.dtype #doctest: +SKIP dtype('uint8') """ - # import PIL only when needed - from ..externals._pilutil import imread + try: + from PIL import Image + except ImportError: + raise ImportError( + "The Python Imaging Library (PIL) is required to load data " + "from jpeg files. Please refer to " + "https://pillow.readthedocs.io/en/stable/installation.html " + "for installing PIL." + ) descr = load_descr("README.txt", descr_module=IMAGES_MODULE) @@ -1397,7 +1404,8 @@ def load_sample_images(): if filename.endswith(".jpg"): filenames.append(filename) with resources.open_binary(IMAGES_MODULE, filename) as image_file: - image = imread(image_file) + pil_image = Image.open(image_file) + image = np.asarray(pil_image) images.append(image) return Bunch(images=images, filenames=filenames, DESCR=descr) diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index bc0b334267849..dc1267af59f96 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -116,8 +116,15 @@ def _check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True): def _load_imgs(file_paths, slice_, color, resize): """Internally used to load images""" - # import PIL only when needed - from ..externals._pilutil import imread, imresize + try: + from PIL import Image + except ImportError: + raise ImportError( + "The Python Imaging Library (PIL) is required to load data " + "from jpeg files. Please refer to " + "https://pillow.readthedocs.io/en/stable/installation.html " + "for installing PIL." + ) # compute the portion of the images to load to respect the slice_ parameter # given by the caller @@ -151,17 +158,19 @@ def _load_imgs(file_paths, slice_, color, resize): # Checks if jpeg reading worked. Refer to issue #3594 for more # details. - img = imread(file_path) - if img.ndim == 0: + pil_img = Image.open(file_path) + pil_img.crop((w_slice.start, h_slice.start, w_slice.stop, h_slice.stop)) + if resize is not None: + pil_img = pil_img.resize((w, h)) + face = np.asarray(pil_img, dtype=np.float32) + + if face.ndim == 0: raise RuntimeError( "Failed to read the image file %s, " "Please make sure that libjpeg is installed" % file_path ) - face = np.asarray(img[slice_], dtype=np.float32) face /= 255.0 # scale uint8 coded colors to the [0.0, 1.0] floats - if resize is not None: - face = imresize(face, resize) if not color: # average the color channels to compute a gray levels # representation diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 010fe8011a421..2eeb2fc570094 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -30,8 +30,6 @@ from sklearn.utils._testing import SkipTest from sklearn.datasets.tests.test_common import check_as_frame -from sklearn.externals._pilutil import pillow_installed - def _remove_dir(path): if os.path.isdir(path): @@ -226,11 +224,9 @@ def test_load_sample_image(): def test_load_missing_sample_image_error(): - if pillow_installed: - with pytest.raises(AttributeError): - load_sample_image("blop.jpg") - else: - warnings.warn("Could not load sample images, PIL is not available.") + pytest.importorskip("PIL") + with pytest.raises(AttributeError): + load_sample_image("blop.jpg") def test_load_diabetes_raw(): diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index d7852ab99361a..fba3949befb1a 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -15,12 +15,10 @@ import numpy as np import pytest from functools import partial -from sklearn.externals._pilutil import pillow_installed, imsave from sklearn.datasets import fetch_lfw_pairs from sklearn.datasets import fetch_lfw_people from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import SkipTest from sklearn.datasets.tests.test_common import check_return_X_y @@ -41,8 +39,7 @@ def setup_module(): """Test fixture run once and common to all tests of this module""" - if not pillow_installed: - raise SkipTest("PIL not installed.") + Image = pytest.importorskip("PIL.Image") global SCIKIT_LEARN_DATA, SCIKIT_LEARN_EMPTY_DATA, LFW_HOME @@ -69,10 +66,8 @@ def setup_module(): for i in range(n_faces): file_path = os.path.join(folder_name, name + "_%04d.jpg" % i) uniface = np_rng.randint(0, 255, size=(250, 250, 3)) - try: - imsave(file_path, uniface) - except ImportError: - raise SkipTest("PIL not installed") + img = Image.fromarray(uniface.astype(np.uint8)) + img.save(file_path) # add some random file pollution to test robustness with open(os.path.join(LFW_HOME, "lfw_funneled", ".test.swp"), "wb") as f: diff --git a/sklearn/externals/_pilutil.py b/sklearn/externals/_pilutil.py deleted file mode 100644 index 45457684a9f72..0000000000000 --- a/sklearn/externals/_pilutil.py +++ /dev/null @@ -1,502 +0,0 @@ -""" -A collection of image utilities using the Python Imaging Library (PIL). - -This is a local version of utility functions from scipy that are wrapping PIL -functionality. These functions are deprecated in scipy 1.0.0 and will be -removed in scipy 1.2.0. Therefore, the functionality used in sklearn is copied -here. This file is taken from scipy/misc/pilutil.py in scipy -1.0.0. Modifications include: making this module importable if pillow is not -installed, removal of DeprecationWarning, removal of functions scikit-learn -does not need. - -Copyright (c) 2001, 2002 Enthought, Inc. -All rights reserved. - -Copyright (c) 2003-2017 SciPy Developers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - a. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - b. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - c. Neither the name of Enthought nor the names of the SciPy Developers - may be used to endorse or promote products derived from this software - without specific prior written permission. - - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS -BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, -OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. -""" - -import numpy - -from numpy import (amin, amax, ravel, asarray, arange, ones, newaxis, - transpose, iscomplexobj, uint8, issubdtype, array) - -# Modification of original scipy pilutil.py to make this module importable if -# pillow is not installed. If pillow is not installed, functions will raise -# ImportError when called. -try: - try: - from PIL import Image - except ImportError: - import Image - pillow_installed = True - if not hasattr(Image, 'frombytes'): - Image.frombytes = Image.fromstring -except ImportError: - pillow_installed = False - -__all__ = ['bytescale', 'imread', 'imsave', 'fromimage', 'toimage', 'imresize'] - - -PILLOW_ERROR_MESSAGE = ( - "The Python Imaging Library (PIL) is required to load data " - "from jpeg files. Please refer to " - "https://pillow.readthedocs.io/en/stable/installation.html " - "for installing PIL." -) - - -def bytescale(data, cmin=None, cmax=None, high=255, low=0): - """ - Byte scales an array (image). - - Byte scaling means converting the input image to uint8 dtype and scaling - the range to ``(low, high)`` (default 0-255). - If the input image already has dtype uint8, no scaling is done. - - This function is only available if Python Imaging Library (PIL) is installed. - - Parameters - ---------- - data : ndarray - PIL image data array. - cmin : scalar, default=None - Bias scaling of small values. Default is ``data.min()``. - cmax : scalar, default=None - Bias scaling of large values. Default is ``data.max()``. - high : scalar, default=None - Scale max value to `high`. Default is 255. - low : scalar, default=None - Scale min value to `low`. Default is 0. - - Returns - ------- - img_array : uint8 ndarray - The byte-scaled array. - - Examples - -------- - >>> import numpy as np - >>> from scipy.misc import bytescale - >>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ], - ... [ 73.88003259, 80.91433048, 4.88878881], - ... [ 51.53875334, 34.45808177, 27.5873488 ]]) - >>> bytescale(img) - array([[255, 0, 236], - [205, 225, 4], - [140, 90, 70]], dtype=uint8) - >>> bytescale(img, high=200, low=100) - array([[200, 100, 192], - [180, 188, 102], - [155, 135, 128]], dtype=uint8) - >>> bytescale(img, cmin=0, cmax=255) - array([[91, 3, 84], - [74, 81, 5], - [52, 34, 28]], dtype=uint8) - - """ - if data.dtype == uint8: - return data - - if high > 255: - raise ValueError("`high` should be less than or equal to 255.") - if low < 0: - raise ValueError("`low` should be greater than or equal to 0.") - if high < low: - raise ValueError("`high` should be greater than or equal to `low`.") - - if cmin is None: - cmin = data.min() - if cmax is None: - cmax = data.max() - - cscale = cmax - cmin - if cscale < 0: - raise ValueError("`cmax` should be larger than `cmin`.") - elif cscale == 0: - cscale = 1 - - scale = float(high - low) / cscale - bytedata = (data - cmin) * scale + low - return (bytedata.clip(low, high) + 0.5).astype(uint8) - - -def imread(name, flatten=False, mode=None): - """ - Read an image from a file as an array. - - This function is only available if Python Imaging Library (PIL) is installed. - - Parameters - ---------- - name : str or file object - The file name or file object to be read. - flatten : bool, default=False - If True, flattens the color layers into a single gray-scale layer. - mode : str, default=None - Mode to convert image to, e.g. ``'RGB'``. See the Notes for more - details. - - Returns - ------- - imread : ndarray - The array obtained by reading the image. - - Notes - ----- - `imread` uses the Python Imaging Library (PIL) to read an image. - The following notes are from the PIL documentation. - - `mode` can be one of the following strings: - - * 'L' (8-bit pixels, black and white) - * 'P' (8-bit pixels, mapped to any other mode using a color palette) - * 'RGB' (3x8-bit pixels, true color) - * 'RGBA' (4x8-bit pixels, true color with transparency mask) - * 'CMYK' (4x8-bit pixels, color separation) - * 'YCbCr' (3x8-bit pixels, color video format) - * 'I' (32-bit signed integer pixels) - * 'F' (32-bit floating point pixels) - - PIL also provides limited support for a few special modes, including - 'LA' ('L' with alpha), 'RGBX' (true color with padding) and 'RGBa' - (true color with premultiplied alpha). - - When translating a color image to black and white (mode 'L', 'I' or - 'F'), the library uses the ITU-R 601-2 luma transform:: - - L = R * 299/1000 + G * 587/1000 + B * 114/1000 - - When `flatten` is True, the image is converted using mode 'F'. - When `mode` is not None and `flatten` is True, the image is first - converted according to `mode`, and the result is then flattened using - mode 'F'. - - """ - if not pillow_installed: - raise ImportError(PILLOW_ERROR_MESSAGE) - - im = Image.open(name) - return fromimage(im, flatten=flatten, mode=mode) - - -def imsave(name, arr, format=None): - """ - Save an array as an image. - - This function is only available if Python Imaging Library (PIL) is installed. - - .. warning:: - - This function uses `bytescale` under the hood to rescale images to use - the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. - It will also cast data for 2-D images to ``uint32`` for ``mode=None`` - (which is the default). - - Parameters - ---------- - name : str or file object - Output file name or file object. - arr : ndarray, MxN or MxNx3 or MxNx4 - Array containing image values. If the shape is ``MxN``, the array - represents a grey-level image. Shape ``MxNx3`` stores the red, green - and blue bands along the last dimension. An alpha layer may be - included, specified as the last colour band of an ``MxNx4`` array. - format : str, default=None - Image format. If omitted, the format to use is determined from the - file name extension. If a file object was used instead of a file name, - this parameter should always be used. - - Examples - -------- - Construct an array of gradient intensity values and save to file: - - >>> import numpy as np - >>> from scipy.misc import imsave - >>> x = np.zeros((255, 255)) - >>> x = np.zeros((255, 255), dtype=np.uint8) - >>> x[:] = np.arange(255) - >>> imsave('gradient.png', x) - - Construct an array with three colour bands (R, G, B) and store to file: - - >>> rgb = np.zeros((255, 255, 3), dtype=np.uint8) - >>> rgb[..., 0] = np.arange(255) - >>> rgb[..., 1] = 55 - >>> rgb[..., 2] = 1 - np.arange(255) - >>> imsave('rgb_gradient.png', rgb) - - """ - im = toimage(arr, channel_axis=2) - if format is None: - im.save(name) - else: - im.save(name, format) - return - - -def fromimage(im, flatten=False, mode=None): - """ - Return a copy of a PIL image as a numpy array. - - This function is only available if Python Imaging Library (PIL) is installed. - - Parameters - ---------- - im : PIL image - Input image. - flatten : bool, default=False - If true, convert the output to grey-scale. - mode : str, default=None - Mode to convert image to, e.g. ``'RGB'``. See the Notes of the - `imread` docstring for more details. - - Returns - ------- - fromimage : ndarray - The different colour bands/channels are stored in the - third dimension, such that a grey-image is MxN, an - RGB-image MxNx3 and an RGBA-image MxNx4. - - """ - if not pillow_installed: - raise ImportError(PILLOW_ERROR_MESSAGE) - - if not Image.isImageType(im): - raise TypeError("Input is not a PIL image.") - - if mode is not None: - if mode != im.mode: - im = im.convert(mode) - elif im.mode == 'P': - # Mode 'P' means there is an indexed "palette". If we leave the mode - # as 'P', then when we do `a = array(im)` below, `a` will be a 2-D - # containing the indices into the palette, and not a 3-D array - # containing the RGB or RGBA values. - if 'transparency' in im.info: - im = im.convert('RGBA') - else: - im = im.convert('RGB') - - if flatten: - im = im.convert('F') - elif im.mode == '1': - # Workaround for crash in PIL. When im is 1-bit, the call array(im) - # can cause a seg. fault, or generate garbage. See - # https://github.com/scipy/scipy/issues/2138 and - # https://github.com/python-pillow/Pillow/issues/350. - # - # This converts im from a 1-bit image to an 8-bit image. - im = im.convert('L') - - a = array(im) - return a - -_errstr = "Mode is unknown or incompatible with input array shape." - - -def toimage(arr, high=255, low=0, cmin=None, cmax=None, pal=None, - mode=None, channel_axis=None): - """Takes a numpy array and returns a PIL image. - - This function is only available if Python Imaging Library (PIL) is installed. - - The mode of the PIL image depends on the array shape and the `pal` and - `mode` keywords. - - For 2-D arrays, if `pal` is a valid (N,3) byte-array giving the RGB values - (from 0 to 255) then ``mode='P'``, otherwise ``mode='L'``, unless mode - is given as 'F' or 'I' in which case a float and/or integer array is made. - - .. warning:: - - This function uses `bytescale` under the hood to rescale images to use - the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. - It will also cast data for 2-D images to ``uint32`` for ``mode=None`` - (which is the default). - - Notes - ----- - For 3-D arrays, the `channel_axis` argument tells which dimension of the - array holds the channel data. - - For 3-D arrays if one of the dimensions is 3, the mode is 'RGB' - by default or 'YCbCr' if selected. - - The numpy array must be either 2 dimensional or 3 dimensional. - - """ - if not pillow_installed: - raise ImportError(PILLOW_ERROR_MESSAGE) - - data = asarray(arr) - if iscomplexobj(data): - raise ValueError("Cannot convert a complex-valued array.") - shape = list(data.shape) - valid = len(shape) == 2 or ((len(shape) == 3) and - ((3 in shape) or (4 in shape))) - if not valid: - raise ValueError("'arr' does not have a suitable array shape for " - "any mode.") - if len(shape) == 2: - shape = (shape[1], shape[0]) # columns show up first - if mode == 'F': - data32 = data.astype(numpy.float32) - image = Image.frombytes(mode, shape, data32.tobytes()) - return image - if mode in [None, 'L', 'P']: - bytedata = bytescale(data, high=high, low=low, - cmin=cmin, cmax=cmax) - image = Image.frombytes('L', shape, bytedata.tobytes()) - if pal is not None: - image.putpalette(asarray(pal, dtype=uint8).tobytes()) - # Becomes a mode='P' automagically. - elif mode == 'P': # default gray-scale - pal = (arange(0, 256, 1, dtype=uint8)[:, newaxis] * - ones((3,), dtype=uint8)[newaxis, :]) - image.putpalette(asarray(pal, dtype=uint8).tobytes()) - return image - if mode == '1': # high input gives threshold for 1 - bytedata = (data > high) - image = Image.frombytes('1', shape, bytedata.tobytes()) - return image - if cmin is None: - cmin = amin(ravel(data)) - if cmax is None: - cmax = amax(ravel(data)) - data = (data*1.0 - cmin)*(high - low)/(cmax - cmin) + low - if mode == 'I': - data32 = data.astype(numpy.uint32) - image = Image.frombytes(mode, shape, data32.tobytes()) - else: - raise ValueError(_errstr) - return image - - # if here then 3-d array with a 3 or a 4 in the shape length. - # Check for 3 in datacube shape --- 'RGB' or 'YCbCr' - if channel_axis is None: - if (3 in shape): - ca = numpy.flatnonzero(asarray(shape) == 3)[0] - else: - ca = numpy.flatnonzero(asarray(shape) == 4) - if len(ca): - ca = ca[0] - else: - raise ValueError("Could not find channel dimension.") - else: - ca = channel_axis - - numch = shape[ca] - if numch not in [3, 4]: - raise ValueError("Channel axis dimension is not valid.") - - bytedata = bytescale(data, high=high, low=low, cmin=cmin, cmax=cmax) - if ca == 2: - strdata = bytedata.tobytes() - shape = (shape[1], shape[0]) - elif ca == 1: - strdata = transpose(bytedata, (0, 2, 1)).tobytes() - shape = (shape[2], shape[0]) - elif ca == 0: - strdata = transpose(bytedata, (1, 2, 0)).tobytes() - shape = (shape[2], shape[1]) - if mode is None: - if numch == 3: - mode = 'RGB' - else: - mode = 'RGBA' - - if mode not in ['RGB', 'RGBA', 'YCbCr', 'CMYK']: - raise ValueError(_errstr) - - if mode in ['RGB', 'YCbCr']: - if numch != 3: - raise ValueError("Invalid array shape for mode.") - if mode in ['RGBA', 'CMYK']: - if numch != 4: - raise ValueError("Invalid array shape for mode.") - - # Here we know data and mode is correct - image = Image.frombytes(mode, shape, strdata) - return image - - -def imresize(arr, size, interp='bilinear', mode=None): - """ - Resize an image. - - This function is only available if Python Imaging Library (PIL) is installed. - - .. warning:: - - This function uses `bytescale` under the hood to rescale images to use - the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. - It will also cast data for 2-D images to ``uint32`` for ``mode=None`` - (which is the default). - - Parameters - ---------- - arr : ndarray - The array of image to be resized. - size : int, float or tuple - * int - Percentage of current size. - * float - Fraction of current size. - * tuple - Size of the output image (height, width). - - interp : str, default='bilinear' - Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear', - 'bicubic' or 'cubic'). - mode : str, default=None - The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing. - If ``mode=None`` (the default), 2-D images will be treated like - ``mode='L'``, i.e. casting to long integer. For 3-D and 4-D arrays, - `mode` will be set to ``'RGB'`` and ``'RGBA'`` respectively. - - Returns - ------- - imresize : ndarray - The resized array of image. - - See Also - -------- - toimage : Implicitly used to convert `arr` according to `mode`. - scipy.ndimage.zoom : More generic implementation that does not use PIL. - - """ - im = toimage(arr, mode=mode) - ts = type(size) - if issubdtype(ts, numpy.signedinteger): - percent = size / 100.0 - size = tuple((array(im.size)*percent).astype(int)) - elif issubdtype(type(size), numpy.floating): - size = tuple((array(im.size)*size).astype(int)) - else: - size = (size[1], size[0]) - func = {'nearest': 0, 'lanczos': 1, 'bilinear': 2, 'bicubic': 3, 'cubic': 3} - imnew = im.resize(size, resample=func[interp]) - return fromimage(imnew)