From d396bc907edeb4da4dd3c0223c55e4c5c806c0eb Mon Sep 17 00:00:00 2001 From: Jonathan Tammo Siebert Date: Sun, 7 Jan 2018 17:23:52 +0100 Subject: [PATCH 1/6] Added scipy.misc.pilutil functions. --- sklearn/datasets/base.py | 16 +- sklearn/datasets/lfw.py | 20 +- sklearn/datasets/tests/test_base.py | 11 +- sklearn/datasets/tests/test_lfw.py | 13 +- sklearn/externals/pilutil.py | 322 ++++++++++++++++++++++++++++ 5 files changed, 337 insertions(+), 45 deletions(-) create mode 100644 sklearn/externals/pilutil.py diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py index 506f727c06583..40a3795bae1ab 100644 --- a/sklearn/datasets/base.py +++ b/sklearn/datasets/base.py @@ -767,16 +767,10 @@ def load_sample_images(): >>> first_img_data.dtype #doctest: +SKIP dtype('uint8') """ - # Try to import imread from scipy. We do this lazily here to prevent - # this module from depending on PIL. - try: - try: - from scipy.misc import imread - except ImportError: - from scipy.misc.pilutil import imread - except ImportError: - raise ImportError("The Python Imaging Library (PIL) " - "is required to load data from jpeg files") + # Locally import _imread to prevent the whole sklearn.datasets module from + # depending on PIL. + from ..externals.pilutil import _imread + module_path = join(dirname(__file__), "images") with open(join(module_path, 'README.txt')) as f: descr = f.read() @@ -784,7 +778,7 @@ def load_sample_images(): for filename in os.listdir(module_path) if filename.endswith(".jpg")] # Load image data for each image in the source folder. - images = [imread(filename) for filename in filenames] + images = [_imread(filename) for filename in filenames] return Bunch(images=images, filenames=filenames, diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py index 51850ad6c8898..9737d4b800362 100644 --- a/sklearn/datasets/lfw.py +++ b/sklearn/datasets/lfw.py @@ -32,7 +32,6 @@ from .base import get_data_home, _fetch_remote, RemoteFileMetadata from ..utils import Bunch from ..externals.joblib import Memory - from ..externals.six import b logger = logging.getLogger(__name__) @@ -136,18 +135,9 @@ def check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True): def _load_imgs(file_paths, slice_, color, resize): """Internally used to load images""" - - # Try to import imread and imresize from PIL. We do this here to prevent - # the whole sklearn.datasets module from depending on PIL. - try: - try: - from scipy.misc import imread - except ImportError: - from scipy.misc.pilutil import imread - from scipy.misc import imresize - except ImportError: - raise ImportError("The Python Imaging Library (PIL)" - " is required to load data from jpeg files") + # Locally import _imread and _imresize to prevent the whole + # sklearn.datasets module from depending on PIL. + from ..externals.pilutil import _imread, _imresize # compute the portion of the images to load to respect the slice_ parameter # given by the caller @@ -181,7 +171,7 @@ def _load_imgs(file_paths, slice_, color, resize): # Checks if jpeg reading worked. Refer to issue #3594 for more # details. - img = imread(file_path) + img = _imread(file_path) if img.ndim is 0: raise RuntimeError("Failed to read the image file %s, " "Please make sure that libjpeg is installed" @@ -190,7 +180,7 @@ def _load_imgs(file_paths, slice_, color, resize): face = np.asarray(img[slice_], dtype=np.float32) face /= 255.0 # scale uint8 coded colors to the [0.0, 1.0] floats if resize is not None: - face = imresize(face, resize) + face = _imresize(face, resize) if not color: # average the color channels to compute a gray levels # representation diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 30f0416e72200..92a829fe2691a 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -21,6 +21,7 @@ from sklearn.datasets.base import Bunch from sklearn.externals.six import b, u +from sklearn.externals.pilutil import _have_image from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_true @@ -161,15 +162,7 @@ def test_load_sample_image(): def test_load_missing_sample_image_error(): - have_PIL = True - try: - try: - from scipy.misc import imread - except ImportError: - from scipy.misc.pilutil import imread # noqa - except ImportError: - have_PIL = False - if have_PIL: + if _have_image: assert_raises(AttributeError, load_sample_image, 'blop.jpg') else: diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index ac6395c4958be..4cc6cd3417fc8 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -14,14 +14,7 @@ import tempfile import numpy as np from sklearn.externals import six -try: - try: - from scipy.misc import imsave - except ImportError: - from scipy.misc.pilutil import imsave -except ImportError: - imsave = None - +from sklearn.externals.pilutil import _have_image, _imsave from sklearn.datasets import fetch_lfw_pairs from sklearn.datasets import fetch_lfw_people @@ -48,7 +41,7 @@ def setup_module(): """Test fixture run once and common to all tests of this module""" - if imsave is None: + if not _have_image: raise SkipTest("PIL not installed.") if not os.path.exists(LFW_HOME): @@ -70,7 +63,7 @@ def setup_module(): file_path = os.path.join(folder_name, name + '_%04d.jpg' % i) uniface = np_rng.randint(0, 255, size=(250, 250, 3)) try: - imsave(file_path, uniface) + _imsave(file_path, uniface) except ImportError: raise SkipTest("PIL not installed") diff --git a/sklearn/externals/pilutil.py b/sklearn/externals/pilutil.py new file mode 100644 index 0000000000000..b5e63ed984a94 --- /dev/null +++ b/sklearn/externals/pilutil.py @@ -0,0 +1,322 @@ +""" +Utility functions wrapping PIL functions + +This is a local version of utility functions from scipy that are wrapping PIL +functionality. These functions are deprecated in scipy 1.0.0 and will be +removed in scipy 1.2.0. Therefore, the functionality used in sklearn is +copied here. Origin is the file scipy/misc/pilutil.py. Parameters and +functions that are not used in sklearn were removed. + +Copyright (c) 2001, 2002 Enthought, Inc. +All rights reserved. + +Copyright (c) 2003-2017 SciPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of Enthought nor the names of the SciPy Developers + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. +""" +from __future__ import division, print_function, absolute_import + + +__all__ = ['_have_image', '_bytescale', '_imread', '_imsave', + '_fromimage', '_toimage', '_imresize'] + + +import numpy as np + +_have_image = True +try: + try: + from PIL import Image + except ImportError: + import Image + if not hasattr(Image, 'frombytes'): + Image.frombytes = Image.fromstring +except ImportError: + _have_image = False + + +def _bytescale(data): + """ + Byte scales an array (image). + + Byte scaling means converting the input image to uint8 dtype and scaling + the range to ``(0, 255)``. + + If the input image already has dtype uint8, no scaling is done. + This function is only available if Python Imaging Library (PIL) is + installed. + + Parameters + ---------- + data : ndarray + PIL image data array. + + Returns + ------- + img_array : uint8 ndarray + The byte-scaled array. + + Examples + -------- + >>> from sklearn.externals.pilutil import _bytescale + >>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ], + ... [ 73.88003259, 80.91433048, 4.88878881], + ... [ 51.53875334, 34.45808177, 27.5873488 ]]) + >>> _bytescale(img) + array([[255, 0, 236], + [205, 225, 4], + [140, 90, 70]], dtype=uint8) + """ + if data.dtype == np.uint8: + return data + + cmin = data.min() + cmax = data.max() + + cscale = cmax - cmin + if cscale == 0: + cscale = 1 + + scale = 255. / cscale + bytedata = (data - cmin) * scale + return (bytedata.clip(0, 255) + 0.5).astype(np.uint8) + + +def _imread(name): + """ + Read an image from a file as an array. + + This function is only available if Python Imaging Library (PIL) is + installed. + + Parameters + ---------- + name : str or file object + The file name or file object to be read. + + Returns + ------- + imread : ndarray + The array obtained by reading the image. + + Notes + ----- + This is a simplified combination of scipy's scipy.misc.pilutil.imread and + scipy.misc.pilutil.fromimage, which are deprecated in scipy 1.0.0 and will + be removed from scipy in version 1.2.0. + """ + if not _have_image: + raise ImportError("The Python Imaging Library (PIL) " + "is required to load data from jpeg files") + pil_image = Image.open(name) + + return _fromimage(pil_image) + + +def _imsave(name, arr): + """ + Save an array as an image. + This function is only available if Python Imaging Library (PIL) is installed. + .. warning:: + This function uses `bytescale` under the hood to rescale images to use + the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. + It will also cast data for 2-D images to ``uint32`` for ``mode=None`` + (which is the default). + + Parameters + ---------- + name : str or file object + Output file name or file object. + arr : ndarray, MxN or MxNx3 or MxNx4 + Array containing image values. If the shape is ``MxN``, the array + represents a grey-level image. Shape ``MxNx3`` stores the red, green + and blue bands along the last dimension. An alpha layer may be + included, specified as the last colour band of an ``MxNx4`` array. + """ + pil_image = _toimage(arr, channel_axis=2) + pil_image.save(name) + return + + +def _fromimage(pil_image): + """ + Return a copy of a PIL image as a numpy array. + + This function is only available if Python Imaging Library (PIL) is + installed. + + Parameters + ---------- + im : PIL image + Input image. + + Returns + ------- + fromimage : ndarray + The different colour bands/channels are stored in the + third dimension, such that a grey-image is MxN, an + RGB-image MxNx3 and an RGBA-image MxNx4. + """ + if not _have_image: + raise ImportError("The Python Imaging Library (PIL) " + "is required to load data from jpeg files") + if not Image.isImageType(pil_image): + raise TypeError("Input is not a PIL image.") + + if pil_image.mode == 'P': + # Mode 'P' means there is an indexed "palette". If we leave the mode + # as 'P', then when we do `a = array(pil_image)` below, `a` will be a + # 2-D containing the indices into the palette, and not a 3-D array + # containing the RGB or RGBA values. + if 'transparency' in pil_image.info: + pil_image = pil_image.convert('RGBA') + else: + pil_image = pil_image.convert('RGB') + + if pil_image.mode == '1': + # Workaround for crash in PIL. When pil_image is 1-bit, the cal + # array(pil_image) can cause a seg. fault, or generate garbage. See + # https://github.com/scipy/scipy/issues/2138 and + # https://github.com/python-pillow/Pillow/issues/350. + # + # This converts im from a 1-bit image to an 8-bit image. + pil_image = pil_image.convert('L') + + return np.array(pil_image) + + +def _toimage(arr, channel_axis=None): + """ + Takes a numpy array and returns a PIL image. + + This function is only available if Python Imaging Library (PIL) is + installed. + .. warning:: + This function uses `_bytescale` under the hood to rescale images to + use the full (0, 255) range. It will also cast data for 2-D images to + ``uint32``. + + Notes + ----- + For 3-D arrays if one of the dimensions is 3, the mode is 'RGB' + by default or 'YCbCr' if selected. + The numpy array must be either 2 dimensional or 3 dimensional. + """ + if not _have_image: + raise ImportError("The Python Imaging Library (PIL) " + "is required to load data from jpeg files") + data = np.asarray(arr) + if np.iscomplexobj(data): + raise ValueError("Cannot convert a complex-valued array.") + shape = list(data.shape) + valid = len(shape) == 2 or ((len(shape) == 3) and + ((3 in shape) or (4 in shape))) + if not valid: + raise ValueError("'arr' does not have a suitable array shape for " + "any mode.") + if len(shape) == 2: + shape = (shape[1], shape[0]) + bytedata = _bytescale(data) + image = Image.frombytes('L', shape, bytedata.tostring()) + return image + + # if here then 3-d array with a 3 or a 4 in the shape length. + # Check for 3 in datacube shape --- 'RGB' or 'YCbCr' + if channel_axis is None: + if 3 in shape: + ca = np.flatnonzero(np.asarray(shape) == 3)[0] + else: + ca = np.flatnonzero(np.asarray(shape) == 4) + if not ca: + ca = ca[0] + else: + raise ValueError("Could not find channel dimension.") + else: + ca = channel_axis + + numch = shape[ca] + if numch not in [3, 4]: + raise ValueError("Channel axis dimension is not valid.") + + bytedata = _bytescale(data) + if ca == 2: + strdata = bytedata.tostring() + shape = (shape[1], shape[0]) + elif ca == 1: + strdata = np.transpose(bytedata, (0, 2, 1)).tostring() + shape = (shape[2], shape[0]) + elif ca == 0: + strdata = np.transpose(bytedata, (1, 2, 0)).tostring() + shape = (shape[2], shape[1]) + else: + raise ValueError("Invalid channel dimension.") + + if numch == 3: + mode = 'RGB' + else: + mode = 'RGBA' + + # Here we know data and mode is correct + return Image.frombytes(mode, shape, strdata) + + +def _imresize(arr, size): + """ + Resize an image. + + This function is only available if Python Imaging Library (PIL) is + installed. + .. warning:: + This function uses `_bytescale` under the hood to rescale images to + use the full (0, 255) range. + It will also cast data for 2-D images to ``uint32``. + + Parameters + ---------- + arr : ndarray + The array of image to be resized. + size : int, float or tuple + * int - Percentage of current size. + * float - Fraction of current size. + * tuple - Size of the output image (height, width). + + Returns + ------- + imresize : ndarray + The resized array of image. + """ + im = _toimage(arr) + ts = type(size) + if np.issubdtype(ts, np.signedinteger): + percent = size / 100.0 + size = tuple((np.array(im.size) * percent).astype(int)) + elif np.issubdtype(type(size), np.floating): + size = tuple((np.array(im.size) * size).astype(int)) + else: + size = (size[1], size[0]) + imnew = im.resize(size, resample=2) + return _fromimage(imnew) From 2ad69e406df3b01bbee5338c0378900756e6b9b4 Mon Sep 17 00:00:00 2001 From: Jonathan Tammo Siebert Date: Tue, 9 Jan 2018 06:41:11 +0100 Subject: [PATCH 2/6] Added pillow to Travis and Appveyor for pilutils --- .travis.yml | 6 +++--- build_tools/appveyor/requirements.txt | 1 + build_tools/travis/install.sh | 4 ++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4022f78aa0928..2f0cbf91067dd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,14 +36,14 @@ matrix: # Python 3.4 build - env: DISTRIB="conda" PYTHON_VERSION="3.4" INSTALL_MKL="false" NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2" - COVERAGE=true + PILLOW_VERSION="4.0.0" COVERAGE=true if: type != cron # This environment tests the newest supported Anaconda release (5.0.0) # It also runs tests requiring Pandas and PyAMG - env: DISTRIB="conda" PYTHON_VERSION="3.6.2" INSTALL_MKL="true" NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" PANDAS_VERSION="0.20.3" - CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" COVERAGE=true - CHECK_PYTEST_SOFT_DEPENDENCY="true" + CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" PILLOW_VERSION="4.3.0" + COVERAGE=true CHECK_PYTEST_SOFT_DEPENDENCY="true" if: type != cron # flake8 linting on diff wrt common ancestor with upstream/master - env: RUN_FLAKE8="true" SKIP_TESTS="true" diff --git a/build_tools/appveyor/requirements.txt b/build_tools/appveyor/requirements.txt index b01693343441f..a14f4032d770c 100644 --- a/build_tools/appveyor/requirements.txt +++ b/build_tools/appveyor/requirements.txt @@ -13,3 +13,4 @@ cython pytest wheel wheelhouse_uploader +pillow diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index 76cd3221cb009..9a5b65ce225bd 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -55,6 +55,10 @@ if [[ "$DISTRIB" == "conda" ]]; then TO_INSTALL="$TO_INSTALL pyamg=$PYAMG_VERSION" fi + if [[ -n "$PILLOW_VERSION" ]]; then + TO_INSTALL="$TO_INSTALL pillow=$PILLOW_VERSION" + fi + conda create -n testenv --yes $TO_INSTALL source activate testenv From 1e851a9365e321a891e0867ada1a14acc701e0f8 Mon Sep 17 00:00:00 2001 From: Jonathan Tammo Siebert Date: Wed, 17 Jan 2018 07:00:05 +0100 Subject: [PATCH 3/6] Added back unused parts to reduce diff. --- sklearn/datasets/base.py | 4 +- sklearn/datasets/lfw.py | 8 +- sklearn/datasets/tests/test_base.py | 2 +- sklearn/datasets/tests/test_lfw.py | 4 +- sklearn/externals/_pilutil.py | 473 ++++++++++++++++++++++++++++ sklearn/externals/pilutil.py | 322 ------------------- 6 files changed, 482 insertions(+), 331 deletions(-) create mode 100644 sklearn/externals/_pilutil.py delete mode 100644 sklearn/externals/pilutil.py diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py index 40a3795bae1ab..2817792e8d526 100644 --- a/sklearn/datasets/base.py +++ b/sklearn/datasets/base.py @@ -769,7 +769,7 @@ def load_sample_images(): """ # Locally import _imread to prevent the whole sklearn.datasets module from # depending on PIL. - from ..externals.pilutil import _imread + from ..externals._pilutil import imread module_path = join(dirname(__file__), "images") with open(join(module_path, 'README.txt')) as f: @@ -778,7 +778,7 @@ def load_sample_images(): for filename in os.listdir(module_path) if filename.endswith(".jpg")] # Load image data for each image in the source folder. - images = [_imread(filename) for filename in filenames] + images = [imread(filename) for filename in filenames] return Bunch(images=images, filenames=filenames, diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py index 9737d4b800362..f4da7950da0a4 100644 --- a/sklearn/datasets/lfw.py +++ b/sklearn/datasets/lfw.py @@ -135,9 +135,9 @@ def check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True): def _load_imgs(file_paths, slice_, color, resize): """Internally used to load images""" - # Locally import _imread and _imresize to prevent the whole + # Locally import imread and imresize to prevent the whole # sklearn.datasets module from depending on PIL. - from ..externals.pilutil import _imread, _imresize + from ..externals._pilutil import imread, imresize # compute the portion of the images to load to respect the slice_ parameter # given by the caller @@ -171,7 +171,7 @@ def _load_imgs(file_paths, slice_, color, resize): # Checks if jpeg reading worked. Refer to issue #3594 for more # details. - img = _imread(file_path) + img = imread(file_path) if img.ndim is 0: raise RuntimeError("Failed to read the image file %s, " "Please make sure that libjpeg is installed" @@ -180,7 +180,7 @@ def _load_imgs(file_paths, slice_, color, resize): face = np.asarray(img[slice_], dtype=np.float32) face /= 255.0 # scale uint8 coded colors to the [0.0, 1.0] floats if resize is not None: - face = _imresize(face, resize) + face = imresize(face, resize) if not color: # average the color channels to compute a gray levels # representation diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 92a829fe2691a..dbdcb41291ca9 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -21,7 +21,7 @@ from sklearn.datasets.base import Bunch from sklearn.externals.six import b, u -from sklearn.externals.pilutil import _have_image +from sklearn.externals._pilutil import _have_image from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_true diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 4cc6cd3417fc8..0773a57f48928 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -14,7 +14,7 @@ import tempfile import numpy as np from sklearn.externals import six -from sklearn.externals.pilutil import _have_image, _imsave +from sklearn.externals._pilutil import _have_image, imsave from sklearn.datasets import fetch_lfw_pairs from sklearn.datasets import fetch_lfw_people @@ -63,7 +63,7 @@ def setup_module(): file_path = os.path.join(folder_name, name + '_%04d.jpg' % i) uniface = np_rng.randint(0, 255, size=(250, 250, 3)) try: - _imsave(file_path, uniface) + imsave(file_path, uniface) except ImportError: raise SkipTest("PIL not installed") diff --git a/sklearn/externals/_pilutil.py b/sklearn/externals/_pilutil.py new file mode 100644 index 0000000000000..621d778f46ff6 --- /dev/null +++ b/sklearn/externals/_pilutil.py @@ -0,0 +1,473 @@ +""" +Utility functions wrapping PIL functions + +This is a local version of utility functions from scipy that are wrapping PIL +functionality. These functions are deprecated in scipy 1.0.0 and will be +removed in scipy 1.2.0. Therefore, the functionality used in sklearn is +copied here. Origin is the file scipy/misc/pilutil.py. Functions that are +not used in sklearn were removed. + +Copyright (c) 2001, 2002 Enthought, Inc. +All rights reserved. + +Copyright (c) 2003-2017 SciPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of Enthought nor the names of the SciPy Developers + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. +""" +from __future__ import division, print_function, absolute_import + + +__all__ = ['_have_image', 'bytescale', 'imread', 'imsave', + 'fromimage', 'toimage', 'imresize'] + +import numpy + +from numpy import (amin, amax, ravel, asarray, arange, ones, newaxis, + transpose, iscomplexobj, uint8, issubdtype, array) + +_have_image = True +try: + try: + from PIL import Image + except ImportError: + import Image + if not hasattr(Image, 'frombytes'): + Image.frombytes = Image.fromstring +except ImportError: + _have_image = False + + +def bytescale(data, cmin=None, cmax=None, high=255, low=0): + """ + Byte scales an array (image). + + Byte scaling means converting the input image to uint8 dtype and scaling + the range to ``(low, high)`` (default 0-255). + If the input image already has dtype uint8, no scaling is done. + + This function is only available if Python Imaging Library (PIL) is installed. + + Parameters + ---------- + data : ndarray + PIL image data array. + cmin : scalar, optional + Bias scaling of small values. Default is ``data.min()``. + cmax : scalar, optional + Bias scaling of large values. Default is ``data.max()``. + high : scalar, optional + Scale max value to `high`. Default is 255. + low : scalar, optional + Scale min value to `low`. Default is 0. + + Returns + ------- + img_array : uint8 ndarray + The byte-scaled array. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.externals._pilutil import bytescale + >>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ], + ... [ 73.88003259, 80.91433048, 4.88878881], + ... [ 51.53875334, 34.45808177, 27.5873488 ]]) + >>> bytescale(img) + array([[255, 0, 236], + [205, 225, 4], + [140, 90, 70]], dtype=uint8) + >>> bytescale(img, high=200, low=100) + array([[200, 100, 192], + [180, 188, 102], + [155, 135, 128]], dtype=uint8) + >>> bytescale(img, cmin=0, cmax=255) + array([[91, 3, 84], + [74, 81, 5], + [52, 34, 28]], dtype=uint8) + + """ + if data.dtype == uint8: + return data + + if high > 255: + raise ValueError("`high` should be less than or equal to 255.") + if low < 0: + raise ValueError("`low` should be greater than or equal to 0.") + if high < low: + raise ValueError("`high` should be greater than or equal to `low`.") + + if cmin is None: + cmin = data.min() + if cmax is None: + cmax = data.max() + + cscale = cmax - cmin + if cscale < 0: + raise ValueError("`cmax` should be larger than `cmin`.") + elif cscale == 0: + cscale = 1 + + scale = float(high - low) / cscale + bytedata = (data - cmin) * scale + low + return (bytedata.clip(low, high) + 0.5).astype(uint8) + + +def imread(name, flatten=False, mode=None): + """ + Read an image from a file as an array. + + This function is only available if Python Imaging Library (PIL) is installed. + + Parameters + ---------- + name : str or file object + The file name or file object to be read. + flatten : bool, optional + If True, flattens the color layers into a single gray-scale layer. + mode : str, optional + Mode to convert image to, e.g. ``'RGB'``. See the Notes for more + details. + + Returns + ------- + imread : ndarray + The array obtained by reading the image. + + Notes + ----- + `imread` uses the Python Imaging Library (PIL) to read an image. + The following notes are from the PIL documentation. + + `mode` can be one of the following strings: + + * 'L' (8-bit pixels, black and white) + * 'P' (8-bit pixels, mapped to any other mode using a color palette) + * 'RGB' (3x8-bit pixels, true color) + * 'RGBA' (4x8-bit pixels, true color with transparency mask) + * 'CMYK' (4x8-bit pixels, color separation) + * 'YCbCr' (3x8-bit pixels, color video format) + * 'I' (32-bit signed integer pixels) + * 'F' (32-bit floating point pixels) + + PIL also provides limited support for a few special modes, including + 'LA' ('L' with alpha), 'RGBX' (true color with padding) and 'RGBa' + (true color with premultiplied alpha). + + When translating a color image to black and white (mode 'L', 'I' or + 'F'), the library uses the ITU-R 601-2 luma transform:: + + L = R * 299/1000 + G * 587/1000 + B * 114/1000 + + When `flatten` is True, the image is converted using mode 'F'. + When `mode` is not None and `flatten` is True, the image is first + converted according to `mode`, and the result is then flattened using + mode 'F'. + + """ + if not _have_image: + raise ImportError("The Python Imaging Library (PIL) " + "is required to load data from jpeg files") + + im = Image.open(name) + return fromimage(im, flatten=flatten, mode=mode) + + +def imsave(name, arr, format=None): + """ + Save an array as an image. + + This function is only available if Python Imaging Library (PIL) is installed. + + .. warning:: + + This function uses `bytescale` under the hood to rescale images to use + the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. + It will also cast data for 2-D images to ``uint32`` for ``mode=None`` + (which is the default). + + Parameters + ---------- + name : str or file object + Output file name or file object. + arr : ndarray, MxN or MxNx3 or MxNx4 + Array containing image values. If the shape is ``MxN``, the array + represents a grey-level image. Shape ``MxNx3`` stores the red, green + and blue bands along the last dimension. An alpha layer may be + included, specified as the last colour band of an ``MxNx4`` array. + format : str + Image format. If omitted, the format to use is determined from the + file name extension. If a file object was used instead of a file name, + this parameter should always be used. + + """ + im = toimage(arr, channel_axis=2) + if format is None: + im.save(name) + else: + im.save(name, format) + return + + +def fromimage(im, flatten=False, mode=None): + """ + Return a copy of a PIL image as a numpy array. + + This function is only available if Python Imaging Library (PIL) is installed. + + Parameters + ---------- + im : PIL image + Input image. + flatten : bool + If true, convert the output to grey-scale. + mode : str, optional + Mode to convert image to, e.g. ``'RGB'``. See the Notes of the + `imread` docstring for more details. + + Returns + ------- + fromimage : ndarray + The different colour bands/channels are stored in the + third dimension, such that a grey-image is MxN, an + RGB-image MxNx3 and an RGBA-image MxNx4. + + """ + if not _have_image: + raise ImportError("The Python Imaging Library (PIL) " + "is required to load data from jpeg files") + + if not Image.isImageType(im): + raise TypeError("Input is not a PIL image.") + + if mode is not None: + if mode != im.mode: + im = im.convert(mode) + elif im.mode == 'P': + # Mode 'P' means there is an indexed "palette". If we leave the mode + # as 'P', then when we do `a = array(im)` below, `a` will be a 2-D + # containing the indices into the palette, and not a 3-D array + # containing the RGB or RGBA values. + if 'transparency' in im.info: + im = im.convert('RGBA') + else: + im = im.convert('RGB') + + if flatten: + im = im.convert('F') + elif im.mode == '1': + # Workaround for crash in PIL. When im is 1-bit, the call array(im) + # can cause a seg. fault, or generate garbage. See + # https://github.com/scipy/scipy/issues/2138 and + # https://github.com/python-pillow/Pillow/issues/350. + # + # This converts im from a 1-bit image to an 8-bit image. + im = im.convert('L') + + a = array(im) + return a + + +_errstr = "Mode is unknown or incompatible with input array shape." + + +def toimage(arr, high=255, low=0, cmin=None, cmax=None, pal=None, + mode=None, channel_axis=None): + """Takes a numpy array and returns a PIL image. + + This function is only available if Python Imaging Library (PIL) is installed. + + The mode of the PIL image depends on the array shape and the `pal` and + `mode` keywords. + + For 2-D arrays, if `pal` is a valid (N,3) byte-array giving the RGB values + (from 0 to 255) then ``mode='P'``, otherwise ``mode='L'``, unless mode + is given as 'F' or 'I' in which case a float and/or integer array is made. + + .. warning:: + + This function uses `bytescale` under the hood to rescale images to use + the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. + It will also cast data for 2-D images to ``uint32`` for ``mode=None`` + (which is the default). + + Notes + ----- + For 3-D arrays, the `channel_axis` argument tells which dimension of the + array holds the channel data. + + For 3-D arrays if one of the dimensions is 3, the mode is 'RGB' + by default or 'YCbCr' if selected. + + The numpy array must be either 2 dimensional or 3 dimensional. + + """ + if not _have_image: + raise ImportError("The Python Imaging Library (PIL) " + "is required to load data from jpeg files") + + data = asarray(arr) + if iscomplexobj(data): + raise ValueError("Cannot convert a complex-valued array.") + shape = list(data.shape) + valid = len(shape) == 2 or ((len(shape) == 3) and + ((3 in shape) or (4 in shape))) + if not valid: + raise ValueError("'arr' does not have a suitable array shape for " + "any mode.") + if len(shape) == 2: + shape = (shape[1], shape[0]) # columns show up first + if mode == 'F': + data32 = data.astype(numpy.float32) + image = Image.frombytes(mode, shape, data32.tostring()) + return image + if mode in [None, 'L', 'P']: + bytedata = bytescale(data, high=high, low=low, + cmin=cmin, cmax=cmax) + image = Image.frombytes('L', shape, bytedata.tostring()) + if pal is not None: + image.putpalette(asarray(pal, dtype=uint8).tostring()) + # Becomes a mode='P' automagically. + elif mode == 'P': # default gray-scale + pal = (arange(0, 256, 1, dtype=uint8)[:, newaxis] * + ones((3,), dtype=uint8)[newaxis, :]) + image.putpalette(asarray(pal, dtype=uint8).tostring()) + return image + if mode == '1': # high input gives threshold for 1 + bytedata = (data > high) + image = Image.frombytes('1', shape, bytedata.tostring()) + return image + if cmin is None: + cmin = amin(ravel(data)) + if cmax is None: + cmax = amax(ravel(data)) + data = (data * 1.0 - cmin) * (high - low) / (cmax - cmin) + low + if mode == 'I': + data32 = data.astype(numpy.uint32) + image = Image.frombytes(mode, shape, data32.tostring()) + else: + raise ValueError(_errstr) + return image + + # if here then 3-d array with a 3 or a 4 in the shape length. + # Check for 3 in datacube shape --- 'RGB' or 'YCbCr' + if channel_axis is None: + if (3 in shape): + ca = numpy.flatnonzero(asarray(shape) == 3)[0] + else: + ca = numpy.flatnonzero(asarray(shape) == 4) + if len(ca): + ca = ca[0] + else: + raise ValueError("Could not find channel dimension.") + else: + ca = channel_axis + + numch = shape[ca] + if numch not in [3, 4]: + raise ValueError("Channel axis dimension is not valid.") + + bytedata = bytescale(data, high=high, low=low, cmin=cmin, cmax=cmax) + if ca == 2: + strdata = bytedata.tostring() + shape = (shape[1], shape[0]) + elif ca == 1: + strdata = transpose(bytedata, (0, 2, 1)).tostring() + shape = (shape[2], shape[0]) + elif ca == 0: + strdata = transpose(bytedata, (1, 2, 0)).tostring() + shape = (shape[2], shape[1]) + if mode is None: + if numch == 3: + mode = 'RGB' + else: + mode = 'RGBA' + + if mode not in ['RGB', 'RGBA', 'YCbCr', 'CMYK']: + raise ValueError(_errstr) + + if mode in ['RGB', 'YCbCr']: + if numch != 3: + raise ValueError("Invalid array shape for mode.") + if mode in ['RGBA', 'CMYK']: + if numch != 4: + raise ValueError("Invalid array shape for mode.") + + # Here we know data and mode is correct + image = Image.frombytes(mode, shape, strdata) + return image + + +def imresize(arr, size, interp='bilinear', mode=None): + """ + Resize an image. + + This function is only available if Python Imaging Library (PIL) is installed. + + .. warning:: + + This function uses `bytescale` under the hood to rescale images to use + the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. + It will also cast data for 2-D images to ``uint32`` for ``mode=None`` + (which is the default). + + Parameters + ---------- + arr : ndarray + The array of image to be resized. + size : int, float or tuple + * int - Percentage of current size. + * float - Fraction of current size. + * tuple - Size of the output image (height, width). + + interp : str, optional + Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear', + 'bicubic' or 'cubic'). + mode : str, optional + The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing. + If ``mode=None`` (the default), 2-D images will be treated like + ``mode='L'``, i.e. casting to long integer. For 3-D and 4-D arrays, + `mode` will be set to ``'RGB'`` and ``'RGBA'`` respectively. + + Returns + ------- + imresize : ndarray + The resized array of image. + + """ + im = toimage(arr, mode=mode) + ts = type(size) + if issubdtype(ts, numpy.signedinteger): + percent = size / 100.0 + size = tuple((array(im.size) * percent).astype(int)) + elif issubdtype(type(size), numpy.floating): + size = tuple((array(im.size) * size).astype(int)) + else: + size = (size[1], size[0]) + func = {'nearest': 0, 'lanczos': 1, + 'bilinear': 2, 'bicubic': 3, 'cubic': 3} + imnew = im.resize(size, resample=func[interp]) + return fromimage(imnew) diff --git a/sklearn/externals/pilutil.py b/sklearn/externals/pilutil.py deleted file mode 100644 index b5e63ed984a94..0000000000000 --- a/sklearn/externals/pilutil.py +++ /dev/null @@ -1,322 +0,0 @@ -""" -Utility functions wrapping PIL functions - -This is a local version of utility functions from scipy that are wrapping PIL -functionality. These functions are deprecated in scipy 1.0.0 and will be -removed in scipy 1.2.0. Therefore, the functionality used in sklearn is -copied here. Origin is the file scipy/misc/pilutil.py. Parameters and -functions that are not used in sklearn were removed. - -Copyright (c) 2001, 2002 Enthought, Inc. -All rights reserved. - -Copyright (c) 2003-2017 SciPy Developers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - a. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - b. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - c. Neither the name of Enthought nor the names of the SciPy Developers - may be used to endorse or promote products derived from this software - without specific prior written permission. - - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS -BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, -OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. -""" -from __future__ import division, print_function, absolute_import - - -__all__ = ['_have_image', '_bytescale', '_imread', '_imsave', - '_fromimage', '_toimage', '_imresize'] - - -import numpy as np - -_have_image = True -try: - try: - from PIL import Image - except ImportError: - import Image - if not hasattr(Image, 'frombytes'): - Image.frombytes = Image.fromstring -except ImportError: - _have_image = False - - -def _bytescale(data): - """ - Byte scales an array (image). - - Byte scaling means converting the input image to uint8 dtype and scaling - the range to ``(0, 255)``. - - If the input image already has dtype uint8, no scaling is done. - This function is only available if Python Imaging Library (PIL) is - installed. - - Parameters - ---------- - data : ndarray - PIL image data array. - - Returns - ------- - img_array : uint8 ndarray - The byte-scaled array. - - Examples - -------- - >>> from sklearn.externals.pilutil import _bytescale - >>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ], - ... [ 73.88003259, 80.91433048, 4.88878881], - ... [ 51.53875334, 34.45808177, 27.5873488 ]]) - >>> _bytescale(img) - array([[255, 0, 236], - [205, 225, 4], - [140, 90, 70]], dtype=uint8) - """ - if data.dtype == np.uint8: - return data - - cmin = data.min() - cmax = data.max() - - cscale = cmax - cmin - if cscale == 0: - cscale = 1 - - scale = 255. / cscale - bytedata = (data - cmin) * scale - return (bytedata.clip(0, 255) + 0.5).astype(np.uint8) - - -def _imread(name): - """ - Read an image from a file as an array. - - This function is only available if Python Imaging Library (PIL) is - installed. - - Parameters - ---------- - name : str or file object - The file name or file object to be read. - - Returns - ------- - imread : ndarray - The array obtained by reading the image. - - Notes - ----- - This is a simplified combination of scipy's scipy.misc.pilutil.imread and - scipy.misc.pilutil.fromimage, which are deprecated in scipy 1.0.0 and will - be removed from scipy in version 1.2.0. - """ - if not _have_image: - raise ImportError("The Python Imaging Library (PIL) " - "is required to load data from jpeg files") - pil_image = Image.open(name) - - return _fromimage(pil_image) - - -def _imsave(name, arr): - """ - Save an array as an image. - This function is only available if Python Imaging Library (PIL) is installed. - .. warning:: - This function uses `bytescale` under the hood to rescale images to use - the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``. - It will also cast data for 2-D images to ``uint32`` for ``mode=None`` - (which is the default). - - Parameters - ---------- - name : str or file object - Output file name or file object. - arr : ndarray, MxN or MxNx3 or MxNx4 - Array containing image values. If the shape is ``MxN``, the array - represents a grey-level image. Shape ``MxNx3`` stores the red, green - and blue bands along the last dimension. An alpha layer may be - included, specified as the last colour band of an ``MxNx4`` array. - """ - pil_image = _toimage(arr, channel_axis=2) - pil_image.save(name) - return - - -def _fromimage(pil_image): - """ - Return a copy of a PIL image as a numpy array. - - This function is only available if Python Imaging Library (PIL) is - installed. - - Parameters - ---------- - im : PIL image - Input image. - - Returns - ------- - fromimage : ndarray - The different colour bands/channels are stored in the - third dimension, such that a grey-image is MxN, an - RGB-image MxNx3 and an RGBA-image MxNx4. - """ - if not _have_image: - raise ImportError("The Python Imaging Library (PIL) " - "is required to load data from jpeg files") - if not Image.isImageType(pil_image): - raise TypeError("Input is not a PIL image.") - - if pil_image.mode == 'P': - # Mode 'P' means there is an indexed "palette". If we leave the mode - # as 'P', then when we do `a = array(pil_image)` below, `a` will be a - # 2-D containing the indices into the palette, and not a 3-D array - # containing the RGB or RGBA values. - if 'transparency' in pil_image.info: - pil_image = pil_image.convert('RGBA') - else: - pil_image = pil_image.convert('RGB') - - if pil_image.mode == '1': - # Workaround for crash in PIL. When pil_image is 1-bit, the cal - # array(pil_image) can cause a seg. fault, or generate garbage. See - # https://github.com/scipy/scipy/issues/2138 and - # https://github.com/python-pillow/Pillow/issues/350. - # - # This converts im from a 1-bit image to an 8-bit image. - pil_image = pil_image.convert('L') - - return np.array(pil_image) - - -def _toimage(arr, channel_axis=None): - """ - Takes a numpy array and returns a PIL image. - - This function is only available if Python Imaging Library (PIL) is - installed. - .. warning:: - This function uses `_bytescale` under the hood to rescale images to - use the full (0, 255) range. It will also cast data for 2-D images to - ``uint32``. - - Notes - ----- - For 3-D arrays if one of the dimensions is 3, the mode is 'RGB' - by default or 'YCbCr' if selected. - The numpy array must be either 2 dimensional or 3 dimensional. - """ - if not _have_image: - raise ImportError("The Python Imaging Library (PIL) " - "is required to load data from jpeg files") - data = np.asarray(arr) - if np.iscomplexobj(data): - raise ValueError("Cannot convert a complex-valued array.") - shape = list(data.shape) - valid = len(shape) == 2 or ((len(shape) == 3) and - ((3 in shape) or (4 in shape))) - if not valid: - raise ValueError("'arr' does not have a suitable array shape for " - "any mode.") - if len(shape) == 2: - shape = (shape[1], shape[0]) - bytedata = _bytescale(data) - image = Image.frombytes('L', shape, bytedata.tostring()) - return image - - # if here then 3-d array with a 3 or a 4 in the shape length. - # Check for 3 in datacube shape --- 'RGB' or 'YCbCr' - if channel_axis is None: - if 3 in shape: - ca = np.flatnonzero(np.asarray(shape) == 3)[0] - else: - ca = np.flatnonzero(np.asarray(shape) == 4) - if not ca: - ca = ca[0] - else: - raise ValueError("Could not find channel dimension.") - else: - ca = channel_axis - - numch = shape[ca] - if numch not in [3, 4]: - raise ValueError("Channel axis dimension is not valid.") - - bytedata = _bytescale(data) - if ca == 2: - strdata = bytedata.tostring() - shape = (shape[1], shape[0]) - elif ca == 1: - strdata = np.transpose(bytedata, (0, 2, 1)).tostring() - shape = (shape[2], shape[0]) - elif ca == 0: - strdata = np.transpose(bytedata, (1, 2, 0)).tostring() - shape = (shape[2], shape[1]) - else: - raise ValueError("Invalid channel dimension.") - - if numch == 3: - mode = 'RGB' - else: - mode = 'RGBA' - - # Here we know data and mode is correct - return Image.frombytes(mode, shape, strdata) - - -def _imresize(arr, size): - """ - Resize an image. - - This function is only available if Python Imaging Library (PIL) is - installed. - .. warning:: - This function uses `_bytescale` under the hood to rescale images to - use the full (0, 255) range. - It will also cast data for 2-D images to ``uint32``. - - Parameters - ---------- - arr : ndarray - The array of image to be resized. - size : int, float or tuple - * int - Percentage of current size. - * float - Fraction of current size. - * tuple - Size of the output image (height, width). - - Returns - ------- - imresize : ndarray - The resized array of image. - """ - im = _toimage(arr) - ts = type(size) - if np.issubdtype(ts, np.signedinteger): - percent = size / 100.0 - size = tuple((np.array(im.size) * percent).astype(int)) - elif np.issubdtype(type(size), np.floating): - size = tuple((np.array(im.size) * size).astype(int)) - else: - size = (size[1], size[0]) - imnew = im.resize(size, resample=2) - return _fromimage(imnew) From c16a55f4572f48ed617dbbfbae418bb2114b4067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 17 Jan 2018 10:30:47 +0100 Subject: [PATCH 4/6] Tweaks to _pilutil.py to reduce differences from original scipy pilutil.py --- sklearn/datasets/tests/test_base.py | 4 +- sklearn/datasets/tests/test_lfw.py | 4 +- sklearn/externals/_pilutil.py | 66 ++++++++++++++++++++--------- 3 files changed, 50 insertions(+), 24 deletions(-) diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index dbdcb41291ca9..a2e75e98eccf8 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -21,7 +21,7 @@ from sklearn.datasets.base import Bunch from sklearn.externals.six import b, u -from sklearn.externals._pilutil import _have_image +from sklearn.externals._pilutil import pillow_installed from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_true @@ -162,7 +162,7 @@ def test_load_sample_image(): def test_load_missing_sample_image_error(): - if _have_image: + if pillow_installed: assert_raises(AttributeError, load_sample_image, 'blop.jpg') else: diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 0773a57f48928..97e4f08540265 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -14,7 +14,7 @@ import tempfile import numpy as np from sklearn.externals import six -from sklearn.externals._pilutil import _have_image, imsave +from sklearn.externals._pilutil import pillow_installed, imsave from sklearn.datasets import fetch_lfw_pairs from sklearn.datasets import fetch_lfw_people @@ -41,7 +41,7 @@ def setup_module(): """Test fixture run once and common to all tests of this module""" - if not _have_image: + if not pillow_installed: raise SkipTest("PIL not installed.") if not os.path.exists(LFW_HOME): diff --git a/sklearn/externals/_pilutil.py b/sklearn/externals/_pilutil.py index 621d778f46ff6..1f00dd527d8b9 100644 --- a/sklearn/externals/_pilutil.py +++ b/sklearn/externals/_pilutil.py @@ -1,11 +1,13 @@ """ -Utility functions wrapping PIL functions +A collection of image utilities using the Python Imaging Library (PIL). This is a local version of utility functions from scipy that are wrapping PIL functionality. These functions are deprecated in scipy 1.0.0 and will be -removed in scipy 1.2.0. Therefore, the functionality used in sklearn is -copied here. Origin is the file scipy/misc/pilutil.py. Functions that are -not used in sklearn were removed. +removed in scipy 1.2.0. Therefore, the functionality used in sklearn is copied +here. This file is taken from scipy/misc/pilutil.py in scipy +1.0.0. Modifications include: making this module importable if pillow is not +installed, removal of DeprecationWarning, removal of functions scikit-learn +does not need. Copyright (c) 2001, 2002 Enthought, Inc. All rights reserved. @@ -41,25 +43,28 @@ from __future__ import division, print_function, absolute_import -__all__ = ['_have_image', 'bytescale', 'imread', 'imsave', - 'fromimage', 'toimage', 'imresize'] - import numpy +import tempfile from numpy import (amin, amax, ravel, asarray, arange, ones, newaxis, transpose, iscomplexobj, uint8, issubdtype, array) -_have_image = True +# Modification of original scipy pilutil.py to make this module importable if +# pillow is not installed. If pillow is not installed, functions will raise +# ImportError when called. try: try: from PIL import Image except ImportError: import Image + pillow_installed = True if not hasattr(Image, 'frombytes'): Image.frombytes = Image.fromstring + except ImportError: - _have_image = False + pillow_installed = False +__all__ = ['bytescale', 'imread', 'imsave', 'fromimage', 'toimage', 'imresize'] def bytescale(data, cmin=None, cmax=None, high=255, low=0): """ @@ -91,8 +96,7 @@ def bytescale(data, cmin=None, cmax=None, high=255, low=0): Examples -------- - >>> import numpy as np - >>> from sklearn.externals._pilutil import bytescale + >>> from scipy.misc import bytescale >>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ], ... [ 73.88003259, 80.91433048, 4.88878881], ... [ 51.53875334, 34.45808177, 27.5873488 ]]) @@ -188,7 +192,7 @@ def imread(name, flatten=False, mode=None): mode 'F'. """ - if not _have_image: + if not pillow_installed: raise ImportError("The Python Imaging Library (PIL) " "is required to load data from jpeg files") @@ -223,6 +227,24 @@ def imsave(name, arr, format=None): file name extension. If a file object was used instead of a file name, this parameter should always be used. + Examples + -------- + Construct an array of gradient intensity values and save to file: + + >>> from scipy.misc import imsave + >>> x = np.zeros((255, 255)) + >>> x = np.zeros((255, 255), dtype=np.uint8) + >>> x[:] = np.arange(255) + >>> imsave('gradient.png', x) + + Construct an array with three colour bands (R, G, B) and store to file: + + >>> rgb = np.zeros((255, 255, 3), dtype=np.uint8) + >>> rgb[..., 0] = np.arange(255) + >>> rgb[..., 1] = 55 + >>> rgb[..., 2] = 1 - np.arange(255) + >>> imsave('rgb_gradient.png', rgb) + """ im = toimage(arr, channel_axis=2) if format is None: @@ -256,7 +278,7 @@ def fromimage(im, flatten=False, mode=None): RGB-image MxNx3 and an RGBA-image MxNx4. """ - if not _have_image: + if not pillow_installed: raise ImportError("The Python Imaging Library (PIL) " "is required to load data from jpeg files") @@ -290,7 +312,6 @@ def fromimage(im, flatten=False, mode=None): a = array(im) return a - _errstr = "Mode is unknown or incompatible with input array shape." @@ -325,7 +346,7 @@ def toimage(arr, high=255, low=0, cmin=None, cmax=None, pal=None, The numpy array must be either 2 dimensional or 3 dimensional. """ - if not _have_image: + if not pillow_installed: raise ImportError("The Python Imaging Library (PIL) " "is required to load data from jpeg files") @@ -364,7 +385,7 @@ def toimage(arr, high=255, low=0, cmin=None, cmax=None, pal=None, cmin = amin(ravel(data)) if cmax is None: cmax = amax(ravel(data)) - data = (data * 1.0 - cmin) * (high - low) / (cmax - cmin) + low + data = (data*1.0 - cmin)*(high - low)/(cmax - cmin) + low if mode == 'I': data32 = data.astype(numpy.uint32) image = Image.frombytes(mode, shape, data32.tostring()) @@ -457,17 +478,22 @@ def imresize(arr, size, interp='bilinear', mode=None): imresize : ndarray The resized array of image. + See Also + -------- + toimage : Implicitly used to convert `arr` according to `mode`. + scipy.ndimage.zoom : More generic implementation that does not use PIL. + """ im = toimage(arr, mode=mode) ts = type(size) if issubdtype(ts, numpy.signedinteger): percent = size / 100.0 - size = tuple((array(im.size) * percent).astype(int)) + size = tuple((array(im.size)*percent).astype(int)) elif issubdtype(type(size), numpy.floating): - size = tuple((array(im.size) * size).astype(int)) + size = tuple((array(im.size)*size).astype(int)) else: size = (size[1], size[0]) - func = {'nearest': 0, 'lanczos': 1, - 'bilinear': 2, 'bicubic': 3, 'cubic': 3} + func = {'nearest': 0, 'lanczos': 1, 'bilinear': 2, 'bicubic': 3, 'cubic': 3} imnew = im.resize(size, resample=func[interp]) return fromimage(imnew) + From a7e1c2617f2d87530728ebbaa2f93314e12da8d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 17 Jan 2018 10:43:17 +0100 Subject: [PATCH 5/6] Tweak comments --- sklearn/datasets/base.py | 3 +-- sklearn/datasets/lfw.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py index 2817792e8d526..c42a102f59bc6 100644 --- a/sklearn/datasets/base.py +++ b/sklearn/datasets/base.py @@ -767,8 +767,7 @@ def load_sample_images(): >>> first_img_data.dtype #doctest: +SKIP dtype('uint8') """ - # Locally import _imread to prevent the whole sklearn.datasets module from - # depending on PIL. + # import PIL only when needed from ..externals._pilutil import imread module_path = join(dirname(__file__), "images") diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py index f4da7950da0a4..843e7f15dce0f 100644 --- a/sklearn/datasets/lfw.py +++ b/sklearn/datasets/lfw.py @@ -135,8 +135,7 @@ def check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True): def _load_imgs(file_paths, slice_, color, resize): """Internally used to load images""" - # Locally import imread and imresize to prevent the whole - # sklearn.datasets module from depending on PIL. + # import PIL only when needed from ..externals._pilutil import imread, imresize # compute the portion of the images to load to respect the slice_ parameter From bb89d20ee74d187591554bfa35b2049c6505f4c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 17 Jan 2018 11:30:14 +0100 Subject: [PATCH 6/6] Minor tweaks --- sklearn/externals/_pilutil.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/externals/_pilutil.py b/sklearn/externals/_pilutil.py index 1f00dd527d8b9..f5c9b43d92a98 100644 --- a/sklearn/externals/_pilutil.py +++ b/sklearn/externals/_pilutil.py @@ -60,12 +60,12 @@ pillow_installed = True if not hasattr(Image, 'frombytes'): Image.frombytes = Image.fromstring - except ImportError: pillow_installed = False __all__ = ['bytescale', 'imread', 'imsave', 'fromimage', 'toimage', 'imresize'] + def bytescale(data, cmin=None, cmax=None, high=255, low=0): """ Byte scales an array (image). @@ -496,4 +496,3 @@ def imresize(arr, size, interp='bilinear', mode=None): func = {'nearest': 0, 'lanczos': 1, 'bilinear': 2, 'bicubic': 3, 'cubic': 3} imnew = im.resize(size, resample=func[interp]) return fromimage(imnew) -