Skip to content

[WIP] Add Utility function to read image from the file as imread is deprecated #10149

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions sklearn/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from ..utils import Bunch
from ..utils import check_random_state
from sklearn.utils.image_read import _imread

import numpy as np

Expand Down Expand Up @@ -742,24 +743,14 @@ def load_sample_images():
>>> first_img_data.dtype #doctest: +SKIP
dtype('uint8')
"""
# Try to import imread from scipy. We do this lazily here to prevent
# this module from depending on PIL.
try:
try:
from scipy.misc import imread
except ImportError:
from scipy.misc.pilutil import imread
except ImportError:
raise ImportError("The Python Imaging Library (PIL) "
"is required to load data from jpeg files")
module_path = join(dirname(__file__), "images")
with open(join(module_path, 'README.txt')) as f:
descr = f.read()
filenames = [join(module_path, filename)
for filename in os.listdir(module_path)
if filename.endswith(".jpg")]
# Load image data for each image in the source folder.
images = [imread(filename) for filename in filenames]
images = [_imread(filename) for filename in filenames]

return Bunch(images=images,
filenames=filenames,
Expand Down
12 changes: 4 additions & 8 deletions sklearn/datasets/lfw.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from .base import get_data_home, _fetch_remote, RemoteFileMetadata
from ..utils import Bunch
from ..externals.joblib import Memory

from sklearn.utils.image_read import _imread
from ..externals.six import b

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -137,13 +137,9 @@ def check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True):
def _load_imgs(file_paths, slice_, color, resize):
"""Internally used to load images"""

# Try to import imread and imresize from PIL. We do this here to prevent
# the whole sklearn.datasets module from depending on PIL.
# Try to import imread from scipy. We do this lazily here to prevent
# this module from depending on PIL.
try:
try:
from scipy.misc import imread
except ImportError:
from scipy.misc.pilutil import imread
from scipy.misc import imresize
except ImportError:
raise ImportError("The Python Imaging Library (PIL)"
Expand Down Expand Up @@ -181,7 +177,7 @@ def _load_imgs(file_paths, slice_, color, resize):

# Checks if jpeg reading worked. Refer to issue #3594 for more
# details.
img = imread(file_path)
img = _imread(file_path)
if img.ndim is 0:
raise RuntimeError("Failed to read the image file %s, "
"Please make sure that libjpeg is installed"
Expand Down
5 changes: 1 addition & 4 deletions sklearn/datasets/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,7 @@ def test_load_sample_image():
def test_load_missing_sample_image_error():
have_PIL = True
try:
try:
from scipy.misc import imread
except ImportError:
from scipy.misc.pilutil import imread # noqa
from sklearn.utils.image_read import _imread
except ImportError:
have_PIL = False
if have_PIL:
Expand Down
62 changes: 62 additions & 0 deletions sklearn/utils/image_read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Utility to read image from file as an array."""
import numpy as np

# Try to import from PIL as imread is deprecated in SciPy 1.0.0,
# and will be removed in 1.2.0.
# Github Issue: https://github.com/scikit-learn/scikit-learn/issues/10147
try:
try:
from PIL import Image
except ImportError:
import Image
except ImportError:
raise ImportError("The Python Imaging Library (PIL) "
"is required to load data from jpeg files")


def _imread(name, flatten=False, mode=None):
"""
Read an image from a file as an array.
This function is only available if Python Imaging Library (PIL) is installed.
Parameters
----------
name : str or file object
The file name or file object to be read.
flatten : bool, optional
If True, flattens the color layers into a single gray-scale layer.
mode : str, optional
Mode to convert image to, e.g. ``'RGB'``. See the Notes for more
details.
Returns
-------
imread : ndarray
The array obtained by reading the image.
Notes
-----
`imread` uses the Python Imaging Library (PIL) to read an image.
The following notes are from the PIL documentation.
`mode` can be one of the following strings:
* 'L' (8-bit pixels, black and white)
* 'P' (8-bit pixels, mapped to any other mode using a color palette)
* 'RGB' (3x8-bit pixels, true color)
* 'RGBA' (4x8-bit pixels, true color with transparency mask)
* 'CMYK' (4x8-bit pixels, color separation)
* 'YCbCr' (3x8-bit pixels, color video format)
* 'I' (32-bit signed integer pixels)
* 'F' (32-bit floating point pixels)
PIL also provides limited support for a few special modes, including
'LA' ('L' with alpha), 'RGBX' (true color with padding) and 'RGBa'
(true color with premultiplied alpha).
When translating a color image to black and white (mode 'L', 'I' or
'F'), the library uses the ITU-R 601-2 luma transform::
L = R * 299/1000 + G * 587/1000 + B * 114/1000
When `flatten` is True, the image is converted using mode 'F'.
When `mode` is not None and `flatten` is True, the image is first
converted according to `mode`, and the result is then flattened using
mode 'F'.
"""
im = Image.open(name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't do anything here with flatten or mode.

# Check if image is PIL
if not Image.isImageType(im):
raise TypeError("Input is not a PIL image.")
return np.asarray(im)