Skip to content

FIX: pandas indexing error #5556

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 30, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 55 additions & 44 deletions lib/matplotlib/axes/_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2331,11 +2331,11 @@ def broken_barh(self, xranges, yrange, **kwargs):
"""
# process the unit information
if len(xranges):
xdata = six.next(iter(xranges))
xdata = cbook.safe_first_element(xranges)
else:
xdata = None
if len(yrange):
ydata = six.next(iter(yrange))
ydata = cbook.safe_first_element(yrange)
else:
ydata = None
self._process_unit_info(xdata=xdata,
Expand Down Expand Up @@ -3016,7 +3016,7 @@ def xywhere(xs, ys, mask):

if ecolor is None:
if l0 is None and 'color' in self._get_lines._prop_keys:
ecolor = six.next(self._get_lines.prop_cycler)['color']
ecolor = next(self._get_lines.prop_cycler)['color']
else:
ecolor = l0.get_color()

Expand Down Expand Up @@ -5875,6 +5875,41 @@ def hist(self, x, bins=10, range=None, normed=False, weights=None,
.. plot:: mpl_examples/statistics/histogram_demo_features.py

"""
def _normalize_input(inp, ename='input'):
"""Normalize 1 or 2d input into list of np.ndarray or
a single 2D np.ndarray.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, which does it do? A list of ndarrays or a 2D array? This isn't very clear.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it really does return either/or 👿

Note that this is a _'d private to this method function. The main reason for this existing is to remove copy-paste code so the questionable API is OK (imo) as it preserves the behavior of the block of code the function call is replacing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, the API is awkward, but I am fine with it as it is private and internal, and it does clean up the code (and fixes a possible mutation side-effect in weights processing). I was more aiming for a clearer docstring for future devs. The if-statements are quite opaque.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was waiting for a clearer docstring here because the if-statements are opaque.


Parameters
----------
inp : iterable
ename : str, optional
Name to use in ValueError if `inp` can not be normalized

"""
if (isinstance(x, np.ndarray) or
not iterable(cbook.safe_first_element(inp))):
# TODO: support masked arrays;
inp = np.asarray(inp)
if inp.ndim == 2:
# 2-D input with columns as datasets; switch to rows
inp = inp.T
elif inp.ndim == 1:
# new view, single row
inp = inp.reshape(1, inp.shape[0])
else:
raise ValueError(
"{ename} must be 1D or 2D".format(ename=ename))
if inp.shape[1] < inp.shape[0]:
warnings.warn(
'2D hist input should be nsamples x nvariables;\n '
'this looks transposed '
'(shape is %d x %d)' % inp.shape[::-1])
else:
# multiple hist with data of different length
inp = [np.asarray(xi) for xi in inp]

return inp

if not self._hold:
self.cla()

Expand Down Expand Up @@ -5918,58 +5953,34 @@ def hist(self, x, bins=10, range=None, normed=False, weights=None,
input_empty = len(flat) == 0

# Massage 'x' for processing.
# NOTE: Be sure any changes here is also done below to 'weights'
if input_empty:
x = np.array([[]])
elif isinstance(x, np.ndarray) or not iterable(x[0]):
# TODO: support masked arrays;
x = np.asarray(x)
if x.ndim == 2:
x = x.T # 2-D input with columns as datasets; switch to rows
elif x.ndim == 1:
x = x.reshape(1, x.shape[0]) # new view, single row
else:
raise ValueError("x must be 1D or 2D")
if x.shape[1] < x.shape[0]:
warnings.warn(
'2D hist input should be nsamples x nvariables;\n '
'this looks transposed (shape is %d x %d)' % x.shape[::-1])
else:
# multiple hist with data of different length
x = [np.asarray(xi) for xi in x]

x = _normalize_input(x, 'x')
nx = len(x) # number of datasets

# We need to do to 'weights' what was done to 'x'
if weights is not None:
w = _normalize_input(weights, 'weights')
else:
w = [None]*nx

if len(w) != nx:
raise ValueError('weights should have the same shape as x')

for xi, wi in zip(x, w):
if wi is not None and len(wi) != len(xi):
raise ValueError(
'weights should have the same shape as x')

if color is None and 'color' in self._get_lines._prop_keys:
color = [six.next(self._get_lines.prop_cycler)['color']
color = [next(self._get_lines.prop_cycler)['color']
for i in xrange(nx)]
else:
color = mcolors.colorConverter.to_rgba_array(color)
if len(color) != nx:
raise ValueError("color kwarg must have one color per dataset")

# We need to do to 'weights' what was done to 'x'
if weights is not None:
if isinstance(weights, np.ndarray) or not iterable(weights[0]):
w = np.array(weights)
if w.ndim == 2:
w = w.T
elif w.ndim == 1:
w.shape = (1, w.shape[0])
else:
raise ValueError("weights must be 1D or 2D")
else:
w = [np.asarray(wi) for wi in weights]

if len(w) != nx:
raise ValueError('weights should have the same shape as x')
for i in xrange(nx):
if len(w[i]) != len(x[i]):
raise ValueError(
'weights should have the same shape as x')
else:
w = [None]*nx

# Save the datalimits for the same reason:
_saved_bounds = self.dataLim.bounds

Expand All @@ -5985,7 +5996,7 @@ def hist(self, x, bins=10, range=None, normed=False, weights=None,
xmax = max(xmax, xi.max())
bin_range = (xmin, xmax)

#hist_kwargs = dict(range=range, normed=bool(normed))
# hist_kwargs = dict(range=range, normed=bool(normed))
# We will handle the normed kwarg within mpl until we
# get to the point of requiring numpy >= 1.5.
hist_kwargs = dict(range=bin_range)
Expand Down
2 changes: 1 addition & 1 deletion lib/matplotlib/axes/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import matplotlib.image as mimage
from matplotlib.offsetbox import OffsetBox
from matplotlib.artist import allow_rasterization
from matplotlib.cbook import iterable, index_of

from matplotlib.rcsetup import cycler

rcParams = matplotlib.rcParams
Expand Down
2 changes: 1 addition & 1 deletion lib/matplotlib/backends/backend_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1846,7 +1846,7 @@ def draw_tex(self, gc, x, y, s, prop, angle, ismath='TeX!', mtext=None):
fontsize = prop.get_size_in_points()
dvifile = texmanager.make_dvi(s, fontsize)
dvi = dviread.Dvi(dvifile, 72)
page = six.next(iter(dvi))
page = next(iter(dvi))
dvi.close()

# Gather font information and do some setup for combining
Expand Down
8 changes: 8 additions & 0 deletions lib/matplotlib/cbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from matplotlib.externals import six
from matplotlib.externals.six.moves import xrange, zip
from itertools import repeat
import collections

import datetime
import errno
Expand Down Expand Up @@ -2536,6 +2537,13 @@ def index_of(y):
return np.arange(y.shape[0], dtype=float), y


def safe_first_element(obj):
if isinstance(obj, collections.Iterator):
raise RuntimeError("matplotlib does not support generators "
"as input")
return next(iter(obj))


def get_label(y, default_name):
try:
return y.name
Expand Down
4 changes: 2 additions & 2 deletions lib/matplotlib/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1561,8 +1561,8 @@ def default_units(x, axis):
x = x.ravel()

try:
x = x[0]
except (TypeError, IndexError):
x = cbook.safe_first_element(x)
except (TypeError, StopIteration):
pass

try:
Expand Down
31 changes: 31 additions & 0 deletions lib/matplotlib/tests/test_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import io

from nose.tools import assert_equal, assert_raises, assert_false, assert_true
from nose.plugins.skip import SkipTest

import datetime

Expand Down Expand Up @@ -4183,6 +4184,36 @@ def test_broken_barh_empty():
ax.broken_barh([], (.1, .5))


@cleanup
def test_pandas_indexing_dates():
try:
import pandas as pd
except ImportError:
raise SkipTest("Pandas not installed")

dates = np.arange('2005-02', '2005-03', dtype='datetime64[D]')
values = np.sin(np.array(range(len(dates))))
df = pd.DataFrame({'dates': dates, 'values': values})

ax = plt.gca()

without_zero_index = df[np.array(df.index) % 2 == 1].copy()
ax.plot('dates', 'values', data=without_zero_index)


@cleanup
def test_pandas_indexing_hist():
try:
import pandas as pd
except ImportError:
raise SkipTest("Pandas not installed")

ser_1 = pd.Series(data=[1, 2, 2, 3, 3, 4, 4, 4, 4, 5])
ser_2 = ser_1.iloc[1:]
fig, axes = plt.subplots()
axes.hist(ser_2)


if __name__ == '__main__':
import nose
import sys
Expand Down