Skip to content

Plotting datetime values from Pandas dataframe #5550

Closed
@arc-jim

Description

@arc-jim

This appears to be a new issue in 1.5.0.

The script below attempts to plot two 2-D graphs whose X and Y values are Pandas series. The issue seems to occur when pyplot is passed a datetime column which doesn't contain an index of value 0 - note how the second dataframe contains only odd indices (1, 3, 5, etc.)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# create sample dataframe with preset dates and values columns
dates = np.arange('2005-02', '2005-03', dtype='datetime64[D]')
values = np.sin(np.array(range(len(dates))))
df = pd.DataFrame({'dates': dates, 'values': values})

# matplotlib figure + two subplots for comparison
fig, axes = plt.subplots(1, 2)

# create two dataframes for comparison - one with all indices, including 0, and one with only odd indices
with_zero_index = df.copy()
without_zero_index = df[np.array(df.index) % 2 == 1].copy()

# plot both - note how second plot fails without a 0 index
axes[0].plot(with_zero_index['dates'], with_zero_index['values'])
axes[1].plot(without_zero_index['dates'], without_zero_index['values'])

Stack trace:

KeyError                                  Traceback (most recent call last)
<ipython-input-29-28e878247c17> in <module>()
     17 # plot both - note how second plot fails without a 0 index
     18 axes[0].plot(with_zero_index['dates'], with_zero_index['values'])
---> 19 axes[1].plot(without_zero_index['dates'], without_zero_index['values'])

/home/jim/arcemweb/venv/local/lib/python2.7/site-packages/matplotlib/__init__.pyc in inner(ax, *args, **kwargs)
   1809                     warnings.warn(msg % (label_namer, func.__name__),
   1810                                   RuntimeWarning, stacklevel=2)
-> 1811             return func(ax, *args, **kwargs)
   1812         pre_doc = inner.__doc__
   1813         if pre_doc is None:

/home/jim/arcemweb/venv/local/lib/python2.7/site-packages/matplotlib/axes/_axes.pyc in plot(self, *args, **kwargs)
   1425             kwargs['color'] = c
   1426 
-> 1427         for line in self._get_lines(*args, **kwargs):
   1428             self.add_line(line)
   1429             lines.append(line)

/home/jim/arcemweb/venv/local/lib/python2.7/site-packages/matplotlib/axes/_base.pyc in _grab_next_args(self, *args, **kwargs)
    384                 return
    385             if len(remaining) <= 3:
--> 386                 for seg in self._plot_args(remaining, kwargs):
    387                     yield seg
    388                 return

/home/jim/arcemweb/venv/local/lib/python2.7/site-packages/matplotlib/axes/_base.pyc in _plot_args(self, tup, kwargs)
    362             x, y = index_of(tup[-1])
    363 
--> 364         x, y = self._xy_from_xy(x, y)
    365 
    366         if self.command == 'plot':

/home/jim/arcemweb/venv/local/lib/python2.7/site-packages/matplotlib/axes/_base.pyc in _xy_from_xy(self, x, y)
    195     def _xy_from_xy(self, x, y):
    196         if self.axes.xaxis is not None and self.axes.yaxis is not None:
--> 197             bx = self.axes.xaxis.update_units(x)
    198             by = self.axes.yaxis.update_units(y)
    199 

/home/jim/arcemweb/venv/local/lib/python2.7/site-packages/matplotlib/axis.pyc in update_units(self, data)
   1387         neednew = self.converter != converter
   1388         self.converter = converter
-> 1389         default = self.converter.default_units(data, self)
   1390         if default is not None and self.units is None:
   1391             self.set_units(default)

/home/jim/arcemweb/venv/local/lib/python2.7/site-packages/matplotlib/dates.pyc in default_units(x, axis)
   1562 
   1563         try:
-> 1564             x = x[0]
   1565         except (TypeError, IndexError):
   1566             pass

/home/jim/arcemweb/venv/local/lib/python2.7/site-packages/pandas/core/series.pyc in __getitem__(self, key)
    519     def __getitem__(self, key):
    520         try:
--> 521             result = self.index.get_value(self, key)
    522 
    523             if not np.isscalar(result):

/home/jim/arcemweb/venv/local/lib/python2.7/site-packages/pandas/core/index.pyc in get_value(self, series, key)
   1593 
   1594         try:
-> 1595             return self._engine.get_value(s, k)
   1596         except KeyError as e1:
   1597             if len(self) > 0 and self.inferred_type in ['integer','boolean']:

pandas/index.pyx in pandas.index.IndexEngine.get_value (pandas/index.c:3113)()

pandas/index.pyx in pandas.index.IndexEngine.get_value (pandas/index.c:2844)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3704)()

pandas/hashtable.pyx in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:7224)()

pandas/hashtable.pyx in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:7162)()

KeyError: 0

Looks like matplotlib's dates.py module is attempting to access the first value in the datetime series - but when passed as a pandas series, x[0] represents the value at index 0, and a) might not exist, and b) isn't necessarily the first value in the series! Possible fix might be to catch IndexErrors and attempt an x.iloc[0] call instead.

Script above worked fine in 1.4.3, but fails specifically in 1.5.0.

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions