Skip to content

Modified rrulewraper to handle timezone-aware datetimes. #9028

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 92 additions & 6 deletions lib/matplotlib/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
import time
import math
import datetime
import functools

import warnings

Expand Down Expand Up @@ -732,20 +733,105 @@ def __call__(self, x, pos=None):


class rrulewrapper(object):
def __init__(self, freq, tzinfo=None, **kwargs):
kwargs['freq'] = freq
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually have no clue if it is always safe to modify a **kwargs. Does it never modify a passed in dictionary?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be pretty weird if it could modify a passed in dictionary, since it's an expansion of the keywords. I assume that the only way there would even be a dictionary to pass in would be if you passed it like f(**kwargs). Testing it out on Python 3 seems to indicate that does not modify the dictionary. I can try and read up on it later, though.

def test_func(*args, **kwargs):
    kwargs['demo'] = 9

if __name__ == "__main__":
    kwargs = {}
    test_func(**kwargs)
    print(kwargs)    # {}

    kwargs2 = {'demo': 6}
    test_func(**kwargs)
    print(kwargs)    # {}

Copy link
Member Author

@pganssle pganssle Aug 25, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have looked into this a bit and I don't see anyone actually saying this is safe, but it also doesn't seem like the question has come up before - I would guess that it would be surprising if it were unsafe and someone would have commented on it by now. My understanding of how these things are supposed to work makes me fairly certain that this will not be a problem, but I've opened a question on SO just in case.

Either way, if we find that there's some weird situation where it could be a problem, we can just have a second PR that adds kwargs = kwargs.copy() at the top of the function, so I don't see any reason why uncertainty in this regard should be blocking.

Edit: I should have waited a few more minutes, the first answer on the SO says that it's always safe based on the spec.

self._base_tzinfo = tzinfo

def __init__(self, freq, **kwargs):
self._construct = kwargs.copy()
self._construct["freq"] = freq
self._rrule = rrule(**self._construct)
self._update_rrule(**kwargs)

def set(self, **kwargs):
self._construct.update(kwargs)

self._update_rrule(**self._construct)

def _update_rrule(self, **kwargs):
tzinfo = self._base_tzinfo

# rrule does not play nicely with time zones - especially pytz time
# zones, it's best to use naive zones and attach timezones once the
# datetimes are returned
if 'dtstart' in kwargs:
dtstart = kwargs['dtstart']
if dtstart.tzinfo is not None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could do dtstart = kwargs.get('dtstart', None), and therefore be able to reduce all of this by an indent.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this is fine. I think the main reason I did it this way is that it avoids the unnecessary replace(tzinfo=None) call at the end, but I'm fine with it either way (I really doubt anyone will be bogged down by any potential performance hit in that case).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WeatherGod Actually, looking at this, I don't think you can reduce all this by an indent, because if dtstart is None, the dtstart.tzinfo is None and dtstart.replace() calls will fail.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually, it'll fail for a different reason. dtstart would be None and it wouldn't have the tzinfo attribute. Don't know why I didn't see that originally.

if tzinfo is None:
tzinfo = dtstart.tzinfo
else:
dtstart = dtstart.astimezone(tzinfo)

kwargs['dtstart'] = dtstart.replace(tzinfo=None)

if 'until' in kwargs:
until = kwargs['until']
if until.tzinfo is not None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here: until = kwargs.get('until', None)

if tzinfo is not None:
until = until.astimezone(tzinfo)
else:
raise ValueError('until cannot be aware if dtstart '
'is naive and tzinfo is None')

kwargs['until'] = until.replace(tzinfo=None)

self._construct = kwargs.copy()
self._tzinfo = tzinfo
self._rrule = rrule(**self._construct)

def _attach_tzinfo(self, dt, tzinfo):
# pytz zones are attached by "localizing" the datetime
if hasattr(tzinfo, 'localize'):
return tzinfo.localize(dt, is_dst=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to hard-code is_dst?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WeatherGod The default for pytz.localize is essentially the same as hard-coding is_dst=False, which is inconsistent with PEP-495, since rrule will never set the fold attribute.

In the end I think very few people will need the feature of choosing which side of a DST fold their ticks are on, but it would not be difficult to add in a later version.


return dt.replace(tzinfo=tzinfo)

def _aware_return_wrapper(self, f, returns_list=False):
"""Decorator function that allows rrule methods to handle tzinfo."""
# This is only necessary if we're actually attaching a tzinfo
if self._tzinfo is None:
return f

# All datetime arguments must be naive. If they are not naive, they are
# converted to the _tzinfo zone before dropping the zone.
def normalize_arg(arg):
if isinstance(arg, datetime.datetime) and arg.tzinfo is not None:
if arg.tzinfo is not self._tzinfo:
arg = arg.astimezone(self._tzinfo)

return arg.replace(tzinfo=None)

return arg

def normalize_args(args, kwargs):
args = tuple(normalize_arg(arg) for arg in args)
kwargs = {kw: normalize_arg(arg) for kw, arg in kwargs.items()}

return args, kwargs

# There are two kinds of functions we care about - ones that return
# dates and ones that return lists of dates.
if not returns_list:
def inner_func(*args, **kwargs):
args, kwargs = normalize_args(args, kwargs)
dt = f(*args, **kwargs)
return self._attach_tzinfo(dt, self._tzinfo)
else:
def inner_func(*args, **kwargs):
args, kwargs = normalize_args(args, kwargs)
dts = f(*args, **kwargs)
return [self._attach_tzinfo(dt, self._tzinfo) for dt in dts]

return functools.wraps(f)(inner_func)

def __getattr__(self, name):
if name in self.__dict__:
return self.__dict__[name]
return getattr(self._rrule, name)

f = getattr(self._rrule, name)

if name in {'after', 'before'}:
return self._aware_return_wrapper(f)
elif name in {'xafter', 'xbefore', 'between'}:
return self._aware_return_wrapper(f, returns_list=True)
else:
return f

def __setstate__(self, state):
self.__dict__.update(state)
Expand Down Expand Up @@ -1226,7 +1312,7 @@ def __init__(self, bymonth=None, bymonthday=1, interval=1, tz=None):
bymonth = [x.item() for x in bymonth.astype(int)]

rule = rrulewrapper(MONTHLY, bymonth=bymonth, bymonthday=bymonthday,
interval=interval, **self.hms0d)
interval=interval, **self.hms0d)
RRuleLocator.__init__(self, rule, tz)


Expand Down
18 changes: 18 additions & 0 deletions lib/matplotlib/tests/test_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,24 @@ def tz_convert(*args):
_test_date2num_dst(pd.date_range, tz_convert)


@pytest.mark.parametrize("attach_tz, get_tz", [
(lambda dt, zi: zi.localize(dt), lambda n: pytz.timezone(n)),
(lambda dt, zi: dt.replace(tzinfo=zi), lambda n: dateutil.tz.gettz(n))])
def test_rrulewrapper(attach_tz, get_tz):
SYD = get_tz('Australia/Sydney')

dtstart = attach_tz(datetime.datetime(2017, 4, 1, 0), SYD)
dtend = attach_tz(datetime.datetime(2017, 4, 4, 0), SYD)

rule = mdates.rrulewrapper(freq=dateutil.rrule.DAILY, dtstart=dtstart)

act = rule.between(dtstart, dtend)
exp = [datetime.datetime(2017, 4, 1, 13, tzinfo=dateutil.tz.tzutc()),
datetime.datetime(2017, 4, 2, 14, tzinfo=dateutil.tz.tzutc())]

assert act == exp


def test_DayLocator():
with pytest.raises(ValueError):
mdates.DayLocator(interval=-1)
Expand Down