From 0efaba68046f3e681bba1d103b3c7575f743bcf1 Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Tue, 21 Jan 2020 23:06:52 +0100 Subject: [PATCH] Remove the private, unused _csv2rec. It was only left as a helper for the deprecated and now removed plotfile(). --- doc/faq/howto_faq.rst | 38 +--- lib/matplotlib/mlab.py | 281 ------------------------------ lib/matplotlib/pyplot.py | 2 +- lib/matplotlib/tests/test_mlab.py | 49 ------ 4 files changed, 8 insertions(+), 362 deletions(-) diff --git a/doc/faq/howto_faq.rst b/doc/faq/howto_faq.rst index 212bd6badb3f..6ffa28963670 100644 --- a/doc/faq/howto_faq.rst +++ b/doc/faq/howto_faq.rst @@ -336,37 +336,13 @@ setting in the right subplots. Skip dates where there is no data --------------------------------- -When plotting time series, e.g., financial time series, one often wants -to leave out days on which there is no data, e.g., weekends. By passing -in dates on the x-xaxis, you get large horizontal gaps on periods when -there is not data. The solution is to pass in some proxy x-data, e.g., -evenly sampled indices, and then use a custom formatter to format -these as dates. The example below shows how to use an 'index formatter' -to achieve the desired plot:: - - import numpy as np - import matplotlib.pyplot as plt - import matplotlib.mlab as mlab - import matplotlib.ticker as ticker - - r = mlab.csv2rec('../data/aapl.csv') - r.sort() - r = r[-30:] # get the last 30 days - - N = len(r) - ind = np.arange(N) # the evenly spaced plot indices - - def format_date(x, pos=None): - thisind = np.clip(int(x+0.5), 0, N-1) - return r.date[thisind].strftime('%Y-%m-%d') - - fig = plt.figure() - ax = fig.add_subplot(111) - ax.plot(ind, r.adj_close, 'o-') - ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_date)) - fig.autofmt_xdate() - - plt.show() +When plotting time series, e.g., financial time series, one often wants to +leave out days on which there is no data, e.g., weekends. By passing in +dates on the x-xaxis, you get large horizontal gaps on periods when there +is not data. The solution is to pass in some proxy x-data, e.g., evenly +sampled indices, and then use a custom formatter to format these as dates. +:doc:`/gallery/text_labels_and_annotations/date_index_formatter` demonstrates +how to use an 'index formatter' to achieve the desired plot. .. _howto-set-zorder: diff --git a/lib/matplotlib/mlab.py b/lib/matplotlib/mlab.py index 7c1fb194f7de..fe9be26c0002 100644 --- a/lib/matplotlib/mlab.py +++ b/lib/matplotlib/mlab.py @@ -53,7 +53,6 @@ Apply a window along a given axis """ -import csv import functools from numbers import Number @@ -985,286 +984,6 @@ def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning, return Cxy, f -def _csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',', - converterd=None, names=None, missing='', missingd=None, - use_mrecords=False, dayfirst=False, yearfirst=False): - """ - Load data from comma/space/tab delimited file in *fname* into a - numpy record array and return the record array. - - If *names* is *None*, a header row is required to automatically - assign the recarray names. The headers will be lower cased, - spaces will be converted to underscores, and illegal attribute - name characters removed. If *names* is not *None*, it is a - sequence of names to use for the column names. In this case, it - is assumed there is no header row. - - - - *fname*: can be a filename or a file handle. Support for gzipped - files is automatic, if the filename ends in '.gz' - - - *comments*: the character used to indicate the start of a comment - in the file, or *None* to switch off the removal of comments - - - *skiprows*: is the number of rows from the top to skip - - - *checkrows*: is the number of rows to check to validate the column - data type. When set to zero all rows are validated. - - - *converterd*: if not *None*, is a dictionary mapping column number or - munged column name to a converter function. - - - *names*: if not None, is a list of header names. In this case, no - header will be read from the file - - - *missingd* is a dictionary mapping munged column names to field values - which signify that the field does not contain actual data and should - be masked, e.g., '0000-00-00' or 'unused' - - - *missing*: a string whose value signals a missing field regardless of - the column it appears in - - - *use_mrecords*: if True, return an mrecords.fromrecords record array if - any of the data are missing - - - *dayfirst*: default is False so that MM-DD-YY has precedence over - DD-MM-YY. See - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 - for further information. - - - *yearfirst*: default is False so that MM-DD-YY has precedence over - YY-MM-DD. See - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 - for further information. - - If no rows are found, *None* is returned - """ - - if converterd is None: - converterd = dict() - - if missingd is None: - missingd = {} - - import dateutil.parser - import datetime - - fh = cbook.to_filehandle(fname) - - delimiter = str(delimiter) - - class FH: - """ - For space-delimited files, we want different behavior than - comma or tab. Generally, we want multiple spaces to be - treated as a single separator, whereas with comma and tab we - want multiple commas to return multiple (empty) fields. The - join/strip trick below effects this. - """ - def __init__(self, fh): - self.fh = fh - - def close(self): - self.fh.close() - - def seek(self, arg): - self.fh.seek(arg) - - def fix(self, s): - return ' '.join(s.split()) - - def __next__(self): - return self.fix(next(self.fh)) - - def __iter__(self): - for line in self.fh: - yield self.fix(line) - - if delimiter == ' ': - fh = FH(fh) - - reader = csv.reader(fh, delimiter=delimiter) - - def process_skiprows(reader): - if skiprows: - for i, row in enumerate(reader): - if i >= (skiprows-1): - break - - return fh, reader - - process_skiprows(reader) - - def ismissing(name, val): - """Return whether the value val in column name should be masked.""" - return val == missing or val == missingd.get(name) or val == '' - - def with_default_value(func, default): - def newfunc(name, val): - if ismissing(name, val): - return default - else: - return func(val) - return newfunc - - def mybool(x): - if x == 'True': - return True - elif x == 'False': - return False - else: - raise ValueError('invalid bool') - - dateparser = dateutil.parser.parse - - def mydateparser(x): - # try and return a datetime object - d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) - return d - - mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1)) - - myfloat = with_default_value(float, np.nan) - myint = with_default_value(int, -1) - mystr = with_default_value(str, '') - mybool = with_default_value(mybool, None) - - def mydate(x): - # try and return a date object - d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) - - if d.hour > 0 or d.minute > 0 or d.second > 0: - raise ValueError('not a date') - return d.date() - mydate = with_default_value(mydate, datetime.date(1, 1, 1)) - - def get_func(name, item, func): - # promote functions in this order - funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr] - for func in funcs[funcs.index(func):]: - try: - func(name, item) - except Exception: - continue - return func - raise ValueError('Could not find a working conversion function') - - # map column names that clash with builtins -- TODO - extend this list - itemd = { - 'return': 'return_', - 'file': 'file_', - 'print': 'print_', - } - - def get_converters(reader, comments): - - converters = None - i = 0 - for row in reader: - if (len(row) and comments is not None and - row[0].startswith(comments)): - continue - if i == 0: - converters = [mybool]*len(row) - if checkrows and i > checkrows: - break - i += 1 - - for j, (name, item) in enumerate(zip(names, row)): - func = converterd.get(j) - if func is None: - func = converterd.get(name) - if func is None: - func = converters[j] - if len(item.strip()): - func = get_func(name, item, func) - else: - # how should we handle custom converters and defaults? - func = with_default_value(func, None) - converters[j] = func - return converters - - # Get header and remove invalid characters - needheader = names is None - - if needheader: - for row in reader: - if (len(row) and comments is not None and - row[0].startswith(comments)): - continue - headers = row - break - - # remove these chars - delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""") - delete.add('"') - - names = [] - seen = dict() - for i, item in enumerate(headers): - item = item.strip().lower().replace(' ', '_') - item = ''.join([c for c in item if c not in delete]) - if not len(item): - item = 'column%d' % i - - item = itemd.get(item, item) - cnt = seen.get(item, 0) - if cnt > 0: - names.append(item + '_%d' % cnt) - else: - names.append(item) - seen[item] = cnt+1 - - else: - if isinstance(names, str): - names = [n.strip() for n in names.split(',')] - - # get the converter functions by inspecting checkrows - converters = get_converters(reader, comments) - if converters is None: - raise ValueError('Could not find any valid data in CSV file') - - # reset the reader and start over - fh.seek(0) - reader = csv.reader(fh, delimiter=delimiter) - process_skiprows(reader) - - if needheader: - while True: - # skip past any comments and consume one line of column header - row = next(reader) - if (len(row) and comments is not None and - row[0].startswith(comments)): - continue - break - - # iterate over the remaining rows and convert the data to date - # objects, ints, or floats as appropriate - rows = [] - rowmasks = [] - for i, row in enumerate(reader): - if not len(row): - continue - if comments is not None and row[0].startswith(comments): - continue - # Ensure that the row returned always has the same nr of elements - row.extend([''] * (len(converters) - len(row))) - rows.append([func(name, val) - for func, name, val in zip(converters, names, row)]) - rowmasks.append([ismissing(name, val) - for name, val in zip(names, row)]) - fh.close() - - if not len(rows): - return None - - if use_mrecords and np.any(rowmasks): - r = np.ma.mrecords.fromrecords(rows, names=names, mask=rowmasks) - else: - r = np.rec.fromrecords(rows, names=names) - return r - - class GaussianKDE: """ Representation of a kernel-density estimate using Gaussian kernels. diff --git a/lib/matplotlib/pyplot.py b/lib/matplotlib/pyplot.py index 01b196045886..1d1e294e6a0a 100644 --- a/lib/matplotlib/pyplot.py +++ b/lib/matplotlib/pyplot.py @@ -45,7 +45,7 @@ from matplotlib.artist import Artist from matplotlib.axes import Axes, Subplot from matplotlib.projections import PolarAxes -from matplotlib import mlab # for _csv2rec, detrend_none, window_hanning +from matplotlib import mlab # for detrend_none, window_hanning from matplotlib.scale import get_scale_docs, get_scale_names from matplotlib import cm diff --git a/lib/matplotlib/tests/test_mlab.py b/lib/matplotlib/tests/test_mlab.py index 86be071c2a4f..745045708531 100644 --- a/lib/matplotlib/tests/test_mlab.py +++ b/lib/matplotlib/tests/test_mlab.py @@ -1,9 +1,6 @@ -import tempfile - from numpy.testing import (assert_allclose, assert_almost_equal, assert_array_equal, assert_array_almost_equal_nulp) import numpy as np -import datetime as datetime import pytest import matplotlib.mlab as mlab @@ -140,52 +137,6 @@ def test_stride_ensure_integer_type(self): assert_array_equal(y_strided, 0.3) -@pytest.fixture -def tempcsv(): - with tempfile.TemporaryFile(suffix='csv', mode="w+", newline='') as fd: - yield fd - - -def test_csv2rec_names_with_comments(tempcsv): - tempcsv.write('# comment\n1,2,3\n4,5,6\n') - tempcsv.seek(0) - array = mlab._csv2rec(tempcsv, names='a,b,c') - assert len(array) == 2 - assert len(array.dtype) == 3 - - -@pytest.mark.parametrize('input, kwargs', [ - ('01/11/14\n' - '03/05/76 12:00:01 AM\n' - '07/09/83 5:17:34 PM\n' - '06/20/2054 2:31:45 PM\n' - '10/31/00 11:50:23 AM\n', - {}), - ('11/01/14\n' - '05/03/76 12:00:01 AM\n' - '09/07/83 5:17:34 PM\n' - '20/06/2054 2:31:45 PM\n' - '31/10/00 11:50:23 AM\n', - {'dayfirst': True}), - ('14/01/11\n' - '76/03/05 12:00:01 AM\n' - '83/07/09 5:17:34 PM\n' - '2054/06/20 2:31:45 PM\n' - '00/10/31 11:50:23 AM\n', - {'yearfirst': True}), -], ids=['usdate', 'dayfirst', 'yearfirst']) -def test_csv2rec_dates(tempcsv, input, kwargs): - tempcsv.write(input) - expected = [datetime.datetime(2014, 1, 11, 0, 0), - datetime.datetime(1976, 3, 5, 0, 0, 1), - datetime.datetime(1983, 7, 9, 17, 17, 34), - datetime.datetime(2054, 6, 20, 14, 31, 45), - datetime.datetime(2000, 10, 31, 11, 50, 23)] - tempcsv.seek(0) - array = mlab._csv2rec(tempcsv, names='a', **kwargs) - assert_array_equal(array['a'].tolist(), expected) - - def _apply_window(*args, **kwargs): with pytest.warns(MatplotlibDeprecationWarning): return mlab.apply_window(*args, **kwargs)