Skip to content

Remove the private, unused _csv2rec. #16288

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 7 additions & 31 deletions doc/faq/howto_faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -336,37 +336,13 @@ setting in the right subplots.
Skip dates where there is no data
---------------------------------

When plotting time series, e.g., financial time series, one often wants
to leave out days on which there is no data, e.g., weekends. By passing
in dates on the x-xaxis, you get large horizontal gaps on periods when
there is not data. The solution is to pass in some proxy x-data, e.g.,
evenly sampled indices, and then use a custom formatter to format
these as dates. The example below shows how to use an 'index formatter'
to achieve the desired plot::

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import matplotlib.ticker as ticker

r = mlab.csv2rec('../data/aapl.csv')
r.sort()
r = r[-30:] # get the last 30 days

N = len(r)
ind = np.arange(N) # the evenly spaced plot indices

def format_date(x, pos=None):
thisind = np.clip(int(x+0.5), 0, N-1)
return r.date[thisind].strftime('%Y-%m-%d')

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(ind, r.adj_close, 'o-')
ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_date))
fig.autofmt_xdate()

plt.show()
When plotting time series, e.g., financial time series, one often wants to
leave out days on which there is no data, e.g., weekends. By passing in
dates on the x-xaxis, you get large horizontal gaps on periods when there
is not data. The solution is to pass in some proxy x-data, e.g., evenly
sampled indices, and then use a custom formatter to format these as dates.
:doc:`/gallery/text_labels_and_annotations/date_index_formatter` demonstrates
how to use an 'index formatter' to achieve the desired plot.

.. _howto-set-zorder:

Expand Down
281 changes: 0 additions & 281 deletions lib/matplotlib/mlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
Apply a window along a given axis
"""

import csv
import functools
from numbers import Number

Expand Down Expand Up @@ -985,286 +984,6 @@ def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning,
return Cxy, f


def _csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',',
converterd=None, names=None, missing='', missingd=None,
use_mrecords=False, dayfirst=False, yearfirst=False):
"""
Load data from comma/space/tab delimited file in *fname* into a
numpy record array and return the record array.

If *names* is *None*, a header row is required to automatically
assign the recarray names. The headers will be lower cased,
spaces will be converted to underscores, and illegal attribute
name characters removed. If *names* is not *None*, it is a
sequence of names to use for the column names. In this case, it
is assumed there is no header row.


- *fname*: can be a filename or a file handle. Support for gzipped
files is automatic, if the filename ends in '.gz'

- *comments*: the character used to indicate the start of a comment
in the file, or *None* to switch off the removal of comments

- *skiprows*: is the number of rows from the top to skip

- *checkrows*: is the number of rows to check to validate the column
data type. When set to zero all rows are validated.

- *converterd*: if not *None*, is a dictionary mapping column number or
munged column name to a converter function.

- *names*: if not None, is a list of header names. In this case, no
header will be read from the file

- *missingd* is a dictionary mapping munged column names to field values
which signify that the field does not contain actual data and should
be masked, e.g., '0000-00-00' or 'unused'

- *missing*: a string whose value signals a missing field regardless of
the column it appears in

- *use_mrecords*: if True, return an mrecords.fromrecords record array if
any of the data are missing

- *dayfirst*: default is False so that MM-DD-YY has precedence over
DD-MM-YY. See
http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
for further information.

- *yearfirst*: default is False so that MM-DD-YY has precedence over
YY-MM-DD. See
http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
for further information.

If no rows are found, *None* is returned
"""

if converterd is None:
converterd = dict()

if missingd is None:
missingd = {}

import dateutil.parser
import datetime

fh = cbook.to_filehandle(fname)

delimiter = str(delimiter)

class FH:
"""
For space-delimited files, we want different behavior than
comma or tab. Generally, we want multiple spaces to be
treated as a single separator, whereas with comma and tab we
want multiple commas to return multiple (empty) fields. The
join/strip trick below effects this.
"""
def __init__(self, fh):
self.fh = fh

def close(self):
self.fh.close()

def seek(self, arg):
self.fh.seek(arg)

def fix(self, s):
return ' '.join(s.split())

def __next__(self):
return self.fix(next(self.fh))

def __iter__(self):
for line in self.fh:
yield self.fix(line)

if delimiter == ' ':
fh = FH(fh)

reader = csv.reader(fh, delimiter=delimiter)

def process_skiprows(reader):
if skiprows:
for i, row in enumerate(reader):
if i >= (skiprows-1):
break

return fh, reader

process_skiprows(reader)

def ismissing(name, val):
"""Return whether the value val in column name should be masked."""
return val == missing or val == missingd.get(name) or val == ''

def with_default_value(func, default):
def newfunc(name, val):
if ismissing(name, val):
return default
else:
return func(val)
return newfunc

def mybool(x):
if x == 'True':
return True
elif x == 'False':
return False
else:
raise ValueError('invalid bool')

dateparser = dateutil.parser.parse

def mydateparser(x):
# try and return a datetime object
d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst)
return d

mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1))

myfloat = with_default_value(float, np.nan)
myint = with_default_value(int, -1)
mystr = with_default_value(str, '')
mybool = with_default_value(mybool, None)

def mydate(x):
# try and return a date object
d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst)

if d.hour > 0 or d.minute > 0 or d.second > 0:
raise ValueError('not a date')
return d.date()
mydate = with_default_value(mydate, datetime.date(1, 1, 1))

def get_func(name, item, func):
# promote functions in this order
funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr]
for func in funcs[funcs.index(func):]:
try:
func(name, item)
except Exception:
continue
return func
raise ValueError('Could not find a working conversion function')

# map column names that clash with builtins -- TODO - extend this list
itemd = {
'return': 'return_',
'file': 'file_',
'print': 'print_',
}

def get_converters(reader, comments):

converters = None
i = 0
for row in reader:
if (len(row) and comments is not None and
row[0].startswith(comments)):
continue
if i == 0:
converters = [mybool]*len(row)
if checkrows and i > checkrows:
break
i += 1

for j, (name, item) in enumerate(zip(names, row)):
func = converterd.get(j)
if func is None:
func = converterd.get(name)
if func is None:
func = converters[j]
if len(item.strip()):
func = get_func(name, item, func)
else:
# how should we handle custom converters and defaults?
func = with_default_value(func, None)
converters[j] = func
return converters

# Get header and remove invalid characters
needheader = names is None

if needheader:
for row in reader:
if (len(row) and comments is not None and
row[0].startswith(comments)):
continue
headers = row
break

# remove these chars
delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""")
delete.add('"')

names = []
seen = dict()
for i, item in enumerate(headers):
item = item.strip().lower().replace(' ', '_')
item = ''.join([c for c in item if c not in delete])
if not len(item):
item = 'column%d' % i

item = itemd.get(item, item)
cnt = seen.get(item, 0)
if cnt > 0:
names.append(item + '_%d' % cnt)
else:
names.append(item)
seen[item] = cnt+1

else:
if isinstance(names, str):
names = [n.strip() for n in names.split(',')]

# get the converter functions by inspecting checkrows
converters = get_converters(reader, comments)
if converters is None:
raise ValueError('Could not find any valid data in CSV file')

# reset the reader and start over
fh.seek(0)
reader = csv.reader(fh, delimiter=delimiter)
process_skiprows(reader)

if needheader:
while True:
# skip past any comments and consume one line of column header
row = next(reader)
if (len(row) and comments is not None and
row[0].startswith(comments)):
continue
break

# iterate over the remaining rows and convert the data to date
# objects, ints, or floats as appropriate
rows = []
rowmasks = []
for i, row in enumerate(reader):
if not len(row):
continue
if comments is not None and row[0].startswith(comments):
continue
# Ensure that the row returned always has the same nr of elements
row.extend([''] * (len(converters) - len(row)))
rows.append([func(name, val)
for func, name, val in zip(converters, names, row)])
rowmasks.append([ismissing(name, val)
for name, val in zip(names, row)])
fh.close()

if not len(rows):
return None

if use_mrecords and np.any(rowmasks):
r = np.ma.mrecords.fromrecords(rows, names=names, mask=rowmasks)
else:
r = np.rec.fromrecords(rows, names=names)
return r


class GaussianKDE:
"""
Representation of a kernel-density estimate using Gaussian kernels.
Expand Down
2 changes: 1 addition & 1 deletion lib/matplotlib/pyplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from matplotlib.artist import Artist
from matplotlib.axes import Axes, Subplot
from matplotlib.projections import PolarAxes
from matplotlib import mlab # for _csv2rec, detrend_none, window_hanning
from matplotlib import mlab # for detrend_none, window_hanning
from matplotlib.scale import get_scale_docs, get_scale_names

from matplotlib import cm
Expand Down
Loading