Skip to content

Commit 8b57539

Browse files
committed
added read/load csv,h5,excel functions using pandas dataframe methods
1 parent fda9019 commit 8b57539

File tree

2 files changed

+98
-55
lines changed

2 files changed

+98
-55
lines changed

larray.py

Lines changed: 96 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@
7575
import sys
7676

7777
import numpy as np
78+
import pandas as pd
79+
7880
import tables
7981

8082
from utils import prod, table2str, table2csv, table2iode, timed, unique
@@ -292,6 +294,17 @@ def __new__(cls, data, axes=None):
292294
axes = list(axes)
293295
obj.axes = axes
294296
return obj
297+
298+
def as_dataframe(self):
299+
axes_labels = [a.labels.tolist() for a in self.axes[:-1]]
300+
axes_names = [a.name for a in self.axes[:-1]]
301+
axes_names[-1] = axes_names[-1] + '\\' + self.axes[-1].name
302+
columns = self.axes[-1].labels.tolist()
303+
full_index=[i for i in product(*axes_labels)]
304+
index = pd.MultiIndex.from_tuples(full_index, names=axes_names)
305+
df = pd.DataFrame(self.reshape(len(full_index), len(columns)), index, columns)
306+
return df
307+
295308

296309
#noinspection PyAttributeOutsideInit
297310
def __array_finalize__(self, obj):
@@ -401,39 +414,27 @@ def __str__(self):
401414
def as_table(self):
402415
if not self.ndim:
403416
return []
417+
418+
#ert | unit | geo\time | 2012 | 2011 | 2010
419+
#NEER27 | I05 | AT | 101.41 | 101.63 | 101.63
420+
#NEER27 | I05 | AU | 134.86 | 125.29 | 117.08
404421

405-
# gender | |
406-
# False | True | total
407-
# 20 | 16 | 35
408-
409-
# dead | gender | |
410-
# | False | True | total
411-
# False | 20 | 15 | 35
412-
# True | 0 | 1 | 1
413-
# total | 20 | 16 | 36
414-
415-
# agegroup | gender | dead | |
416-
# | | False | True | total
417-
# 5 | False | 20 | 15 | xx
418-
# 5 | True | 0 | 1 | xx
419-
# 10 | False | 25 | 10 | xx
420-
# 10 | True | 1 | 1 | xx
421-
# | total | xx | xx | xx
422422
width = self.shape[-1]
423423
height = prod(self.shape[:-1])
424424
if self.axes is not None:
425+
#axes_names = [axis.name for axis in self.axes]
425426
axes_names = [axis.name for axis in self.axes]
427+
if len(axes_names) > 1:
428+
axes_names[-2] = '\\'.join(axes_names[-2:])
429+
axes_names.pop()
430+
426431
axes_labels = [axis.labels for axis in self.axes]
427432
else:
428433
axes_names = None
429434
axes_labels = None
430435

431436
if axes_names is not None:
432-
result = [axes_names +
433-
[''] * (width - 1),
434-
# 2nd line
435-
[''] * (self.ndim - 1) +
436-
list(axes_labels[-1])]
437+
result = [axes_names + list(axes_labels[-1])]
437438
#if self.row_totals is not None:
438439
# result[0].append('')
439440
# result[1].append('total')
@@ -659,44 +660,82 @@ def ToAv(self, filename):
659660

660661

661662
def parse(s):
662-
s = s.lower()
663-
if s in ('0', '1', 'false', 'true'):
664-
return s in ('1', 'true')
665-
elif s.isdigit():
666-
return int(s)
663+
if(isinstance(s, str)):
664+
s = s.lower()
665+
if s in ('0', '1', 'false', 'true'):
666+
return s in ('1', 'true')
667+
elif s.isdigit():
668+
return int(s)
669+
else:
670+
try:
671+
return float(s)
672+
except ValueError:
673+
return s
667674
else:
668-
try:
669-
return float(s)
670-
except ValueError:
671-
return s
675+
return s
672676

677+
def df_aslarray(df, na=np.nan):
678+
axes_labels = [list(unique(level[labels]))
679+
for level, labels in zip(df.index.levels, df.index.labels)]
680+
axes_names = df.index.names
681+
laxis = axes_names[-1].split('\\')
682+
if len(laxis) > 0:
683+
axes_names[-1] = laxis[0]
684+
axes = [Axis(name, labels) for name, labels in zip(axes_names, axes_labels)]
685+
# pandas treats the "time" labels as column names (strings) so we need to
686+
# convert them to values
687+
if len(laxis) > 0:
688+
axes_names[-1] = laxis[0]
689+
axes.append(Axis(laxis[1], [parse(cell) for cell in df.columns.values]))
690+
else:
691+
axes.append(Axis('time', [parse(cell) for cell in df.columns.values]))
692+
sdf = df.reindex([i for i in product(*axes_labels)], df.columns.values)
693+
if na != np.nan:
694+
sdf.fillna(na,inplace=True)
695+
data = sdf.values.reshape([len(axis.labels) for axis in axes])
696+
return LArray(data, axes)
673697

674-
def read_csv(filepath):
675-
import pandas as pd
676698

699+
# CSV functions
700+
def read_csv(filepath, sep=',', na=np.nan):
677701
# read the first line to determine how many axes (time excluded) we have
678702
with open(filepath, 'rb') as f:
679-
reader = csv.reader(f)
703+
reader = csv.reader(f, delimiter=sep)
680704
header = [parse(cell) for cell in reader.next()]
681705
axes_names = [cell for cell in header if isinstance(cell, basestring)]
682-
df = pd.read_csv(filepath, index_col=range(len(axes_names)))
706+
df = pd.read_csv(filepath, index_col=range(len(axes_names)), sep=sep)
683707
assert df.index.names == axes_names, "%s != %s" % (df.index.names,
684708
axes_names)
685-
686-
# labels in index.levels are sorted, but the data is not, so we need to
687-
# compute the "unsorted" labels !
688-
# alternatives are to either use "df = df.sort_index()", or
689-
# "df.index.get_level_values(level)" but they are both slower.
690-
axes_labels = [list(unique(level[labels]))
691-
for level, labels in zip(df.index.levels, df.index.labels)]
692-
axes = [Axis(name, labels) for name, labels in zip(axes_names, axes_labels)]
693-
# pandas treats the "time" labels as column names (strings) so we need to
694-
# convert them to values
695-
axes.append(Axis('time', [parse(cell) for cell in df.columns.values]))
696-
data = df.values.reshape([len(axis.labels) for axis in axes])
697-
return LArray(data, axes)
698-
699-
709+
return df_aslarray(df, na)
710+
711+
def save_csv(l_array, filepath, sep=',', na=np.nan):
712+
df = l_array.as_dataframe()
713+
df.to_csv(filepath, sep=sep)
714+
715+
# HDF5 functions
716+
def save_h5(l_array, name, filepath):
717+
df = l_array.as_dataframe()
718+
store = pd.HDFStore(filepath)
719+
store.put(name, df)
720+
store.close()
721+
722+
def read_h5(name, filepath):
723+
store = pd.HDFStore(filepath)
724+
df = store.get(name)
725+
store.close()
726+
return df_aslarray(df)
727+
728+
#EXCEL functions
729+
def save_excel(l_array, name, filepath):
730+
df = l_array.as_dataframe()
731+
writer = pd.ExcelWriter(filepath)
732+
df.to_excel(writer, name)
733+
writer.save()
734+
735+
def read_excel(name, filepath, index_col):
736+
df=pd.read_excel(filepath, name, index_col=index_col)
737+
return df_aslarray(df)
738+
700739
def SaveMatrices(h5_filename):
701740
try:
702741
h5file = tables.openFile(h5_filename, mode="w", title="IodeMatrix")
@@ -755,4 +794,10 @@ def LoadMatrix(h5_filename, matname):
755794
if __name__ == '__main__':
756795
#reg.Collapse('c:/tmp/reg.csv')
757796
#reg.ToAv('reg.av')
758-
pass
797+
test = read_csv('ert_eff_ic_a.tsv', '\t', 0)
798+
test.ToCsv('brol.csv')
799+
save_csv(test, 'brolpd.csv')
800+
save_excel(test, "TEST", "test.xls")
801+
test_xls = read_excel("TEST", "test.xls", index_col=[0,1,2])
802+
save_h5(test, 'test', 'store.h5')
803+
test_h5 = read_h5('test', 'store.h5')

test_la.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44

55
print "numpy", np.__version__
66

7-
vla = 'A11,A12,A13,A23,A24,A31,A32,A33,A34,A35,A36,A37,A38,A41,A42,A43,A44,' \
8-
'A45,A46,A71,A72,A73' #.split(',')
9-
wal = 'A25,A51,A52,A53,A54,A55,A56,A57,A61,A62,A63,A64,A65,A81,A82,A83,A84,' \
10-
'A85,A91,A92,A93' #.split(',')
7+
vla = 'A11,A12,A13,A23,A24,A31,A32,A33,A34,A35,A36,A37,A38,A41,A42,A43,A44,A45,A46,A71,A72,A73'.split(',')
8+
wal = 'A25,A51,A52,A53,A54,A55,A56,A57,A61,A62,A63,A64,A65,A81,A82,A83,A84,A85,A91,A92,A93'.split(',')
119
#bru = ['A21']
1210
bru = 'A21'
1311

0 commit comments

Comments
 (0)