Skip to content

Commit 03c1184

Browse files
committed
Merge remote-tracking branch 'origin/master'
Conflicts: larray.py test_la.py
2 parents 651b5e4 + 0cef5f2 commit 03c1184

File tree

1 file changed

+118
-88
lines changed

1 file changed

+118
-88
lines changed

larray.py

Lines changed: 118 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,16 @@
113113
# ? improve Labeler I don't know how though :)
114114
# ? implement dict-like behavior for LArray.axes (to be able to do stuff like
115115
# la.axes['sex'].labels
116+
#
116117

117118
import csv
118119
from itertools import izip, product, chain
119120
import string
120121
import sys
121122

122123
import numpy as np
124+
import pandas as pd
125+
123126
import tables
124127

125128
from orderedset import OrderedSet
@@ -518,6 +521,17 @@ def __new__(cls, data, axes=None):
518521
axes = list(axes)
519522
obj.axes = axes
520523
return obj
524+
525+
def as_dataframe(self):
526+
axes_labels = [a.labels.tolist() for a in self.axes[:-1]]
527+
axes_names = [a.name for a in self.axes[:-1]]
528+
axes_names[-1] = axes_names[-1] + '\\' + self.axes[-1].name
529+
columns = self.axes[-1].labels.tolist()
530+
full_index=[i for i in product(*axes_labels)]
531+
index = pd.MultiIndex.from_tuples(full_index, names=axes_names)
532+
df = pd.DataFrame(self.reshape(len(full_index), len(columns)), index, columns)
533+
return df
534+
521535

522536
#noinspection PyAttributeOutsideInit
523537
def __array_finalize__(self, obj):
@@ -644,39 +658,27 @@ def __str__(self):
644658
def as_table(self):
645659
if not self.ndim:
646660
return []
661+
662+
#ert | unit | geo\time | 2012 | 2011 | 2010
663+
#NEER27 | I05 | AT | 101.41 | 101.63 | 101.63
664+
#NEER27 | I05 | AU | 134.86 | 125.29 | 117.08
647665

648-
# gender | |
649-
# False | True | total
650-
# 20 | 16 | 35
651-
652-
# dead | gender | |
653-
# | False | True | total
654-
# False | 20 | 15 | 35
655-
# True | 0 | 1 | 1
656-
# total | 20 | 16 | 36
657-
658-
# agegroup | gender | dead | |
659-
# | | False | True | total
660-
# 5 | False | 20 | 15 | xx
661-
# 5 | True | 0 | 1 | xx
662-
# 10 | False | 25 | 10 | xx
663-
# 10 | True | 1 | 1 | xx
664-
# | total | xx | xx | xx
665666
width = self.shape[-1]
666667
height = prod(self.shape[:-1])
667668
if self.axes is not None:
669+
#axes_names = [axis.name for axis in self.axes]
668670
axes_names = [axis.name for axis in self.axes]
671+
if len(axes_names) > 1:
672+
axes_names[-2] = '\\'.join(axes_names[-2:])
673+
axes_names.pop()
674+
669675
axes_labels = [axis.labels for axis in self.axes]
670676
else:
671677
axes_names = None
672678
axes_labels = None
673679

674680
if axes_names is not None:
675-
result = [axes_names +
676-
[''] * (width - 1),
677-
# 2nd line
678-
[''] * (self.ndim - 1) +
679-
list(axes_labels[-1])]
681+
result = [axes_names + list(axes_labels[-1])]
680682
#if self.row_totals is not None:
681683
# result[0].append('')
682684
# result[1].append('total')
@@ -854,58 +856,39 @@ def sum(self, *args, **kwargs):
854856

855857
#XXX: sep argument does not seem very useful
856858
def to_excel(self, filename, sep=None):
857-
import ExcelCom as ec
859+
import xlsxwriter as xl
858860

859861
if sep is None:
860862
sep = '_'
861863
#sep = self.sep
862-
xl = ec.comExcel()
863-
xl.load(filename)
864+
workbook = xl.Workbook(filename)
864865
if self.ndim > 2:
865866
for key in product(*[axis.labels for axis in self.axes[:-2]]):
866867
sheetname = sep.join(str(k) for k in key)
867-
868868
# sheet names must not:
869869
# * contain any of the following characters: : \ / ? * [ ]
870870
#XXX: this will NOT work for unicode strings !
871871
sheetname = sheetname.translate(string.maketrans('[:]', '(-)'),
872872
r'\/?*') # chars to delete
873873
# * exceed 31 characters
874-
sheetname = sheetname[:31]
874+
# sheetname = sheetname[:31]
875875
# * be blank
876876
assert sheetname, "sheet name cannot be blank"
877-
878-
sheetdata = np.asarray(self[key])
879-
880-
xl.addworksheets(sheetname)
881-
882-
#TODO: reuse as_table, possibly adding an argument to
883-
# as_table to determine how many dimensions should be "folded"
884-
885-
# last axis (time) as columns headers (ie the first row)
886-
xl.setRange(sheetname, 2, 1,
887-
(tuple(str(l) for l in self.axes[-1].labels),))
888-
889-
# next to last axis as rows headers (ie the first column)
890-
xl.setRange(sheetname, 1, 2,
891-
tuple((x,) for x in self.axes[-2].labels))
892-
xl.setRange(sheetname, 2, 2, sheetdata)
877+
worksheet = workbook.add_worksheet(sheetname)
878+
worksheet.write_row(0, 1, self.axes[-1].labels)
879+
worksheet.write_column(1, 0, self.axes[-2].labels)
880+
for row, data in enumerate(np.asarray(self[key])):
881+
worksheet.write_row(1+row, 1, data)
882+
893883
else:
894-
xl.addworksheets('Sheet1')
895-
896-
# last axis (time) as columns headers (ie the first row)
897-
xl.setRange('Sheet1', 2, 1,
898-
(tuple(str(l) for l in self.axes[-1].labels),))
884+
worksheet = workbook.add_worksheet('Sheet1')
885+
worksheet.write_row(0, 1, self.axes[-1].labels)
899886
if self.ndim == 2:
900-
# next to last axis as rows headers (ie the first column)
901-
xl.setRange('Sheet1', 1, 2,
902-
tuple((str(x),) for x in self.axes[-2].labels))
903-
xl.setRange('Sheet1', 2, 2, np.asarray(self))
887+
worksheet.write_column(1, 0, self.axes[-2].labels)
888+
for row, data in enumerate(np.asarray(self)):
889+
worksheet.write_row(1+row, 1, data)
890+
904891

905-
xl.save(filename)
906-
xl.close()
907-
xl.end
908-
del xl
909892

910893
def transpose(self, *args):
911894
axes_names = set(axis.name for axis in args)
@@ -937,44 +920,84 @@ def ToAv(self, filename):
937920

938921

939922
def parse(s):
940-
s = s.lower()
941-
if s in ('0', '1', 'false', 'true'):
942-
return s in ('1', 'true')
943-
elif s.isdigit():
944-
return int(s)
923+
#parameters can be strings or numbers
924+
if(isinstance(s, str)):
925+
s = s.lower()
926+
if s in ('0', '1', 'false', 'true'):
927+
return s in ('1', 'true')
928+
elif s.isdigit():
929+
return int(s)
930+
else:
931+
try:
932+
return float(s)
933+
except ValueError:
934+
return s
945935
else:
946-
try:
947-
return float(s)
948-
except ValueError:
949-
return s
950-
951-
952-
def read_csv(filepath):
953-
import pandas as pd
936+
return s
954937

955-
# read the first line to determine how many axes (time excluded) we have
956-
with open(filepath, 'rb') as f:
957-
reader = csv.reader(f)
958-
header = [parse(cell) for cell in reader.next()]
959-
axes_names = [cell for cell in header if isinstance(cell, basestring)]
960-
df = pd.read_csv(filepath, index_col=range(len(axes_names)))
961-
assert df.index.names == axes_names, "%s != %s" % (df.index.names,
962-
axes_names)
963-
964-
# labels in index.levels are sorted, but the data is not, so we need to
965-
# compute the "unsorted" labels !
966-
# alternatives are to either use "df = df.sort_index()", or
967-
# "df.index.get_level_values(level)" but they are both slower.
938+
def df_aslarray(df, na=np.nan):
968939
axes_labels = [list(unique(level[labels]))
969940
for level, labels in zip(df.index.levels, df.index.labels)]
941+
axes_names = df.index.names
942+
laxis = axes_names[-1].split('\\')
943+
if len(laxis) > 0:
944+
axes_names[-1] = laxis[0]
970945
axes = [Axis(name, labels) for name, labels in zip(axes_names, axes_labels)]
971946
# pandas treats the "time" labels as column names (strings) so we need to
972947
# convert them to values
973-
axes.append(Axis('time', [parse(cell) for cell in df.columns.values]))
974-
data = df.values.reshape([len(axis.labels) for axis in axes])
975-
return LArray(data, axes)
948+
if len(laxis) > 0:
949+
axes_names[-1] = laxis[0]
950+
axes.append(Axis(laxis[1], [parse(cell) for cell in df.columns.values]))
951+
else:
952+
axes.append(Axis('time', [parse(cell) for cell in df.columns.values]))
953+
sdf = df.reindex([i for i in product(*axes_labels)], df.columns.values)
954+
if na != np.nan:
955+
sdf.fillna(na,inplace=True)
956+
data = sdf.values.reshape([len(axis.labels) for axis in axes])
957+
return LArray(data, axes)
976958

977959

960+
# CSV functions
961+
def read_csv(filepath, index_col, sep=',', na=np.nan):
962+
# read the first line to determine how many axes (time excluded) we have
963+
# with open(filepath, 'rb') as f:
964+
# reader = csv.reader(f, delimiter=sep)
965+
# header = [parse(cell) for cell in reader.next()]
966+
# axes_names = [cell for cell in header if isinstance(cell, basestring)]
967+
# df = pd.read_csv(filepath, index_col=range(len(axes_names)), sep=sep)
968+
# assert df.index.names == axes_names, "%s != %s" % (df.index.names,
969+
# axes_names)
970+
df = pd.read_csv(filepath, index_col=index_col, sep=sep)
971+
return df_aslarray(df.reindex_axis(sorted(df.columns), axis=1), na)
972+
973+
def save_csv(l_array, filepath, sep=',', na=np.nan):
974+
df = l_array.as_dataframe()
975+
df.to_csv(filepath, sep=sep)
976+
977+
# HDF5 functions
978+
def save_h5(l_array, name, filepath):
979+
df = l_array.as_dataframe()
980+
store = pd.HDFStore(filepath)
981+
store.put(name, df)
982+
store.close()
983+
984+
def read_h5(name, filepath):
985+
store = pd.HDFStore(filepath)
986+
df = store.get(name)
987+
store.close()
988+
return df_aslarray(df)
989+
990+
#EXCEL functions
991+
def save_excel(l_array, name, filepath):
992+
df = l_array.as_dataframe()
993+
writer = pd.ExcelWriter(filepath)
994+
df.to_excel(writer, name)
995+
writer.save()
996+
997+
def read_excel(name, filepath, index_col):
998+
df=pd.read_excel(filepath, name, index_col=index_col)
999+
return df_aslarray(df.reindex_axis(sorted(df.columns), axis=1))
1000+
9781001
def SaveMatrices(h5_filename):
9791002
try:
9801003
h5file = tables.openFile(h5_filename, mode="w", title="IodeMatrix")
@@ -1033,6 +1056,13 @@ def LoadMatrix(h5_filename, matname):
10331056
if __name__ == '__main__':
10341057
#reg.Collapse('c:/tmp/reg.csv')
10351058
#reg.ToAv('reg.av')
1036-
pass
1037-
1038-
1059+
bel = read_csv('bel.csv', index_col=[0,1,2,3])
1060+
test = read_csv('ert_eff_ic_a.tsv', index_col=[0,1,2], sep='\t', na=0)
1061+
test.ToCsv('brol.csv')
1062+
save_csv(test, 'brolpd.csv')
1063+
test_csv = read_csv('brolpd.csv', index_col=[0,1,2])
1064+
save_excel(test, "TEST", "test.xls")
1065+
test_xls = read_excel("TEST", "test.xls", index_col=[0,1,2])
1066+
save_h5(test, 'test', 'store.h5')
1067+
test_h5 = read_h5('test', 'store.h5')
1068+
save_h5(bel, 'bel', 'store.h5')

0 commit comments

Comments
 (0)