75
75
import sys
76
76
77
77
import numpy as np
78
+ import pandas as pd
79
+
78
80
import tables
79
81
80
82
from utils import prod , table2str , table2csv , table2iode , timed , unique
@@ -292,6 +294,17 @@ def __new__(cls, data, axes=None):
292
294
axes = list (axes )
293
295
obj .axes = axes
294
296
return obj
297
+
298
+ def as_dataframe (self ):
299
+ axes_labels = [a .labels .tolist () for a in self .axes [:- 1 ]]
300
+ axes_names = [a .name for a in self .axes [:- 1 ]]
301
+ axes_names [- 1 ] = axes_names [- 1 ] + '\\ ' + self .axes [- 1 ].name
302
+ columns = self .axes [- 1 ].labels .tolist ()
303
+ full_index = [i for i in product (* axes_labels )]
304
+ index = pd .MultiIndex .from_tuples (full_index , names = axes_names )
305
+ df = pd .DataFrame (self .reshape (len (full_index ), len (columns )), index , columns )
306
+ return df
307
+
295
308
296
309
#noinspection PyAttributeOutsideInit
297
310
def __array_finalize__ (self , obj ):
@@ -401,39 +414,27 @@ def __str__(self):
401
414
def as_table (self ):
402
415
if not self .ndim :
403
416
return []
417
+
418
+ #ert | unit | geo\time | 2012 | 2011 | 2010
419
+ #NEER27 | I05 | AT | 101.41 | 101.63 | 101.63
420
+ #NEER27 | I05 | AU | 134.86 | 125.29 | 117.08
404
421
405
- # gender | |
406
- # False | True | total
407
- # 20 | 16 | 35
408
-
409
- # dead | gender | |
410
- # | False | True | total
411
- # False | 20 | 15 | 35
412
- # True | 0 | 1 | 1
413
- # total | 20 | 16 | 36
414
-
415
- # agegroup | gender | dead | |
416
- # | | False | True | total
417
- # 5 | False | 20 | 15 | xx
418
- # 5 | True | 0 | 1 | xx
419
- # 10 | False | 25 | 10 | xx
420
- # 10 | True | 1 | 1 | xx
421
- # | total | xx | xx | xx
422
422
width = self .shape [- 1 ]
423
423
height = prod (self .shape [:- 1 ])
424
424
if self .axes is not None :
425
+ #axes_names = [axis.name for axis in self.axes]
425
426
axes_names = [axis .name for axis in self .axes ]
427
+ if len (axes_names ) > 1 :
428
+ axes_names [- 2 ] = '\\ ' .join (axes_names [- 2 :])
429
+ axes_names .pop ()
430
+
426
431
axes_labels = [axis .labels for axis in self .axes ]
427
432
else :
428
433
axes_names = None
429
434
axes_labels = None
430
435
431
436
if axes_names is not None :
432
- result = [axes_names +
433
- ['' ] * (width - 1 ),
434
- # 2nd line
435
- ['' ] * (self .ndim - 1 ) +
436
- list (axes_labels [- 1 ])]
437
+ result = [axes_names + list (axes_labels [- 1 ])]
437
438
#if self.row_totals is not None:
438
439
# result[0].append('')
439
440
# result[1].append('total')
@@ -659,44 +660,82 @@ def ToAv(self, filename):
659
660
660
661
661
662
def parse (s ):
662
- s = s .lower ()
663
- if s in ('0' , '1' , 'false' , 'true' ):
664
- return s in ('1' , 'true' )
665
- elif s .isdigit ():
666
- return int (s )
663
+ if (isinstance (s , str )):
664
+ s = s .lower ()
665
+ if s in ('0' , '1' , 'false' , 'true' ):
666
+ return s in ('1' , 'true' )
667
+ elif s .isdigit ():
668
+ return int (s )
669
+ else :
670
+ try :
671
+ return float (s )
672
+ except ValueError :
673
+ return s
667
674
else :
668
- try :
669
- return float (s )
670
- except ValueError :
671
- return s
675
+ return s
672
676
677
+ def df_aslarray (df , na = np .nan ):
678
+ axes_labels = [list (unique (level [labels ]))
679
+ for level , labels in zip (df .index .levels , df .index .labels )]
680
+ axes_names = df .index .names
681
+ laxis = axes_names [- 1 ].split ('\\ ' )
682
+ if len (laxis ) > 0 :
683
+ axes_names [- 1 ] = laxis [0 ]
684
+ axes = [Axis (name , labels ) for name , labels in zip (axes_names , axes_labels )]
685
+ # pandas treats the "time" labels as column names (strings) so we need to
686
+ # convert them to values
687
+ if len (laxis ) > 0 :
688
+ axes_names [- 1 ] = laxis [0 ]
689
+ axes .append (Axis (laxis [1 ], [parse (cell ) for cell in df .columns .values ]))
690
+ else :
691
+ axes .append (Axis ('time' , [parse (cell ) for cell in df .columns .values ]))
692
+ sdf = df .reindex ([i for i in product (* axes_labels )], df .columns .values )
693
+ if na != np .nan :
694
+ sdf .fillna (na ,inplace = True )
695
+ data = sdf .values .reshape ([len (axis .labels ) for axis in axes ])
696
+ return LArray (data , axes )
673
697
674
- def read_csv (filepath ):
675
- import pandas as pd
676
698
699
+ # CSV functions
700
+ def read_csv (filepath , sep = ',' , na = np .nan ):
677
701
# read the first line to determine how many axes (time excluded) we have
678
702
with open (filepath , 'rb' ) as f :
679
- reader = csv .reader (f )
703
+ reader = csv .reader (f , delimiter = sep )
680
704
header = [parse (cell ) for cell in reader .next ()]
681
705
axes_names = [cell for cell in header if isinstance (cell , basestring )]
682
- df = pd .read_csv (filepath , index_col = range (len (axes_names )))
706
+ df = pd .read_csv (filepath , index_col = range (len (axes_names )), sep = sep )
683
707
assert df .index .names == axes_names , "%s != %s" % (df .index .names ,
684
708
axes_names )
685
-
686
- # labels in index.levels are sorted, but the data is not, so we need to
687
- # compute the "unsorted" labels !
688
- # alternatives are to either use "df = df.sort_index()", or
689
- # "df.index.get_level_values(level)" but they are both slower.
690
- axes_labels = [list (unique (level [labels ]))
691
- for level , labels in zip (df .index .levels , df .index .labels )]
692
- axes = [Axis (name , labels ) for name , labels in zip (axes_names , axes_labels )]
693
- # pandas treats the "time" labels as column names (strings) so we need to
694
- # convert them to values
695
- axes .append (Axis ('time' , [parse (cell ) for cell in df .columns .values ]))
696
- data = df .values .reshape ([len (axis .labels ) for axis in axes ])
697
- return LArray (data , axes )
698
-
699
-
709
+ return df_aslarray (df , na )
710
+
711
+ def save_csv (l_array , filepath , sep = ',' , na = np .nan ):
712
+ df = l_array .as_dataframe ()
713
+ df .to_csv (filepath , sep = sep )
714
+
715
+ # HDF5 functions
716
+ def save_h5 (l_array , name , filepath ):
717
+ df = l_array .as_dataframe ()
718
+ store = pd .HDFStore (filepath )
719
+ store .put (name , df )
720
+ store .close ()
721
+
722
+ def read_h5 (name , filepath ):
723
+ store = pd .HDFStore (filepath )
724
+ df = store .get (name )
725
+ store .close ()
726
+ return df_aslarray (df )
727
+
728
+ #EXCEL functions
729
+ def save_excel (l_array , name , filepath ):
730
+ df = l_array .as_dataframe ()
731
+ writer = pd .ExcelWriter (filepath )
732
+ df .to_excel (writer , name )
733
+ writer .save ()
734
+
735
+ def read_excel (name , filepath , index_col ):
736
+ df = pd .read_excel (filepath , name , index_col = index_col )
737
+ return df_aslarray (df )
738
+
700
739
def SaveMatrices (h5_filename ):
701
740
try :
702
741
h5file = tables .openFile (h5_filename , mode = "w" , title = "IodeMatrix" )
@@ -755,4 +794,10 @@ def LoadMatrix(h5_filename, matname):
755
794
if __name__ == '__main__' :
756
795
#reg.Collapse('c:/tmp/reg.csv')
757
796
#reg.ToAv('reg.av')
758
- pass
797
+ test = read_csv ('ert_eff_ic_a.tsv' , '\t ' , 0 )
798
+ test .ToCsv ('brol.csv' )
799
+ save_csv (test , 'brolpd.csv' )
800
+ save_excel (test , "TEST" , "test.xls" )
801
+ test_xls = read_excel ("TEST" , "test.xls" , index_col = [0 ,1 ,2 ])
802
+ save_h5 (test , 'test' , 'store.h5' )
803
+ test_h5 = read_h5 ('test' , 'store.h5' )
0 commit comments