20
20
import pandas as pd
21
21
22
22
from apache_beam .dataframe import doctests
23
+ from apache_beam .dataframe .frames import PD_VERSION
23
24
from apache_beam .dataframe .pandas_top_level_functions import _is_top_level_function
24
25
25
26
@@ -68,7 +69,8 @@ def test_ndframe_tests(self):
68
69
"df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})"
69
70
],
70
71
'pandas.core.generic.NDFrame.fillna' : [
71
- "df.fillna(method='ffill')" ,
72
+ 'df.fillna(method=\' ffill\' )' ,
73
+ 'df.fillna(method="ffill")' ,
72
74
'df.fillna(value=values, limit=1)' ,
73
75
],
74
76
'pandas.core.generic.NDFrame.sort_values' : ['*' ],
@@ -164,7 +166,8 @@ def test_dataframe_tests(self):
164
166
'pandas.core.frame.DataFrame.cumprod' : ['*' ],
165
167
'pandas.core.frame.DataFrame.diff' : ['*' ],
166
168
'pandas.core.frame.DataFrame.fillna' : [
167
- "df.fillna(method='ffill')" ,
169
+ 'df.fillna(method=\' ffill\' )' ,
170
+ 'df.fillna(method="ffill")' ,
168
171
'df.fillna(value=values, limit=1)' ,
169
172
],
170
173
'pandas.core.frame.DataFrame.items' : ['*' ],
@@ -237,13 +240,17 @@ def test_dataframe_tests(self):
237
240
# reindex not supported
238
241
's2 = s.reindex([1, 0, 2, 3])' ,
239
242
],
243
+ 'pandas.core.frame.DataFrame.resample' : ['*' ],
244
+ 'pandas.core.frame.DataFrame.values' : ['*' ],
240
245
},
241
246
not_implemented_ok = {
242
247
'pandas.core.frame.DataFrame.transform' : [
243
248
# str arg not supported. Tested with np.sum in
244
249
# frames_test.py::DeferredFrameTest::test_groupby_transform_sum
245
250
"df.groupby('Date')['Data'].transform('sum')" ,
246
251
],
252
+ 'pandas.core.frame.DataFrame.swaplevel' : ['*' ],
253
+ 'pandas.core.frame.DataFrame.melt' : ['*' ],
247
254
'pandas.core.frame.DataFrame.reindex_axis' : ['*' ],
248
255
'pandas.core.frame.DataFrame.round' : [
249
256
'df.round(decimals)' ,
@@ -267,13 +274,20 @@ def test_dataframe_tests(self):
267
274
'pandas.core.frame.DataFrame.set_index' : [
268
275
"df.set_index([s, s**2])" ,
269
276
],
277
+
278
+ # TODO(BEAM-12495)
279
+ 'pandas.core.frame.DataFrame.value_counts' : [
280
+ 'df.value_counts(dropna=False)'
281
+ ],
270
282
},
271
283
skip = {
272
284
# s2 created with reindex
273
285
'pandas.core.frame.DataFrame.dot' : [
274
286
'df.dot(s2)' ,
275
287
],
276
288
289
+ 'pandas.core.frame.DataFrame.resample' : ['df' ],
290
+ 'pandas.core.frame.DataFrame.asfreq' : ['*' ],
277
291
# Throws NotImplementedError when modifying df
278
292
'pandas.core.frame.DataFrame.axes' : [
279
293
# Returns deferred index.
@@ -302,6 +316,14 @@ def test_dataframe_tests(self):
302
316
'pandas.core.frame.DataFrame.to_markdown' : ['*' ],
303
317
'pandas.core.frame.DataFrame.to_parquet' : ['*' ],
304
318
319
+ # Raises right exception, but testing framework has matching issues.
320
+ # Tested in `frames_test.py`.
321
+ 'pandas.core.frame.DataFrame.insert' : [
322
+ 'df' ,
323
+ 'df.insert(1, "newcol", [99, 99])' ,
324
+ 'df.insert(0, "col1", [100, 100], allow_duplicates=True)'
325
+ ],
326
+
305
327
'pandas.core.frame.DataFrame.to_records' : [
306
328
'df.index = df.index.rename("I")' ,
307
329
'index_dtypes = f"<S{df.index.str.len().max()}"' , # 1.x
@@ -385,7 +407,8 @@ def test_series_tests(self):
385
407
's.dot(arr)' , # non-deferred result
386
408
],
387
409
'pandas.core.series.Series.fillna' : [
388
- "df.fillna(method='ffill')" ,
410
+ 'df.fillna(method=\' ffill\' )' ,
411
+ 'df.fillna(method="ffill")' ,
389
412
'df.fillna(value=values, limit=1)' ,
390
413
],
391
414
'pandas.core.series.Series.items' : ['*' ],
@@ -434,11 +457,11 @@ def test_series_tests(self):
434
457
's.drop_duplicates()' ,
435
458
"s.drop_duplicates(keep='last')" ,
436
459
],
437
- 'pandas.core.series.Series.repeat' : [
438
- 's.repeat([1, 2, 3])'
439
- ],
440
460
'pandas.core.series.Series.reindex' : ['*' ],
441
461
'pandas.core.series.Series.autocorr' : ['*' ],
462
+ 'pandas.core.series.Series.repeat' : ['s.repeat([1, 2, 3])' ],
463
+ 'pandas.core.series.Series.resample' : ['*' ],
464
+ 'pandas.core.series.Series' : ['ser.iloc[0] = 999' ],
442
465
},
443
466
not_implemented_ok = {
444
467
'pandas.core.series.Series.transform' : [
@@ -451,15 +474,19 @@ def test_series_tests(self):
451
474
'ser.groupby(["a", "b", "a", np.nan]).mean()' ,
452
475
'ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()' ,
453
476
],
477
+ 'pandas.core.series.Series.swaplevel' :['*' ]
454
478
},
455
479
skip = {
480
+ # Relies on setting values with iloc
481
+ 'pandas.core.series.Series' : ['ser' , 'r' ],
456
482
'pandas.core.series.Series.groupby' : [
457
483
# TODO(BEAM-11393): This example requires aligning two series
458
484
# with non-unique indexes. It only works in pandas because
459
485
# pandas can recognize the indexes are identical and elide the
460
486
# alignment.
461
487
'ser.groupby(ser > 100).mean()' ,
462
488
],
489
+ 'pandas.core.series.Series.asfreq' : ['*' ],
463
490
# error formatting
464
491
'pandas.core.series.Series.append' : [
465
492
's1.append(s2, verify_integrity=True)' ,
@@ -491,12 +518,12 @@ def test_series_tests(self):
491
518
# Inspection after modification.
492
519
's'
493
520
],
521
+ 'pandas.core.series.Series.resample' : ['df' ],
494
522
})
495
523
self .assertEqual (result .failed , 0 )
496
524
497
525
def test_string_tests (self ):
498
- PD_VERSION = tuple (int (v ) for v in pd .__version__ .split ('.' ))
499
- if PD_VERSION < (1 , 2 , 0 ):
526
+ if PD_VERSION < (1 , 2 ):
500
527
module = pd .core .strings
501
528
else :
502
529
# Definitions were moved to accessor in pandas 1.2.0
@@ -668,11 +695,13 @@ def test_groupby_tests(self):
668
695
'pandas.core.groupby.generic.SeriesGroupBy.diff' : ['*' ],
669
696
'pandas.core.groupby.generic.DataFrameGroupBy.hist' : ['*' ],
670
697
'pandas.core.groupby.generic.DataFrameGroupBy.fillna' : [
671
- "df.fillna(method='ffill')" ,
698
+ 'df.fillna(method=\' ffill\' )' ,
699
+ 'df.fillna(method="ffill")' ,
672
700
'df.fillna(value=values, limit=1)' ,
673
701
],
674
702
'pandas.core.groupby.generic.SeriesGroupBy.fillna' : [
675
- "df.fillna(method='ffill')" ,
703
+ 'df.fillna(method=\' ffill\' )' ,
704
+ 'df.fillna(method="ffill")' ,
676
705
'df.fillna(value=values, limit=1)' ,
677
706
],
678
707
},
@@ -682,6 +711,7 @@ def test_groupby_tests(self):
682
711
'pandas.core.groupby.generic.SeriesGroupBy.transform' : ['*' ],
683
712
'pandas.core.groupby.generic.SeriesGroupBy.idxmax' : ['*' ],
684
713
'pandas.core.groupby.generic.SeriesGroupBy.idxmin' : ['*' ],
714
+ 'pandas.core.groupby.generic.SeriesGroupBy.apply' : ['*' ],
685
715
},
686
716
skip = {
687
717
'pandas.core.groupby.generic.SeriesGroupBy.cov' : [
@@ -698,6 +728,14 @@ def test_groupby_tests(self):
698
728
# These examples rely on grouping by a list
699
729
'pandas.core.groupby.generic.SeriesGroupBy.aggregate' : ['*' ],
700
730
'pandas.core.groupby.generic.DataFrameGroupBy.aggregate' : ['*' ],
731
+ 'pandas.core.groupby.generic.SeriesGroupBy.transform' : [
732
+ # Dropping invalid columns during a transform is unsupported.
733
+ 'grouped.transform(lambda x: (x - x.mean()) / x.std())'
734
+ ],
735
+ 'pandas.core.groupby.generic.DataFrameGroupBy.transform' : [
736
+ # Dropping invalid columns during a transform is unsupported.
737
+ 'grouped.transform(lambda x: (x - x.mean()) / x.std())'
738
+ ],
701
739
})
702
740
self .assertEqual (result .failed , 0 )
703
741
0 commit comments