20
20
import pandas as pd
21
21
22
22
from apache_beam .dataframe import doctests
23
- from apache_beam .dataframe .frames import PD_VERSION
24
23
from apache_beam .dataframe .pandas_top_level_functions import _is_top_level_function
25
24
26
25
@@ -69,8 +68,7 @@ def test_ndframe_tests(self):
69
68
"df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})"
70
69
],
71
70
'pandas.core.generic.NDFrame.fillna' : [
72
- 'df.fillna(method=\' ffill\' )' ,
73
- 'df.fillna(method="ffill")' ,
71
+ "df.fillna(method='ffill')" ,
74
72
'df.fillna(value=values, limit=1)' ,
75
73
],
76
74
'pandas.core.generic.NDFrame.sort_values' : ['*' ],
@@ -166,8 +164,7 @@ def test_dataframe_tests(self):
166
164
'pandas.core.frame.DataFrame.cumprod' : ['*' ],
167
165
'pandas.core.frame.DataFrame.diff' : ['*' ],
168
166
'pandas.core.frame.DataFrame.fillna' : [
169
- 'df.fillna(method=\' ffill\' )' ,
170
- 'df.fillna(method="ffill")' ,
167
+ "df.fillna(method='ffill')" ,
171
168
'df.fillna(value=values, limit=1)' ,
172
169
],
173
170
'pandas.core.frame.DataFrame.items' : ['*' ],
@@ -240,17 +237,13 @@ def test_dataframe_tests(self):
240
237
# reindex not supported
241
238
's2 = s.reindex([1, 0, 2, 3])' ,
242
239
],
243
- 'pandas.core.frame.DataFrame.resample' : ['*' ],
244
- 'pandas.core.frame.DataFrame.values' : ['*' ],
245
240
},
246
241
not_implemented_ok = {
247
242
'pandas.core.frame.DataFrame.transform' : [
248
243
# str arg not supported. Tested with np.sum in
249
244
# frames_test.py::DeferredFrameTest::test_groupby_transform_sum
250
245
"df.groupby('Date')['Data'].transform('sum')" ,
251
246
],
252
- 'pandas.core.frame.DataFrame.swaplevel' : ['*' ],
253
- 'pandas.core.frame.DataFrame.melt' : ['*' ],
254
247
'pandas.core.frame.DataFrame.reindex_axis' : ['*' ],
255
248
'pandas.core.frame.DataFrame.round' : [
256
249
'df.round(decimals)' ,
@@ -274,20 +267,13 @@ def test_dataframe_tests(self):
274
267
'pandas.core.frame.DataFrame.set_index' : [
275
268
"df.set_index([s, s**2])" ,
276
269
],
277
-
278
- # TODO(BEAM-12495)
279
- 'pandas.core.frame.DataFrame.value_counts' : [
280
- 'df.value_counts(dropna=False)'
281
- ],
282
270
},
283
271
skip = {
284
272
# s2 created with reindex
285
273
'pandas.core.frame.DataFrame.dot' : [
286
274
'df.dot(s2)' ,
287
275
],
288
276
289
- 'pandas.core.frame.DataFrame.resample' : ['df' ],
290
- 'pandas.core.frame.DataFrame.asfreq' : ['*' ],
291
277
# Throws NotImplementedError when modifying df
292
278
'pandas.core.frame.DataFrame.axes' : [
293
279
# Returns deferred index.
@@ -316,14 +302,6 @@ def test_dataframe_tests(self):
316
302
'pandas.core.frame.DataFrame.to_markdown' : ['*' ],
317
303
'pandas.core.frame.DataFrame.to_parquet' : ['*' ],
318
304
319
- # Raises right exception, but testing framework has matching issues.
320
- # Tested in `frames_test.py`.
321
- 'pandas.core.frame.DataFrame.insert' : [
322
- 'df' ,
323
- 'df.insert(1, "newcol", [99, 99])' ,
324
- 'df.insert(0, "col1", [100, 100], allow_duplicates=True)'
325
- ],
326
-
327
305
'pandas.core.frame.DataFrame.to_records' : [
328
306
'df.index = df.index.rename("I")' ,
329
307
'index_dtypes = f"<S{df.index.str.len().max()}"' , # 1.x
@@ -407,8 +385,7 @@ def test_series_tests(self):
407
385
's.dot(arr)' , # non-deferred result
408
386
],
409
387
'pandas.core.series.Series.fillna' : [
410
- 'df.fillna(method=\' ffill\' )' ,
411
- 'df.fillna(method="ffill")' ,
388
+ "df.fillna(method='ffill')" ,
412
389
'df.fillna(value=values, limit=1)' ,
413
390
],
414
391
'pandas.core.series.Series.items' : ['*' ],
@@ -457,11 +434,11 @@ def test_series_tests(self):
457
434
's.drop_duplicates()' ,
458
435
"s.drop_duplicates(keep='last')" ,
459
436
],
437
+ 'pandas.core.series.Series.repeat' : [
438
+ 's.repeat([1, 2, 3])'
439
+ ],
460
440
'pandas.core.series.Series.reindex' : ['*' ],
461
441
'pandas.core.series.Series.autocorr' : ['*' ],
462
- 'pandas.core.series.Series.repeat' : ['s.repeat([1, 2, 3])' ],
463
- 'pandas.core.series.Series.resample' : ['*' ],
464
- 'pandas.core.series.Series' : ['ser.iloc[0] = 999' ],
465
442
},
466
443
not_implemented_ok = {
467
444
'pandas.core.series.Series.transform' : [
@@ -474,19 +451,15 @@ def test_series_tests(self):
474
451
'ser.groupby(["a", "b", "a", np.nan]).mean()' ,
475
452
'ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()' ,
476
453
],
477
- 'pandas.core.series.Series.swaplevel' :['*' ]
478
454
},
479
455
skip = {
480
- # Relies on setting values with iloc
481
- 'pandas.core.series.Series' : ['ser' , 'r' ],
482
456
'pandas.core.series.Series.groupby' : [
483
457
# TODO(BEAM-11393): This example requires aligning two series
484
458
# with non-unique indexes. It only works in pandas because
485
459
# pandas can recognize the indexes are identical and elide the
486
460
# alignment.
487
461
'ser.groupby(ser > 100).mean()' ,
488
462
],
489
- 'pandas.core.series.Series.asfreq' : ['*' ],
490
463
# error formatting
491
464
'pandas.core.series.Series.append' : [
492
465
's1.append(s2, verify_integrity=True)' ,
@@ -518,12 +491,12 @@ def test_series_tests(self):
518
491
# Inspection after modification.
519
492
's'
520
493
],
521
- 'pandas.core.series.Series.resample' : ['df' ],
522
494
})
523
495
self .assertEqual (result .failed , 0 )
524
496
525
497
def test_string_tests (self ):
526
- if PD_VERSION < (1 , 2 ):
498
+ PD_VERSION = tuple (int (v ) for v in pd .__version__ .split ('.' ))
499
+ if PD_VERSION < (1 , 2 , 0 ):
527
500
module = pd .core .strings
528
501
else :
529
502
# Definitions were moved to accessor in pandas 1.2.0
@@ -695,13 +668,11 @@ def test_groupby_tests(self):
695
668
'pandas.core.groupby.generic.SeriesGroupBy.diff' : ['*' ],
696
669
'pandas.core.groupby.generic.DataFrameGroupBy.hist' : ['*' ],
697
670
'pandas.core.groupby.generic.DataFrameGroupBy.fillna' : [
698
- 'df.fillna(method=\' ffill\' )' ,
699
- 'df.fillna(method="ffill")' ,
671
+ "df.fillna(method='ffill')" ,
700
672
'df.fillna(value=values, limit=1)' ,
701
673
],
702
674
'pandas.core.groupby.generic.SeriesGroupBy.fillna' : [
703
- 'df.fillna(method=\' ffill\' )' ,
704
- 'df.fillna(method="ffill")' ,
675
+ "df.fillna(method='ffill')" ,
705
676
'df.fillna(value=values, limit=1)' ,
706
677
],
707
678
},
@@ -711,7 +682,6 @@ def test_groupby_tests(self):
711
682
'pandas.core.groupby.generic.SeriesGroupBy.transform' : ['*' ],
712
683
'pandas.core.groupby.generic.SeriesGroupBy.idxmax' : ['*' ],
713
684
'pandas.core.groupby.generic.SeriesGroupBy.idxmin' : ['*' ],
714
- 'pandas.core.groupby.generic.SeriesGroupBy.apply' : ['*' ],
715
685
},
716
686
skip = {
717
687
'pandas.core.groupby.generic.SeriesGroupBy.cov' : [
@@ -728,14 +698,6 @@ def test_groupby_tests(self):
728
698
# These examples rely on grouping by a list
729
699
'pandas.core.groupby.generic.SeriesGroupBy.aggregate' : ['*' ],
730
700
'pandas.core.groupby.generic.DataFrameGroupBy.aggregate' : ['*' ],
731
- 'pandas.core.groupby.generic.SeriesGroupBy.transform' : [
732
- # Dropping invalid columns during a transform is unsupported.
733
- 'grouped.transform(lambda x: (x - x.mean()) / x.std())'
734
- ],
735
- 'pandas.core.groupby.generic.DataFrameGroupBy.transform' : [
736
- # Dropping invalid columns during a transform is unsupported.
737
- 'grouped.transform(lambda x: (x - x.mean()) / x.std())'
738
- ],
739
701
})
740
702
self .assertEqual (result .failed , 0 )
741
703
0 commit comments