From 7e54e3041bbf01d49500a9cab949bc33291be019 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Fri, 20 Oct 2023 21:29:55 +0000 Subject: [PATCH 1/6] docs: add runnable code samples for DataFrames I/O methods and property --- .../bigframes_vendored/pandas/core/frame.py | 139 +++++++++++++++++- 1 file changed, 131 insertions(+), 8 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index a5c12d7b32..9e69054bc6 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -33,7 +33,18 @@ class DataFrame(NDFrame): @property def shape(self) -> tuple[int, int]: - """Return a tuple representing the dimensionality of the DataFrame.""" + """ + Return a tuple representing the dimensionality of the DataFrame. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.shape + (2, 2) + """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @property @@ -44,14 +55,14 @@ def axes(self) -> list: It has the row axis labels and column axis labels as the only members. They are returned in that order. - Examples + **Examples:** - .. code-block:: + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None - df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) - df.axes - [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], - dtype='object')] + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.axes[1:] + [Index(['col1', 'col2'], dtype='object')] """ return [self.index, self.columns] @@ -59,6 +70,16 @@ def axes(self) -> list: def values(self) -> np.ndarray: """Return the values of DataFrame in the form of a NumPy array. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.values + array([[1, 3], + [2, 4]], dtype=object) + Args: dytype (default None): The dtype to pass to `numpy.asarray()`. @@ -76,6 +97,16 @@ def to_numpy(self, dtype=None, copy=False, na_value=None, **kwargs) -> np.ndarra """ Convert the DataFrame to a NumPy array. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.to_numpy() + array([[1, 3], + [2, 4]], dtype=object) + Args: dtype (None): The dtype to pass to `numpy.asarray()`. @@ -101,6 +132,15 @@ def to_gbq( ) -> None: """Write a DataFrame to a BigQuery table. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> # destination_table = PROJECT_ID + "." + DATASET_ID + "." + TABLE_NAME + >>> df.to_gbq("bigframes-dev.birds.test-numbers", if_exists="replace") + Args: destination_table (str): Name of table to be written, in the form ``dataset.tablename`` @@ -137,6 +177,15 @@ def to_parquet( This function writes the dataframe as a `parquet file `_ to Cloud Storage. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> gcs_bucket = "gs://bigframes-dev-testing/sample_parquet*.parquet" + >>> df.to_parquet(path=gcs_bucket) + Args: path (str): Destination URI(s) of Cloud Storage files(s) to store the extracted dataframe @@ -171,6 +220,15 @@ def to_dict( The type of the key-value pairs can be customized with the parameters (see below). + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.to_dict() + {'col1': {0: 1, 1: 2}, 'col2': {0: 3, 1: 4}} + Args: orient (str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}): Determines the type of the values of the dictionary. @@ -213,6 +271,15 @@ def to_excel(self, excel_writer, sheet_name: str = "Sheet1", **kwargs) -> None: Note that creating an `ExcelWriter` object with a file name that already exists will result in the contents of the existing file being erased. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import tempfile + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.to_excel(tempfile.TemporaryFile()) + Args: excel_writer (path-like, file-like, or ExcelWriter object): File path or existing ExcelWriter. @@ -231,6 +298,16 @@ def to_latex( into a main LaTeX document or read from an external file with ``\input{{table.tex}}``. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.to_latex() + '\\begin{tabular}{lrr}\n\\toprule\n & col1 & col2 \\\\\n\\midrule\n0 & 1 & 3 \\\\\n1 & 2 & 4 \\\\\n\\bottomrule\n\\end{tabular}\n' + + Args: buf (str, Path or StringIO-like, optional, default None): Buffer to write to. If None, the output is returned as a string. @@ -253,6 +330,16 @@ def to_records( Index will be included as the first field of the record array if requested. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.to_records() + rec.array([(0, 1, 3), (1, 2, 4)], + dtype=[('index', 'O'), ('col1', 'O'), ('col2', 'O')]) + Args: index (bool, default True): Include index in resulting record array, stored in 'index' @@ -298,6 +385,15 @@ def to_string( ): """Render a DataFrame to a console-friendly tabular output. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.to_string() + ' col1 col2\\n0 1 3\\n1 2 4' + Args: buf (str, Path or StringIO-like, optional, default None): Buffer to write to. If None, the output is returned as a string. @@ -363,6 +459,15 @@ def to_markdown( ): """Print DataFrame in Markdown-friendly format. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.to_string() + ' col1 col2\\n0 1 3\\n1 2 4' + Args: buf (str, Path or StringIO-like, optional, default None): Buffer to write to. If None, the output is returned as a string. @@ -371,7 +476,7 @@ def to_markdown( index (bool, optional, default True): Add index (row) labels. **kwargs - These parameters will be passed to `tabulate `_. + These parameters will be passed to `tabulate `_. Returns: DataFrame in Markdown-friendly format. @@ -381,6 +486,15 @@ def to_markdown( def to_pickle(self, path, **kwargs) -> None: """Pickle (serialize) object to file. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> gcs_bucket = "gs://bigframes-dev-testing/sample_pickle_gcs.pkl" + >>> df.to_pickle(path=gcs_bucket) + Args: path (str): File path where the pickled object will be stored. @@ -391,6 +505,15 @@ def to_orc(self, path=None, **kwargs) -> bytes | None: """ Write a DataFrame to the ORC format. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> import tempfile + >>> df.to_orc(tempfile.TemporaryFile()) + Args: path (str, file-like object or None, default None): If a string, it will be used as Root Directory path From 47a665116f61b557a5d8896485e002612d5807a0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 23 Oct 2023 12:05:58 -0500 Subject: [PATCH 2/6] fix: expose `bigframes.pandas.reset_session` as a public API (#128) --- bigframes/pandas/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 971d40f801..5c1928e6f0 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -484,4 +484,5 @@ def read_gbq_function(function_name: str): # Session management APIs "get_global_session", "close_session", + "reset_session", ] From 5fcd8a1bf6d5e612805e56f30a6ad3d439e25a38 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Mon, 23 Oct 2023 18:55:15 +0000 Subject: [PATCH 3/6] fix: address the comment --- third_party/bigframes_vendored/pandas/core/frame.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 9e69054bc6..fd89aa8aea 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -41,9 +41,10 @@ def shape(self) -> tuple[int, int]: >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df = bpd.DataFrame({'col1': [1, 2, 3], + ... 'col2': [4, 5, 6]}) >>> df.shape - (2, 2) + (3, 2) """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 90b68131a6a082fc59bdf05ebe095d371369437f Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Mon, 23 Oct 2023 20:32:33 +0000 Subject: [PATCH 4/6] Empty commit From dad54b0499175f374345db355774b37a4f2b157f Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Mon, 23 Oct 2023 23:49:38 +0000 Subject: [PATCH 5/6] fix: address comments for better visualization of the output --- .../bigframes_vendored/pandas/core/frame.py | 46 ++++++++++++++++--- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index fd89aa8aea..13a81b4645 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -230,6 +230,26 @@ def to_dict( >>> df.to_dict() {'col1': {0: 1, 1: 2}, 'col2': {0: 3, 1: 4}} + You can specify the return orientation. + + >>> df.to_dict('series') + {'col1': 0 1 + 1 2 + Name: col1, dtype: Int64, + 'col2': 0 3 + 1 4 + Name: col2, dtype: Int64} + + >>> df.to_dict('split') + {'index': [0, 1], 'columns': ['col1', 'col2'], 'data': [[1, 3], [2, 4]]} + + >>> df.to_dict("tight") + {'index': [0, 1], + 'columns': ['col1', 'col2'], + 'data': [[1, 3], [2, 4]], + 'index_names': [None], + 'column_names': [None]} + Args: orient (str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}): Determines the type of the values of the dictionary. @@ -305,9 +325,16 @@ def to_latex( >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) - >>> df.to_latex() - '\\begin{tabular}{lrr}\n\\toprule\n & col1 & col2 \\\\\n\\midrule\n0 & 1 & 3 \\\\\n1 & 2 & 4 \\\\\n\\bottomrule\n\\end{tabular}\n' - + >>> print(df.to_latex()) + \begin{tabular}{lrr} + \toprule + & col1 & col2 \\ + \midrule + 0 & 1 & 3 \\ + 1 & 2 & 4 \\ + \bottomrule + \end{tabular} + Args: buf (str, Path or StringIO-like, optional, default None): @@ -392,8 +419,10 @@ def to_string( >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) - >>> df.to_string() - ' col1 col2\\n0 1 3\\n1 2 4' + >>> print(df.to_string()) + col1 col2 + 0 1 3 + 1 2 4 Args: buf (str, Path or StringIO-like, optional, default None): @@ -466,8 +495,11 @@ def to_markdown( >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) - >>> df.to_string() - ' col1 col2\\n0 1 3\\n1 2 4' + >>> print(df.to_markdown()) + | | col1 | col2 | + |---:|-------:|-------:| + | 0 | 1 | 3 | + | 1 | 2 | 4 | Args: buf (str, Path or StringIO-like, optional, default None): From 17f5a8dfddb7ad28918002f01145b12eaf4fee23 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Tue, 24 Oct 2023 04:35:41 +0000 Subject: [PATCH 6/6] Empty commit