Skip to content

docs: add runnable code samples for DataFrames I/O methods and property #129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Oct 25, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 164 additions & 8 deletions third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,19 @@ class DataFrame(NDFrame):

@property
def shape(self) -> tuple[int, int]:
"""Return a tuple representing the dimensionality of the DataFrame."""
"""
Return a tuple representing the dimensionality of the DataFrame.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2, 3],
... 'col2': [4, 5, 6]})
>>> df.shape
(3, 2)
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
Expand All @@ -44,21 +56,31 @@ def axes(self) -> list:
It has the row axis labels and column axis labels as the only members.
They are returned in that order.

Examples
**Examples:**

.. code-block::
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
df.axes
[RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'],
dtype='object')]
>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> df.axes[1:]
[Index(['col1', 'col2'], dtype='object')]
"""
return [self.index, self.columns]

@property
def values(self) -> np.ndarray:
"""Return the values of DataFrame in the form of a NumPy array.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> df.values
array([[1, 3],
[2, 4]], dtype=object)

Args:
dytype (default None):
The dtype to pass to `numpy.asarray()`.
Expand All @@ -76,6 +98,16 @@ def to_numpy(self, dtype=None, copy=False, na_value=None, **kwargs) -> np.ndarra
"""
Convert the DataFrame to a NumPy array.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> df.to_numpy()
array([[1, 3],
[2, 4]], dtype=object)

Args:
dtype (None):
The dtype to pass to `numpy.asarray()`.
Expand All @@ -101,6 +133,15 @@ def to_gbq(
) -> None:
"""Write a DataFrame to a BigQuery table.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> # destination_table = PROJECT_ID + "." + DATASET_ID + "." + TABLE_NAME
>>> df.to_gbq("bigframes-dev.birds.test-numbers", if_exists="replace")

Args:
destination_table (str):
Name of table to be written, in the form ``dataset.tablename``
Expand Down Expand Up @@ -137,6 +178,15 @@ def to_parquet(
This function writes the dataframe as a `parquet file
<https://parquet.apache.org/>`_ to Cloud Storage.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> gcs_bucket = "gs://bigframes-dev-testing/sample_parquet*.parquet"
>>> df.to_parquet(path=gcs_bucket)

Args:
path (str):
Destination URI(s) of Cloud Storage files(s) to store the extracted dataframe
Expand Down Expand Up @@ -171,6 +221,35 @@ def to_dict(
The type of the key-value pairs can be customized with the parameters
(see below).

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> df.to_dict()
{'col1': {0: 1, 1: 2}, 'col2': {0: 3, 1: 4}}

You can specify the return orientation.

>>> df.to_dict('series')
{'col1': 0 1
1 2
Name: col1, dtype: Int64,
'col2': 0 3
1 4
Name: col2, dtype: Int64}

>>> df.to_dict('split')
{'index': [0, 1], 'columns': ['col1', 'col2'], 'data': [[1, 3], [2, 4]]}

>>> df.to_dict("tight")
{'index': [0, 1],
'columns': ['col1', 'col2'],
'data': [[1, 3], [2, 4]],
'index_names': [None],
'column_names': [None]}

Args:
orient (str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}):
Determines the type of the values of the dictionary.
Expand Down Expand Up @@ -213,6 +292,15 @@ def to_excel(self, excel_writer, sheet_name: str = "Sheet1", **kwargs) -> None:
Note that creating an `ExcelWriter` object with a file name that already
exists will result in the contents of the existing file being erased.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import tempfile
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> df.to_excel(tempfile.TemporaryFile())

Args:
excel_writer (path-like, file-like, or ExcelWriter object):
File path or existing ExcelWriter.
Expand All @@ -231,6 +319,23 @@ def to_latex(
into a main LaTeX document or read from an external file
with ``\input{{table.tex}}``.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> print(df.to_latex())
\begin{tabular}{lrr}
\toprule
& col1 & col2 \\
\midrule
0 & 1 & 3 \\
1 & 2 & 4 \\
\bottomrule
\end{tabular}
<BLANKLINE>

Args:
buf (str, Path or StringIO-like, optional, default None):
Buffer to write to. If None, the output is returned as a string.
Expand All @@ -253,6 +358,16 @@ def to_records(
Index will be included as the first field of the record array if
requested.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> df.to_records()
rec.array([(0, 1, 3), (1, 2, 4)],
dtype=[('index', 'O'), ('col1', 'O'), ('col2', 'O')])

Args:
index (bool, default True):
Include index in resulting record array, stored in 'index'
Expand Down Expand Up @@ -298,6 +413,17 @@ def to_string(
):
"""Render a DataFrame to a console-friendly tabular output.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> print(df.to_string())
col1 col2
0 1 3
1 2 4

Args:
buf (str, Path or StringIO-like, optional, default None):
Buffer to write to. If None, the output is returned as a string.
Expand Down Expand Up @@ -363,6 +489,18 @@ def to_markdown(
):
"""Print DataFrame in Markdown-friendly format.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> print(df.to_markdown())
| | col1 | col2 |
|---:|-------:|-------:|
| 0 | 1 | 3 |
| 1 | 2 | 4 |

Args:
buf (str, Path or StringIO-like, optional, default None):
Buffer to write to. If None, the output is returned as a string.
Expand All @@ -371,7 +509,7 @@ def to_markdown(
index (bool, optional, default True):
Add index (row) labels.
**kwargs
These parameters will be passed to `tabulate <https://pypi.org/project/tabulate>`_.
These parameters will be passed to `tabulate <https://pypi.org/project/tabulate>`_.

Returns:
DataFrame in Markdown-friendly format.
Expand All @@ -381,6 +519,15 @@ def to_markdown(
def to_pickle(self, path, **kwargs) -> None:
"""Pickle (serialize) object to file.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> gcs_bucket = "gs://bigframes-dev-testing/sample_pickle_gcs.pkl"
>>> df.to_pickle(path=gcs_bucket)

Args:
path (str):
File path where the pickled object will be stored.
Expand All @@ -391,6 +538,15 @@ def to_orc(self, path=None, **kwargs) -> bytes | None:
"""
Write a DataFrame to the ORC format.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> import tempfile
>>> df.to_orc(tempfile.TemporaryFile())

Args:
path (str, file-like object or None, default None):
If a string, it will be used as Root Directory path
Expand Down