Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,8 @@ repos:
additional_dependencies: [types-requests, types-tabulate, types-PyYAML, pandas-stubs<=2.2.3.241126]
exclude: "^third_party"
args: ["--check-untyped-defs", "--explicit-package-bases", "--ignore-missing-imports"]
- repo: https://github.com/biomejs/pre-commit
rev: v2.0.2
hooks:
- id: biome-check
files: '\.js$'
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# Generated by synthtool. DO NOT EDIT!
include README.rst LICENSE
recursive-include third_party/bigframes_vendored *
recursive-include bigframes *.json *.proto py.typed
recursive-include bigframes *.json *.proto *.js py.typed
recursive-include tests *
global-exclude *.py[co]
global-exclude __pycache__
Expand Down
42 changes: 25 additions & 17 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,22 +779,7 @@ def _repr_html_(self) -> str:
if opts.repr_mode == "deferred":
return formatter.repr_query_job(self._compute_dry_run())

if opts.repr_mode == "anywidget":
import anywidget # type: ignore

# create an iterator for the data batches
batches = self.to_pandas_batches()

# get the first page result
try:
first_page = next(iter(batches))
except StopIteration:
first_page = pandas.DataFrame(columns=self.columns)

# Instantiate and return the widget. The widget's frontend will
# handle the display of the table and pagination
return anywidget.AnyWidget(dataframe=first_page)

# Process blob columns first, regardless of display mode
self._cached()
df = self.copy()
if bigframes.options.display.blob_display:
Expand All @@ -806,7 +791,31 @@ def _repr_html_(self) -> str:
for col in blob_cols:
# TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
else:
blob_cols = []

if opts.repr_mode == "anywidget":
try:
from IPython.display import display as ipython_display

from bigframes import display

# Always create a new widget instance for each display call
# This ensures that each cell gets its own widget and prevents
# unintended sharing between cells
widget = display.TableWidget(df.copy())

ipython_display(widget)
return "" # Return empty string since we used display()

except (AttributeError, ValueError, ImportError):
# Fallback if anywidget is not available
warnings.warn(
"Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to deferred mode."
)
return formatter.repr_query_job(self._compute_dry_run())

# Continue with regular HTML rendering for non-anywidget modes
# TODO(swast): pass max_columns and get the true column count back. Maybe
# get 1 more column than we have requested so that pandas can add the
# ... for us?
Expand All @@ -815,7 +824,6 @@ def _repr_html_(self) -> str:
)

self._set_internal_query_job(query_job)

column_count = len(pandas_df.columns)

with display_options.pandas_repr(opts):
Expand Down
24 changes: 24 additions & 0 deletions bigframes/display/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

try:
import anywidget # noqa

from bigframes.display.anywidget import TableWidget

__all__ = ["TableWidget"]
except Exception:
pass
179 changes: 179 additions & 0 deletions bigframes/display/anywidget.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from importlib import resources
import functools
import math
from typing import Any, Dict, Iterator, List, Optional, Type
import uuid

import pandas as pd

import bigframes

# anywidget and traitlets are optional dependencies. We don't want the import of this
# module to fail if they aren't installed, though. Instead, we try to limit the surface that
# these packages could affect. This makes unit testing easier and ensures we don't
# accidentally make these required packages.
try:
import anywidget
import traitlets

ANYWIDGET_INSTALLED = True
except Exception:
ANYWIDGET_INSTALLED = False

WIDGET_BASE: Type[Any]
if ANYWIDGET_INSTALLED:
WIDGET_BASE = anywidget.AnyWidget
else:
WIDGET_BASE = object


class TableWidget(WIDGET_BASE):
"""
An interactive, paginated table widget for BigFrames DataFrames.
"""

def __init__(self, dataframe: bigframes.dataframe.DataFrame):
"""Initialize the TableWidget.

Args:
dataframe: The Bigframes Dataframe to display in the widget.
"""
if not ANYWIDGET_INSTALLED:
raise ImportError(
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
)

super().__init__()
self._dataframe = dataframe

# respect display options
self.page_size = bigframes.options.display.max_rows

# Initialize data fetching attributes.
self._batches = dataframe.to_pandas_batches(page_size=self.page_size)

# Use list of DataFrames to avoid memory copies from concatenation
self._cached_batches: List[pd.DataFrame] = []

# Unique identifier for HTML table element
self._table_id = str(uuid.uuid4())
self._all_data_loaded = False
# Renamed from _batch_iterator to _batch_iter to avoid naming conflict
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None

# len(dataframe) is expensive, since it will trigger a
# SELECT COUNT(*) query. It is a must have however.
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
# before we get here so that the count might already be cached.
self.row_count = len(dataframe)

# get the initial page
self._set_table_html()

@functools.cached_property
def _esm(self):
"""Load JavaScript code from external file."""
return resources.read_text(bigframes.display, "table_widget.js")

page = traitlets.Int(0).tag(sync=True)
page_size = traitlets.Int(25).tag(sync=True)
row_count = traitlets.Int(0).tag(sync=True)
table_html = traitlets.Unicode().tag(sync=True)

@traitlets.validate("page")
def _validate_page(self, proposal: Dict[str, Any]):
"""Validate and clamp the page number to a valid range.

Args:
proposal: A dictionary from the traitlets library containing the
proposed change. The new value is in proposal["value"].
"""

value = proposal["value"]
if self.row_count == 0 or self.page_size == 0:
return 0

# Calculate the zero-indexed maximum page number.
max_page = max(0, math.ceil(self.row_count / self.page_size) - 1)

# Clamp the proposed value to the valid range [0, max_page].
return max(0, min(value, max_page))

def _get_next_batch(self) -> bool:
"""
Gets the next batch of data from the generator and appends to cache.

Return:
True if a batch was successfully loaded, False otherwise.
"""
if self._all_data_loaded:
return False

try:
iterator = self._batch_iterator
batch = next(iterator)
self._cached_batches.append(batch)
return True
except StopIteration:
self._all_data_loaded = True
return False

@property
def _batch_iterator(self) -> Iterator[pd.DataFrame]:
"""Lazily initializes and returns the batch iterator."""
if self._batch_iter is None:
self._batch_iter = iter(self._batches)
return self._batch_iter

@property
def _cached_data(self) -> pd.DataFrame:
"""Combine all cached batches into a single DataFrame."""
if not self._cached_batches:
return pd.DataFrame(columns=self._dataframe.columns)
return pd.concat(self._cached_batches, ignore_index=True)

def _set_table_html(self):
"""Sets the current html data based on the current page and page size."""
start = self.page * self.page_size
end = start + self.page_size

# fetch more data if the requested page is outside our cache
cached_data = self._cached_data
while len(cached_data) < end and not self._all_data_loaded:
if self._get_next_batch():
cached_data = self._cached_data
else:
break

# Get the data for the current page
page_data = cached_data.iloc[start:end]

# Generate HTML table
self.table_html = page_data.to_html(
index=False,
max_rows=None,
table_id=f"table-{self._table_id}",
classes="table table-striped table-hover",
escape=False,
)

@traitlets.observe("page")
def _page_changed(self, change):
"""Handler for when the page number is changed from the frontend."""
self._set_table_html()
95 changes: 95 additions & 0 deletions bigframes/display/table_widget.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

const ModelProperty = {
TABLE_HTML: "table_html",
ROW_COUNT: "row_count",
PAGE_SIZE: "page_size",
PAGE: "page",
};

const Event = {
CHANGE_TABLE_HTML: `change:${ModelProperty.TABLE_HTML}`,
CLICK: "click",
};

/**
* Renders a paginated table and its controls into a given element.
* @param {{
* model: !Backbone.Model,
* el: !HTMLElement
* }} options
*/
function render({ model, el }) {
const container = document.createElement("div");
container.innerHTML = model.get(ModelProperty.TABLE_HTML);

const buttonContainer = document.createElement("div");
const prevPage = document.createElement("button");
const label = document.createElement("span");
const nextPage = document.createElement("button");

prevPage.type = "button";
nextPage.type = "button";
prevPage.textContent = "Prev";
nextPage.textContent = "Next";

/** Updates the button states and page label based on the model. */
function updateButtonStates() {
const totalPages = Math.ceil(
model.get(ModelProperty.ROW_COUNT) / model.get(ModelProperty.PAGE_SIZE),
);
const currentPage = model.get(ModelProperty.PAGE);

label.textContent = `Page ${currentPage + 1} of ${totalPages}`;
prevPage.disabled = currentPage === 0;
nextPage.disabled = currentPage >= totalPages - 1;
}

/**
* Updates the page in the model.
* @param {number} direction -1 for previous, 1 for next.
*/
function handlePageChange(direction) {
const currentPage = model.get(ModelProperty.PAGE);
const newPage = Math.max(0, currentPage + direction);
if (newPage !== currentPage) {
model.set(ModelProperty.PAGE, newPage);
model.save_changes();
}
}

prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1));
nextPage.addEventListener(Event.CLICK, () => handlePageChange(1));

model.on(Event.CHANGE_TABLE_HTML, () => {
// Note: Using innerHTML can be a security risk if the content is
// user-generated. Ensure 'table_html' is properly sanitized.
container.innerHTML = model.get(ModelProperty.TABLE_HTML);
updateButtonStates();
});

// Initial setup
updateButtonStates();

buttonContainer.appendChild(prevPage);
buttonContainer.appendChild(label);
buttonContainer.appendChild(nextPage);
el.appendChild(container);
el.appendChild(buttonContainer);
}

export default { render };
Loading