Skip to content

feat: Add config option to set partial ordering mode #855

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions bigframes/_config/bigquery_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

from __future__ import annotations

from typing import Optional
from enum import Enum
from typing import Literal, Optional
import warnings

import google.api_core.exceptions
Expand All @@ -26,6 +27,12 @@
import bigframes.constants
import bigframes.exceptions


class OrderingMode(Enum):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mailed #870

STRICT = "strict"
PARTIAL = "partial"


SESSION_STARTED_MESSAGE = (
"Cannot change '{attribute}' once a session has started. "
"Call bigframes.pandas.close_session() first, if you are using the bigframes.pandas API."
Expand Down Expand Up @@ -57,6 +64,14 @@ def _validate_location(value: Optional[str]):
)


def _validate_ordering_mode(value: str) -> OrderingMode:
if value.casefold() == OrderingMode.STRICT.value.casefold():
return OrderingMode.STRICT
if value.casefold() == OrderingMode.PARTIAL.value.casefold():
return OrderingMode.PARTIAL
raise ValueError("Ordering mode must be one of 'strict' or 'partial'.")


class BigQueryOptions:
"""Encapsulates configuration for working with a session."""

Expand All @@ -71,7 +86,7 @@ def __init__(
kms_key_name: Optional[str] = None,
skip_bq_connection_check: bool = False,
*,
_strictly_ordered: bool = True,
ordering_mode: Literal["strict", "partial"] = "strict",
):
self._credentials = credentials
self._project = project
Expand All @@ -82,8 +97,8 @@ def __init__(
self._kms_key_name = kms_key_name
self._skip_bq_connection_check = skip_bq_connection_check
self._session_started = False
# Determines the ordering strictness for the session. For internal use only.
self._strictly_ordered_internal = _strictly_ordered
# Determines the ordering strictness for the session.
self._ordering_mode = _validate_ordering_mode(ordering_mode)

@property
def application_name(self) -> Optional[str]:
Expand Down Expand Up @@ -241,6 +256,10 @@ def kms_key_name(self, value: str):
self._kms_key_name = value

@property
def _strictly_ordered(self) -> bool:
"""Internal use only. Controls whether total row order is always maintained for DataFrame/Series."""
return self._strictly_ordered_internal
def ordering_mode(self) -> Literal["strict", "partial"]:
"""Controls whether total row order is always maintained for DataFrame/Series."""
return self._ordering_mode.value

@ordering_mode.setter
def ordering_mode(self, ordering_mode: Literal["strict", "partial"]) -> None:
self._ordering_mode = _validate_ordering_mode(ordering_mode)
12 changes: 9 additions & 3 deletions bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,21 @@ def __init__(
self._execution_count = 0
# Whether this session treats objects as totally ordered.
# Will expose as feature later, only False for internal testing
self._strictly_ordered: bool = context._strictly_ordered
self._strictly_ordered: bool = context.ordering_mode != "partial"
if not self._strictly_ordered:
warnings.warn(
"Partial ordering mode is a preview feature and is subject to change.",
bigframes.exceptions.PreviewWarning,
)

# Sequential index needs total ordering to generate, so use null index with unstrict ordering.
self._default_index_type: bigframes.enums.DefaultIndexKind = (
bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
if context._strictly_ordered
if self._strictly_ordered
else bigframes.enums.DefaultIndexKind.NULL
)
self._compiler = bigframes.core.compile.SQLCompiler(
strict=context._strictly_ordered
strict=self._strictly_ordered
)

self._remote_function_session = bigframes_rf._RemoteFunctionSession()
Expand Down
6 changes: 2 additions & 4 deletions tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,15 @@ def session() -> Generator[bigframes.Session, None, None]:

@pytest.fixture(scope="session", params=["ordered", "unordered"])
def maybe_ordered_session(request) -> Generator[bigframes.Session, None, None]:
context = bigframes.BigQueryOptions(
location="US", _strictly_ordered=request.param == "ordered"
)
context = bigframes.BigQueryOptions(location="US", ordering_mode="partial")
session = bigframes.Session(context=context)
yield session
session.close() # close generated session at cleanup type


@pytest.fixture(scope="session")
def unordered_session() -> Generator[bigframes.Session, None, None]:
context = bigframes.BigQueryOptions(location="US", _strictly_ordered=False)
context = bigframes.BigQueryOptions(location="US", ordering_mode="partial")
session = bigframes.Session(context=context)
yield session
session.close() # close generated session at cleanup type
Expand Down