googleapis · tswast · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025
@@ -118,6 +118,8 @@ def __init__(
     ):
         global bigframes
 
+        self._query_job: Optional[bigquery.QueryJob] = None
+
         if copy is not None and not copy:
             raise ValueError(
                 f"DataFrame constructor only supports copy=True. {constants.FEEDBACK_LINK}"
@@ -182,7 +184,6 @@ def __init__(
             if dtype:
                 bf_dtype = bigframes.dtypes.bigframes_type(dtype)
                 block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))
-            self._block = block
 
         else:
             import bigframes.pandas
@@ -194,10 +195,14 @@ def __init__(
                 dtype=dtype,  # type:ignore
             )
             if session:
-                self._block = session.read_pandas(pd_dataframe)._get_block()
+                block = session.read_pandas(pd_dataframe)._get_block()
             else:
-                self._block = bigframes.pandas.read_pandas(pd_dataframe)._get_block()
-        self._query_job: Optional[bigquery.QueryJob] = None
+                block = bigframes.pandas.read_pandas(pd_dataframe)._get_block()
+
+        # We use _block as an indicator in __getattr__ and __setattr__ to see
+        # if the object is fully initialized, so make sure we set the _block
+        # attribute last.
+        self._block = block
         self._block.session._register_object(self)
 
     def __dir__(self):
@@ -625,13 +630,17 @@ def _getitem_bool_series(self, key: bigframes.series.Series) -> DataFrame:
         return DataFrame(block)
 
     def __getattr__(self, key: str):
-        # Protect against recursion errors with uninitialized DataFrame
-        # objects. See:
+        # To allow subclasses to set private attributes before the class is
+        # fully initialized, protect against recursion errors with
+        # uninitialized DataFrame objects. Note: this comes at the downside
+        # that columns with a leading `_` won't be treated as columns.
+        #
+        # See:
         # https://github.com/googleapis/python-bigquery-dataframes/issues/728
         # and
         # https://nedbatchelder.com/blog/201010/surprising_getattr_recursion.html
         if key == "_block":
-            raise AttributeError("_block")
+            raise AttributeError(key)
 
         if key in self._block.column_labels:
             return self.__getitem__(key)
@@ -651,26 +660,36 @@ def __getattr__(self, key: str):
         raise AttributeError(key)
 
     def __setattr__(self, key: str, value):
-        if key in ["_block", "_query_job"]:
+        if key == "_block":
+            object.__setattr__(self, key, value)
+            return
+
+        # To allow subclasses to set private attributes before the class is
+        # fully initialized, assume anything set before `_block` is initialized
+        # is a regular attribute.
+        if not hasattr(self, "_block"):
             object.__setattr__(self, key, value)
             return
-        # Can this be removed???
+
+        # If someone has a column named the same as a normal attribute
+        # (e.g. index), we want to set the normal attribute, not the column.
+        # To do that, check if there is a normal attribute by using
+        # __getattribute__ (not __getattr__, because that includes columns).
+        # If that returns a value without raising, then we know this is a
+        # normal attribute and we should prefer that.
         try:
-            # boring attributes go through boring old path
             object.__getattribute__(self, key)
             return object.__setattr__(self, key, value)
         except AttributeError:
             pass
 
-        # if this fails, go on to more involved attribute setting
-        # (note that this matches __getattr__, above).
-        try:
-            if key in self.columns:
-                self[key] = value
-            else:
-                object.__setattr__(self, key, value)
-        # Can this be removed?
-        except (AttributeError, TypeError):
+        # If we made it here, then we know that it's not a regular attribute
+        # already, so it might be a column to update. Note: we don't allow
+        # adding new columns using __setattr__, only __setitem__, that way we
+        # can still add regular new attributes.
+        if key in self._block.column_labels:
+            self[key] = value
+        else:
             object.__setattr__(self, key, value)
 
     def __repr__(self) -> str:

@@ -41,6 +41,15 @@ def test_dataframe_repr_with_uninitialized_object():
     assert "DataFrame" in got
 
 
+def test_dataframe_setattr_with_uninitialized_object():
+    """Ensures DataFrame can be subclassed without trying to set attributes as columns."""
+    # Avoid calling __init__ since it might be called later in a subclass.
+    # https://stackoverflow.com/a/6384982/101923
+    dataframe = bigframes.dataframe.DataFrame.__new__(bigframes.dataframe.DataFrame)
+    dataframe.lineage = "my-test-value"
+    assert dataframe.lineage == "my-test-value"  # Should just be a regular attribute.
+
+
 def test_dataframe_to_gbq_invalid_destination(monkeypatch: pytest.MonkeyPatch):
     dataframe = resources.create_dataframe(monkeypatch)