Skip to content

Commit f4a48b5

Browse files
committed
fix(table): fix _Row.cells can raise IndexError
The original implementation of `_Row.cells` did not take into account the fact that rows could include unoccupied grid cells at the beginning and/or end of the row. This "advanced" feature of tables is sometimes used by the Word table layout algorithm when the user does not carefully align the right boundary of cells during resizing, so while quite unusual to be used on purpose, this arises with some frequency in human-authored documents in the wild. The prior implementation of `_Row.cells` assumed that `Table.cells()` was uniform and the cells for a row could be reliably be computed from the table column-count and row and column offsets. That assumption does not always hold and can raise `IndexError` when omitted cells are present. This reimplementation remedies that situation. As a side-effect it should also perform much better when reading large tables.
1 parent 512f269 commit f4a48b5

File tree

2 files changed

+66
-11
lines changed

2 files changed

+66
-11
lines changed

src/docx/table.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from __future__ import annotations
44

5-
from typing import TYPE_CHECKING, cast, overload
5+
from typing import TYPE_CHECKING, Iterator, cast, overload
66

77
from typing_extensions import TypeAlias
88

@@ -102,7 +102,10 @@ def columns(self):
102102
return _Columns(self._tbl, self)
103103

104104
def row_cells(self, row_idx: int) -> list[_Cell]:
105-
"""Sequence of cells in the row at `row_idx` in this table."""
105+
"""DEPRECATED: Use `table.rows[row_idx].cells` instead.
106+
107+
Sequence of cells in the row at `row_idx` in this table.
108+
"""
106109
column_count = self._column_count
107110
start = row_idx * column_count
108111
end = start + column_count
@@ -403,7 +406,36 @@ def cells(self) -> tuple[_Cell, ...]:
403406
layout-grid positions using `.grid_cols_before` and `.grid_cols_after`.
404407
405408
"""
406-
return tuple(self.table.row_cells(self._index))
409+
410+
def iter_tc_cells(tc: CT_Tc) -> Iterator[_Cell]:
411+
"""Generate a cell object for each layout-grid cell in `tc`.
412+
413+
In particular, a `<w:tc>` element with a horizontal "span" with generate the same cell
414+
multiple times, one for each grid-cell being spanned. This approximates a row in a
415+
"uniform" table, where each row has a cell for each column in the table.
416+
"""
417+
# -- a cell comprising the second or later row of a vertical span is indicated by
418+
# -- tc.vMerge="continue" (the default value of the `w:vMerge` attribute, when it is
419+
# -- present in the XML). The `w:tc` element at the same grid-offset in the prior row
420+
# -- is guaranteed to be the same width (gridSpan). So we can delegate content
421+
# -- discovery to that prior-row `w:tc` element (recursively) until we arrive at the
422+
# -- "root" cell -- for the vertical span.
423+
if tc.vMerge == "continue":
424+
yield from iter_tc_cells(tc._tc_above) # pyright: ignore[reportPrivateUsage]
425+
return
426+
427+
# -- Otherwise, vMerge is either "restart" or None, meaning this `tc` holds the actual
428+
# -- content of the cell (whether it is vertically merged or not).
429+
cell = _Cell(tc, self.table)
430+
for _ in range(tc.grid_span):
431+
yield cell
432+
433+
def _iter_row_cells() -> Iterator[_Cell]:
434+
"""Generate `_Cell` instance for each populated layout-grid cell in this row."""
435+
for tc in self._tr.tc_lst:
436+
yield from iter_tc_cells(tc)
437+
438+
return tuple(_iter_row_cells())
407439

408440
@property
409441
def grid_cols_after(self) -> int:

tests/test_table.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -782,18 +782,41 @@ def it_can_change_its_height_rule(
782782
row.height_rule = new_value
783783
assert row._tr.xml == xml(expected_cxml)
784784

785+
@pytest.mark.parametrize(
786+
("tbl_cxml", "row_idx", "expected_len"),
787+
[
788+
# -- cell corresponds to single layout-grid cell --
789+
("w:tbl/w:tr/w:tc/w:p", 0, 1),
790+
# -- cell has a horizontal span --
791+
("w:tbl/w:tr/w:tc/(w:tcPr/w:gridSpan{w:val=2},w:p)", 0, 2),
792+
# -- cell is in latter row of vertical span --
793+
(
794+
"w:tbl/(w:tr/w:tc/(w:tcPr/w:vMerge{w:val=restart},w:p),"
795+
"w:tr/w:tc/(w:tcPr/w:vMerge,w:p))",
796+
1,
797+
1,
798+
),
799+
# -- cell both has horizontal span and is latter row of vertical span --
800+
(
801+
"w:tbl/(w:tr/w:tc/(w:tcPr/(w:gridSpan{w:val=2},w:vMerge{w:val=restart}),w:p),"
802+
"w:tr/w:tc/(w:tcPr/(w:gridSpan{w:val=2},w:vMerge),w:p))",
803+
1,
804+
2,
805+
),
806+
],
807+
)
785808
def it_provides_access_to_its_cells(
786-
self, _index_prop_: Mock, table_prop_: Mock, table_: Mock, parent_: Mock
809+
self, tbl_cxml: str, row_idx: int, expected_len: int, parent_: Mock
787810
):
788-
row = _Row(cast(CT_Row, element("w:tr")), parent_)
789-
_index_prop_.return_value = row_idx = 6
790-
expected_cells = (1, 2, 3)
791-
table_.row_cells.return_value = list(expected_cells)
811+
tbl = cast(CT_Tbl, element(tbl_cxml))
812+
tr = tbl.tr_lst[row_idx]
813+
table = Table(tbl, parent_)
814+
row = _Row(tr, table)
792815

793816
cells = row.cells
794817

795-
table_.row_cells.assert_called_once_with(row_idx)
796-
assert cells == expected_cells
818+
assert len(cells) == expected_len
819+
assert all(type(c) is _Cell for c in cells)
797820

798821
def it_provides_access_to_the_table_it_belongs_to(self, parent_: Mock, table_: Mock):
799822
parent_.table = table_
@@ -821,7 +844,7 @@ def table_(self, request: FixtureRequest):
821844

822845
@pytest.fixture
823846
def table_prop_(self, request: FixtureRequest, table_: Mock):
824-
return property_mock(request, _Row, "table", return_value=table_)
847+
return property_mock(request, _Row, "table")
825848

826849

827850
class Describe_Rows:

0 commit comments

Comments
 (0)