Skip to content

Commit 0ff8bbd

Browse files
committed
Accept slightly-filled pages for tuples larger than fillfactor.
We always inserted a larger-than-fillfactor tuple into a newly-extended page, even when existing pages were empty or contained nothing but an unused line pointer. This was unnecessary relation extension. Start tolerating page usage up to 1/8 the maximum space that could be taken up by line pointers. This is somewhat arbitrary, but it should allow more cases to reuse pages. This has no effect on tables with fillfactor=100 (the default). John Naylor and Floris van Nee. Reviewed by Matthias van de Meent. Reported by Floris van Nee. Discussion: https://postgr.es/m/6e263217180649339720afe2176c50aa@opammb0562.comp.optiver.com
1 parent 7ef64e7 commit 0ff8bbd

File tree

4 files changed

+74
-18
lines changed

4 files changed

+74
-18
lines changed

src/backend/access/heap/hio.c

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -317,10 +317,10 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
317317
* BULKWRITE buffer selection strategy object to the buffer manager.
318318
* Passing NULL for bistate selects the default behavior.
319319
*
320-
* We always try to avoid filling existing pages further than the fillfactor.
321-
* This is OK since this routine is not consulted when updating a tuple and
322-
* keeping it on the same page, which is the scenario fillfactor is meant
323-
* to reserve space for.
320+
* We don't fill existing pages further than the fillfactor, except for large
321+
* tuples in nearly-empty pages. This is OK since this routine is not
322+
* consulted when updating a tuple and keeping it on the same page, which is
323+
* the scenario fillfactor is meant to reserve space for.
324324
*
325325
* ereport(ERROR) is allowed here, so this routine *must* be called
326326
* before any (unlogged) changes are made in buffer pool.
@@ -334,8 +334,10 @@ RelationGetBufferForTuple(Relation relation, Size len,
334334
bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
335335
Buffer buffer = InvalidBuffer;
336336
Page page;
337-
Size pageFreeSpace = 0,
338-
saveFreeSpace = 0;
337+
Size nearlyEmptyFreeSpace,
338+
pageFreeSpace = 0,
339+
saveFreeSpace = 0,
340+
targetFreeSpace = 0;
339341
BlockNumber targetBlock,
340342
otherBlock;
341343
bool needLock;
@@ -358,6 +360,19 @@ RelationGetBufferForTuple(Relation relation, Size len,
358360
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
359361
HEAP_DEFAULT_FILLFACTOR);
360362

363+
/*
364+
* Since pages without tuples can still have line pointers, we consider
365+
* pages "empty" when the unavailable space is slight. This threshold is
366+
* somewhat arbitrary, but it should prevent most unnecessary relation
367+
* extensions while inserting large tuples into low-fillfactor tables.
368+
*/
369+
nearlyEmptyFreeSpace = MaxHeapTupleSize -
370+
(MaxHeapTuplesPerPage / 8 * sizeof(ItemIdData));
371+
if (len + saveFreeSpace > nearlyEmptyFreeSpace)
372+
targetFreeSpace = Max(len, nearlyEmptyFreeSpace);
373+
else
374+
targetFreeSpace = len + saveFreeSpace;
375+
361376
if (otherBuffer != InvalidBuffer)
362377
otherBlock = BufferGetBlockNumber(otherBuffer);
363378
else
@@ -376,13 +391,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
376391
* When use_fsm is false, we either put the tuple onto the existing target
377392
* page or extend the relation.
378393
*/
379-
if (len + saveFreeSpace > MaxHeapTupleSize)
380-
{
381-
/* can't fit, don't bother asking FSM */
382-
targetBlock = InvalidBlockNumber;
383-
use_fsm = false;
384-
}
385-
else if (bistate && bistate->current_buf != InvalidBuffer)
394+
if (bistate && bistate->current_buf != InvalidBuffer)
386395
targetBlock = BufferGetBlockNumber(bistate->current_buf);
387396
else
388397
targetBlock = RelationGetTargetBlock(relation);
@@ -393,7 +402,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
393402
* We have no cached target page, so ask the FSM for an initial
394403
* target.
395404
*/
396-
targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
405+
targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
397406
}
398407

399408
/*
@@ -517,7 +526,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
517526
}
518527

519528
pageFreeSpace = PageGetHeapFreeSpace(page);
520-
if (len + saveFreeSpace <= pageFreeSpace)
529+
if (targetFreeSpace <= pageFreeSpace)
521530
{
522531
/* use this page as future insert target, too */
523532
RelationSetTargetBlock(relation, targetBlock);
@@ -550,7 +559,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
550559
targetBlock = RecordAndGetPageWithFreeSpace(relation,
551560
targetBlock,
552561
pageFreeSpace,
553-
len + saveFreeSpace);
562+
targetFreeSpace);
554563
}
555564

556565
/*
@@ -582,7 +591,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
582591
* Check if some other backend has extended a block for us while
583592
* we were waiting on the lock.
584593
*/
585-
targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
594+
targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
586595

587596
/*
588597
* If some other waiter has already extended the relation, we

src/backend/access/heap/rewriteheap.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,11 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
676676

677677
if (len + saveFreeSpace > pageFreeSpace)
678678
{
679-
/* Doesn't fit, so write out the existing page */
679+
/*
680+
* Doesn't fit, so write out the existing page. It always
681+
* contains a tuple. Hence, unlike RelationGetBufferForTuple(),
682+
* enforce saveFreeSpace unconditionally.
683+
*/
680684

681685
/* XLOG stuff */
682686
if (RelationNeedsWAL(state->rs_new_rel))

src/test/regress/expected/insert.out

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,27 @@ select col1, col2, char_length(col3) from inserttest;
8282

8383
drop table inserttest;
8484
--
85+
-- tuple larger than fillfactor
86+
--
87+
CREATE TABLE large_tuple_test (a int, b text) WITH (fillfactor = 10);
88+
ALTER TABLE large_tuple_test ALTER COLUMN b SET STORAGE plain;
89+
-- create page w/ free space in range [nearlyEmptyFreeSpace, MaxHeapTupleSize)
90+
INSERT INTO large_tuple_test (select 1, NULL);
91+
-- should still fit on the page
92+
INSERT INTO large_tuple_test (select 2, repeat('a', 1000));
93+
SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main'));
94+
pg_size_pretty
95+
----------------
96+
8192 bytes
97+
(1 row)
98+
99+
-- add small record to the second page
100+
INSERT INTO large_tuple_test (select 3, NULL);
101+
-- now this tuple won't fit on the second page, but the insert should
102+
-- still succeed by extending the relation
103+
INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
104+
DROP TABLE large_tuple_test;
105+
--
85106
-- check indirection (field/array assignment), cf bug #14265
86107
--
87108
-- these tests are aware that transformInsertStmt has 3 separate code paths

src/test/regress/sql/insert.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,28 @@ select col1, col2, char_length(col3) from inserttest;
3737

3838
drop table inserttest;
3939

40+
--
41+
-- tuple larger than fillfactor
42+
--
43+
CREATE TABLE large_tuple_test (a int, b text) WITH (fillfactor = 10);
44+
ALTER TABLE large_tuple_test ALTER COLUMN b SET STORAGE plain;
45+
46+
-- create page w/ free space in range [nearlyEmptyFreeSpace, MaxHeapTupleSize)
47+
INSERT INTO large_tuple_test (select 1, NULL);
48+
49+
-- should still fit on the page
50+
INSERT INTO large_tuple_test (select 2, repeat('a', 1000));
51+
SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main'));
52+
53+
-- add small record to the second page
54+
INSERT INTO large_tuple_test (select 3, NULL);
55+
56+
-- now this tuple won't fit on the second page, but the insert should
57+
-- still succeed by extending the relation
58+
INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
59+
60+
DROP TABLE large_tuple_test;
61+
4062
--
4163
-- check indirection (field/array assignment), cf bug #14265
4264
--

0 commit comments

Comments
 (0)