Skip to content

Commit 85e2ced

Browse files
committed
Improve bulk-insert performance by keeping the current target buffer pinned
(but not locked, as that would risk deadlocks). Also, make it work in a small ring of buffers to avoid having bulk inserts trash the whole buffer arena. Robert Haas, after an idea of Simon Riggs'.
1 parent cdc197c commit 85e2ced

File tree

12 files changed

+201
-82
lines changed

12 files changed

+201
-82
lines changed

src/backend/access/heap/heapam.c

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.268 2008/10/31 19:40:26 heikki Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.269 2008/11/06 20:51:14 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -1799,23 +1799,53 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
17991799
}
18001800

18011801

1802+
/*
1803+
* GetBulkInsertState - prepare status object for a bulk insert
1804+
*/
1805+
BulkInsertState
1806+
GetBulkInsertState(void)
1807+
{
1808+
BulkInsertState bistate;
1809+
1810+
bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1811+
bistate->strategy = GetAccessStrategy(BAS_BULKWRITE);
1812+
bistate->current_buf = InvalidBuffer;
1813+
return bistate;
1814+
}
1815+
1816+
/*
1817+
* FreeBulkInsertState - clean up after finishing a bulk insert
1818+
*/
1819+
void
1820+
FreeBulkInsertState(BulkInsertState bistate)
1821+
{
1822+
if (bistate->current_buf != InvalidBuffer)
1823+
ReleaseBuffer(bistate->current_buf);
1824+
FreeAccessStrategy(bistate->strategy);
1825+
pfree(bistate);
1826+
}
1827+
1828+
18021829
/*
18031830
* heap_insert - insert tuple into a heap
18041831
*
18051832
* The new tuple is stamped with current transaction ID and the specified
18061833
* command ID.
18071834
*
1808-
* If use_wal is false, the new tuple is not logged in WAL, even for a
1809-
* non-temp relation. Safe usage of this behavior requires that we arrange
1810-
* that all new tuples go into new pages not containing any tuples from other
1811-
* transactions, and that the relation gets fsync'd before commit.
1812-
* (See also heap_sync() comments)
1835+
* If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not
1836+
* logged in WAL, even for a non-temp relation. Safe usage of this behavior
1837+
* requires that we arrange that all new tuples go into new pages not
1838+
* containing any tuples from other transactions, and that the relation gets
1839+
* fsync'd before commit. (See also heap_sync() comments)
1840+
*
1841+
* The HEAP_INSERT_SKIP_FSM option is passed directly to
1842+
* RelationGetBufferForTuple, which see for more info.
18131843
*
1814-
* use_fsm is passed directly to RelationGetBufferForTuple, which see for
1815-
* more info.
1844+
* Note that these options will be applied when inserting into the heap's
1845+
* TOAST table, too, if the tuple requires any out-of-line data.
18161846
*
1817-
* Note that use_wal and use_fsm will be applied when inserting into the
1818-
* heap's TOAST table, too, if the tuple requires any out-of-line data.
1847+
* The BulkInsertState object (if any; bistate can be NULL for default
1848+
* behavior) is also just passed through to RelationGetBufferForTuple.
18191849
*
18201850
* The return value is the OID assigned to the tuple (either here or by the
18211851
* caller), or InvalidOid if no OID. The header fields of *tup are updated
@@ -1825,7 +1855,7 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
18251855
*/
18261856
Oid
18271857
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
1828-
bool use_wal, bool use_fsm)
1858+
int options, BulkInsertState bistate)
18291859
{
18301860
TransactionId xid = GetCurrentTransactionId();
18311861
HeapTuple heaptup;
@@ -1877,14 +1907,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
18771907
heaptup = tup;
18781908
}
18791909
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
1880-
heaptup = toast_insert_or_update(relation, tup, NULL,
1881-
use_wal, use_fsm);
1910+
heaptup = toast_insert_or_update(relation, tup, NULL, options);
18821911
else
18831912
heaptup = tup;
18841913

18851914
/* Find buffer to insert this tuple into */
18861915
buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
1887-
InvalidBuffer, use_fsm);
1916+
InvalidBuffer, options, bistate);
18881917

18891918
/* NO EREPORT(ERROR) from here till changes are logged */
18901919
START_CRIT_SECTION();
@@ -1905,7 +1934,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
19051934
MarkBufferDirty(buffer);
19061935

19071936
/* XLOG stuff */
1908-
if (use_wal && !relation->rd_istemp)
1937+
if (!(options & HEAP_INSERT_SKIP_WAL) && !relation->rd_istemp)
19091938
{
19101939
xl_heap_insert xlrec;
19111940
xl_heap_header xlhdr;
@@ -2000,7 +2029,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
20002029
Oid
20012030
simple_heap_insert(Relation relation, HeapTuple tup)
20022031
{
2003-
return heap_insert(relation, tup, GetCurrentCommandId(true), true, true);
2032+
return heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
20042033
}
20052034

20062035
/*
@@ -2595,8 +2624,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
25952624
if (need_toast)
25962625
{
25972626
/* Note we always use WAL and FSM during updates */
2598-
heaptup = toast_insert_or_update(relation, newtup, &oldtup,
2599-
true, true);
2627+
heaptup = toast_insert_or_update(relation, newtup, &oldtup, 0);
26002628
newtupsize = MAXALIGN(heaptup->t_len);
26012629
}
26022630
else
@@ -2623,7 +2651,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
26232651
{
26242652
/* Assume there's no chance to put heaptup on same page. */
26252653
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
2626-
buffer, true);
2654+
buffer, 0, NULL);
26272655
}
26282656
else
26292657
{
@@ -2640,7 +2668,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
26402668
*/
26412669
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
26422670
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
2643-
buffer, true);
2671+
buffer, 0, NULL);
26442672
}
26452673
else
26462674
{

src/backend/access/heap/hio.c

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.74 2008/11/06 20:51:14 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
1515

1616
#include "postgres.h"
1717

18+
#include "access/heapam.h"
1819
#include "access/hio.h"
1920
#include "storage/bufmgr.h"
2021
#include "storage/freespace.h"
@@ -56,6 +57,43 @@ RelationPutHeapTuple(Relation relation,
5657
((HeapTupleHeader) item)->t_ctid = tuple->t_self;
5758
}
5859

60+
/*
61+
* Read in a buffer, using bulk-insert strategy if bistate isn't NULL.
62+
*/
63+
static Buffer
64+
ReadBufferBI(Relation relation, BlockNumber targetBlock,
65+
BulkInsertState bistate)
66+
{
67+
Buffer buffer;
68+
69+
/* If not bulk-insert, exactly like ReadBuffer */
70+
if (!bistate)
71+
return ReadBuffer(relation, targetBlock);
72+
73+
/* If we have the desired block already pinned, re-pin and return it */
74+
if (bistate->current_buf != InvalidBuffer)
75+
{
76+
if (BufferGetBlockNumber(bistate->current_buf) == targetBlock)
77+
{
78+
IncrBufferRefCount(bistate->current_buf);
79+
return bistate->current_buf;
80+
}
81+
/* ... else drop the old buffer */
82+
ReleaseBuffer(bistate->current_buf);
83+
bistate->current_buf = InvalidBuffer;
84+
}
85+
86+
/* Perform a read using the buffer strategy */
87+
buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
88+
RBM_NORMAL, bistate->strategy);
89+
90+
/* Save the selected block as target for future inserts */
91+
IncrBufferRefCount(buffer);
92+
bistate->current_buf = buffer;
93+
94+
return buffer;
95+
}
96+
5997
/*
6098
* RelationGetBufferForTuple
6199
*
@@ -80,20 +118,26 @@ RelationPutHeapTuple(Relation relation,
80118
* happen if space is freed in that page after heap_update finds there's not
81119
* enough there). In that case, the page will be pinned and locked only once.
82120
*
83-
* If use_fsm is true (the normal case), we use FSM to help us find free
84-
* space. If use_fsm is false, we always append a new empty page to the
85-
* end of the relation if the tuple won't fit on the current target page.
121+
* We normally use FSM to help us find free space. However,
122+
* if HEAP_INSERT_SKIP_FSM is specified, we just append a new empty page to
123+
* the end of the relation if the tuple won't fit on the current target page.
86124
* This can save some cycles when we know the relation is new and doesn't
87125
* contain useful amounts of free space.
88126
*
89-
* The use_fsm = false case is also useful for non-WAL-logged additions to a
127+
* HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a
90128
* relation, if the caller holds exclusive lock and is careful to invalidate
91129
* relation->rd_targblock before the first insertion --- that ensures that
92130
* all insertions will occur into newly added pages and not be intermixed
93131
* with tuples from other transactions. That way, a crash can't risk losing
94132
* any committed data of other transactions. (See heap_insert's comments
95133
* for additional constraints needed for safe usage of this behavior.)
96134
*
135+
* The caller can also provide a BulkInsertState object to optimize many
136+
* insertions into the same relation. This keeps a pin on the current
137+
* insertion target page (to save pin/unpin cycles) and also passes a
138+
* BULKWRITE buffer selection strategy object to the buffer manager.
139+
* Passing NULL for bistate selects the default behavior.
140+
*
97141
* We always try to avoid filling existing pages further than the fillfactor.
98142
* This is OK since this routine is not consulted when updating a tuple and
99143
* keeping it on the same page, which is the scenario fillfactor is meant
@@ -104,8 +148,10 @@ RelationPutHeapTuple(Relation relation,
104148
*/
105149
Buffer
106150
RelationGetBufferForTuple(Relation relation, Size len,
107-
Buffer otherBuffer, bool use_fsm)
151+
Buffer otherBuffer, int options,
152+
struct BulkInsertStateData *bistate)
108153
{
154+
bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
109155
Buffer buffer = InvalidBuffer;
110156
Page page;
111157
Size pageFreeSpace,
@@ -116,6 +162,9 @@ RelationGetBufferForTuple(Relation relation, Size len,
116162

117163
len = MAXALIGN(len); /* be conservative */
118164

165+
/* Bulk insert is not supported for updates, only inserts. */
166+
Assert(otherBuffer == InvalidBuffer || !bistate);
167+
119168
/*
120169
* If we're gonna fail for oversize tuple, do it right away
121170
*/
@@ -137,25 +186,27 @@ RelationGetBufferForTuple(Relation relation, Size len,
137186

138187
/*
139188
* We first try to put the tuple on the same page we last inserted a tuple
140-
* on, as cached in the relcache entry. If that doesn't work, we ask the
141-
* shared Free Space Map to locate a suitable page. Since the FSM's info
142-
* might be out of date, we have to be prepared to loop around and retry
143-
* multiple times. (To insure this isn't an infinite loop, we must update
144-
* the FSM with the correct amount of free space on each page that proves
145-
* not to be suitable.) If the FSM has no record of a page with enough
146-
* free space, we give up and extend the relation.
189+
* on, as cached in the BulkInsertState or relcache entry. If that
190+
* doesn't work, we ask the Free Space Map to locate a suitable page.
191+
* Since the FSM's info might be out of date, we have to be prepared to
192+
* loop around and retry multiple times. (To insure this isn't an infinite
193+
* loop, we must update the FSM with the correct amount of free space on
194+
* each page that proves not to be suitable.) If the FSM has no record of
195+
* a page with enough free space, we give up and extend the relation.
147196
*
148197
* When use_fsm is false, we either put the tuple onto the existing target
149198
* page or extend the relation.
150199
*/
151-
if (len + saveFreeSpace <= MaxHeapTupleSize)
152-
targetBlock = relation->rd_targblock;
153-
else
200+
if (len + saveFreeSpace > MaxHeapTupleSize)
154201
{
155-
/* can't fit, don't screw up FSM request tracking by trying */
202+
/* can't fit, don't bother asking FSM */
156203
targetBlock = InvalidBlockNumber;
157204
use_fsm = false;
158205
}
206+
else if (bistate && bistate->current_buf != InvalidBuffer)
207+
targetBlock = BufferGetBlockNumber(bistate->current_buf);
208+
else
209+
targetBlock = relation->rd_targblock;
159210

160211
if (targetBlock == InvalidBlockNumber && use_fsm)
161212
{
@@ -189,7 +240,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
189240
if (otherBuffer == InvalidBuffer)
190241
{
191242
/* easy case */
192-
buffer = ReadBuffer(relation, targetBlock);
243+
buffer = ReadBufferBI(relation, targetBlock, bistate);
193244
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
194245
}
195246
else if (otherBlock == targetBlock)
@@ -274,7 +325,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
274325
* it worth keeping an accurate file length in shared memory someplace,
275326
* rather than relying on the kernel to do it for us?
276327
*/
277-
buffer = ReadBuffer(relation, P_NEW);
328+
buffer = ReadBufferBI(relation, P_NEW, bistate);
278329

279330
/*
280331
* We can be certain that locking the otherBuffer first is OK, since it

src/backend/access/heap/rewriteheap.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@
9696
* Portions Copyright (c) 1994-5, Regents of the University of California
9797
*
9898
* IDENTIFICATION
99-
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $
99+
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.16 2008/11/06 20:51:14 tgl Exp $
100100
*
101101
*-------------------------------------------------------------------------
102102
*/
@@ -575,7 +575,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
575575
}
576576
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
577577
heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
578-
state->rs_use_wal, false);
578+
HEAP_INSERT_SKIP_FSM |
579+
(state->rs_use_wal ?
580+
0 : HEAP_INSERT_SKIP_WAL));
579581
else
580582
heaptup = tup;
581583

0 commit comments

Comments
 (0)