postgres
diff --git a/‎src/backend/access/heap/heapam.c
Lines changed: 48 additions & 20 deletions b/‎src/backend/access/heap/heapam.c
Lines changed: 48 additions & 20 deletions
diff --git a/‎src/backend/access/heap/hio.c
Lines changed: 70 additions & 19 deletions b/‎src/backend/access/heap/hio.c
Lines changed: 70 additions & 19 deletions
diff --git a/‎src/backend/access/heap/rewriteheap.c
Lines changed: 4 additions & 2 deletions b/‎src/backend/access/heap/rewriteheap.c
Lines changed: 4 additions & 2 deletions
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.268 2008/10/31 19:40:26 heikki Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.269 2008/11/06 20:51:14 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1799,23 +1799,53 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
 }
 
 
+/*
+ * GetBulkInsertState - prepare status object for a bulk insert
+ */
+BulkInsertState
+GetBulkInsertState(void)
+{
+	BulkInsertState bistate;
+
+	bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
+	bistate->strategy = GetAccessStrategy(BAS_BULKWRITE);
+	bistate->current_buf = InvalidBuffer;
+	return bistate;
+}
+
+/*
+ * FreeBulkInsertState - clean up after finishing a bulk insert
+ */
+void
+FreeBulkInsertState(BulkInsertState bistate)
+{
+	if (bistate->current_buf != InvalidBuffer)
+		ReleaseBuffer(bistate->current_buf);		
+	FreeAccessStrategy(bistate->strategy);
+	pfree(bistate);
+}
+
+
 /*
  *	heap_insert		- insert tuple into a heap
  *
  * The new tuple is stamped with current transaction ID and the specified
  * command ID.
  *
- * If use_wal is false, the new tuple is not logged in WAL, even for a
- * non-temp relation.  Safe usage of this behavior requires that we arrange
- * that all new tuples go into new pages not containing any tuples from other
- * transactions, and that the relation gets fsync'd before commit.
- * (See also heap_sync() comments)
+ * If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not
+ * logged in WAL, even for a non-temp relation.  Safe usage of this behavior
+ * requires that we arrange that all new tuples go into new pages not
+ * containing any tuples from other transactions, and that the relation gets
+ * fsync'd before commit.  (See also heap_sync() comments)
+ *
+ * The HEAP_INSERT_SKIP_FSM option is passed directly to
+ * RelationGetBufferForTuple, which see for more info.
  *
- * use_fsm is passed directly to RelationGetBufferForTuple, which see for
- * more info.
+ * Note that these options will be applied when inserting into the heap's
+ * TOAST table, too, if the tuple requires any out-of-line data.
  *
- * Note that use_wal and use_fsm will be applied when inserting into the
- * heap's TOAST table, too, if the tuple requires any out-of-line data.
+ * The BulkInsertState object (if any; bistate can be NULL for default
+ * behavior) is also just passed through to RelationGetBufferForTuple.
  *
  * The return value is the OID assigned to the tuple (either here or by the
  * caller), or InvalidOid if no OID.  The header fields of *tup are updated
@@ -1825,7 +1855,7 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
  */
 Oid
 heap_insert(Relation relation, HeapTuple tup, CommandId cid,
-			bool use_wal, bool use_fsm)
+			int options, BulkInsertState bistate)
 {
 	TransactionId xid = GetCurrentTransactionId();
 	HeapTuple	heaptup;
@@ -1877,14 +1907,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 		heaptup = tup;
 	}
 	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
-		heaptup = toast_insert_or_update(relation, tup, NULL,
-										 use_wal, use_fsm);
+		heaptup = toast_insert_or_update(relation, tup, NULL, options);
 	else
 		heaptup = tup;
 
 	/* Find buffer to insert this tuple into */
 	buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
-									   InvalidBuffer, use_fsm);
+									   InvalidBuffer, options, bistate);
 
 	/* NO EREPORT(ERROR) from here till changes are logged */
 	START_CRIT_SECTION();
@@ -1905,7 +1934,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	MarkBufferDirty(buffer);
 
 	/* XLOG stuff */
-	if (use_wal && !relation->rd_istemp)
+	if (!(options & HEAP_INSERT_SKIP_WAL) && !relation->rd_istemp)
 	{
 		xl_heap_insert xlrec;
 		xl_heap_header xlhdr;
@@ -2000,7 +2029,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 Oid
 simple_heap_insert(Relation relation, HeapTuple tup)
 {
-	return heap_insert(relation, tup, GetCurrentCommandId(true), true, true);
+	return heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
 }
 
 /*
@@ -2595,8 +2624,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 		if (need_toast)
 		{
 			/* Note we always use WAL and FSM during updates */
-			heaptup = toast_insert_or_update(relation, newtup, &oldtup,
-											 true, true);
+			heaptup = toast_insert_or_update(relation, newtup, &oldtup, 0);
 			newtupsize = MAXALIGN(heaptup->t_len);
 		}
 		else
@@ -2623,7 +2651,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 		{
 			/* Assume there's no chance to put heaptup on same page. */
 			newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
-											   buffer, true);
+											   buffer, 0, NULL);
 		}
 		else
 		{
@@ -2640,7 +2668,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 				 */
 				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 				newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
-												   buffer, true);
+												   buffer, 0, NULL);
 			}
 			else
 			{
 
@@ -8,13 +8,14 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.74 2008/11/06 20:51:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "postgres.h"
 
+#include "access/heapam.h"
 #include "access/hio.h"
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
@@ -56,6 +57,43 @@ RelationPutHeapTuple(Relation relation,
 	((HeapTupleHeader) item)->t_ctid = tuple->t_self;
 }
 
+/*
+ * Read in a buffer, using bulk-insert strategy if bistate isn't NULL.
+ */
+static Buffer
+ReadBufferBI(Relation relation, BlockNumber targetBlock,
+			 BulkInsertState bistate)
+{
+	Buffer buffer;
+
+	/* If not bulk-insert, exactly like ReadBuffer */
+	if (!bistate)
+		return ReadBuffer(relation, targetBlock);
+
+	/* If we have the desired block already pinned, re-pin and return it */
+	if (bistate->current_buf != InvalidBuffer)
+	{
+		if (BufferGetBlockNumber(bistate->current_buf) == targetBlock)
+		{
+			IncrBufferRefCount(bistate->current_buf);
+			return bistate->current_buf;
+		}
+		/* ... else drop the old buffer */
+		ReleaseBuffer(bistate->current_buf);
+		bistate->current_buf = InvalidBuffer;
+	}
+
+	/* Perform a read using the buffer strategy */
+	buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
+								RBM_NORMAL, bistate->strategy);
+
+	/* Save the selected block as target for future inserts */
+	IncrBufferRefCount(buffer);
+	bistate->current_buf = buffer;
+
+	return buffer;
+}
+
 /*
  * RelationGetBufferForTuple
  *
@@ -80,20 +118,26 @@ RelationPutHeapTuple(Relation relation,
  *	happen if space is freed in that page after heap_update finds there's not
  *	enough there).	In that case, the page will be pinned and locked only once.
  *
- *	If use_fsm is true (the normal case), we use FSM to help us find free
- *	space.	If use_fsm is false, we always append a new empty page to the
- *	end of the relation if the tuple won't fit on the current target page.
+ *	We normally use FSM to help us find free space.	 However,
+ *	if HEAP_INSERT_SKIP_FSM is specified, we just append a new empty page to
+ *	the end of the relation if the tuple won't fit on the current target page.
  *	This can save some cycles when we know the relation is new and doesn't
  *	contain useful amounts of free space.
  *
- *	The use_fsm = false case is also useful for non-WAL-logged additions to a
+ *	HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a
  *	relation, if the caller holds exclusive lock and is careful to invalidate
  *	relation->rd_targblock before the first insertion --- that ensures that
  *	all insertions will occur into newly added pages and not be intermixed
  *	with tuples from other transactions.  That way, a crash can't risk losing
  *	any committed data of other transactions.  (See heap_insert's comments
  *	for additional constraints needed for safe usage of this behavior.)
  *
+ *	The caller can also provide a BulkInsertState object to optimize many
+ *	insertions into the same relation.  This keeps a pin on the current
+ *	insertion target page (to save pin/unpin cycles) and also passes a
+ *	BULKWRITE buffer selection strategy object to the buffer manager.
+ *	Passing NULL for bistate selects the default behavior.
+ *
  *	We always try to avoid filling existing pages further than the fillfactor.
  *	This is OK since this routine is not consulted when updating a tuple and
  *	keeping it on the same page, which is the scenario fillfactor is meant
@@ -104,8 +148,10 @@ RelationPutHeapTuple(Relation relation,
  */
 Buffer
 RelationGetBufferForTuple(Relation relation, Size len,
-						  Buffer otherBuffer, bool use_fsm)
+						  Buffer otherBuffer, int options,
+						  struct BulkInsertStateData *bistate)
 {
+	bool		use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
 	Buffer		buffer = InvalidBuffer;
 	Page		page;
 	Size		pageFreeSpace,
@@ -116,6 +162,9 @@ RelationGetBufferForTuple(Relation relation, Size len,
 
 	len = MAXALIGN(len);		/* be conservative */
 
+	/* Bulk insert is not supported for updates, only inserts. */
+	Assert(otherBuffer == InvalidBuffer || !bistate);
+
 	/*
 	 * If we're gonna fail for oversize tuple, do it right away
 	 */
@@ -137,25 +186,27 @@ RelationGetBufferForTuple(Relation relation, Size len,
 
 	/*
 	 * We first try to put the tuple on the same page we last inserted a tuple
-	 * on, as cached in the relcache entry.  If that doesn't work, we ask the
-	 * shared Free Space Map to locate a suitable page.  Since the FSM's info
-	 * might be out of date, we have to be prepared to loop around and retry
-	 * multiple times.	(To insure this isn't an infinite loop, we must update
-	 * the FSM with the correct amount of free space on each page that proves
-	 * not to be suitable.)  If the FSM has no record of a page with enough
-	 * free space, we give up and extend the relation.
+	 * on, as cached in the BulkInsertState or relcache entry.  If that
+	 * doesn't work, we ask the Free Space Map to locate a suitable page.
+	 * Since the FSM's info might be out of date, we have to be prepared to
+	 * loop around and retry multiple times. (To insure this isn't an infinite
+	 * loop, we must update the FSM with the correct amount of free space on
+	 * each page that proves not to be suitable.)  If the FSM has no record of
+	 * a page with enough free space, we give up and extend the relation.
 	 *
 	 * When use_fsm is false, we either put the tuple onto the existing target
 	 * page or extend the relation.
 	 */
-	if (len + saveFreeSpace <= MaxHeapTupleSize)
-		targetBlock = relation->rd_targblock;
-	else
+	if (len + saveFreeSpace > MaxHeapTupleSize)
 	{
-		/* can't fit, don't screw up FSM request tracking by trying */
+		/* can't fit, don't bother asking FSM */
 		targetBlock = InvalidBlockNumber;
 		use_fsm = false;
 	}
+	else if (bistate && bistate->current_buf != InvalidBuffer)
+		targetBlock = BufferGetBlockNumber(bistate->current_buf);
+	else
+		targetBlock = relation->rd_targblock;
 
 	if (targetBlock == InvalidBlockNumber && use_fsm)
 	{
@@ -189,7 +240,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
 		if (otherBuffer == InvalidBuffer)
 		{
 			/* easy case */
-			buffer = ReadBuffer(relation, targetBlock);
+			buffer = ReadBufferBI(relation, targetBlock, bistate);
 			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 		}
 		else if (otherBlock == targetBlock)
@@ -274,7 +325,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
 	 * it worth keeping an accurate file length in shared memory someplace,
 	 * rather than relying on the kernel to do it for us?
 	 */
-	buffer = ReadBuffer(relation, P_NEW);
+	buffer = ReadBufferBI(relation, P_NEW, bistate);
 
 	/*
 	 * We can be certain that locking the otherBuffer first is OK, since it
 
@@ -96,7 +96,7 @@
  * Portions Copyright (c) 1994-5, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.16 2008/11/06 20:51:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -575,7 +575,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 	}
 	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
 		heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
-										 state->rs_use_wal, false);
+										 HEAP_INSERT_SKIP_FSM |
+										 (state->rs_use_wal ?
+									     0 : HEAP_INSERT_SKIP_WAL));
 	else
 		heaptup = tup;
Original file line number	Diff line number	Diff line change
`@@ -96,7 +96,7 @@`
`96`	`96`	`* Portions Copyright (c) 1994-5, Regents of the University of California`
`97`	`97`	`*`
`98`	`98`	`* IDENTIFICATION`
`99`		`- * $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $`
	`99`	`+ * $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.16 2008/11/06 20:51:14 tgl Exp $`
`100`	`100`	`*`
`101`	`101`	`*-------------------------------------------------------------------------`
`102`	`102`	`*/`
`@@ -575,7 +575,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)`
`575`	`575`	`}`
`576`	`576`	`else if (HeapTupleHasExternal(tup) \|\| tup->t_len > TOAST_TUPLE_THRESHOLD)`
`577`	`577`	`heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,`
`578`		`- state->rs_use_wal, false);`
	`578`	`+ HEAP_INSERT_SKIP_FSM \|`
	`579`	`+ (state->rs_use_wal ?`
	`580`	`+ 0 : HEAP_INSERT_SKIP_WAL));`
`579`	`581`	`else`
`580`	`582`	`heaptup = tup;`
`581`	`583`