8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.61 2000/07/21 19:21:00 tgl Exp $
11
+ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.62 2000/08/25 23:13:33 tgl Exp $
12
12
*
13
13
*-------------------------------------------------------------------------
14
14
*/
@@ -289,9 +289,11 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
289
289
* if the new key is equal to the page's "high key" we can place it on
290
290
* the next page. If it is equal to the high key, and there's not room
291
291
* to insert the new tuple on the current page without splitting, then
292
- * we move right hoping to find more free space and avoid a split.
293
- * Ordinarily, though, we'll insert it before the existing equal keys
294
- * because of the way _bt_binsrch() works.
292
+ * we can move right hoping to find more free space and avoid a split.
293
+ * (We should not move right indefinitely, however, since that leads to
294
+ * O(N^2) insertion behavior in the presence of many equal keys.)
295
+ * Once we have chosen the page to put the key on, we'll insert it before
296
+ * any existing equal keys because of the way _bt_binsrch() works.
295
297
*
296
298
* The locking interactions in this code are critical. You should
297
299
* grok Lehman and Yao's paper before making any changes. In addition,
@@ -351,15 +353,30 @@ _bt_insertonpg(Relation rel,
351
353
}
352
354
else
353
355
{
354
- /*
356
+ /*----------
355
357
* If we will need to split the page to put the item here,
356
358
* check whether we can put the tuple somewhere to the right,
357
- * instead. Keep scanning until we find enough free space or
358
- * reach the last page where the tuple can legally go.
359
+ * instead. Keep scanning right until we
360
+ * (a) find a page with enough free space,
361
+ * (b) reach the last page where the tuple can legally go, or
362
+ * (c) get tired of searching.
363
+ * (c) is not flippant; it is important because if there are many
364
+ * pages' worth of equal keys, it's better to split one of the early
365
+ * pages than to scan all the way to the end of the run of equal keys
366
+ * on every insert. We implement "get tired" as a random choice,
367
+ * since stopping after scanning a fixed number of pages wouldn't work
368
+ * well (we'd never reach the right-hand side of previously split
369
+ * pages). Currently the probability of moving right is set at 0.99,
370
+ * which may seem too high to change the behavior much, but it does an
371
+ * excellent job of preventing O(N^2) behavior with many equal keys.
372
+ *----------
359
373
*/
374
+ bool movedright = false;
375
+
360
376
while (PageGetFreeSpace (page ) < itemsz &&
361
377
!P_RIGHTMOST (lpageop ) &&
362
- _bt_compare (rel , keysz , scankey , page , P_HIKEY ) == 0 )
378
+ _bt_compare (rel , keysz , scankey , page , P_HIKEY ) == 0 &&
379
+ random () > (MAX_RANDOM_VALUE / 100 ))
363
380
{
364
381
/* step right one page */
365
382
BlockNumber rblkno = lpageop -> btpo_next ;
@@ -368,11 +385,17 @@ _bt_insertonpg(Relation rel,
368
385
buf = _bt_getbuf (rel , rblkno , BT_WRITE );
369
386
page = BufferGetPage (buf );
370
387
lpageop = (BTPageOpaque ) PageGetSpecialPointer (page );
388
+ movedright = true;
371
389
}
372
390
/*
373
- * This is it, so find the position...
391
+ * Now we are on the right page, so find the insert position.
392
+ * If we moved right at all, we know we should insert at the
393
+ * start of the page, else must find the position by searching.
374
394
*/
375
- newitemoff = _bt_binsrch (rel , buf , keysz , scankey );
395
+ if (movedright )
396
+ newitemoff = P_FIRSTDATAKEY (lpageop );
397
+ else
398
+ newitemoff = _bt_binsrch (rel , buf , keysz , scankey );
376
399
}
377
400
378
401
/*
0 commit comments