@@ -268,65 +268,20 @@ bool
268
268
hashgettuple (IndexScanDesc scan , ScanDirection dir )
269
269
{
270
270
HashScanOpaque so = (HashScanOpaque ) scan -> opaque ;
271
- Relation rel = scan -> indexRelation ;
272
- Buffer buf ;
273
- Page page ;
274
- OffsetNumber offnum ;
275
- ItemPointer current ;
276
271
bool res ;
277
272
278
273
/* Hash indexes are always lossy since we store only the hash code */
279
274
scan -> xs_recheck = true;
280
275
281
- /*
282
- * We hold pin but not lock on current buffer while outside the hash AM.
283
- * Reacquire the read lock here.
284
- */
285
- if (BufferIsValid (so -> hashso_curbuf ))
286
- LockBuffer (so -> hashso_curbuf , BUFFER_LOCK_SHARE );
287
-
288
276
/*
289
277
* If we've already initialized this scan, we can just advance it in the
290
278
* appropriate direction. If we haven't done so yet, we call a routine to
291
279
* get the first item in the scan.
292
280
*/
293
- current = & (so -> hashso_curpos );
294
- if (ItemPointerIsValid (current ))
281
+ if (!HashScanPosIsValid (so -> currPos ))
282
+ res = _hash_first (scan , dir );
283
+ else
295
284
{
296
- /*
297
- * An insertion into the current index page could have happened while
298
- * we didn't have read lock on it. Re-find our position by looking
299
- * for the TID we previously returned. (Because we hold a pin on the
300
- * primary bucket page, no deletions or splits could have occurred;
301
- * therefore we can expect that the TID still exists in the current
302
- * index page, at an offset >= where we were.)
303
- */
304
- OffsetNumber maxoffnum ;
305
-
306
- buf = so -> hashso_curbuf ;
307
- Assert (BufferIsValid (buf ));
308
- page = BufferGetPage (buf );
309
-
310
- /*
311
- * We don't need test for old snapshot here as the current buffer is
312
- * pinned, so vacuum can't clean the page.
313
- */
314
- maxoffnum = PageGetMaxOffsetNumber (page );
315
- for (offnum = ItemPointerGetOffsetNumber (current );
316
- offnum <= maxoffnum ;
317
- offnum = OffsetNumberNext (offnum ))
318
- {
319
- IndexTuple itup ;
320
-
321
- itup = (IndexTuple ) PageGetItem (page , PageGetItemId (page , offnum ));
322
- if (ItemPointerEquals (& (so -> hashso_heappos ), & (itup -> t_tid )))
323
- break ;
324
- }
325
- if (offnum > maxoffnum )
326
- elog (ERROR , "failed to re-find scan position within index \"%s\"" ,
327
- RelationGetRelationName (rel ));
328
- ItemPointerSetOffsetNumber (current , offnum );
329
-
330
285
/*
331
286
* Check to see if we should kill the previously-fetched tuple.
332
287
*/
@@ -341,47 +296,18 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir)
341
296
* entries.
342
297
*/
343
298
if (so -> killedItems == NULL )
344
- so -> killedItems = palloc ( MaxIndexTuplesPerPage *
345
- sizeof (HashScanPosItem ));
299
+ so -> killedItems = ( int * )
300
+ palloc ( MaxIndexTuplesPerPage * sizeof (int ));
346
301
347
302
if (so -> numKilled < MaxIndexTuplesPerPage )
348
- {
349
- so -> killedItems [so -> numKilled ].heapTid = so -> hashso_heappos ;
350
- so -> killedItems [so -> numKilled ].indexOffset =
351
- ItemPointerGetOffsetNumber (& (so -> hashso_curpos ));
352
- so -> numKilled ++ ;
353
- }
303
+ so -> killedItems [so -> numKilled ++ ] = so -> currPos .itemIndex ;
354
304
}
355
305
356
306
/*
357
307
* Now continue the scan.
358
308
*/
359
309
res = _hash_next (scan , dir );
360
310
}
361
- else
362
- res = _hash_first (scan , dir );
363
-
364
- /*
365
- * Skip killed tuples if asked to.
366
- */
367
- if (scan -> ignore_killed_tuples )
368
- {
369
- while (res )
370
- {
371
- offnum = ItemPointerGetOffsetNumber (current );
372
- page = BufferGetPage (so -> hashso_curbuf );
373
- if (!ItemIdIsDead (PageGetItemId (page , offnum )))
374
- break ;
375
- res = _hash_next (scan , dir );
376
- }
377
- }
378
-
379
- /* Release read lock on current buffer, but keep it pinned */
380
- if (BufferIsValid (so -> hashso_curbuf ))
381
- LockBuffer (so -> hashso_curbuf , BUFFER_LOCK_UNLOCK );
382
-
383
- /* Return current heap TID on success */
384
- scan -> xs_ctup .t_self = so -> hashso_heappos ;
385
311
386
312
return res ;
387
313
}
@@ -396,35 +322,21 @@ hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
396
322
HashScanOpaque so = (HashScanOpaque ) scan -> opaque ;
397
323
bool res ;
398
324
int64 ntids = 0 ;
325
+ HashScanPosItem * currItem ;
399
326
400
327
res = _hash_first (scan , ForwardScanDirection );
401
328
402
329
while (res )
403
330
{
404
- bool add_tuple ;
331
+ currItem = & so -> currPos . items [ so -> currPos . itemIndex ] ;
405
332
406
333
/*
407
- * Skip killed tuples if asked to.
334
+ * _hash_first and _hash_next handle eliminate dead index entries
335
+ * whenever scan->ignored_killed_tuples is true. Therefore, there's
336
+ * nothing to do here except add the results to the TIDBitmap.
408
337
*/
409
- if (scan -> ignore_killed_tuples )
410
- {
411
- Page page ;
412
- OffsetNumber offnum ;
413
-
414
- offnum = ItemPointerGetOffsetNumber (& (so -> hashso_curpos ));
415
- page = BufferGetPage (so -> hashso_curbuf );
416
- add_tuple = !ItemIdIsDead (PageGetItemId (page , offnum ));
417
- }
418
- else
419
- add_tuple = true;
420
-
421
- /* Save tuple ID, and continue scanning */
422
- if (add_tuple )
423
- {
424
- /* Note we mark the tuple ID as requiring recheck */
425
- tbm_add_tuples (tbm , & (so -> hashso_heappos ), 1 , true);
426
- ntids ++ ;
427
- }
338
+ tbm_add_tuples (tbm , & (currItem -> heapTid ), 1 , true);
339
+ ntids ++ ;
428
340
429
341
res = _hash_next (scan , ForwardScanDirection );
430
342
}
@@ -448,12 +360,9 @@ hashbeginscan(Relation rel, int nkeys, int norderbys)
448
360
scan = RelationGetIndexScan (rel , nkeys , norderbys );
449
361
450
362
so = (HashScanOpaque ) palloc (sizeof (HashScanOpaqueData ));
451
- so -> hashso_curbuf = InvalidBuffer ;
363
+ HashScanPosInvalidate ( so -> currPos ) ;
452
364
so -> hashso_bucket_buf = InvalidBuffer ;
453
365
so -> hashso_split_bucket_buf = InvalidBuffer ;
454
- /* set position invalid (this will cause _hash_first call) */
455
- ItemPointerSetInvalid (& (so -> hashso_curpos ));
456
- ItemPointerSetInvalid (& (so -> hashso_heappos ));
457
366
458
367
so -> hashso_buc_populated = false;
459
368
so -> hashso_buc_split = false;
@@ -476,22 +385,17 @@ hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
476
385
HashScanOpaque so = (HashScanOpaque ) scan -> opaque ;
477
386
Relation rel = scan -> indexRelation ;
478
387
479
- /*
480
- * Before leaving current page, deal with any killed items. Also, ensure
481
- * that we acquire lock on current page before calling _hash_kill_items.
482
- */
483
- if (so -> numKilled > 0 )
388
+ if (HashScanPosIsValid (so -> currPos ))
484
389
{
485
- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_SHARE );
486
- _hash_kill_items ( scan );
487
- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_UNLOCK );
390
+ /* Before leaving current page, deal with any killed items */
391
+ if ( so -> numKilled > 0 )
392
+ _hash_kill_items ( scan );
488
393
}
489
394
490
395
_hash_dropscanbuf (rel , so );
491
396
492
397
/* set position invalid (this will cause _hash_first call) */
493
- ItemPointerSetInvalid (& (so -> hashso_curpos ));
494
- ItemPointerSetInvalid (& (so -> hashso_heappos ));
398
+ HashScanPosInvalidate (so -> currPos );
495
399
496
400
/* Update scan key, if a new one is given */
497
401
if (scankey && scan -> numberOfKeys > 0 )
@@ -514,15 +418,11 @@ hashendscan(IndexScanDesc scan)
514
418
HashScanOpaque so = (HashScanOpaque ) scan -> opaque ;
515
419
Relation rel = scan -> indexRelation ;
516
420
517
- /*
518
- * Before leaving current page, deal with any killed items. Also, ensure
519
- * that we acquire lock on current page before calling _hash_kill_items.
520
- */
521
- if (so -> numKilled > 0 )
421
+ if (HashScanPosIsValid (so -> currPos ))
522
422
{
523
- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_SHARE );
524
- _hash_kill_items ( scan );
525
- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_UNLOCK );
423
+ /* Before leaving current page, deal with any killed items */
424
+ if ( so -> numKilled > 0 )
425
+ _hash_kill_items ( scan );
526
426
}
527
427
528
428
_hash_dropscanbuf (rel , so );
@@ -755,16 +655,15 @@ hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
755
655
* primary bucket page. The lock won't necessarily be held continuously,
756
656
* though, because we'll release it when visiting overflow pages.
757
657
*
758
- * It would be very bad if this function cleaned a page while some other
759
- * backend was in the midst of scanning it, because hashgettuple assumes
760
- * that the next valid TID will be greater than or equal to the current
761
- * valid TID. There can't be any concurrent scans in progress when we first
762
- * enter this function because of the cleanup lock we hold on the primary
763
- * bucket page, but as soon as we release that lock, there might be. We
764
- * handle that by conspiring to prevent those scans from passing our cleanup
765
- * scan. To do that, we lock the next page in the bucket chain before
766
- * releasing the lock on the previous page. (This type of lock chaining is
767
- * not ideal, so we might want to look for a better solution at some point.)
658
+ * There can't be any concurrent scans in progress when we first enter this
659
+ * function because of the cleanup lock we hold on the primary bucket page,
660
+ * but as soon as we release that lock, there might be. If those scans got
661
+ * ahead of our cleanup scan, they might see a tuple before we kill it and
662
+ * wake up only after VACUUM has completed and the TID has been recycled for
663
+ * an unrelated tuple. To avoid that calamity, we prevent scans from passing
664
+ * our cleanup scan by locking the next page in the bucket chain before
665
+ * releasing the lock on the previous page. (This type of lock chaining is not
666
+ * ideal, so we might want to look for a better solution at some point.)
768
667
*
769
668
* We need to retain a pin on the primary bucket to ensure that no concurrent
770
669
* split can start.
0 commit comments