@@ -46,8 +46,6 @@ typedef struct
46
46
IndexBulkDeleteCallback callback ;
47
47
void * callback_state ;
48
48
BTCycleId cycleid ;
49
- BlockNumber lastBlockVacuumed ; /* highest blkno actually vacuumed */
50
- BlockNumber lastBlockLocked ; /* highest blkno we've cleanup-locked */
51
49
BlockNumber totFreePages ; /* true total # of free pages */
52
50
TransactionId oldestBtpoXact ;
53
51
MemoryContext pagedelcontext ;
@@ -978,8 +976,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
978
976
vstate .callback = callback ;
979
977
vstate .callback_state = callback_state ;
980
978
vstate .cycleid = cycleid ;
981
- vstate .lastBlockVacuumed = BTREE_METAPAGE ; /* Initialise at first block */
982
- vstate .lastBlockLocked = BTREE_METAPAGE ;
983
979
vstate .totFreePages = 0 ;
984
980
vstate .oldestBtpoXact = InvalidTransactionId ;
985
981
@@ -1040,39 +1036,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
1040
1036
}
1041
1037
}
1042
1038
1043
- /*
1044
- * Check to see if we need to issue one final WAL record for this index,
1045
- * which may be needed for correctness on a hot standby node when non-MVCC
1046
- * index scans could take place.
1047
- *
1048
- * If the WAL is replayed in hot standby, the replay process needs to get
1049
- * cleanup locks on all index leaf pages, just as we've been doing here.
1050
- * However, we won't issue any WAL records about pages that have no items
1051
- * to be deleted. For pages between pages we've vacuumed, the replay code
1052
- * will take locks under the direction of the lastBlockVacuumed fields in
1053
- * the XLOG_BTREE_VACUUM WAL records. To cover pages after the last one
1054
- * we vacuum, we need to issue a dummy XLOG_BTREE_VACUUM WAL record
1055
- * against the last leaf page in the index, if that one wasn't vacuumed.
1056
- */
1057
- if (XLogStandbyInfoActive () &&
1058
- vstate .lastBlockVacuumed < vstate .lastBlockLocked )
1059
- {
1060
- Buffer buf ;
1061
-
1062
- /*
1063
- * The page should be valid, but we can't use _bt_getbuf() because we
1064
- * want to use a nondefault buffer access strategy. Since we aren't
1065
- * going to delete any items, getting cleanup lock again is probably
1066
- * overkill, but for consistency do that anyway.
1067
- */
1068
- buf = ReadBufferExtended (rel , MAIN_FORKNUM , vstate .lastBlockLocked ,
1069
- RBM_NORMAL , info -> strategy );
1070
- LockBufferForCleanup (buf );
1071
- _bt_checkpage (rel , buf );
1072
- _bt_delitems_vacuum (rel , buf , NULL , 0 , vstate .lastBlockVacuumed );
1073
- _bt_relbuf (rel , buf );
1074
- }
1075
-
1076
1039
MemoryContextDelete (vstate .pagedelcontext );
1077
1040
1078
1041
/*
@@ -1203,13 +1166,6 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
1203
1166
LockBuffer (buf , BUFFER_LOCK_UNLOCK );
1204
1167
LockBufferForCleanup (buf );
1205
1168
1206
- /*
1207
- * Remember highest leaf page number we've taken cleanup lock on; see
1208
- * notes in btvacuumscan
1209
- */
1210
- if (blkno > vstate -> lastBlockLocked )
1211
- vstate -> lastBlockLocked = blkno ;
1212
-
1213
1169
/*
1214
1170
* Check whether we need to recurse back to earlier pages. What we
1215
1171
* are concerned about is a page split that happened since we started
@@ -1225,8 +1181,10 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
1225
1181
recurse_to = opaque -> btpo_next ;
1226
1182
1227
1183
/*
1228
- * Scan over all items to see which ones need deleted according to the
1229
- * callback function.
1184
+ * When each VACUUM begins, it determines an OldestXmin cutoff value.
1185
+ * Tuples before the cutoff are removed by VACUUM. Scan over all
1186
+ * items to see which ones need to be deleted according to cutoff
1187
+ * point using callback.
1230
1188
*/
1231
1189
ndeletable = 0 ;
1232
1190
minoff = P_FIRSTDATAKEY (opaque );
@@ -1245,25 +1203,24 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
1245
1203
htup = & (itup -> t_tid );
1246
1204
1247
1205
/*
1248
- * During Hot Standby we currently assume that
1249
- * XLOG_BTREE_VACUUM records do not produce conflicts. That is
1250
- * only true as long as the callback function depends only
1251
- * upon whether the index tuple refers to heap tuples removed
1252
- * in the initial heap scan. When vacuum starts it derives a
1253
- * value of OldestXmin. Backends taking later snapshots could
1254
- * have a RecentGlobalXmin with a later xid than the vacuum's
1255
- * OldestXmin, so it is possible that row versions deleted
1256
- * after OldestXmin could be marked as killed by other
1257
- * backends. The callback function *could* look at the index
1258
- * tuple state in isolation and decide to delete the index
1259
- * tuple, though currently it does not. If it ever did, we
1260
- * would need to reconsider whether XLOG_BTREE_VACUUM records
1261
- * should cause conflicts. If they did cause conflicts they
1262
- * would be fairly harsh conflicts, since we haven't yet
1263
- * worked out a way to pass a useful value for
1264
- * latestRemovedXid on the XLOG_BTREE_VACUUM records. This
1265
- * applies to *any* type of index that marks index tuples as
1266
- * killed.
1206
+ * Hot Standby assumes that it's okay that XLOG_BTREE_VACUUM
1207
+ * records do not produce their own conflicts. This is safe
1208
+ * as long as the callback function only considers whether the
1209
+ * index tuple refers to pre-cutoff heap tuples that were
1210
+ * certainly already pruned away during VACUUM's initial heap
1211
+ * scan by the time we get here. (We can rely on conflicts
1212
+ * produced by heap pruning, rather than producing our own
1213
+ * now.)
1214
+ *
1215
+ * Backends with snapshots acquired after a VACUUM starts but
1216
+ * before it finishes could have a RecentGlobalXmin with a
1217
+ * later xid than the VACUUM's OldestXmin cutoff. These
1218
+ * backends might happen to opportunistically mark some index
1219
+ * tuples LP_DEAD before we reach them, even though they may
1220
+ * be after our cutoff. We don't try to kill these "extra"
1221
+ * index tuples in _bt_delitems_vacuum(). This keep things
1222
+ * simple, and allows us to always avoid generating our own
1223
+ * conflicts.
1267
1224
*/
1268
1225
if (callback (htup , callback_state ))
1269
1226
deletable [ndeletable ++ ] = offnum ;
@@ -1276,29 +1233,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
1276
1233
*/
1277
1234
if (ndeletable > 0 )
1278
1235
{
1279
- /*
1280
- * Notice that the issued XLOG_BTREE_VACUUM WAL record includes
1281
- * all information to the replay code to allow it to get a cleanup
1282
- * lock on all pages between the previous lastBlockVacuumed and
1283
- * this page. This ensures that WAL replay locks all leaf pages at
1284
- * some point, which is important should non-MVCC scans be
1285
- * requested. This is currently unused on standby, but we record
1286
- * it anyway, so that the WAL contains the required information.
1287
- *
1288
- * Since we can visit leaf pages out-of-order when recursing,
1289
- * replay might end up locking such pages an extra time, but it
1290
- * doesn't seem worth the amount of bookkeeping it'd take to avoid
1291
- * that.
1292
- */
1293
- _bt_delitems_vacuum (rel , buf , deletable , ndeletable ,
1294
- vstate -> lastBlockVacuumed );
1295
-
1296
- /*
1297
- * Remember highest leaf page number we've issued a
1298
- * XLOG_BTREE_VACUUM WAL record for.
1299
- */
1300
- if (blkno > vstate -> lastBlockVacuumed )
1301
- vstate -> lastBlockVacuumed = blkno ;
1236
+ _bt_delitems_vacuum (rel , buf , deletable , ndeletable );
1302
1237
1303
1238
stats -> tuples_removed += ndeletable ;
1304
1239
/* must recompute maxoff */
0 commit comments