Skip to content

Commit c3b23ae

Browse files
committed
Don't to predicate lock for analyze scans, refactor scan option passing.
Before this commit, when ANALYZE was run on a table and serializable was used (either by virtue of an explicit BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE, or default_transaction_isolation being set to serializable) a null pointer dereference lead to a crash. The analyze scan doesn't need a snapshot (nor predicate locking), but before this commit a scan only contained information about being a bitmap or sample scan. Refactor the option passing to the scan_begin callback to use a bitmask instead. Alternatively we could have added a new boolean parameter, but that seems harder to read. Even before this issue various people (Heikki, Tom, Robert) suggested doing so. These changes don't change the scan APIs outside of tableam. The flags argument could be exposed, it's not necessary to fix this problem. Also the wrapper table_beginscan* functions encapsulate most of that complexity. After these changes fixing the bug is trivial, just don't acquire predicate lock for analyze style scans. That was already done for bitmap heap scans. Add an assert that a snapshot is passed when acquiring the predicate lock, so this kind of bug doesn't require running with serializable. Also add a comment about sample scans currently requiring predicate locking the entire relation, that previously wasn't remarked upon. Reported-By: Joe Wildish Author: Andres Freund Discussion: https://postgr.es/m/4EA80A20-E9BF-49F1-9F01-5B66CAB21453@elusive.cx https://postgr.es/m/20190411164947.nkii4gaeilt4bui7@alap3.anarazel.de https://postgr.es/m/20190518203102.g7peu2fianukjuxm@alap3.anarazel.de
1 parent bd1592e commit c3b23ae

File tree

8 files changed

+160
-102
lines changed

8 files changed

+160
-102
lines changed

src/backend/access/heap/heapam.c

Lines changed: 69 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,8 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
245245
if (!RelationUsesLocalBuffers(scan->rs_base.rs_rd) &&
246246
scan->rs_nblocks > NBuffers / 4)
247247
{
248-
allow_strat = scan->rs_base.rs_allow_strat;
249-
allow_sync = scan->rs_base.rs_allow_sync;
248+
allow_strat = (scan->rs_base.rs_flags & SO_ALLOW_STRAT) != 0;
249+
allow_sync = (scan->rs_base.rs_flags & SO_ALLOW_SYNC) != 0;
250250
}
251251
else
252252
allow_strat = allow_sync = false;
@@ -267,7 +267,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
267267
if (scan->rs_base.rs_parallel != NULL)
268268
{
269269
/* For parallel scan, believe whatever ParallelTableScanDesc says. */
270-
scan->rs_base.rs_syncscan = scan->rs_base.rs_parallel->phs_syncscan;
270+
if (scan->rs_base.rs_parallel->phs_syncscan)
271+
scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
272+
else
273+
scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
271274
}
272275
else if (keep_startblock)
273276
{
@@ -276,16 +279,19 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
276279
* so that rewinding a cursor doesn't generate surprising results.
277280
* Reset the active syncscan setting, though.
278281
*/
279-
scan->rs_base.rs_syncscan = (allow_sync && synchronize_seqscans);
282+
if (allow_sync && synchronize_seqscans)
283+
scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
284+
else
285+
scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
280286
}
281287
else if (allow_sync && synchronize_seqscans)
282288
{
283-
scan->rs_base.rs_syncscan = true;
289+
scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
284290
scan->rs_startblock = ss_get_location(scan->rs_base.rs_rd, scan->rs_nblocks);
285291
}
286292
else
287293
{
288-
scan->rs_base.rs_syncscan = false;
294+
scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
289295
scan->rs_startblock = 0;
290296
}
291297

@@ -305,11 +311,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
305311
memcpy(scan->rs_base.rs_key, key, scan->rs_base.rs_nkeys * sizeof(ScanKeyData));
306312

307313
/*
308-
* Currently, we don't have a stats counter for bitmap heap scans (but the
309-
* underlying bitmap index scans will be counted) or sample scans (we only
310-
* update stats for tuple fetches there)
314+
* Currently, we only have a stats counter for sequential heap scans (but
315+
* e.g for bitmap scans the underlying bitmap index scans will be counted,
316+
* and for sample scans we update stats for tuple fetches).
311317
*/
312-
if (!scan->rs_base.rs_bitmapscan && !scan->rs_base.rs_samplescan)
318+
if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN)
313319
pgstat_count_heap_scan(scan->rs_base.rs_rd);
314320
}
315321

@@ -325,7 +331,8 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk
325331
HeapScanDesc scan = (HeapScanDesc) sscan;
326332

327333
Assert(!scan->rs_inited); /* else too late to change */
328-
Assert(!scan->rs_base.rs_syncscan); /* else rs_startblock is significant */
334+
/* else rs_startblock is significant */
335+
Assert(!(scan->rs_base.rs_flags & SO_ALLOW_SYNC));
329336

330337
/* Check startBlk is valid (but allow case of zero blocks...) */
331338
Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
@@ -375,7 +382,7 @@ heapgetpage(TableScanDesc sscan, BlockNumber page)
375382
RBM_NORMAL, scan->rs_strategy);
376383
scan->rs_cblock = page;
377384

378-
if (!scan->rs_base.rs_pageatatime)
385+
if (!(scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE))
379386
return;
380387

381388
buffer = scan->rs_cbuf;
@@ -574,7 +581,7 @@ heapgettup(HeapScanDesc scan,
574581
* time, and much more likely that we'll just bollix things for
575582
* forward scanners.
576583
*/
577-
scan->rs_base.rs_syncscan = false;
584+
scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
578585
/* start from last page of the scan */
579586
if (scan->rs_startblock > 0)
580587
page = scan->rs_startblock - 1;
@@ -738,7 +745,7 @@ heapgettup(HeapScanDesc scan,
738745
* a little bit backwards on every invocation, which is confusing.
739746
* We don't guarantee any specific ordering in general, though.
740747
*/
741-
if (scan->rs_base.rs_syncscan)
748+
if (scan->rs_base.rs_flags & SO_ALLOW_SYNC)
742749
ss_report_location(scan->rs_base.rs_rd, page);
743750
}
744751

@@ -885,7 +892,7 @@ heapgettup_pagemode(HeapScanDesc scan,
885892
* time, and much more likely that we'll just bollix things for
886893
* forward scanners.
887894
*/
888-
scan->rs_base.rs_syncscan = false;
895+
scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
889896
/* start from last page of the scan */
890897
if (scan->rs_startblock > 0)
891898
page = scan->rs_startblock - 1;
@@ -1037,7 +1044,7 @@ heapgettup_pagemode(HeapScanDesc scan,
10371044
* a little bit backwards on every invocation, which is confusing.
10381045
* We don't guarantee any specific ordering in general, though.
10391046
*/
1040-
if (scan->rs_base.rs_syncscan)
1047+
if (scan->rs_base.rs_flags & SO_ALLOW_SYNC)
10411048
ss_report_location(scan->rs_base.rs_rd, page);
10421049
}
10431050

@@ -1125,12 +1132,7 @@ TableScanDesc
11251132
heap_beginscan(Relation relation, Snapshot snapshot,
11261133
int nkeys, ScanKey key,
11271134
ParallelTableScanDesc parallel_scan,
1128-
bool allow_strat,
1129-
bool allow_sync,
1130-
bool allow_pagemode,
1131-
bool is_bitmapscan,
1132-
bool is_samplescan,
1133-
bool temp_snap)
1135+
uint32 flags)
11341136
{
11351137
HeapScanDesc scan;
11361138

@@ -1151,33 +1153,39 @@ heap_beginscan(Relation relation, Snapshot snapshot,
11511153
scan->rs_base.rs_rd = relation;
11521154
scan->rs_base.rs_snapshot = snapshot;
11531155
scan->rs_base.rs_nkeys = nkeys;
1154-
scan->rs_base.rs_bitmapscan = is_bitmapscan;
1155-
scan->rs_base.rs_samplescan = is_samplescan;
1156-
scan->rs_strategy = NULL; /* set in initscan */
1157-
scan->rs_base.rs_allow_strat = allow_strat;
1158-
scan->rs_base.rs_allow_sync = allow_sync;
1159-
scan->rs_base.rs_temp_snap = temp_snap;
1156+
scan->rs_base.rs_flags = flags;
11601157
scan->rs_base.rs_parallel = parallel_scan;
1158+
scan->rs_strategy = NULL; /* set in initscan */
11611159

11621160
/*
1163-
* we can use page-at-a-time mode if it's an MVCC-safe snapshot
1161+
* Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
11641162
*/
1165-
scan->rs_base.rs_pageatatime =
1166-
allow_pagemode && snapshot && IsMVCCSnapshot(snapshot);
1163+
if (!(snapshot && IsMVCCSnapshot(snapshot)))
1164+
scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE;
11671165

11681166
/*
1169-
* For a seqscan in a serializable transaction, acquire a predicate lock
1170-
* on the entire relation. This is required not only to lock all the
1171-
* matching tuples, but also to conflict with new insertions into the
1172-
* table. In an indexscan, we take page locks on the index pages covering
1173-
* the range specified in the scan qual, but in a heap scan there is
1174-
* nothing more fine-grained to lock. A bitmap scan is a different story,
1175-
* there we have already scanned the index and locked the index pages
1176-
* covering the predicate. But in that case we still have to lock any
1177-
* matching heap tuples.
1167+
* For seqscan and sample scans in a serializable transaction, acquire a
1168+
* predicate lock on the entire relation. This is required not only to
1169+
* lock all the matching tuples, but also to conflict with new insertions
1170+
* into the table. In an indexscan, we take page locks on the index pages
1171+
* covering the range specified in the scan qual, but in a heap scan there
1172+
* is nothing more fine-grained to lock. A bitmap scan is a different
1173+
* story, there we have already scanned the index and locked the index
1174+
* pages covering the predicate. But in that case we still have to lock
1175+
* any matching heap tuples. For sample scan we could optimize the locking
1176+
* to be at least page-level granularity, but we'd need to add per-tuple
1177+
* locking for that.
11781178
*/
1179-
if (!is_bitmapscan)
1179+
if (scan->rs_base.rs_flags & (SO_TYPE_SEQSCAN | SO_TYPE_SAMPLESCAN))
1180+
{
1181+
/*
1182+
* Ensure a missing snapshot is noticed reliably, even if the
1183+
* isolation mode means predicate locking isn't performed (and
1184+
* therefore the snapshot isn't used here).
1185+
*/
1186+
Assert(snapshot);
11801187
PredicateLockRelation(relation, snapshot);
1188+
}
11811189

11821190
/* we only need to set this up once */
11831191
scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
@@ -1204,10 +1212,21 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
12041212

12051213
if (set_params)
12061214
{
1207-
scan->rs_base.rs_allow_strat = allow_strat;
1208-
scan->rs_base.rs_allow_sync = allow_sync;
1209-
scan->rs_base.rs_pageatatime =
1210-
allow_pagemode && IsMVCCSnapshot(scan->rs_base.rs_snapshot);
1215+
if (allow_strat)
1216+
scan->rs_base.rs_flags |= SO_ALLOW_STRAT;
1217+
else
1218+
scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT;
1219+
1220+
if (allow_sync)
1221+
scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
1222+
else
1223+
scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
1224+
1225+
if (allow_pagemode && scan->rs_base.rs_snapshot &&
1226+
IsMVCCSnapshot(scan->rs_base.rs_snapshot))
1227+
scan->rs_base.rs_flags |= SO_ALLOW_PAGEMODE;
1228+
else
1229+
scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE;
12111230
}
12121231

12131232
/*
@@ -1246,7 +1265,7 @@ heap_endscan(TableScanDesc sscan)
12461265
if (scan->rs_strategy != NULL)
12471266
FreeAccessStrategy(scan->rs_strategy);
12481267

1249-
if (scan->rs_base.rs_temp_snap)
1268+
if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
12501269
UnregisterSnapshot(scan->rs_base.rs_snapshot);
12511270

12521271
pfree(scan);
@@ -1288,7 +1307,7 @@ heap_getnext(TableScanDesc sscan, ScanDirection direction)
12881307

12891308
HEAPDEBUG_1; /* heap_getnext( info ) */
12901309

1291-
if (scan->rs_base.rs_pageatatime)
1310+
if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
12921311
heapgettup_pagemode(scan, direction,
12931312
scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
12941313
else
@@ -1335,11 +1354,10 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s
13351354

13361355
HEAPAMSLOTDEBUG_1; /* heap_getnextslot( info ) */
13371356

1338-
if (scan->rs_base.rs_pageatatime)
1339-
heapgettup_pagemode(scan, direction,
1340-
scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1357+
if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1358+
heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
13411359
else
1342-
heapgettup(scan, direction, scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1360+
heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
13431361

13441362
if (scan->rs_ctup.t_data == NULL)
13451363
{

src/backend/access/heap/heapam_handler.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2323,7 +2323,7 @@ heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
23232323
* a little bit backwards on every invocation, which is confusing.
23242324
* We don't guarantee any specific ordering in general, though.
23252325
*/
2326-
if (scan->rs_syncscan)
2326+
if (scan->rs_flags & SO_ALLOW_SYNC)
23272327
ss_report_location(scan->rs_rd, blockno);
23282328

23292329
if (blockno == hscan->rs_startblock)
@@ -2357,7 +2357,7 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
23572357
HeapScanDesc hscan = (HeapScanDesc) scan;
23582358
TsmRoutine *tsm = scanstate->tsmroutine;
23592359
BlockNumber blockno = hscan->rs_cblock;
2360-
bool pagemode = scan->rs_pageatatime;
2360+
bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
23612361

23622362
Page page;
23632363
bool all_visible;
@@ -2504,7 +2504,7 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
25042504
{
25052505
HeapScanDesc hscan = (HeapScanDesc) scan;
25062506

2507-
if (scan->rs_pageatatime)
2507+
if (scan->rs_flags & SO_ALLOW_PAGEMODE)
25082508
{
25092509
/*
25102510
* In pageatatime mode, heapgetpage() already did visibility checks,

src/backend/access/table/tableam.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,13 @@ table_slot_create(Relation relation, List **reglist)
9393
TableScanDesc
9494
table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
9595
{
96+
uint32 flags = SO_TYPE_SEQSCAN |
97+
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE | SO_TEMP_SNAPSHOT;
9698
Oid relid = RelationGetRelid(relation);
9799
Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
98100

99101
return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key,
100-
NULL, true, true, true, false,
101-
false, true);
102+
NULL, flags);
102103
}
103104

104105
void
@@ -108,7 +109,7 @@ table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
108109

109110
RegisterSnapshot(snapshot);
110111
scan->rs_snapshot = snapshot;
111-
scan->rs_temp_snap = true;
112+
scan->rs_flags |= SO_TEMP_SNAPSHOT;
112113
}
113114

114115

@@ -156,6 +157,8 @@ TableScanDesc
156157
table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
157158
{
158159
Snapshot snapshot;
160+
uint32 flags = SO_TYPE_SEQSCAN |
161+
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
159162

160163
Assert(RelationGetRelid(relation) == parallel_scan->phs_relid);
161164

@@ -165,6 +168,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
165168
snapshot = RestoreSnapshot((char *) parallel_scan +
166169
parallel_scan->phs_snapshot_off);
167170
RegisterSnapshot(snapshot);
171+
flags |= SO_TEMP_SNAPSHOT;
168172
}
169173
else
170174
{
@@ -173,9 +177,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
173177
}
174178

175179
return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL,
176-
parallel_scan, true, true, true,
177-
false, false,
178-
!parallel_scan->phs_snapshot_any);
180+
parallel_scan, flags);
179181
}
180182

181183

src/include/access/heapam.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,7 @@ typedef enum
110110
extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
111111
int nkeys, ScanKey key,
112112
ParallelTableScanDesc parallel_scan,
113-
bool allow_strat,
114-
bool allow_sync,
115-
bool allow_pagemode,
116-
bool is_bitmapscan,
117-
bool is_samplescan,
118-
bool temp_snap);
113+
uint32 flags);
119114
extern void heap_setscanlimits(TableScanDesc scan, BlockNumber startBlk,
120115
BlockNumber endBlk);
121116
extern void heapgetpage(TableScanDesc scan, BlockNumber page);

src/include/access/relscan.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,12 @@ typedef struct TableScanDescData
3535
struct SnapshotData *rs_snapshot; /* snapshot to see */
3636
int rs_nkeys; /* number of scan keys */
3737
struct ScanKeyData *rs_key; /* array of scan key descriptors */
38-
bool rs_bitmapscan; /* true if this is really a bitmap scan */
39-
bool rs_samplescan; /* true if this is really a sample scan */
40-
bool rs_pageatatime; /* verify visibility page-at-a-time? */
41-
bool rs_allow_strat; /* allow or disallow use of access strategy */
42-
bool rs_allow_sync; /* allow or disallow use of syncscan */
43-
bool rs_temp_snap; /* unregister snapshot at scan end? */
44-
bool rs_syncscan; /* report location to syncscan logic? */
38+
39+
/*
40+
* Information about type and behaviour of the scan, a bitmask of members
41+
* of the ScanOptions enum (see tableam.h).
42+
*/
43+
uint32 rs_flags;
4544

4645
struct ParallelTableScanDescData *rs_parallel; /* parallel scan
4746
* information */

0 commit comments

Comments
 (0)