103
103
104
104
#include "access/brin.h"
105
105
#include "access/gin.h"
106
- #include "access/htup_details.h"
107
- #include "access/sysattr.h"
108
106
#include "access/table.h"
109
107
#include "access/tableam.h"
110
- #include "catalog/index .h"
108
+ #include "access/visibilitymap .h"
111
109
#include "catalog/pg_am.h"
112
110
#include "catalog/pg_collation.h"
113
111
#include "catalog/pg_operator.h"
114
112
#include "catalog/pg_statistic.h"
115
113
#include "catalog/pg_statistic_ext.h"
116
- #include "executor/executor.h"
117
114
#include "executor/nodeAgg.h"
118
115
#include "miscadmin.h"
119
116
#include "nodes/makefuncs.h"
127
124
#include "parser/parse_clause.h"
128
125
#include "parser/parsetree.h"
129
126
#include "statistics/statistics.h"
127
+ #include "storage/bufmgr.h"
130
128
#include "utils/builtins.h"
131
129
#include "utils/date.h"
132
130
#include "utils/datum.h"
133
131
#include "utils/fmgroids.h"
134
132
#include "utils/index_selfuncs.h"
135
133
#include "utils/lsyscache.h"
134
+ #include "utils/memutils.h"
136
135
#include "utils/pg_locale.h"
137
136
#include "utils/rel.h"
138
137
#include "utils/selfuncs.h"
@@ -198,6 +197,15 @@ static bool get_actual_variable_range(PlannerInfo *root,
198
197
VariableStatData * vardata ,
199
198
Oid sortop ,
200
199
Datum * min , Datum * max );
200
+ static bool get_actual_variable_endpoint (Relation heapRel ,
201
+ Relation indexRel ,
202
+ ScanDirection indexscandir ,
203
+ ScanKey scankeys ,
204
+ int16 typLen ,
205
+ bool typByVal ,
206
+ TupleTableSlot * tableslot ,
207
+ MemoryContext outercontext ,
208
+ Datum * endpointDatum );
201
209
static RelOptInfo * find_join_input_rel (PlannerInfo * root , Relids relids );
202
210
203
211
@@ -5180,30 +5188,23 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5180
5188
}
5181
5189
5182
5190
/*
5183
- * Found a suitable index to extract data from. We'll need an EState
5184
- * and a bunch of other infrastructure .
5191
+ * Found a suitable index to extract data from. Set up some data that
5192
+ * can be used by both invocations of get_actual_variable_endpoint .
5185
5193
*/
5186
5194
{
5187
- EState * estate ;
5188
- ExprContext * econtext ;
5189
5195
MemoryContext tmpcontext ;
5190
5196
MemoryContext oldcontext ;
5191
5197
Relation heapRel ;
5192
5198
Relation indexRel ;
5193
- IndexInfo * indexInfo ;
5194
5199
TupleTableSlot * slot ;
5195
5200
int16 typLen ;
5196
5201
bool typByVal ;
5197
5202
ScanKeyData scankeys [1 ];
5198
- IndexScanDesc index_scan ;
5199
- Datum values [INDEX_MAX_KEYS ];
5200
- bool isnull [INDEX_MAX_KEYS ];
5201
- SnapshotData SnapshotNonVacuumable ;
5202
-
5203
- estate = CreateExecutorState ();
5204
- econtext = GetPerTupleExprContext (estate );
5205
- /* Make sure any cruft is generated in the econtext's memory */
5206
- tmpcontext = econtext -> ecxt_per_tuple_memory ;
5203
+
5204
+ /* Make sure any cruft gets recycled when we're done */
5205
+ tmpcontext = AllocSetContextCreate (CurrentMemoryContext ,
5206
+ "get_actual_variable_range workspace" ,
5207
+ ALLOCSET_DEFAULT_SIZES );
5207
5208
oldcontext = MemoryContextSwitchTo (tmpcontext );
5208
5209
5209
5210
/*
@@ -5213,14 +5214,9 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5213
5214
heapRel = table_open (rte -> relid , NoLock );
5214
5215
indexRel = index_open (index -> indexoid , NoLock );
5215
5216
5216
- /* extract index key information from the index's pg_index info */
5217
- indexInfo = BuildIndexInfo (indexRel );
5218
-
5219
- /* some other stuff */
5217
+ /* build some stuff needed for indexscan execution */
5220
5218
slot = table_slot_create (heapRel , NULL );
5221
- econtext -> ecxt_scantuple = slot ;
5222
5219
get_typlenbyval (vardata -> atttype , & typLen , & typByVal );
5223
- InitNonVacuumableSnapshot (SnapshotNonVacuumable , RecentGlobalXmin );
5224
5220
5225
5221
/* set up an IS NOT NULL scan key so that we ignore nulls */
5226
5222
ScanKeyEntryInitialize (& scankeys [0 ],
@@ -5232,94 +5228,38 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5232
5228
InvalidOid , /* no reg proc for this */
5233
5229
(Datum ) 0 ); /* constant */
5234
5230
5235
- have_data = true;
5236
-
5237
5231
/* If min is requested ... */
5238
5232
if (min )
5239
5233
{
5240
- /*
5241
- * In principle, we should scan the index with our current
5242
- * active snapshot, which is the best approximation we've got
5243
- * to what the query will see when executed. But that won't
5244
- * be exact if a new snap is taken before running the query,
5245
- * and it can be very expensive if a lot of recently-dead or
5246
- * uncommitted rows exist at the beginning or end of the index
5247
- * (because we'll laboriously fetch each one and reject it).
5248
- * Instead, we use SnapshotNonVacuumable. That will accept
5249
- * recently-dead and uncommitted rows as well as normal
5250
- * visible rows. On the other hand, it will reject known-dead
5251
- * rows, and thus not give a bogus answer when the extreme
5252
- * value has been deleted (unless the deletion was quite
5253
- * recent); that case motivates not using SnapshotAny here.
5254
- *
5255
- * A crucial point here is that SnapshotNonVacuumable, with
5256
- * RecentGlobalXmin as horizon, yields the inverse of the
5257
- * condition that the indexscan will use to decide that index
5258
- * entries are killable (see heap_hot_search_buffer()).
5259
- * Therefore, if the snapshot rejects a tuple and we have to
5260
- * continue scanning past it, we know that the indexscan will
5261
- * mark that index entry killed. That means that the next
5262
- * get_actual_variable_range() call will not have to visit
5263
- * that heap entry. In this way we avoid repetitive work when
5264
- * this function is used a lot during planning.
5265
- */
5266
- index_scan = index_beginscan (heapRel , indexRel ,
5267
- & SnapshotNonVacuumable ,
5268
- 1 , 0 );
5269
- index_rescan (index_scan , scankeys , 1 , NULL , 0 );
5270
-
5271
- /* Fetch first tuple in sortop's direction */
5272
- if (index_getnext_slot (index_scan , indexscandir , slot ))
5273
- {
5274
- /* Extract the index column values from the slot */
5275
- FormIndexDatum (indexInfo , slot , estate ,
5276
- values , isnull );
5277
-
5278
- /* Shouldn't have got a null, but be careful */
5279
- if (isnull [0 ])
5280
- elog (ERROR , "found unexpected null value in index \"%s\"" ,
5281
- RelationGetRelationName (indexRel ));
5282
-
5283
- /* Copy the index column value out to caller's context */
5284
- MemoryContextSwitchTo (oldcontext );
5285
- * min = datumCopy (values [0 ], typByVal , typLen );
5286
- MemoryContextSwitchTo (tmpcontext );
5287
- }
5288
- else
5289
- have_data = false;
5290
-
5291
- index_endscan (index_scan );
5234
+ have_data = get_actual_variable_endpoint (heapRel ,
5235
+ indexRel ,
5236
+ indexscandir ,
5237
+ scankeys ,
5238
+ typLen ,
5239
+ typByVal ,
5240
+ slot ,
5241
+ oldcontext ,
5242
+ min );
5243
+ }
5244
+ else
5245
+ {
5246
+ /* If min not requested, assume index is nonempty */
5247
+ have_data = true;
5292
5248
}
5293
5249
5294
5250
/* If max is requested, and we didn't find the index is empty */
5295
5251
if (max && have_data )
5296
5252
{
5297
- index_scan = index_beginscan (heapRel , indexRel ,
5298
- & SnapshotNonVacuumable ,
5299
- 1 , 0 );
5300
- index_rescan (index_scan , scankeys , 1 , NULL , 0 );
5301
-
5302
- /* Fetch first tuple in reverse direction */
5303
- if (index_getnext_slot (index_scan , - indexscandir , slot ))
5304
- {
5305
- /* Extract the index column values from the slot */
5306
- FormIndexDatum (indexInfo , slot , estate ,
5307
- values , isnull );
5308
-
5309
- /* Shouldn't have got a null, but be careful */
5310
- if (isnull [0 ])
5311
- elog (ERROR , "found unexpected null value in index \"%s\"" ,
5312
- RelationGetRelationName (indexRel ));
5313
-
5314
- /* Copy the index column value out to caller's context */
5315
- MemoryContextSwitchTo (oldcontext );
5316
- * max = datumCopy (values [0 ], typByVal , typLen );
5317
- MemoryContextSwitchTo (tmpcontext );
5318
- }
5319
- else
5320
- have_data = false;
5321
-
5322
- index_endscan (index_scan );
5253
+ /* scan in the opposite direction; all else is the same */
5254
+ have_data = get_actual_variable_endpoint (heapRel ,
5255
+ indexRel ,
5256
+ - indexscandir ,
5257
+ scankeys ,
5258
+ typLen ,
5259
+ typByVal ,
5260
+ slot ,
5261
+ oldcontext ,
5262
+ max );
5323
5263
}
5324
5264
5325
5265
/* Clean everything up */
@@ -5329,7 +5269,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5329
5269
table_close (heapRel , NoLock );
5330
5270
5331
5271
MemoryContextSwitchTo (oldcontext );
5332
- FreeExecutorState ( estate );
5272
+ MemoryContextDelete ( tmpcontext );
5333
5273
5334
5274
/* And we're done */
5335
5275
break ;
@@ -5339,6 +5279,139 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5339
5279
return have_data ;
5340
5280
}
5341
5281
5282
+ /*
5283
+ * Get one endpoint datum (min or max depending on indexscandir) from the
5284
+ * specified index. Return true if successful, false if index is empty.
5285
+ * On success, endpoint value is stored to *endpointDatum (and copied into
5286
+ * outercontext).
5287
+ *
5288
+ * scankeys is a 1-element scankey array set up to reject nulls.
5289
+ * typLen/typByVal describe the datatype of the index's first column.
5290
+ * tableslot is a slot suitable to hold table tuples, in case we need
5291
+ * to probe the heap.
5292
+ * (We could compute these values locally, but that would mean computing them
5293
+ * twice when get_actual_variable_range needs both the min and the max.)
5294
+ */
5295
+ static bool
5296
+ get_actual_variable_endpoint (Relation heapRel ,
5297
+ Relation indexRel ,
5298
+ ScanDirection indexscandir ,
5299
+ ScanKey scankeys ,
5300
+ int16 typLen ,
5301
+ bool typByVal ,
5302
+ TupleTableSlot * tableslot ,
5303
+ MemoryContext outercontext ,
5304
+ Datum * endpointDatum )
5305
+ {
5306
+ bool have_data = false;
5307
+ SnapshotData SnapshotNonVacuumable ;
5308
+ IndexScanDesc index_scan ;
5309
+ Buffer vmbuffer = InvalidBuffer ;
5310
+ ItemPointer tid ;
5311
+ Datum values [INDEX_MAX_KEYS ];
5312
+ bool isnull [INDEX_MAX_KEYS ];
5313
+ MemoryContext oldcontext ;
5314
+
5315
+ /*
5316
+ * We use the index-only-scan machinery for this. With mostly-static
5317
+ * tables that's a win because it avoids a heap visit. It's also a win
5318
+ * for dynamic data, but the reason is less obvious; read on for details.
5319
+ *
5320
+ * In principle, we should scan the index with our current active
5321
+ * snapshot, which is the best approximation we've got to what the query
5322
+ * will see when executed. But that won't be exact if a new snap is taken
5323
+ * before running the query, and it can be very expensive if a lot of
5324
+ * recently-dead or uncommitted rows exist at the beginning or end of the
5325
+ * index (because we'll laboriously fetch each one and reject it).
5326
+ * Instead, we use SnapshotNonVacuumable. That will accept recently-dead
5327
+ * and uncommitted rows as well as normal visible rows. On the other
5328
+ * hand, it will reject known-dead rows, and thus not give a bogus answer
5329
+ * when the extreme value has been deleted (unless the deletion was quite
5330
+ * recent); that case motivates not using SnapshotAny here.
5331
+ *
5332
+ * A crucial point here is that SnapshotNonVacuumable, with
5333
+ * RecentGlobalXmin as horizon, yields the inverse of the condition that
5334
+ * the indexscan will use to decide that index entries are killable (see
5335
+ * heap_hot_search_buffer()). Therefore, if the snapshot rejects a tuple
5336
+ * (or more precisely, all tuples of a HOT chain) and we have to continue
5337
+ * scanning past it, we know that the indexscan will mark that index entry
5338
+ * killed. That means that the next get_actual_variable_endpoint() call
5339
+ * will not have to re-consider that index entry. In this way we avoid
5340
+ * repetitive work when this function is used a lot during planning.
5341
+ *
5342
+ * But using SnapshotNonVacuumable creates a hazard of its own. In a
5343
+ * recently-created index, some index entries may point at "broken" HOT
5344
+ * chains in which not all the tuple versions contain data matching the
5345
+ * index entry. The live tuple version(s) certainly do match the index,
5346
+ * but SnapshotNonVacuumable can accept recently-dead tuple versions that
5347
+ * don't match. Hence, if we took data from the selected heap tuple, we
5348
+ * might get a bogus answer that's not close to the index extremal value,
5349
+ * or could even be NULL. We avoid this hazard because we take the data
5350
+ * from the index entry not the heap.
5351
+ */
5352
+ InitNonVacuumableSnapshot (SnapshotNonVacuumable , RecentGlobalXmin );
5353
+
5354
+ index_scan = index_beginscan (heapRel , indexRel ,
5355
+ & SnapshotNonVacuumable ,
5356
+ 1 , 0 );
5357
+ /* Set it up for index-only scan */
5358
+ index_scan -> xs_want_itup = true;
5359
+ index_rescan (index_scan , scankeys , 1 , NULL , 0 );
5360
+
5361
+ /* Fetch first/next tuple in specified direction */
5362
+ while ((tid = index_getnext_tid (index_scan , indexscandir )) != NULL )
5363
+ {
5364
+ if (!VM_ALL_VISIBLE (heapRel ,
5365
+ ItemPointerGetBlockNumber (tid ),
5366
+ & vmbuffer ))
5367
+ {
5368
+ /* Rats, we have to visit the heap to check visibility */
5369
+ if (!index_fetch_heap (index_scan , tableslot ))
5370
+ continue ; /* no visible tuple, try next index entry */
5371
+
5372
+ /* We don't actually need the heap tuple for anything */
5373
+ ExecClearTuple (tableslot );
5374
+
5375
+ /*
5376
+ * We don't care whether there's more than one visible tuple in
5377
+ * the HOT chain; if any are visible, that's good enough.
5378
+ */
5379
+ }
5380
+
5381
+ /*
5382
+ * We expect that btree will return data in IndexTuple not HeapTuple
5383
+ * format. It's not lossy either.
5384
+ */
5385
+ if (!index_scan -> xs_itup )
5386
+ elog (ERROR , "no data returned for index-only scan" );
5387
+ if (index_scan -> xs_recheck )
5388
+ elog (ERROR , "unexpected recheck indication from btree" );
5389
+
5390
+ /* OK to deconstruct the index tuple */
5391
+ index_deform_tuple (index_scan -> xs_itup ,
5392
+ index_scan -> xs_itupdesc ,
5393
+ values , isnull );
5394
+
5395
+ /* Shouldn't have got a null, but be careful */
5396
+ if (isnull [0 ])
5397
+ elog (ERROR , "found unexpected null value in index \"%s\"" ,
5398
+ RelationGetRelationName (indexRel ));
5399
+
5400
+ /* Copy the index column value out to caller's context */
5401
+ oldcontext = MemoryContextSwitchTo (outercontext );
5402
+ * endpointDatum = datumCopy (values [0 ], typByVal , typLen );
5403
+ MemoryContextSwitchTo (oldcontext );
5404
+ have_data = true;
5405
+ break ;
5406
+ }
5407
+
5408
+ if (vmbuffer != InvalidBuffer )
5409
+ ReleaseBuffer (vmbuffer );
5410
+ index_endscan (index_scan );
5411
+
5412
+ return have_data ;
5413
+ }
5414
+
5342
5415
/*
5343
5416
* find_join_input_rel
5344
5417
* Look up the input relation for a join.
0 commit comments