Skip to content

Commit a63378a

Browse files
committed
Use column collation for extended statistics
The current extended statistics code was a bit confused which collation to use. When building the statistics, the collations defined as default for the data types were used (since commit 5e09280). The MCV code was however using the column collations for MCV serialization, and then DEFAULT_COLLATION_OID when computing estimates. So overall the code was using all three possible options, inconsistently. This uses the column colation everywhere - this makes it consistent with what 5e09280 did for regular stats. We however do not track the collations in a catalog, because we can derive them from column-level information. This may need to change in the future, e.g. after allowing statistics on expressions. Reviewed-by: Tom Lane Discussion: https://postgr.es/m/8736jdhbhc.fsf%40ansel.ydns.eu Backpatch-to: 12
1 parent e38a55b commit a63378a

File tree

4 files changed

+17
-6
lines changed

4 files changed

+17
-6
lines changed

src/backend/commands/statscmds.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,10 @@ RemoveStatisticsById(Oid statsOid)
485485
*
486486
* For MCV lists that's not the case, as those statistics store the datums
487487
* internally. In this case we simply reset the statistics value to NULL.
488+
*
489+
* Note that "type change" includes collation change, which means we can rely
490+
* on the MCV list being consistent with the collation info in pg_attribute
491+
* during estimation.
488492
*/
489493
void
490494
UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,

src/backend/statistics/dependencies.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
273273
colstat->attrtypid);
274274

275275
/* prepare the sort function for this dimension */
276-
multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation);
276+
multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
277277
}
278278

279279
/*

src/backend/statistics/mcv.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ build_mss(VacAttrStats **stats, int numattrs)
366366
elog(ERROR, "cache lookup failed for ordering operator for type %u",
367367
colstat->attrtypid);
368368

369-
multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation);
369+
multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
370370
}
371371

372372
return mss;
@@ -686,7 +686,7 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
686686

687687
/* sort and deduplicate the data */
688688
ssup[dim].ssup_cxt = CurrentMemoryContext;
689-
ssup[dim].ssup_collation = DEFAULT_COLLATION_OID;
689+
ssup[dim].ssup_collation = stats[dim]->attrcollid;
690690
ssup[dim].ssup_nulls_first = false;
691691

692692
PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
@@ -1630,15 +1630,22 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
16301630
* First check whether the constant is below the lower
16311631
* boundary (in that case we can skip the bucket, because
16321632
* there's no overlap).
1633+
*
1634+
* We don't store collations used to build the statistics,
1635+
* but we can use the collation for the attribute itself,
1636+
* as stored in varcollid. We do reset the statistics after
1637+
* a type change (including collation change), so this is
1638+
* OK. We may need to relax this after allowing extended
1639+
* statistics on expressions.
16331640
*/
16341641
if (varonleft)
16351642
match = DatumGetBool(FunctionCall2Coll(&opproc,
1636-
DEFAULT_COLLATION_OID,
1643+
var->varcollid,
16371644
item->values[idx],
16381645
cst->constvalue));
16391646
else
16401647
match = DatumGetBool(FunctionCall2Coll(&opproc,
1641-
DEFAULT_COLLATION_OID,
1648+
var->varcollid,
16421649
cst->constvalue,
16431650
item->values[idx]));
16441651

src/backend/statistics/mvdistinct.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
477477
colstat->attrtypid);
478478

479479
/* prepare the sort function for this dimension */
480-
multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation);
480+
multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
481481

482482
/* accumulate all the data for this dimension into the arrays */
483483
for (j = 0; j < numrows; j++)

0 commit comments

Comments
 (0)