@@ -170,7 +170,7 @@ typedef struct pgssEntry
170
170
pgssHashKey key ; /* hash key of entry - MUST BE FIRST */
171
171
Counters counters ; /* the statistics for this query */
172
172
Size query_offset ; /* query text offset in external file */
173
- int query_len ; /* # of valid bytes in query string */
173
+ int query_len ; /* # of valid bytes in query string, or -1 */
174
174
int encoding ; /* query text encoding */
175
175
slock_t mutex ; /* protects the counters only */
176
176
} pgssEntry ;
@@ -1705,7 +1705,8 @@ entry_cmp(const void *lhs, const void *rhs)
1705
1705
}
1706
1706
1707
1707
/*
1708
- * Deallocate least used entries.
1708
+ * Deallocate least-used entries.
1709
+ *
1709
1710
* Caller must hold an exclusive lock on pgss->lock.
1710
1711
*/
1711
1712
static void
@@ -1716,17 +1717,27 @@ entry_dealloc(void)
1716
1717
pgssEntry * entry ;
1717
1718
int nvictims ;
1718
1719
int i ;
1719
- Size totlen = 0 ;
1720
+ Size tottextlen ;
1721
+ int nvalidtexts ;
1720
1722
1721
1723
/*
1722
1724
* Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1723
1725
* While we're scanning the table, apply the decay factor to the usage
1724
- * values.
1726
+ * values, and update the mean query length.
1727
+ *
1728
+ * Note that the mean query length is almost immediately obsolete, since
1729
+ * we compute it before not after discarding the least-used entries.
1730
+ * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
1731
+ * making two passes to get a more current result. Likewise, the new
1732
+ * cur_median_usage includes the entries we're about to zap.
1725
1733
*/
1726
1734
1727
1735
entries = palloc (hash_get_num_entries (pgss_hash ) * sizeof (pgssEntry * ));
1728
1736
1729
1737
i = 0 ;
1738
+ tottextlen = 0 ;
1739
+ nvalidtexts = 0 ;
1740
+
1730
1741
hash_seq_init (& hash_seq , pgss_hash );
1731
1742
while ((entry = hash_seq_search (& hash_seq )) != NULL )
1732
1743
{
@@ -1736,20 +1747,27 @@ entry_dealloc(void)
1736
1747
entry -> counters .usage *= STICKY_DECREASE_FACTOR ;
1737
1748
else
1738
1749
entry -> counters .usage *= USAGE_DECREASE_FACTOR ;
1739
- /* Accumulate total size, too. */
1740
- totlen += entry -> query_len + 1 ;
1750
+ /* In the mean length computation, ignore dropped texts. */
1751
+ if (entry -> query_len >= 0 )
1752
+ {
1753
+ tottextlen += entry -> query_len + 1 ;
1754
+ nvalidtexts ++ ;
1755
+ }
1741
1756
}
1742
1757
1758
+ /* Sort into increasing order by usage */
1743
1759
qsort (entries , i , sizeof (pgssEntry * ), entry_cmp );
1744
1760
1761
+ /* Record the (approximate) median usage */
1745
1762
if (i > 0 )
1746
- {
1747
- /* Record the (approximate) median usage */
1748
1763
pgss -> cur_median_usage = entries [i / 2 ]-> counters .usage ;
1749
- /* Record the mean query length */
1750
- pgss -> mean_query_len = totlen / i ;
1751
- }
1764
+ /* Record the mean query length */
1765
+ if (nvalidtexts > 0 )
1766
+ pgss -> mean_query_len = tottextlen / nvalidtexts ;
1767
+ else
1768
+ pgss -> mean_query_len = ASSUMED_LENGTH_INIT ;
1752
1769
1770
+ /* Now zap an appropriate fraction of lowest-usage entries */
1753
1771
nvictims = Max (10 , i * USAGE_DEALLOC_PERCENT / 100 );
1754
1772
nvictims = Min (nvictims , i );
1755
1773
@@ -1892,15 +1910,17 @@ qtext_load_file(Size *buffer_size)
1892
1910
}
1893
1911
1894
1912
/* Allocate buffer; beware that off_t might be wider than size_t */
1895
- if (stat .st_size <= MaxAllocSize )
1913
+ if (stat .st_size <= MaxAllocHugeSize )
1896
1914
buf = (char * ) malloc (stat .st_size );
1897
1915
else
1898
1916
buf = NULL ;
1899
1917
if (buf == NULL )
1900
1918
{
1901
1919
ereport (LOG ,
1902
1920
(errcode (ERRCODE_OUT_OF_MEMORY ),
1903
- errmsg ("out of memory" )));
1921
+ errmsg ("out of memory" ),
1922
+ errdetail ("Could not allocate enough memory to read pg_stat_statement file \"%s\"." ,
1923
+ PGSS_TEXT_FILE )));
1904
1924
CloseTransientFile (fd );
1905
1925
return NULL ;
1906
1926
}
@@ -2002,13 +2022,17 @@ need_gc_qtexts(void)
2002
2022
* occur in the foreseeable future.
2003
2023
*
2004
2024
* The caller must hold an exclusive lock on pgss->lock.
2025
+ *
2026
+ * At the first sign of trouble we unlink the query text file to get a clean
2027
+ * slate (although existing statistics are retained), rather than risk
2028
+ * thrashing by allowing the same problem case to recur indefinitely.
2005
2029
*/
2006
2030
static void
2007
2031
gc_qtexts (void )
2008
2032
{
2009
2033
char * qbuffer ;
2010
2034
Size qbuffer_size ;
2011
- FILE * qfile ;
2035
+ FILE * qfile = NULL ;
2012
2036
HASH_SEQ_STATUS hash_seq ;
2013
2037
pgssEntry * entry ;
2014
2038
Size extent ;
@@ -2023,12 +2047,15 @@ gc_qtexts(void)
2023
2047
return ;
2024
2048
2025
2049
/*
2026
- * Load the old texts file. If we fail (out of memory, for instance) just
2027
- * skip the garbage collection.
2050
+ * Load the old texts file. If we fail (out of memory, for instance),
2051
+ * invalidate query texts. Hopefully this is rare. It might seem better
2052
+ * to leave things alone on an OOM failure, but the problem is that the
2053
+ * file is only going to get bigger; hoping for a future non-OOM result is
2054
+ * risky and can easily lead to complete denial of service.
2028
2055
*/
2029
2056
qbuffer = qtext_load_file (& qbuffer_size );
2030
2057
if (qbuffer == NULL )
2031
- return ;
2058
+ goto gc_fail ;
2032
2059
2033
2060
/*
2034
2061
* We overwrite the query texts file in place, so as to reduce the risk of
@@ -2063,6 +2090,7 @@ gc_qtexts(void)
2063
2090
/* Trouble ... drop the text */
2064
2091
entry -> query_offset = 0 ;
2065
2092
entry -> query_len = -1 ;
2093
+ /* entry will not be counted in mean query length computation */
2066
2094
continue ;
2067
2095
}
2068
2096
@@ -2147,7 +2175,36 @@ gc_qtexts(void)
2147
2175
entry -> query_len = -1 ;
2148
2176
}
2149
2177
2150
- /* Seems like a good idea to bump the GC count even though we failed */
2178
+ /*
2179
+ * Destroy the query text file and create a new, empty one
2180
+ */
2181
+ (void ) unlink (PGSS_TEXT_FILE );
2182
+ qfile = AllocateFile (PGSS_TEXT_FILE , PG_BINARY_W );
2183
+ if (qfile == NULL )
2184
+ ereport (LOG ,
2185
+ (errcode_for_file_access (),
2186
+ errmsg ("could not write new pg_stat_statement file \"%s\": %m" ,
2187
+ PGSS_TEXT_FILE )));
2188
+ else
2189
+ FreeFile (qfile );
2190
+
2191
+ /* Reset the shared extent pointer */
2192
+ pgss -> extent = 0 ;
2193
+
2194
+ /* Reset mean_query_len to match the new state */
2195
+ pgss -> mean_query_len = ASSUMED_LENGTH_INIT ;
2196
+
2197
+ /*
2198
+ * Bump the GC count even though we failed.
2199
+ *
2200
+ * This is needed to make concurrent readers of file without any lock on
2201
+ * pgss->lock notice existence of new version of file. Once readers
2202
+ * subsequently observe a change in GC count with pgss->lock held, that
2203
+ * forces a safe reopen of file. Writers also require that we bump here,
2204
+ * of course. (As required by locking protocol, readers and writers don't
2205
+ * trust earlier file contents until gc_count is found unchanged after
2206
+ * pgss->lock acquired in shared or exclusive mode respectively.)
2207
+ */
2151
2208
record_gc_qtexts ();
2152
2209
}
2153
2210
0 commit comments