Improve performance of dumpSequenceData().

nathan-bossart · nathan-bossart · commit bd15b7db489d · 2024-07-31T10:12:42.000-05:00
As one might guess, this function dumps the sequence data. It is called once per sequence, and each such call executes a query to retrieve the relevant data for a single sequence. This can cause pg_dump to take significantly longer, especially when there are many sequences. This commit improves the performance of this function by gathering all the sequence data with a single query at the beginning of pg_dump. This information is stored in a sorted array that dumpSequenceData() can bsearch() for what it needs. This follows a similar approach as previous commits that introduced sorted arrays for role information, pg_class information, and sequence metadata. As with those commits, this patch will cause pg_dump to use more memory, but that isn't expected to be too egregious. Note that we use the brand new function pg_sequence_read_tuple() in the query that gathers all sequence data, so we must continue to use the preexisting query-per-sequence approach for versions older than 18. Reviewed-by: Euler Taveira, Michael Paquier, Tom Lane Discussion: https://postgr.es/m/20240503025140.GA1227404%40nathanxps13
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
@@ -132,6 +132,8 @@ typedef struct
 	int64		startv;			/* start value */
 	int64		incby;			/* increment value */
 	int64		cache;			/* cache size */
+	int64		last_value;		/* last value of sequence */
+	bool		is_called;		/* whether nextval advances before returning */
 } SequenceItem;
 
 typedef enum OidOptions
@@ -17330,16 +17332,30 @@ collectSequences(Archive *fout)
 	 * Before Postgres 10, sequence metadata is in the sequence itself.  With
 	 * some extra effort, we might be able to use the sorted table for those
 	 * versions, but for now it seems unlikely to be worth it.
+	 *
+	 * Since version 18, we can gather the sequence data in this query with
+	 * pg_sequence_read_tuple(), but we only do so for non-schema-only dumps.
 	 */
 	if (fout->remoteVersion < 100000)
 		return;
-	else
+	else if (fout->remoteVersion < 180000 ||
+			 (fout->dopt->schemaOnly && !fout->dopt->sequence_data))
 		query = "SELECT seqrelid, format_type(seqtypid, NULL), "
 			"seqstart, seqincrement, "
 			"seqmax, seqmin, "
-			"seqcache, seqcycle "
+			"seqcache, seqcycle, "
+			"NULL, 'f' "
 			"FROM pg_catalog.pg_sequence "
 			"ORDER BY seqrelid";
+	else
+		query = "SELECT seqrelid, format_type(seqtypid, NULL), "
+			"seqstart, seqincrement, "
+			"seqmax, seqmin, "
+			"seqcache, seqcycle, "
+			"last_value, is_called "
+			"FROM pg_catalog.pg_sequence, "
+			"pg_sequence_read_tuple(seqrelid) "
+			"ORDER BY seqrelid;";
 
 	res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK);
 
@@ -17356,6 +17372,8 @@ collectSequences(Archive *fout)
 		sequences[i].minv = strtoi64(PQgetvalue(res, i, 5), NULL, 10);
 		sequences[i].cache = strtoi64(PQgetvalue(res, i, 6), NULL, 10);
 		sequences[i].cycled = (strcmp(PQgetvalue(res, i, 7), "t") == 0);
+		sequences[i].last_value = strtoi64(PQgetvalue(res, i, 8), NULL, 10);
+		sequences[i].is_called = (strcmp(PQgetvalue(res, i, 9), "t") == 0);
 	}
 
 	PQclear(res);
@@ -17622,30 +17640,59 @@ static void
 dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
 {
 	TableInfo  *tbinfo = tdinfo->tdtable;
-	PGresult   *res;
-	char	   *last;
+	int64		last;
 	bool		called;
 	PQExpBuffer query = createPQExpBuffer();
 
-	appendPQExpBuffer(query,
-					  "SELECT last_value, is_called FROM %s",
-					  fmtQualifiedDumpable(tbinfo));
+	/*
+	 * For versions >= 18, the sequence information is gathered in the sorted
+	 * array before any calls to dumpSequenceData().  See collectSequences()
+	 * for more information.
+	 *
+	 * For older versions, we have to query the sequence relations
+	 * individually.
+	 */
+	if (fout->remoteVersion < 180000)
+	{
+		PGresult   *res;
 
-	res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+		appendPQExpBuffer(query,
+						  "SELECT last_value, is_called FROM %s",
+						  fmtQualifiedDumpable(tbinfo));
 
-	if (PQntuples(res) != 1)
-		pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)",
-						  "query to get data of sequence \"%s\" returned %d rows (expected 1)",
-						  PQntuples(res)),
-				 tbinfo->dobj.name, PQntuples(res));
+		res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
 
-	last = PQgetvalue(res, 0, 0);
-	called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0);
+		if (PQntuples(res) != 1)
+			pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)",
+							  "query to get data of sequence \"%s\" returned %d rows (expected 1)",
+							  PQntuples(res)),
+					 tbinfo->dobj.name, PQntuples(res));
+
+		last = strtoi64(PQgetvalue(res, 0, 0), NULL, 10);
+		called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0);
+
+		PQclear(res);
+	}
+	else
+	{
+		SequenceItem key = {0};
+		SequenceItem *entry;
+
+		Assert(sequences);
+		Assert(tbinfo->dobj.catId.oid);
+
+		key.oid = tbinfo->dobj.catId.oid;
+		entry = bsearch(&key, sequences, nsequences,
+						sizeof(SequenceItem), SequenceItemCmp);
+
+		last = entry->last_value;
+		called = entry->is_called;
+	}
 
 	resetPQExpBuffer(query);
 	appendPQExpBufferStr(query, "SELECT pg_catalog.setval(");
 	appendStringLiteralAH(query, fmtQualifiedDumpable(tbinfo), fout);
-	appendPQExpBuffer(query, ", %s, %s);\n",
+	appendPQExpBuffer(query, ", " INT64_FORMAT ", %s);\n",
 					  last, (called ? "true" : "false"));
 
 	if (tdinfo->dobj.dump & DUMP_COMPONENT_DATA)
@@ -17659,8 +17706,6 @@ dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
 								  .deps = &(tbinfo->dobj.dumpId),
 								  .nDeps = 1));
 
-	PQclear(res);
-
 	destroyPQExpBuffer(query);
 }