Skip to content

Commit 65c6cab

Browse files
committed
Avoid O(N^2) behavior in SyncPostCheckpoint().
As in commits 6301c3a and e9d9ba2, avoid doing repetitive list_delete_first() operations, since that would be expensive when there are many files waiting to be unlinked. This is a slightly larger change than in those cases. We have to keep the list state valid for calls to AbsorbSyncRequests(), so it's necessary to invent a "canceled" field instead of immediately deleting PendingUnlinkEntry entries. Also, because we might not be able to process all the entries, we need a new list primitive list_delete_first_n(). list_delete_first_n() is almost list_copy_tail(), but it modifies the input List instead of making a new copy. I found a couple of existing uses of the latter that could profitably use the new function. (There might be more, but the other callers look like they probably shouldn't overwrite the input List.) As before, back-patch to v13. Discussion: https://postgr.es/m/CD2F0E7F-9822-45EC-A411-AE56F14DEA9F@amazon.com
1 parent d8dba4d commit 65c6cab

File tree

5 files changed

+103
-14
lines changed

5 files changed

+103
-14
lines changed

src/backend/nodes/list.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,72 @@ list_delete_last(List *list)
906906
return list_truncate(list, list_length(list) - 1);
907907
}
908908

909+
/*
910+
* Delete the first N cells of the list.
911+
*
912+
* The List is pfree'd if the request causes all cells to be deleted.
913+
*/
914+
List *
915+
list_delete_first_n(List *list, int n)
916+
{
917+
check_list_invariants(list);
918+
919+
/* No-op request? */
920+
if (n <= 0)
921+
return list;
922+
923+
/* Delete whole list? */
924+
if (n >= list_length(list))
925+
{
926+
list_free(list);
927+
return NIL;
928+
}
929+
930+
/*
931+
* Otherwise, we normally just collapse out the removed elements. But for
932+
* debugging purposes, move the whole list contents someplace else.
933+
*
934+
* (Note that we *must* keep the contents in the same memory context.)
935+
*/
936+
#ifndef DEBUG_LIST_MEMORY_USAGE
937+
memmove(&list->elements[0], &list->elements[n],
938+
(list->length - n) * sizeof(ListCell));
939+
list->length -= n;
940+
#else
941+
{
942+
ListCell *newelems;
943+
int newmaxlen = list->length - n;
944+
945+
newelems = (ListCell *)
946+
MemoryContextAlloc(GetMemoryChunkContext(list),
947+
newmaxlen * sizeof(ListCell));
948+
memcpy(newelems, &list->elements[n], newmaxlen * sizeof(ListCell));
949+
if (list->elements != list->initial_elements)
950+
pfree(list->elements);
951+
else
952+
{
953+
/*
954+
* As in enlarge_list(), clear the initial_elements[] space and/or
955+
* mark it inaccessible.
956+
*/
957+
#ifdef CLOBBER_FREED_MEMORY
958+
wipe_mem(list->initial_elements,
959+
list->max_length * sizeof(ListCell));
960+
#else
961+
VALGRIND_MAKE_MEM_NOACCESS(list->initial_elements,
962+
list->max_length * sizeof(ListCell));
963+
#endif
964+
}
965+
list->elements = newelems;
966+
list->max_length = newmaxlen;
967+
list->length = newmaxlen;
968+
check_list_invariants(list);
969+
}
970+
#endif
971+
972+
return list;
973+
}
974+
909975
/*
910976
* Generate the union of two lists. This is calculated by copying
911977
* list1 via list_copy(), then adding to it all the members of list2

src/backend/optimizer/util/clauses.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4139,7 +4139,7 @@ add_function_defaults(List *args, int pronargs, HeapTuple func_tuple)
41394139
if (ndelete < 0)
41404140
elog(ERROR, "not enough default arguments");
41414141
if (ndelete > 0)
4142-
defaults = list_copy_tail(defaults, ndelete);
4142+
defaults = list_delete_first_n(defaults, ndelete);
41434143

41444144
/* And form the combined argument list, not modifying the input list */
41454145
return list_concat_copy(args, defaults);

src/backend/parser/parse_func.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1691,7 +1691,7 @@ func_get_detail(List *funcname,
16911691

16921692
ndelete = list_length(defaults) - best_candidate->ndargs;
16931693
if (ndelete > 0)
1694-
defaults = list_copy_tail(defaults, ndelete);
1694+
defaults = list_delete_first_n(defaults, ndelete);
16951695
*argdefaults = defaults;
16961696
}
16971697
}

src/backend/storage/sync/sync.c

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ typedef struct
6969
{
7070
FileTag tag; /* identifies handler and file */
7171
CycleCtr cycle_ctr; /* checkpoint_cycle_ctr when request was made */
72+
bool canceled; /* true if request has been canceled */
7273
} PendingUnlinkEntry;
7374

7475
static HTAB *pendingOps = NULL;
@@ -195,13 +196,18 @@ void
195196
SyncPostCheckpoint(void)
196197
{
197198
int absorb_counter;
199+
ListCell *lc;
198200

199201
absorb_counter = UNLINKS_PER_ABSORB;
200-
while (pendingUnlinks != NIL)
202+
foreach(lc, pendingUnlinks)
201203
{
202-
PendingUnlinkEntry *entry = (PendingUnlinkEntry *) linitial(pendingUnlinks);
204+
PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(lc);
203205
char path[MAXPGPATH];
204206

207+
/* Skip over any canceled entries */
208+
if (entry->canceled)
209+
continue;
210+
205211
/*
206212
* New entries are appended to the end, so if the entry is new we've
207213
* reached the end of old entries.
@@ -231,22 +237,40 @@ SyncPostCheckpoint(void)
231237
errmsg("could not remove file \"%s\": %m", path)));
232238
}
233239

234-
/* And remove the list entry */
235-
pendingUnlinks = list_delete_first(pendingUnlinks);
236-
pfree(entry);
240+
/* Mark the list entry as canceled, just in case */
241+
entry->canceled = true;
237242

238243
/*
239244
* As in ProcessSyncRequests, we don't want to stop absorbing fsync
240245
* requests for a long time when there are many deletions to be done.
241-
* We can safely call AbsorbSyncRequests() at this point in the loop
242-
* (note it might try to delete list entries).
246+
* We can safely call AbsorbSyncRequests() at this point in the loop.
243247
*/
244248
if (--absorb_counter <= 0)
245249
{
246250
AbsorbSyncRequests();
247251
absorb_counter = UNLINKS_PER_ABSORB;
248252
}
249253
}
254+
255+
/*
256+
* If we reached the end of the list, we can just remove the whole list
257+
* (remembering to pfree all the PendingUnlinkEntry objects). Otherwise,
258+
* we must keep the entries at or after "lc".
259+
*/
260+
if (lc == NULL)
261+
{
262+
list_free_deep(pendingUnlinks);
263+
pendingUnlinks = NIL;
264+
}
265+
else
266+
{
267+
int ntodelete = list_cell_number(pendingUnlinks, lc);
268+
269+
for (int i = 0; i < ntodelete; i++)
270+
pfree(list_nth(pendingUnlinks, i));
271+
272+
pendingUnlinks = list_delete_first_n(pendingUnlinks, ntodelete);
273+
}
250274
}
251275

252276
/*
@@ -486,17 +510,14 @@ RememberSyncRequest(const FileTag *ftag, SyncRequestType type)
486510
entry->canceled = true;
487511
}
488512

489-
/* Remove matching unlink requests */
513+
/* Cancel matching unlink requests */
490514
foreach(cell, pendingUnlinks)
491515
{
492516
PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell);
493517

494518
if (entry->tag.handler == ftag->handler &&
495519
syncsw[ftag->handler].sync_filetagmatches(ftag, &entry->tag))
496-
{
497-
pendingUnlinks = foreach_delete_current(pendingUnlinks, cell);
498-
pfree(entry);
499-
}
520+
entry->canceled = true;
500521
}
501522
}
502523
else if (type == SYNC_UNLINK_REQUEST)
@@ -508,6 +529,7 @@ RememberSyncRequest(const FileTag *ftag, SyncRequestType type)
508529
entry = palloc(sizeof(PendingUnlinkEntry));
509530
entry->tag = *ftag;
510531
entry->cycle_ctr = checkpoint_cycle_ctr;
532+
entry->canceled = false;
511533

512534
pendingUnlinks = lappend(pendingUnlinks, entry);
513535

src/include/nodes/pg_list.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,7 @@ extern pg_nodiscard List *list_delete_int(List *list, int datum);
564564
extern pg_nodiscard List *list_delete_oid(List *list, Oid datum);
565565
extern pg_nodiscard List *list_delete_first(List *list);
566566
extern pg_nodiscard List *list_delete_last(List *list);
567+
extern pg_nodiscard List *list_delete_first_n(List *list, int n);
567568
extern pg_nodiscard List *list_delete_nth_cell(List *list, int n);
568569
extern pg_nodiscard List *list_delete_cell(List *list, ListCell *cell);
569570

0 commit comments

Comments
 (0)