Skip to content

Commit a6c5674

Browse files
committed
Fix a performance problem in pg_dump's dump order selection logic.
findDependencyLoops() was not bright about cases where there are multiple dependency paths between the same two dumpable objects. In most scenarios this did not hurt us too badly; but since the introduction of section boundary pseudo-objects in commit a1ef01f, it was possible for this code to take unreasonable amounts of time (tens of seconds on a database with a couple thousand objects), as reported in bug #11033 from Joe Van Dyk. Joe's particular problem scenario involved "pg_dump -a" mode with long chains of foreign key constraints, but I think that similar problems could arise with other situations as long as there were enough objects. To fix, add a flag array that lets us notice when we arrive at the same object again while searching from a given start object. This simple change seems to be enough to eliminate the performance problem. Back-patch to 9.1, like the patch that introduced section boundary objects.
1 parent 18470e5 commit a6c5674

File tree

1 file changed

+45
-9
lines changed

1 file changed

+45
-9
lines changed

src/bin/pg_dump/pg_dump_sort.c

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ static void findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs);
132132
static int findLoop(DumpableObject *obj,
133133
DumpId startPoint,
134134
bool *processed,
135+
DumpId *searchFailed,
135136
DumpableObject **workspace,
136137
int depth);
137138
static void repairDependencyLoop(DumpableObject **loop,
@@ -535,21 +536,35 @@ static void
535536
findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
536537
{
537538
/*
538-
* We use two data structures here. One is a bool array processed[],
539-
* which is indexed by dump ID and marks the objects already processed
540-
* during this invocation of findDependencyLoops(). The other is a
541-
* workspace[] array of DumpableObject pointers, in which we try to build
542-
* lists of objects constituting loops. We make workspace[] large enough
543-
* to hold all the objects, which is huge overkill in most cases but could
544-
* theoretically be necessary if there is a single dependency chain
545-
* linking all the objects.
539+
* We use three data structures here:
540+
*
541+
* processed[] is a bool array indexed by dump ID, marking the objects
542+
* already processed during this invocation of findDependencyLoops().
543+
*
544+
* searchFailed[] is another array indexed by dump ID. searchFailed[j] is
545+
* set to dump ID k if we have proven that there is no dependency path
546+
* leading from object j back to start point k. This allows us to skip
547+
* useless searching when there are multiple dependency paths from k to j,
548+
* which is a common situation. We could use a simple bool array for
549+
* this, but then we'd need to re-zero it for each start point, resulting
550+
* in O(N^2) zeroing work. Using the start point's dump ID as the "true"
551+
* value lets us skip clearing the array before we consider the next start
552+
* point.
553+
*
554+
* workspace[] is an array of DumpableObject pointers, in which we try to
555+
* build lists of objects constituting loops. We make workspace[] large
556+
* enough to hold all the objects in TopoSort's output, which is huge
557+
* overkill in most cases but could theoretically be necessary if there is
558+
* a single dependency chain linking all the objects.
546559
*/
547560
bool *processed;
561+
DumpId *searchFailed;
548562
DumpableObject **workspace;
549563
bool fixedloop;
550564
int i;
551565

552566
processed = (bool *) pg_calloc(getMaxDumpId() + 1, sizeof(bool));
567+
searchFailed = (DumpId *) pg_calloc(getMaxDumpId() + 1, sizeof(DumpId));
553568
workspace = (DumpableObject **) pg_malloc(totObjs * sizeof(DumpableObject *));
554569
fixedloop = false;
555570

@@ -559,7 +574,12 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
559574
int looplen;
560575
int j;
561576

562-
looplen = findLoop(obj, obj->dumpId, processed, workspace, 0);
577+
looplen = findLoop(obj,
578+
obj->dumpId,
579+
processed,
580+
searchFailed,
581+
workspace,
582+
0);
563583

564584
if (looplen > 0)
565585
{
@@ -587,6 +607,7 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
587607
exit_horribly(modulename, "could not identify dependency loop\n");
588608

589609
free(workspace);
610+
free(searchFailed);
590611
free(processed);
591612
}
592613

@@ -597,6 +618,7 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
597618
* obj: object we are examining now
598619
* startPoint: dumpId of starting object for the hoped-for circular loop
599620
* processed[]: flag array marking already-processed objects
621+
* searchFailed[]: flag array marking already-unsuccessfully-visited objects
600622
* workspace[]: work array in which we are building list of loop members
601623
* depth: number of valid entries in workspace[] at call
602624
*
@@ -610,6 +632,7 @@ static int
610632
findLoop(DumpableObject *obj,
611633
DumpId startPoint,
612634
bool *processed,
635+
DumpId *searchFailed,
613636
DumpableObject **workspace,
614637
int depth)
615638
{
@@ -622,6 +645,13 @@ findLoop(DumpableObject *obj,
622645
if (processed[obj->dumpId])
623646
return 0;
624647

648+
/*
649+
* If we've already proven there is no path from this object back to the
650+
* startPoint, forget it.
651+
*/
652+
if (searchFailed[obj->dumpId] == startPoint)
653+
return 0;
654+
625655
/*
626656
* Reject if obj is already present in workspace. This test prevents us
627657
* from going into infinite recursion if we are given a startPoint object
@@ -661,12 +691,18 @@ findLoop(DumpableObject *obj,
661691
newDepth = findLoop(nextobj,
662692
startPoint,
663693
processed,
694+
searchFailed,
664695
workspace,
665696
depth);
666697
if (newDepth > 0)
667698
return newDepth;
668699
}
669700

701+
/*
702+
* Remember there is no path from here back to startPoint
703+
*/
704+
searchFailed[obj->dumpId] = startPoint;
705+
670706
return 0;
671707
}
672708

0 commit comments

Comments
 (0)