Skip to content

Commit dba5150

Browse files
committed
Fix join-order scoring for recursive predicates
The evaluator log summary JSON needs to be read in two passes, to avoid trying to calculate scores for a recursive layer before all the events for it have been processed. Doing it in one pass meant that, for predicates associated with an IN_LAYER event that came after the corresponding COMPUTE_RECURSIVE event, we were missing their delta sizes and were not even attempting to their join-order scores. I had to remove some of the generic machinery for registering log processors, but the JoinOrderScanner was the only processor being registered anyway.
1 parent a155289 commit dba5150

File tree

8 files changed

+100
-183
lines changed

8 files changed

+100
-183
lines changed

extensions/ql-vscode/src/compare-performance/compare-performance-view.ts

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ import { withProgress } from "../common/vscode/progress";
1616
import { telemetryListener } from "../common/vscode/telemetry";
1717
import type { HistoryItemLabelProvider } from "../query-history/history-item-label-provider";
1818
import { PerformanceOverviewScanner } from "../log-insights/performance-comparison";
19-
import { scanLog } from "../log-insights/log-scanner";
2019
import type { ResultsView } from "../local-queries";
20+
import { readJsonlFile } from "../common/jsonl-reader";
21+
import type { SummaryEvent } from "../log-insights/log-summary";
2122

2223
export class ComparePerformanceView extends AbstractWebview<
2324
ToComparePerformanceViewMessage,
@@ -46,8 +47,20 @@ export class ComparePerformanceView extends AbstractWebview<
4647
function scanLogWithProgress(log: string, logDescription: string) {
4748
const bytes = statSync(log).size;
4849
return withProgress(
49-
async (progress) =>
50-
scanLog(log, new PerformanceOverviewScanner(), progress),
50+
async (progress) => {
51+
progress?.({
52+
// all scans have step 1 - the backing progress tracker allows increments instead of
53+
// steps - but for now we are happy with a tiny UI that says what is happening
54+
message: `Scanning ...`,
55+
step: 1,
56+
maxStep: 2,
57+
});
58+
const scanner = new PerformanceOverviewScanner();
59+
await readJsonlFile<SummaryEvent>(log, async (obj) => {
60+
scanner.onEvent(obj);
61+
});
62+
return scanner;
63+
},
5164

5265
{
5366
title: `Scanning evaluator log ${logDescription} (${(bytes / 1024 / 1024).toFixed(1)} MB)`,

extensions/ql-vscode/src/extension.ts

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import {
2828
CliConfigListener,
2929
DistributionConfigListener,
3030
GitHubDatabaseConfigListener,
31-
joinOrderWarningThreshold,
3231
QueryHistoryConfigListener,
3332
QueryServerConfigListener,
3433
VariantAnalysisConfigListener,
@@ -102,7 +101,6 @@ import { getPackagingCommands } from "./packaging";
102101
import { HistoryItemLabelProvider } from "./query-history/history-item-label-provider";
103102
import { EvalLogViewer } from "./query-evaluation-logging";
104103
import { SummaryLanguageSupport } from "./log-insights/summary-language-support";
105-
import { JoinOrderScannerProvider } from "./log-insights/join-order";
106104
import { LogScannerService } from "./log-insights/log-scanner-service";
107105
import { VariantAnalysisView } from "./variant-analysis/variant-analysis-view";
108106
import { VariantAnalysisViewSerializer } from "./variant-analysis/variant-analysis-view-serializer";
@@ -941,11 +939,6 @@ async function activateWithInstalledDistribution(
941939
void extLogger.log("Initializing evaluation log scanners.");
942940
const logScannerService = new LogScannerService(qhm);
943941
ctx.subscriptions.push(logScannerService);
944-
ctx.subscriptions.push(
945-
logScannerService.scanners.registerLogScannerProvider(
946-
new JoinOrderScannerProvider(() => joinOrderWarningThreshold()),
947-
),
948-
);
949942

950943
void extLogger.log("Initializing compare view.");
951944
const compareView = new CompareView(

extensions/ql-vscode/src/log-insights/join-order.ts

Lines changed: 40 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
import type {
2-
EvaluationLogProblemReporter,
3-
EvaluationLogScanner,
4-
EvaluationLogScannerProvider,
5-
} from "./log-scanner";
1+
import { readJsonlFile } from "../common/jsonl-reader";
2+
import type { EvaluationLogProblemReporter } from "./log-scanner";
63
import type {
74
InLayer,
85
ComputeRecursive,
@@ -129,36 +126,18 @@ interface Bucket {
129126
dependentPredicateSizes: Map<string, number>;
130127
}
131128

132-
class JoinOrderScanner implements EvaluationLogScanner {
129+
class PredicateSizeScanner {
133130
// Map a predicate hash to its result size
134-
private readonly predicateSizes = new Map<string, number>();
135-
private readonly layerEvents = new Map<
136-
string,
137-
Array<ComputeRecursive | InLayer>
138-
>();
131+
readonly predicateSizes = new Map<string, number>();
132+
readonly layerEvents = new Map<string, Array<ComputeRecursive | InLayer>>();
139133

140-
constructor(
141-
private readonly problemReporter: EvaluationLogProblemReporter,
142-
private readonly warningThreshold: number,
143-
) {}
144-
145-
public onEvent(event: SummaryEvent): void {
134+
onEvent(event: SummaryEvent): void {
146135
if (
147136
event.completionType !== undefined &&
148137
event.completionType !== "SUCCESS"
149138
) {
150139
return; // Skip any evaluation that wasn't successful
151140
}
152-
153-
this.recordPredicateSizes(event);
154-
this.computeBadnessMetric(event);
155-
}
156-
157-
public onDone(): void {
158-
void this;
159-
}
160-
161-
private recordPredicateSizes(event: SummaryEvent): void {
162141
switch (event.evaluationStrategy) {
163142
case "EXTENSIONAL":
164143
case "COMPUTED_EXTENSIONAL":
@@ -185,8 +164,20 @@ class JoinOrderScanner implements EvaluationLogScanner {
185164
}
186165
}
187166
}
167+
}
188168

189-
private computeBadnessMetric(event: SummaryEvent): void {
169+
class JoinOrderScanner {
170+
constructor(
171+
private readonly predicateSizes: Map<string, number>,
172+
private readonly layerEvents: Map<
173+
string,
174+
Array<ComputeRecursive | InLayer>
175+
>,
176+
private readonly problemReporter: EvaluationLogProblemReporter,
177+
private readonly warningThreshold: number,
178+
) {}
179+
180+
public onEvent(event: SummaryEvent): void {
190181
if (
191182
event.completionType !== undefined &&
192183
event.completionType !== "SUCCESS"
@@ -475,13 +466,26 @@ class JoinOrderScanner implements EvaluationLogScanner {
475466
}
476467
}
477468

478-
export class JoinOrderScannerProvider implements EvaluationLogScannerProvider {
479-
constructor(private readonly getThreshdold: () => number) {}
469+
export async function scanAndReportJoinOrderProblems(
470+
jsonSummaryLocation: string,
471+
problemReporter: EvaluationLogProblemReporter,
472+
warningThreshold: number,
473+
) {
474+
// Do two passes over the summary JSON. The first pass collects the sizes of predicates, along
475+
// with collecting layer events for each recursive SCC.
476+
const predicateSizeScanner = new PredicateSizeScanner();
477+
await readJsonlFile<SummaryEvent>(jsonSummaryLocation, async (obj) => {
478+
predicateSizeScanner.onEvent(obj);
479+
});
480480

481-
public createScanner(
482-
problemReporter: EvaluationLogProblemReporter,
483-
): EvaluationLogScanner {
484-
const threshold = this.getThreshdold();
485-
return new JoinOrderScanner(problemReporter, threshold);
486-
}
481+
// The second pass takes the information from the first pass, computes join order scores, and reports those that exceed the threshold.
482+
const joinOrderScanner = new JoinOrderScanner(
483+
predicateSizeScanner.predicateSizes,
484+
predicateSizeScanner.layerEvents,
485+
problemReporter,
486+
warningThreshold,
487+
);
488+
await readJsonlFile<SummaryEvent>(jsonSummaryLocation, async (obj) => {
489+
joinOrderScanner.onEvent(obj);
490+
});
487491
}

extensions/ql-vscode/src/log-insights/log-scanner-service.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@ import { Diagnostic, DiagnosticSeverity, languages, Range, Uri } from "vscode";
22
import { DisposableObject } from "../common/disposable-object";
33
import type { QueryHistoryInfo } from "../query-history/query-history-info";
44
import type { EvaluationLogProblemReporter } from "./log-scanner";
5-
import { EvaluationLogScannerSet } from "./log-scanner";
65
import type { PipelineInfo, SummarySymbols } from "./summary-parser";
76
import { readFile } from "fs-extra";
87
import { extLogger } from "../common/logging/vscode";
98
import type { QueryHistoryManager } from "../query-history/query-history-manager";
9+
import { scanAndReportJoinOrderProblems } from "./join-order";
10+
import { joinOrderWarningThreshold } from "../config";
1011

1112
/**
1213
* Compute the key used to find a predicate in the summary symbols.
@@ -83,7 +84,6 @@ class ProblemReporter implements EvaluationLogProblemReporter {
8384
}
8485

8586
export class LogScannerService extends DisposableObject {
86-
public readonly scanners = new EvaluationLogScannerSet();
8787
private readonly diagnosticCollection = this.push(
8888
languages.createDiagnosticCollection("ql-eval-log"),
8989
);
@@ -151,9 +151,11 @@ export class LogScannerService extends DisposableObject {
151151
);
152152
}
153153
const problemReporter = new ProblemReporter(symbols);
154-
155-
await this.scanners.scanLog(jsonSummaryLocation, problemReporter);
156-
154+
await scanAndReportJoinOrderProblems(
155+
jsonSummaryLocation,
156+
problemReporter,
157+
joinOrderWarningThreshold(),
158+
);
157159
return problemReporter.diagnostics;
158160
}
159161
}
Lines changed: 0 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
import type { Disposable } from "../common/disposable-object";
2-
import { readJsonlFile } from "../common/jsonl-reader";
3-
import type { ProgressCallback } from "../common/vscode/progress";
4-
import type { SummaryEvent } from "./log-summary";
5-
61
/**
72
* Callback interface used to report diagnostics from a log scanner.
83
*/
@@ -41,111 +36,3 @@ export interface EvaluationLogProblemReporter {
4136
*/
4237
log(message: string): void;
4338
}
44-
45-
/**
46-
* Interface implemented by a log scanner. Instances are created via
47-
* `EvaluationLogScannerProvider.createScanner()`.
48-
*/
49-
export interface EvaluationLogScanner {
50-
/**
51-
* Called for each event in the log summary, in order. The implementation can report problems via
52-
* the `EvaluationLogProblemReporter` interface that was supplied to `createScanner()`.
53-
* @param event The log summary event.
54-
*/
55-
onEvent(event: SummaryEvent): void;
56-
/**
57-
* Called after all events in the log summary have been processed. The implementation can report
58-
* problems via the `EvaluationLogProblemReporter` interface that was supplied to
59-
* `createScanner()`.
60-
*/
61-
onDone(): void;
62-
}
63-
64-
/**
65-
* A factory for log scanners. When a log is to be scanned, all registered
66-
* `EvaluationLogScannerProviders` will be asked to create a new instance of `EvaluationLogScanner`
67-
* to do the scanning.
68-
*/
69-
export interface EvaluationLogScannerProvider {
70-
/**
71-
* Create a new instance of `EvaluationLogScanner` to scan a single summary log.
72-
* @param problemReporter Callback interface for reporting any problems discovered.
73-
*/
74-
createScanner(
75-
problemReporter: EvaluationLogProblemReporter,
76-
): EvaluationLogScanner;
77-
}
78-
79-
export class EvaluationLogScannerSet {
80-
private readonly scannerProviders = new Map<
81-
number,
82-
EvaluationLogScannerProvider
83-
>();
84-
private nextScannerProviderId = 0;
85-
86-
/**
87-
* Register a provider that can create instances of `EvaluationLogScanner` to scan evaluation logs
88-
* for problems.
89-
* @param provider The provider.
90-
* @returns A `Disposable` that, when disposed, will unregister the provider.
91-
*/
92-
public registerLogScannerProvider(
93-
provider: EvaluationLogScannerProvider,
94-
): Disposable {
95-
const id = this.nextScannerProviderId;
96-
this.nextScannerProviderId++;
97-
98-
this.scannerProviders.set(id, provider);
99-
return {
100-
dispose: () => {
101-
this.scannerProviders.delete(id);
102-
},
103-
};
104-
}
105-
106-
/**
107-
* Scan the evaluator summary log for problems, using the scanners for all registered providers.
108-
* @param jsonSummaryLocation The file path of the JSON summary log.
109-
* @param problemReporter Callback interface for reporting any problems discovered.
110-
*/
111-
public async scanLog(
112-
jsonSummaryLocation: string,
113-
problemReporter: EvaluationLogProblemReporter,
114-
): Promise<void> {
115-
const scanners = [...this.scannerProviders.values()].map((p) =>
116-
p.createScanner(problemReporter),
117-
);
118-
119-
await readJsonlFile<SummaryEvent>(jsonSummaryLocation, async (obj) => {
120-
scanners.forEach((scanner) => {
121-
scanner.onEvent(obj);
122-
});
123-
});
124-
125-
scanners.forEach((scanner) => scanner.onDone());
126-
}
127-
}
128-
129-
/**
130-
* Scan the evaluator summary log using the given scanner. For convenience, returns the scanner.
131-
*
132-
* @param jsonSummaryLocation The file path of the JSON summary log.
133-
* @param scanner The scanner to process events from the log
134-
*/
135-
export async function scanLog<T extends EvaluationLogScanner>(
136-
jsonSummaryLocation: string,
137-
scanner: T,
138-
progress?: ProgressCallback,
139-
): Promise<T> {
140-
progress?.({
141-
// all scans have step 1 - the backing progress tracker allows increments instead of steps - but for now we are happy with a tiny UI that says what is happening
142-
message: `Scanning ...`,
143-
step: 1,
144-
maxStep: 2,
145-
});
146-
await readJsonlFile<SummaryEvent>(jsonSummaryLocation, async (obj) => {
147-
scanner.onEvent(obj);
148-
});
149-
scanner.onDone();
150-
return scanner;
151-
}

extensions/ql-vscode/src/log-insights/performance-comparison.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import type { EvaluationLogScanner } from "./log-scanner";
21
import type { SummaryEvent } from "./log-summary";
32

43
export interface PipelineSummary {
@@ -55,7 +54,7 @@ export interface PerformanceComparisonDataFromLog {
5554
pipelineSummaryList: Array<Record<string, PipelineSummary>>;
5655
}
5756

58-
export class PerformanceOverviewScanner implements EvaluationLogScanner {
57+
export class PerformanceOverviewScanner {
5958
private readonly nameToIndex = new Map<string, number>();
6059
private readonly data: PerformanceComparisonDataFromLog = {
6160
names: [],
@@ -178,6 +177,4 @@ export class PerformanceOverviewScanner implements EvaluationLogScanner {
178177
}
179178
}
180179
}
181-
182-
onDone(): void {}
183180
}

extensions/ql-vscode/test/unit-tests/data/evaluator-log-summaries/bad-join-order.jsonl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11458,7 +11458,7 @@
1145811458
"duplicationPercentages" : [ 0, -1, 1, 0, -1, 0, -1, 0, 0, -1, 1, 0, 1, -1, 1, 0 ]
1145911459
}, {
1146011460
"raReference" : "order_500000",
11461-
"counts" : [ 0, -1, 0, -1, 3138, -1, 3138, 3138, -1, 0, -1, 0, 0, -1, 0, -1, 133, 133, -1, 0, 0, -1, 133, 133, 3271, 3271, 3271 ],
11461+
"counts" : [ 0, -1, 0, -1, 31380000, -1, 3138, 3138, -1, 0, -1, 0, 0, -1, 0, -1, 133, 133, -1, 0, 0, -1, 133, 133, 3271, 3271, 3271 ],
1146211462
"duplicationPercentages" : [ 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, 0, 0, 1 ]
1146311463
}, {
1146411464
"raReference" : "order_500000",

0 commit comments

Comments
 (0)