Skip to content

Commit 3ea6e48

Browse files
Switch to topological sorting for MATCH patterns in new query planner
1 parent 759fd02 commit 3ea6e48

File tree

2 files changed

+207
-8
lines changed

2 files changed

+207
-8
lines changed

core/src/main/java/com/orientechnologies/orient/core/sql/executor/OMatchExecutionPlanner.java

Lines changed: 205 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ public OInternalExecutionPlan createExecutionPlan(OCommandContext context, boole
7575
}
7676
result.chain(step);
7777
} else {
78-
OInternalExecutionPlan plan = createPlanForPattern(pattern, context, estimatedRootEntries, aliasesToPrefetch, enableProfiling);
78+
OInternalExecutionPlan plan = createPlanForPattern(pattern, context, estimatedRootEntries, aliasesToPrefetch,
79+
enableProfiling);
7980
for (OExecutionStep step : plan.getSteps()) {
8081
result.chain((OExecutionStepInternal) step);
8182
}
@@ -126,7 +127,7 @@ private void addReturnStep(OSelectExecutionPlan result, OCommandContext context,
126127
private OInternalExecutionPlan createPlanForPattern(Pattern pattern, OCommandContext context,
127128
Map<String, Long> estimatedRootEntries, Set<String> prefetchedAliases, boolean profilingEnabled) {
128129
OSelectExecutionPlan plan = new OSelectExecutionPlan(context);
129-
List<EdgeTraversal> sortedEdges = sortEdges(estimatedRootEntries, pattern, context);
130+
List<EdgeTraversal> sortedEdges = getTopologicalSortedSchedule(estimatedRootEntries, pattern);
130131

131132
boolean first = true;
132133
if (sortedEdges.size() > 0) {
@@ -154,6 +155,200 @@ private OInternalExecutionPlan createPlanForPattern(Pattern pattern, OCommandCon
154155
return plan;
155156
}
156157

158+
/**
159+
* sort edges in the order they will be matched
160+
*/
161+
private List<EdgeTraversal> getTopologicalSortedSchedule(Map<String, Long> estimatedRootEntries,
162+
Pattern pattern) {
163+
List<EdgeTraversal> resultingSchedule = new ArrayList<>();
164+
Map<String, Set<String>> remainingDependencies = getDependencies(pattern);
165+
Set<PatternNode> visitedNodes = new HashSet<>();
166+
Set<PatternEdge> visitedEdges = new HashSet<>();
167+
168+
// Sort the possible root vertices in order of estimated size, since we want to start with a small vertex set.
169+
List<OPair<Long, String>> rootWeights = new ArrayList<>();
170+
for (Map.Entry<String, Long> root : estimatedRootEntries.entrySet()) {
171+
rootWeights.add(new OPair<>(root.getValue(), root.getKey()));
172+
}
173+
Collections.sort(rootWeights);
174+
175+
// Add the starting vertices, in the correct order, to an ordered set.
176+
Set<String> remainingStarts = new LinkedHashSet<String>();
177+
for (OPair<Long, String> item : rootWeights) {
178+
remainingStarts.add(item.getValue());
179+
}
180+
// Add all the remaining aliases after all the suggested start points.
181+
for (String alias : pattern.aliasToNode.keySet()) {
182+
if (!remainingStarts.contains(alias)) {
183+
remainingStarts.add(alias);
184+
}
185+
}
186+
187+
while (resultingSchedule.size() < pattern.numOfEdges) {
188+
// Start a new depth-first pass, adding all nodes with satisfied dependencies.
189+
// 1. Find a starting vertex for the depth-first pass.
190+
PatternNode startingNode = null;
191+
List<String> startsToRemove = new ArrayList<String>();
192+
for (String currentAlias : remainingStarts) {
193+
PatternNode currentNode = pattern.aliasToNode.get(currentAlias);
194+
195+
if (visitedNodes.contains(currentNode)) {
196+
// If a previous traversal already visited this alias, remove it from further consideration.
197+
startsToRemove.add(currentAlias);
198+
} else if (remainingDependencies.get(currentAlias).isEmpty()) {
199+
// If it hasn't been visited, and has all dependencies satisfied, visit it.
200+
startsToRemove.add(currentAlias);
201+
startingNode = currentNode;
202+
break;
203+
}
204+
}
205+
remainingStarts.removeAll(startsToRemove);
206+
207+
if (startingNode == null) {
208+
// We didn't manage to find a valid root, and yet we haven't constructed a complete schedule.
209+
// This means there must be a cycle in our dependency graph, or all dependency-free nodes are optional.
210+
// Therefore, the query is invalid.
211+
throw new OCommandExecutionException("This query contains MATCH conditions that cannot be evaluated, "
212+
+ "like an undefined alias or a circular dependency on a $matched condition.");
213+
}
214+
215+
// 2. Having found a starting vertex, traverse its neighbors depth-first,
216+
// adding any non-visited ones with satisfied dependencies to our schedule.
217+
updateScheduleStartingAt(startingNode, visitedNodes, visitedEdges, remainingDependencies, resultingSchedule);
218+
}
219+
220+
if (resultingSchedule.size() != pattern.numOfEdges) {
221+
throw new AssertionError("Incorrect number of edges: " + resultingSchedule.size() + " vs " + pattern.numOfEdges);
222+
}
223+
224+
return resultingSchedule;
225+
}
226+
227+
/**
228+
* Start a depth-first traversal from the starting node, adding all viable unscheduled edges and vertices.
229+
*
230+
* @param startNode the node from which to start the depth-first traversal
231+
* @param visitedNodes set of nodes that are already visited (mutated in this function)
232+
* @param visitedEdges set of edges that are already visited and therefore don't need to be scheduled (mutated in this
233+
* function)
234+
* @param remainingDependencies dependency map including only the dependencies that haven't yet been satisfied (mutated in this
235+
* function)
236+
* @param resultingSchedule the schedule being computed i.e. appended to (mutated in this function)
237+
*/
238+
private void updateScheduleStartingAt(PatternNode startNode, Set<PatternNode> visitedNodes, Set<PatternEdge> visitedEdges,
239+
Map<String, Set<String>> remainingDependencies, List<EdgeTraversal> resultingSchedule) {
240+
// OrientDB requires the schedule to contain all edges present in the query, which is a stronger condition
241+
// than simply visiting all nodes in the query. Consider the following example query:
242+
// MATCH {
243+
// class: A,
244+
// as: foo
245+
// }.in() {
246+
// as: bar
247+
// }, {
248+
// class: B,
249+
// as: bar
250+
// }.out() {
251+
// as: foo
252+
// } RETURN $matches
253+
// The schedule for the above query must have two edges, even though there are only two nodes and they can both
254+
// be visited with the traversal of a single edge.
255+
//
256+
// To satisfy it, we obey the following for each non-optional node:
257+
// - ignore edges to neighboring nodes which have unsatisfied dependencies;
258+
// - for visited neighboring nodes, add their edge if it wasn't already present in the schedule, but do not
259+
// recurse into the neighboring node;
260+
// - for unvisited neighboring nodes with satisfied dependencies, add their edge and recurse into them.
261+
visitedNodes.add(startNode);
262+
for (Set<String> dependencies : remainingDependencies.values()) {
263+
dependencies.remove(startNode.alias);
264+
}
265+
266+
Map<PatternEdge, Boolean> edges = new LinkedHashMap<PatternEdge, Boolean>();
267+
for (PatternEdge outEdge : startNode.out) {
268+
edges.put(outEdge, true);
269+
}
270+
for (PatternEdge inEdge : startNode.in) {
271+
edges.put(inEdge, false);
272+
}
273+
274+
for (Map.Entry<PatternEdge, Boolean> edgeData : edges.entrySet()) {
275+
PatternEdge edge = edgeData.getKey();
276+
boolean isOutbound = edgeData.getValue();
277+
PatternNode neighboringNode = isOutbound ? edge.in : edge.out;
278+
279+
if (!remainingDependencies.get(neighboringNode.alias).isEmpty()) {
280+
// Unsatisfied dependencies, ignore this neighboring node.
281+
continue;
282+
}
283+
284+
if (visitedNodes.contains(neighboringNode)) {
285+
if (!visitedEdges.contains(edge)) {
286+
// If we are executing in this block, we are in the following situation:
287+
// - the startNode has not been visited yet;
288+
// - it has a neighboringNode that has already been visited;
289+
// - the edge between the startNode and the neighboringNode has not been scheduled yet.
290+
//
291+
// The isOutbound value shows us whether the edge is outbound from the point of view of the startNode.
292+
// However, if there are edges to the startNode, we must visit the startNode from an already-visited
293+
// neighbor, to preserve the validity of the traversal. Therefore, we negate the value of isOutbound
294+
// to ensure that the edge is always scheduled in the direction from the already-visited neighbor
295+
// toward the startNode. Notably, this is also the case when evaluating "optional" nodes -- we always
296+
// visit the optional node from its non-optional and already-visited neighbor.
297+
//
298+
// The only exception to the above is when we have edges with "while" conditions. We are not allowed
299+
// to flip their directionality, so we leave them as-is.
300+
boolean traversalDirection;
301+
if (startNode.optional || edge.item.isBidirectional()) {
302+
traversalDirection = !isOutbound;
303+
} else {
304+
traversalDirection = isOutbound;
305+
}
306+
307+
visitedEdges.add(edge);
308+
resultingSchedule.add(new EdgeTraversal(edge, traversalDirection));
309+
}
310+
} else if (!startNode.optional) {
311+
// If the neighboring node wasn't visited, we don't expand the optional node into it, hence the above check.
312+
// Instead, we'll allow the neighboring node to add the edge we failed to visit, via the above block.
313+
if (visitedEdges.contains(edge)) {
314+
// Should never happen.
315+
throw new AssertionError("The edge was visited, but the neighboring vertex was not: " + edge + " " + neighboringNode);
316+
}
317+
318+
visitedEdges.add(edge);
319+
resultingSchedule.add(new EdgeTraversal(edge, isOutbound));
320+
updateScheduleStartingAt(neighboringNode, visitedNodes, visitedEdges, remainingDependencies, resultingSchedule);
321+
}
322+
}
323+
}
324+
325+
/**
326+
* Calculate the set of dependency aliases for each alias in the pattern.
327+
*
328+
* @param pattern
329+
*
330+
* @return map of alias to the set of aliases it depends on
331+
*/
332+
private Map<String, Set<String>> getDependencies(Pattern pattern) {
333+
Map<String, Set<String>> result = new HashMap<String, Set<String>>();
334+
335+
for (PatternNode node : pattern.aliasToNode.values()) {
336+
Set<String> currentDependencies = new HashSet<String>();
337+
338+
OWhereClause filter = aliasFilters.get(node.alias);
339+
if (filter != null && filter.getBaseExpression() != null) {
340+
List<String> involvedAliases = filter.getBaseExpression().getMatchPatternInvolvedAliases();
341+
if (involvedAliases != null) {
342+
currentDependencies.addAll(involvedAliases);
343+
}
344+
}
345+
346+
result.put(node.alias, currentDependencies);
347+
}
348+
349+
return result;
350+
}
351+
157352
private void splitDisjointPatterns(OCommandContext context) {
158353
if (this.subPatterns != null) {
159354
return;
@@ -162,7 +357,8 @@ private void splitDisjointPatterns(OCommandContext context) {
162357
this.subPatterns = pattern.getDisjointPatterns();
163358
}
164359

165-
private void addStepsFor(OSelectExecutionPlan plan, EdgeTraversal edge, OCommandContext context, boolean first, boolean profilingEnabled) {
360+
private void addStepsFor(OSelectExecutionPlan plan, EdgeTraversal edge, OCommandContext context, boolean first,
361+
boolean profilingEnabled) {
166362
if (first) {
167363
PatternNode patternNode = edge.out ? edge.edge.out : edge.edge.in;
168364
String clazz = this.aliasClasses.get(patternNode.alias);
@@ -174,7 +370,8 @@ private void addStepsFor(OSelectExecutionPlan plan, EdgeTraversal edge, OCommand
174370
select.setWhereClause(where == null ? null : where.copy());
175371
OBasicCommandContext subContxt = new OBasicCommandContext();
176372
subContxt.setParentWithoutOverridingChild(context);
177-
plan.chain(new MatchFirstStep(context, patternNode, select.createExecutionPlan(subContxt, profilingEnabled), profilingEnabled));
373+
plan.chain(
374+
new MatchFirstStep(context, patternNode, select.createExecutionPlan(subContxt, profilingEnabled), profilingEnabled));
178375
}
179376
if (edge.edge.in.isOptionalNode()) {
180377
foundOptional = true;
@@ -184,13 +381,15 @@ private void addStepsFor(OSelectExecutionPlan plan, EdgeTraversal edge, OCommand
184381
}
185382
}
186383

187-
private void addPrefetchSteps(OSelectExecutionPlan result, Set<String> aliasesToPrefetch, OCommandContext context, boolean profilingEnabled) {
384+
private void addPrefetchSteps(OSelectExecutionPlan result, Set<String> aliasesToPrefetch, OCommandContext context,
385+
boolean profilingEnabled) {
188386
for (String alias : aliasesToPrefetch) {
189387
String targetClass = aliasClasses.get(alias);
190388
OWhereClause filter = aliasFilters.get(alias);
191389
OSelectStatement prefetchStm = createSelectStatement(targetClass, filter);
192390

193-
MatchPrefetchStep step = new MatchPrefetchStep(context, prefetchStm.createExecutionPlan(context, profilingEnabled), alias, profilingEnabled);
391+
MatchPrefetchStep step = new MatchPrefetchStep(context, prefetchStm.createExecutionPlan(context, profilingEnabled), alias,
392+
profilingEnabled);
194393
result.chain(step);
195394
}
196395
}

core/src/main/java/com/orientechnologies/orient/core/sql/parser/Pattern.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
* Created by luigidellaquila on 28/07/15.
1212
*/
1313
public class Pattern {
14-
Map<String, PatternNode> aliasToNode = new LinkedHashMap<String, PatternNode>();
15-
int numOfEdges = 0;
14+
public Map<String, PatternNode> aliasToNode = new LinkedHashMap<String, PatternNode>();
15+
public int numOfEdges = 0;
1616

1717
public void addExpression(OMatchExpression expression) {
1818
PatternNode originNode = getOrCreateNode(expression.origin);

0 commit comments

Comments
 (0)