@@ -75,7 +75,8 @@ public OInternalExecutionPlan createExecutionPlan(OCommandContext context, boole
75
75
}
76
76
result .chain (step );
77
77
} else {
78
- OInternalExecutionPlan plan = createPlanForPattern (pattern , context , estimatedRootEntries , aliasesToPrefetch , enableProfiling );
78
+ OInternalExecutionPlan plan = createPlanForPattern (pattern , context , estimatedRootEntries , aliasesToPrefetch ,
79
+ enableProfiling );
79
80
for (OExecutionStep step : plan .getSteps ()) {
80
81
result .chain ((OExecutionStepInternal ) step );
81
82
}
@@ -126,7 +127,7 @@ private void addReturnStep(OSelectExecutionPlan result, OCommandContext context,
126
127
private OInternalExecutionPlan createPlanForPattern (Pattern pattern , OCommandContext context ,
127
128
Map <String , Long > estimatedRootEntries , Set <String > prefetchedAliases , boolean profilingEnabled ) {
128
129
OSelectExecutionPlan plan = new OSelectExecutionPlan (context );
129
- List <EdgeTraversal > sortedEdges = sortEdges (estimatedRootEntries , pattern , context );
130
+ List <EdgeTraversal > sortedEdges = getTopologicalSortedSchedule (estimatedRootEntries , pattern );
130
131
131
132
boolean first = true ;
132
133
if (sortedEdges .size () > 0 ) {
@@ -154,6 +155,200 @@ private OInternalExecutionPlan createPlanForPattern(Pattern pattern, OCommandCon
154
155
return plan ;
155
156
}
156
157
158
+ /**
159
+ * sort edges in the order they will be matched
160
+ */
161
+ private List <EdgeTraversal > getTopologicalSortedSchedule (Map <String , Long > estimatedRootEntries ,
162
+ Pattern pattern ) {
163
+ List <EdgeTraversal > resultingSchedule = new ArrayList <>();
164
+ Map <String , Set <String >> remainingDependencies = getDependencies (pattern );
165
+ Set <PatternNode > visitedNodes = new HashSet <>();
166
+ Set <PatternEdge > visitedEdges = new HashSet <>();
167
+
168
+ // Sort the possible root vertices in order of estimated size, since we want to start with a small vertex set.
169
+ List <OPair <Long , String >> rootWeights = new ArrayList <>();
170
+ for (Map .Entry <String , Long > root : estimatedRootEntries .entrySet ()) {
171
+ rootWeights .add (new OPair <>(root .getValue (), root .getKey ()));
172
+ }
173
+ Collections .sort (rootWeights );
174
+
175
+ // Add the starting vertices, in the correct order, to an ordered set.
176
+ Set <String > remainingStarts = new LinkedHashSet <String >();
177
+ for (OPair <Long , String > item : rootWeights ) {
178
+ remainingStarts .add (item .getValue ());
179
+ }
180
+ // Add all the remaining aliases after all the suggested start points.
181
+ for (String alias : pattern .aliasToNode .keySet ()) {
182
+ if (!remainingStarts .contains (alias )) {
183
+ remainingStarts .add (alias );
184
+ }
185
+ }
186
+
187
+ while (resultingSchedule .size () < pattern .numOfEdges ) {
188
+ // Start a new depth-first pass, adding all nodes with satisfied dependencies.
189
+ // 1. Find a starting vertex for the depth-first pass.
190
+ PatternNode startingNode = null ;
191
+ List <String > startsToRemove = new ArrayList <String >();
192
+ for (String currentAlias : remainingStarts ) {
193
+ PatternNode currentNode = pattern .aliasToNode .get (currentAlias );
194
+
195
+ if (visitedNodes .contains (currentNode )) {
196
+ // If a previous traversal already visited this alias, remove it from further consideration.
197
+ startsToRemove .add (currentAlias );
198
+ } else if (remainingDependencies .get (currentAlias ).isEmpty ()) {
199
+ // If it hasn't been visited, and has all dependencies satisfied, visit it.
200
+ startsToRemove .add (currentAlias );
201
+ startingNode = currentNode ;
202
+ break ;
203
+ }
204
+ }
205
+ remainingStarts .removeAll (startsToRemove );
206
+
207
+ if (startingNode == null ) {
208
+ // We didn't manage to find a valid root, and yet we haven't constructed a complete schedule.
209
+ // This means there must be a cycle in our dependency graph, or all dependency-free nodes are optional.
210
+ // Therefore, the query is invalid.
211
+ throw new OCommandExecutionException ("This query contains MATCH conditions that cannot be evaluated, "
212
+ + "like an undefined alias or a circular dependency on a $matched condition." );
213
+ }
214
+
215
+ // 2. Having found a starting vertex, traverse its neighbors depth-first,
216
+ // adding any non-visited ones with satisfied dependencies to our schedule.
217
+ updateScheduleStartingAt (startingNode , visitedNodes , visitedEdges , remainingDependencies , resultingSchedule );
218
+ }
219
+
220
+ if (resultingSchedule .size () != pattern .numOfEdges ) {
221
+ throw new AssertionError ("Incorrect number of edges: " + resultingSchedule .size () + " vs " + pattern .numOfEdges );
222
+ }
223
+
224
+ return resultingSchedule ;
225
+ }
226
+
227
+ /**
228
+ * Start a depth-first traversal from the starting node, adding all viable unscheduled edges and vertices.
229
+ *
230
+ * @param startNode the node from which to start the depth-first traversal
231
+ * @param visitedNodes set of nodes that are already visited (mutated in this function)
232
+ * @param visitedEdges set of edges that are already visited and therefore don't need to be scheduled (mutated in this
233
+ * function)
234
+ * @param remainingDependencies dependency map including only the dependencies that haven't yet been satisfied (mutated in this
235
+ * function)
236
+ * @param resultingSchedule the schedule being computed i.e. appended to (mutated in this function)
237
+ */
238
+ private void updateScheduleStartingAt (PatternNode startNode , Set <PatternNode > visitedNodes , Set <PatternEdge > visitedEdges ,
239
+ Map <String , Set <String >> remainingDependencies , List <EdgeTraversal > resultingSchedule ) {
240
+ // OrientDB requires the schedule to contain all edges present in the query, which is a stronger condition
241
+ // than simply visiting all nodes in the query. Consider the following example query:
242
+ // MATCH {
243
+ // class: A,
244
+ // as: foo
245
+ // }.in() {
246
+ // as: bar
247
+ // }, {
248
+ // class: B,
249
+ // as: bar
250
+ // }.out() {
251
+ // as: foo
252
+ // } RETURN $matches
253
+ // The schedule for the above query must have two edges, even though there are only two nodes and they can both
254
+ // be visited with the traversal of a single edge.
255
+ //
256
+ // To satisfy it, we obey the following for each non-optional node:
257
+ // - ignore edges to neighboring nodes which have unsatisfied dependencies;
258
+ // - for visited neighboring nodes, add their edge if it wasn't already present in the schedule, but do not
259
+ // recurse into the neighboring node;
260
+ // - for unvisited neighboring nodes with satisfied dependencies, add their edge and recurse into them.
261
+ visitedNodes .add (startNode );
262
+ for (Set <String > dependencies : remainingDependencies .values ()) {
263
+ dependencies .remove (startNode .alias );
264
+ }
265
+
266
+ Map <PatternEdge , Boolean > edges = new LinkedHashMap <PatternEdge , Boolean >();
267
+ for (PatternEdge outEdge : startNode .out ) {
268
+ edges .put (outEdge , true );
269
+ }
270
+ for (PatternEdge inEdge : startNode .in ) {
271
+ edges .put (inEdge , false );
272
+ }
273
+
274
+ for (Map .Entry <PatternEdge , Boolean > edgeData : edges .entrySet ()) {
275
+ PatternEdge edge = edgeData .getKey ();
276
+ boolean isOutbound = edgeData .getValue ();
277
+ PatternNode neighboringNode = isOutbound ? edge .in : edge .out ;
278
+
279
+ if (!remainingDependencies .get (neighboringNode .alias ).isEmpty ()) {
280
+ // Unsatisfied dependencies, ignore this neighboring node.
281
+ continue ;
282
+ }
283
+
284
+ if (visitedNodes .contains (neighboringNode )) {
285
+ if (!visitedEdges .contains (edge )) {
286
+ // If we are executing in this block, we are in the following situation:
287
+ // - the startNode has not been visited yet;
288
+ // - it has a neighboringNode that has already been visited;
289
+ // - the edge between the startNode and the neighboringNode has not been scheduled yet.
290
+ //
291
+ // The isOutbound value shows us whether the edge is outbound from the point of view of the startNode.
292
+ // However, if there are edges to the startNode, we must visit the startNode from an already-visited
293
+ // neighbor, to preserve the validity of the traversal. Therefore, we negate the value of isOutbound
294
+ // to ensure that the edge is always scheduled in the direction from the already-visited neighbor
295
+ // toward the startNode. Notably, this is also the case when evaluating "optional" nodes -- we always
296
+ // visit the optional node from its non-optional and already-visited neighbor.
297
+ //
298
+ // The only exception to the above is when we have edges with "while" conditions. We are not allowed
299
+ // to flip their directionality, so we leave them as-is.
300
+ boolean traversalDirection ;
301
+ if (startNode .optional || edge .item .isBidirectional ()) {
302
+ traversalDirection = !isOutbound ;
303
+ } else {
304
+ traversalDirection = isOutbound ;
305
+ }
306
+
307
+ visitedEdges .add (edge );
308
+ resultingSchedule .add (new EdgeTraversal (edge , traversalDirection ));
309
+ }
310
+ } else if (!startNode .optional ) {
311
+ // If the neighboring node wasn't visited, we don't expand the optional node into it, hence the above check.
312
+ // Instead, we'll allow the neighboring node to add the edge we failed to visit, via the above block.
313
+ if (visitedEdges .contains (edge )) {
314
+ // Should never happen.
315
+ throw new AssertionError ("The edge was visited, but the neighboring vertex was not: " + edge + " " + neighboringNode );
316
+ }
317
+
318
+ visitedEdges .add (edge );
319
+ resultingSchedule .add (new EdgeTraversal (edge , isOutbound ));
320
+ updateScheduleStartingAt (neighboringNode , visitedNodes , visitedEdges , remainingDependencies , resultingSchedule );
321
+ }
322
+ }
323
+ }
324
+
325
+ /**
326
+ * Calculate the set of dependency aliases for each alias in the pattern.
327
+ *
328
+ * @param pattern
329
+ *
330
+ * @return map of alias to the set of aliases it depends on
331
+ */
332
+ private Map <String , Set <String >> getDependencies (Pattern pattern ) {
333
+ Map <String , Set <String >> result = new HashMap <String , Set <String >>();
334
+
335
+ for (PatternNode node : pattern .aliasToNode .values ()) {
336
+ Set <String > currentDependencies = new HashSet <String >();
337
+
338
+ OWhereClause filter = aliasFilters .get (node .alias );
339
+ if (filter != null && filter .getBaseExpression () != null ) {
340
+ List <String > involvedAliases = filter .getBaseExpression ().getMatchPatternInvolvedAliases ();
341
+ if (involvedAliases != null ) {
342
+ currentDependencies .addAll (involvedAliases );
343
+ }
344
+ }
345
+
346
+ result .put (node .alias , currentDependencies );
347
+ }
348
+
349
+ return result ;
350
+ }
351
+
157
352
private void splitDisjointPatterns (OCommandContext context ) {
158
353
if (this .subPatterns != null ) {
159
354
return ;
@@ -162,7 +357,8 @@ private void splitDisjointPatterns(OCommandContext context) {
162
357
this .subPatterns = pattern .getDisjointPatterns ();
163
358
}
164
359
165
- private void addStepsFor (OSelectExecutionPlan plan , EdgeTraversal edge , OCommandContext context , boolean first , boolean profilingEnabled ) {
360
+ private void addStepsFor (OSelectExecutionPlan plan , EdgeTraversal edge , OCommandContext context , boolean first ,
361
+ boolean profilingEnabled ) {
166
362
if (first ) {
167
363
PatternNode patternNode = edge .out ? edge .edge .out : edge .edge .in ;
168
364
String clazz = this .aliasClasses .get (patternNode .alias );
@@ -174,7 +370,8 @@ private void addStepsFor(OSelectExecutionPlan plan, EdgeTraversal edge, OCommand
174
370
select .setWhereClause (where == null ? null : where .copy ());
175
371
OBasicCommandContext subContxt = new OBasicCommandContext ();
176
372
subContxt .setParentWithoutOverridingChild (context );
177
- plan .chain (new MatchFirstStep (context , patternNode , select .createExecutionPlan (subContxt , profilingEnabled ), profilingEnabled ));
373
+ plan .chain (
374
+ new MatchFirstStep (context , patternNode , select .createExecutionPlan (subContxt , profilingEnabled ), profilingEnabled ));
178
375
}
179
376
if (edge .edge .in .isOptionalNode ()) {
180
377
foundOptional = true ;
@@ -184,13 +381,15 @@ private void addStepsFor(OSelectExecutionPlan plan, EdgeTraversal edge, OCommand
184
381
}
185
382
}
186
383
187
- private void addPrefetchSteps (OSelectExecutionPlan result , Set <String > aliasesToPrefetch , OCommandContext context , boolean profilingEnabled ) {
384
+ private void addPrefetchSteps (OSelectExecutionPlan result , Set <String > aliasesToPrefetch , OCommandContext context ,
385
+ boolean profilingEnabled ) {
188
386
for (String alias : aliasesToPrefetch ) {
189
387
String targetClass = aliasClasses .get (alias );
190
388
OWhereClause filter = aliasFilters .get (alias );
191
389
OSelectStatement prefetchStm = createSelectStatement (targetClass , filter );
192
390
193
- MatchPrefetchStep step = new MatchPrefetchStep (context , prefetchStm .createExecutionPlan (context , profilingEnabled ), alias , profilingEnabled );
391
+ MatchPrefetchStep step = new MatchPrefetchStep (context , prefetchStm .createExecutionPlan (context , profilingEnabled ), alias ,
392
+ profilingEnabled );
194
393
result .chain (step );
195
394
}
196
395
}
0 commit comments