Skip to content

Misc: Add stage overlap script #19156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 3, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions misc/scripts/stageoverlap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3

import sys
import os
import re

# read first argument
if len(sys.argv) < 2:
print("Usage: stageoverlap.py <dil>")
sys.exit(1)

dilfile = sys.argv[1]

seen_stages = set()
computed_predicates = {}
stage_number = 0

def process_stage(stage, cached):
global stage_number
stage_key = ' '.join(cached)
# skip repeated stages (in case we're looking at DIL for several queries, e.g. from a .qls)
if stage_key in seen_stages:
return
# don't count the query-stage as seen, since we don't want to skip those
if not '#select' in cached:
seen_stages.add(stage_key)
stage_number += 1
print('STAGE ' + str(stage_number) + ':')
print(str(len(cached)) + ' cached predicate(s)')
print(' '.join(cached))
for predicate in stage:
# strip trailing characters matching the regex '#[bf]+', i.e. disregard magic
predicate = re.sub('#[bf]+$', '', predicate)
# TODO: maybe also strip the hash?
# predicate = re.sub('#[a-f0-9]+$', '', predicate)
if predicate in computed_predicates.keys():
# skip db-relations and some generated predicates
if predicate.startswith('@') or predicate.startswith('project#'):
continue
prior_stage = computed_predicates[predicate]
print('Recompute from ' + str(prior_stage) + ': ' + predicate)
else:
computed_predicates[predicate] = stage_number
print()

with open(dilfile, 'r') as f:
stage = []
cached = []
query = False
for line in f:
# skip lines starting with a space, i.e. predicate bodies
if line.startswith(' '): continue
# get the part of the line containing no spaces occuring before the first '('
# this is the predicate name
parenpos = line.find('(')
if parenpos != -1:
start = line.rfind(' ', 0, parenpos)
predicate = line[start+1:parenpos]
if predicate.startswith('`'):
# remove the leading and trailing backticks
predicate = predicate[1:-1]
stage.append(predicate)
continue
# query predicates, aka cached predicates, are written either as
# 'query <predicatename> = ...' on one line, or split across 2+ lines
if line.startswith('query '):
predicate = line.split(' ')[1]
cached.append(predicate)
continue
if line == 'query\n':
query = True
continue
if query:
predicate = line.split(' ')[0]
cached.append(predicate)
query = False
continue
if line == '/* ---------- END STAGE ---------- */\n':
process_stage(stage, cached)
stage = []
cached = []