Skip to content

Commit 770712e

Browse files
authored
ci: Add script for fetching past test stats from CI (#7086)
Refs: #6677, #6675
1 parent 531fd23 commit 770712e

File tree

2 files changed

+156
-0
lines changed

2 files changed

+156
-0
lines changed

scripts/ci-report/README.md

+4
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,7 @@ This program generates a CI report from the `gotests.json` generated by `go test
55
## Limitations
66

77
We won't generate any report/stats for tests that weren't run. To find all existing tests, we could use: `go test ./... -list=. -json`, but the time it takes is probably not worth it. Usually most tests will run, even if there are errors and we're using `-failfast`.
8+
9+
## Misc
10+
11+
The script `fetch_stats_from_ci.sh` can be used to fetch historical stats from CI, e.g. for development or analysis.
+152
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# Usage: ./fetch_stats_from_ci.sh
5+
#
6+
# This script is for fetching historic test stats from GitHub Actions CI.
7+
#
8+
# Requires gh with credentials.
9+
#
10+
# https://github.com/cli/cli/blob/trunk/pkg/cmd/run/view/view.go#L434
11+
12+
dir="$(dirname "$0")"/ci-stats
13+
mkdir -p "${dir}"
14+
15+
pushd "${dir}" >/dev/null
16+
17+
# Stats step name, used for filtering log.
18+
job_step_name="Print test stats"
19+
20+
if [[ ! -f list-ci.yaml.json ]]; then
21+
gh run list -w ci.yaml -L 1000 --json conclusion,createdAt,databaseId,displayTitle,event,headBranch,headSha,name,number,startedAt,status,updatedAt,url,workflowDatabaseId,workflowName \
22+
>list-ci.yaml.json || {
23+
rm -f list-ci.yaml.json
24+
exit 1
25+
}
26+
fi
27+
28+
runs="$(
29+
jq -r '.[] | select(.status == "completed") | select(.conclusion == "success" or .conclusion == "failure") | [.databaseId, .event, .displayTitle, .headBranch, .headSha, .url] | @tsv' \
30+
<list-ci.yaml.json
31+
)"
32+
33+
while read -r run; do
34+
mapfile -d $'\t' -t parts <<<"${run}"
35+
parts[-1]="${parts[-1]%$'\n'}"
36+
37+
database_id="${parts[0]}"
38+
event="${parts[1]}"
39+
display_title="${parts[2]}"
40+
head_branch="${parts[3]}"
41+
head_sha="${parts[4]}"
42+
run_url="${parts[5]}"
43+
44+
# Check if this run predates the stats PR, if yes, skip it:
45+
# https://github.com/coder/coder/issues/6676
46+
if [[ ${database_id} -le 4595490577 ]]; then
47+
echo "Skipping ${database_id} (${display_title}), too old..."
48+
continue
49+
fi
50+
51+
run_jobs_file=run-"${database_id}"-"${event}"-jobs.json
52+
if [[ ! -f "${run_jobs_file}" ]]; then
53+
echo "Fetching jobs for run: ${display_title} (${database_id}, ${event}, ${head_branch})"
54+
gh run view "${database_id}" --json jobs >"${run_jobs_file}" || {
55+
rm -f "${run_jobs_file}"
56+
exit 1
57+
}
58+
fi
59+
60+
jobs="$(
61+
jq -r '.jobs[] | select(.name | startswith("test-go")) | select(.status == "completed") | select(.conclusion == "success" or .conclusion == "failure") | [.databaseId, .startedAt, .completedAt, .name, .url] | @tsv' \
62+
<"${run_jobs_file}"
63+
)"
64+
65+
while read -r job; do
66+
mapfile -d $'\t' -t parts <<<"${job}"
67+
parts[-1]="${parts[-1]%$'\n'}"
68+
69+
job_database_id="${parts[0]}"
70+
job_started_at="${parts[1]}"
71+
job_completed_at="${parts[2]}"
72+
job_name="${parts[3]}"
73+
job_url="${parts[4]}"
74+
75+
job_log=run-"${database_id}"-job-"${job_database_id}"-"${job_name}".log
76+
if [[ ! -f "${job_log}" ]]; then
77+
echo "Fetching log for: ${job_name} (${job_database_id}, ${job_url})"
78+
# Example log (partial).
79+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4063489Z ##[group]Run # Artifacts are not available after rerunning a job,
80+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4063872Z # Artifacts are not available after rerunning a job,
81+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4064188Z # so we need to print the test stats to the log.
82+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4064642Z go run ./scripts/ci-report/main.go gotests.json | tee gotests_stats.json
83+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4110112Z shell: /usr/bin/bash -e {0}
84+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4110364Z ##[endgroup]
85+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3440469Z {
86+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3441078Z "packages": [
87+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3441448Z {
88+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3442927Z "name": "agent",
89+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3443311Z "time": 17.538
90+
# test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3444048Z },
91+
# ...
92+
gh run view --job "${job_database_id}" --log >"${job_log}" || {
93+
# Sometimes gh fails to extract ZIP, etc. :'(
94+
rm -f "${job_log}"
95+
echo "Failed to fetch log for: ${job_name} (${job_database_id}, ${job_url}), skipping..."
96+
continue
97+
}
98+
log_lines="$(wc -l "${job_log}" | awk '{print $1}')"
99+
if [[ ${log_lines} -lt 2 ]]; then
100+
# Sometimes gh returns nothing and gives no error :'(
101+
rm -f "${job_log}"
102+
echo "Log is empty for: ${job_name} (${job_database_id}, ${job_url}), skipping..."
103+
continue
104+
fi
105+
fi
106+
107+
if ! job_stats="$(
108+
# Extract the stats job output (JSON) from the job log,
109+
# discarding the timestamp and non-JSON header.
110+
#
111+
# Example variable values:
112+
# job_name="test-go (ubuntu-latest)"
113+
# job_step_name="Print test stats"
114+
grep "${job_name}.*${job_step_name}" "${job_log}" |
115+
sed -E 's/.*[0-9-]{10}T[0-9:]{8}\.[0-9]*Z //' |
116+
grep -E "^[{}\ ].*"
117+
)"; then
118+
echo "Failed to find stats in job log: ${job_name} (${job_database_id}, ${job_url}), skipping..."
119+
continue
120+
fi
121+
122+
if ! jq -e . >/dev/null 2>&1 <<<"${job_stats}"; then
123+
# Sometimes actions logs are partial when fetched via CLI :'(
124+
echo "Failed to parse stats for: ${job_name} (${job_database_id}, ${job_url}), skipping..."
125+
continue
126+
fi
127+
128+
job_stats_file=run-"${database_id}"-job-"${job_database_id}"-"${job_name}"-stats.json
129+
if [[ -f "${job_stats_file}" ]]; then
130+
continue
131+
fi
132+
jq \
133+
--argjson run_id "${database_id}" \
134+
--arg run_url "${run_url}" \
135+
--arg event "${event}" \
136+
--arg branch "${head_branch}" \
137+
--arg sha "${head_sha}" \
138+
--arg started_at "${job_started_at}" \
139+
--arg completed_at "${job_completed_at}" \
140+
--arg display_title "${display_title}" \
141+
--argjson job_id "${job_database_id}" \
142+
--arg job "${job_name}" \
143+
--arg job_url "${job_url}" \
144+
'{run_id: $run_id, run_url: $run_url, event: $event, branch: $branch, sha: $sha, started_at: $started_at, completed_at: $completed_at, display_title: $display_title, job_id: $job_id, job: $job, job_url: $job_url, stats: .}' \
145+
<<<"${job_stats}" \
146+
>"${job_stats_file}" || {
147+
echo "Failed to write stats for: ${job_name} (${job_database_id}, ${job_url}), skipping..."
148+
rm -f "${job_stats_file}"
149+
exit 1
150+
}
151+
done <<<"${jobs}"
152+
done <<<"${runs}"

0 commit comments

Comments
 (0)