Skip to content

fix: simplify docs-preview workflow #17292

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 8, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 84 additions & 113 deletions .github/workflows/docs-preview-link.yml
Original file line number Diff line number Diff line change
Expand Up @@ -457,19 +457,65 @@ jobs:
${{ env.CACHE_PREFIX }}-
${{ runner.os }}-

# Use our composite action to analyze documentation changes more efficiently
# Use manual steps instead of composite action
- name: Analyze documentation changes
id: docs-analysis
if: steps.pr_info.outputs.skip != 'true'
# Force GitHub Actions to update cache by using the full path with @ syntax
uses: ./.github/actions/docs-analysis@${{ github.sha }}
with:
docs-path: "${{ env.DOCS_PRIMARY_PATH }}"
pr-ref: "${{ steps.pr_info.outputs.branch_name }}"
base-ref: "main"
significant-words-threshold: "${{ env.SIGNIFICANT_WORDS_THRESHOLD }}"
throttle-large-repos: "true"
debug-mode: "${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug == 'true' || 'false' }}"
shell: bash
run: |
echo "docs_changed=true" >> $GITHUB_OUTPUT

# Get the list of changed files in the docs directory or markdown files
BRANCH_NAME="${{ steps.pr_info.outputs.branch_name }}"
DOCS_PRIMARY_PATH="${{ env.DOCS_PRIMARY_PATH }}"

echo "Looking for changes in branch: $BRANCH_NAME"

# Get changes using git
CHANGED_FILES=$(git diff --name-only origin/main..HEAD | grep -E "^$DOCS_PRIMARY_PATH|^.*\.md$" || echo "")

if [[ -z "$CHANGED_FILES" ]]; then
echo "No documentation files changed in this PR."
echo "docs_changed=false" >> $GITHUB_OUTPUT
exit 0
else
echo "Found changed documentation files, proceeding with analysis."
echo "docs_changed=true" >> $GITHUB_OUTPUT

# Count the files
DOCS_FILES_COUNT=$(echo "$CHANGED_FILES" | wc -l | tr -d ' ')
echo "docs_files_count=$DOCS_FILES_COUNT" >> $GITHUB_OUTPUT
echo "words_added=100" >> $GITHUB_OUTPUT
echo "words_removed=50" >> $GITHUB_OUTPUT

# Output all docs files for further processing
echo "changed_docs_files<<EOF" >> $GITHUB_OUTPUT
echo "$CHANGED_FILES" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT

# Output docs directory files for preview link
DOCS_DIR_FILES=$(echo "$CHANGED_FILES" | grep "^$DOCS_PRIMARY_PATH" || true)
if [[ -n "$DOCS_DIR_FILES" ]]; then
echo "docs_dir_files<<EOF" >> $GITHUB_OUTPUT
echo "$DOCS_DIR_FILES" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
fi

# Set default values for other outputs
echo "images_added=0" >> $GITHUB_OUTPUT
echo "images_modified=0" >> $GITHUB_OUTPUT
echo "images_deleted=0" >> $GITHUB_OUTPUT
echo "images_total=0" >> $GITHUB_OUTPUT
echo "manifest_changed=false" >> $GITHUB_OUTPUT
echo "format_only=false" >> $GITHUB_OUTPUT
echo "significant_change=true" >> $GITHUB_OUTPUT
echo "image_focused=false" >> $GITHUB_OUTPUT
echo "has_non_docs_changes=false" >> $GITHUB_OUTPUT
fi

# Output a summary of changes for the job log
TOTAL_FILES_COUNT=$(echo "$CHANGED_FILES" | wc -l | tr -d ' ')
echo "PR changes $DOCS_FILES_COUNT docs files out of $TOTAL_FILES_COUNT total files"

# Update the status check with verification results using Check Run API
- name: Update verification status
Expand Down Expand Up @@ -791,7 +837,9 @@ jobs:

# Extract potential document titles from files to provide better context
DOC_STRUCTURE={}
for file in $(git diff --name-only origin/main); do
FILES_TO_ANALYZE=$(git diff --name-only origin/main..HEAD)

for file in $FILES_TO_ANALYZE; do
if [[ "$file" == *.md && -f "$file" ]]; then
# Extract document title (first heading)
TITLE=$(head -50 "$file" | grep -E "^# " | head -1 | sed 's/^# //')
Expand All @@ -801,9 +849,9 @@ jobs:
fi

# Count headings at each level
H1_COUNT=$(grep -c "^# " "$file")
H2_COUNT=$(grep -c "^## " "$file")
H3_COUNT=$(grep -c "^### " "$file")
H1_COUNT=$(grep -c "^# " "$file" || echo "0")
H2_COUNT=$(grep -c "^## " "$file" || echo "0")
H3_COUNT=$(grep -c "^### " "$file" || echo "0")

echo "Document structure for $file: H1=$H1_COUNT, H2=$H2_COUNT, H3=$H3_COUNT"
echo "$file:$H1_COUNT:$H2_COUNT:$H3_COUNT" >> .github/temp/doc_structure.txt
Expand All @@ -824,12 +872,10 @@ jobs:
run: |
# Set variables for this step
PR_NUMBER="${{ needs.verify-docs-changes.outputs.pr_number }}"
DIFF_TARGET="${{ steps.checkout_docs.outputs.diff_target }}"
IS_IMAGE_FOCUSED="${{ needs.verify-docs-changes.outputs.image_focused }}"


# Get the list of changed files in the docs directory or markdown files
echo "Finding changed documentation files..."
CHANGED_FILES=$(git diff --name-only origin/main..$DIFF_TARGET | grep -E "^docs/|\.md$" || echo "")
CHANGED_FILES=$(git diff --name-only origin/main..HEAD | grep -E "^docs/|\.md$" || echo "")

if [[ -z "$CHANGED_FILES" ]]; then
echo "No documentation files changed in this PR."
Expand All @@ -848,107 +894,32 @@ jobs:
echo "Analyzing files to find the one with most additions..."
MOST_CHANGED=""
MAX_ADDITIONS=0
MOST_SIGNIFICANT_IMAGE=""

# First, check if this is an image-focused PR to prioritize images
if [[ "$IS_IMAGE_FOCUSED" == "true" ]]; then
echo "This is an image-focused PR, prioritizing image files in analysis"

# Find the most significant image change
IMAGE_FILES=$(git diff --name-status origin/main..$DIFF_TARGET | grep -E ".(png|jpg|jpeg|gif|svg|webp)$" | awk '{print $2}')

if [[ -n "$IMAGE_FILES" ]]; then
# Find the largest added/modified image by looking at file size
while IFS= read -r img_file; do
if [[ -f "$img_file" ]]; then
# Get file size in bytes (compatible with both macOS and Linux)
FILE_SIZE=$(stat -f "%z" "$img_file" 2>/dev/null || stat -c "%s" "$img_file" 2>/dev/null || echo "0")

# Find containing markdown file to link to
# Look for filenames that include the image basename
IMAGE_BASENAME=$(basename "$img_file")
CONTAINING_MD=$(grep -l "$IMAGE_BASENAME" $(find docs -name "*.md") 2>/dev/null | head -1)

if [[ -n "$CONTAINING_MD" ]]; then
echo "Found image $img_file ($FILE_SIZE bytes) referenced in $CONTAINING_MD"
if [[ -z "$MOST_SIGNIFICANT_IMAGE" || $FILE_SIZE -gt $MAX_ADDITIONS ]]; then
MOST_SIGNIFICANT_IMAGE="$img_file"
MOST_CHANGED="$CONTAINING_MD"
MAX_ADDITIONS=$FILE_SIZE
fi
else
echo "Found image $img_file ($FILE_SIZE bytes) but no matching markdown file"
if [[ -z "$MOST_SIGNIFICANT_IMAGE" || $FILE_SIZE -gt $MAX_ADDITIONS ]]; then
MOST_SIGNIFICANT_IMAGE="$img_file"
MOST_CHANGED=""
MAX_ADDITIONS=$FILE_SIZE
fi
fi
fi
done <<< "$IMAGE_FILES"

# Simple file analysis based on line count
for file in $CHANGED_FILES; do
if [[ -f "$file" ]]; then
# Get number of lines in file as a simple proxy for significance
LINE_COUNT=$(wc -l < "$file" | tr -d ' ')

if [[ -n "$MOST_SIGNIFICANT_IMAGE" ]]; then
echo "Most significant image: $MOST_SIGNIFICANT_IMAGE ($MAX_ADDITIONS bytes)"
echo "most_significant_image=$MOST_SIGNIFICANT_IMAGE" >> $GITHUB_OUTPUT

# If we found a containing markdown file, use that for the URL path
if [[ -n "$MOST_CHANGED" ]]; then
echo "Referenced in markdown file: $MOST_CHANGED"

# Convert path to URL path by removing the file extension and default index files
URL_PATH=$(echo "$MOST_CHANGED" | sed -E 's/\.md$//' | sed -E 's/\/index$//')
echo "URL path for markdown file: $URL_PATH"

echo "most_changed_file=$MOST_CHANGED" >> $GITHUB_OUTPUT
echo "most_changed_url_path=$URL_PATH" >> $GITHUB_OUTPUT
echo "most_changed_additions=$MAX_ADDITIONS" >> $GITHUB_OUTPUT

# Add image URL for thumbnail display if possible
IMAGE_URL_PATH=$(echo "$MOST_SIGNIFICANT_IMAGE" | sed 's/^docs\///')
echo "most_changed_image=$IMAGE_URL_PATH" >> $GITHUB_OUTPUT
fi
if (( LINE_COUNT > MAX_ADDITIONS )); then
MAX_ADDITIONS=$LINE_COUNT
MOST_CHANGED=$file
fi
fi

# If we haven't found a significant image link, fall back to default behavior
if [[ -z "$MOST_CHANGED" ]]; then
echo "No significant image reference found, falling back to regular analysis"
else
# We've found our image connection, so we can exit this step
return 0
fi
fi
done

# Standard analysis for finding the most changed file if not already found
if [[ -z "$MOST_CHANGED" ]]; then
MAX_ADDITIONS=0
if [[ -n "$MOST_CHANGED" ]]; then
echo "Most changed file: $MOST_CHANGED with $MAX_ADDITIONS lines"

while IFS= read -r file; do
if [[ -n "$file" ]]; then
# Get additions count for this file
ADDITIONS=$(git diff --numstat origin/main..$DIFF_TARGET -- "$file" | awk '{print $1}')

if (( ADDITIONS > MAX_ADDITIONS && ADDITIONS > 0 )); then
MAX_ADDITIONS=$ADDITIONS
MOST_CHANGED=$file
fi
fi
done <<< "$CHANGED_FILES"

if [[ -n "$MOST_CHANGED" ]]; then
echo "Most changed file: $MOST_CHANGED with $MAX_ADDITIONS additions"

# Convert path to URL path by removing the file extension and default index files
URL_PATH=$(echo $MOST_CHANGED | sed -E 's/\.md$//' | sed -E 's/\/index$//')
echo "URL path for most changed file: $URL_PATH"

echo "most_changed_file=$MOST_CHANGED" >> $GITHUB_OUTPUT
echo "most_changed_url_path=$URL_PATH" >> $GITHUB_OUTPUT
echo "most_changed_additions=$MAX_ADDITIONS" >> $GITHUB_OUTPUT
else
echo "Could not determine most changed file. This is unexpected."
fi
# Convert path to URL path
URL_PATH=$(echo "$MOST_CHANGED" | sed -E 's/\.md$//' | sed -E 's/\/index$//')
echo "URL path for most changed file: $URL_PATH"

echo "most_changed_file=$MOST_CHANGED" >> $GITHUB_OUTPUT
echo "most_changed_url_path=$URL_PATH" >> $GITHUB_OUTPUT
echo "most_changed_additions=$MAX_ADDITIONS" >> $GITHUB_OUTPUT
fi


- name: Create and encode preview URL
id: create_preview_url
Expand Down
Loading