Skip to content

Use git history to find PRs in a tag instead of time #619

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions lib/github_changelog_generator/generator/generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,21 @@ def initialize(options = {})
#
# @return [String] Generated changelog file
def compound_changelog
options.load_custom_ruby_files
@options.load_custom_ruby_files
fetch_and_filter_tags
fetch_issues_and_pr

log = ""
log += options[:frontmatter] if options[:frontmatter]
log += @options[:frontmatter] if @options[:frontmatter]
log += "#{options[:header]}\n\n"

log += if options[:unreleased_only]
generate_entry_between_tags(filtered_tags[0], nil)
log += if @options[:unreleased_only]
generate_entry_between_tags(@filtered_tags[0], nil)
else
generate_entries_for_all_tags
end

log += File.read(options[:base]) if File.file?(options[:base])
log += File.read(@options[:base]) if File.file?(@options[:base])

credit_line = "\n\n\\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)*"
log.gsub!(credit_line, "") # Remove old credit lines
Expand Down Expand Up @@ -84,7 +84,7 @@ def generate_entry_between_tags(older_tag, newer_tag)
# the SHA for the first commit.
older_tag_name =
if older_tag.nil?
@fetcher.commits_before(newer_tag_time).last["sha"]
@fetcher.oldest_commit["sha"]
else
older_tag["name"]
end
Expand All @@ -97,7 +97,7 @@ def generate_entry_between_tags(older_tag, newer_tag)
#
# @return [Array] filtered issues and pull requests
def filter_issues_for_tags(newer_tag, older_tag)
filtered_pull_requests = delete_by_time(@pull_requests, "merged_at", older_tag, newer_tag)
filtered_pull_requests = filter_by_tag(@pull_requests, newer_tag)
filtered_issues = delete_by_time(@issues, "closed_at", older_tag, newer_tag)

newer_tag_name = newer_tag.nil? ? nil : newer_tag["name"]
Expand Down Expand Up @@ -128,13 +128,16 @@ def generate_entries_for_all_tags
def generate_unreleased_entry
entry = ""
if options[:unreleased]
start_tag = filtered_tags[0] || sorted_tags.last
start_tag = @filtered_tags[0] || @sorted_tags.last
unreleased_entry = generate_entry_between_tags(start_tag, nil)
entry += unreleased_entry if unreleased_entry
end
entry
end

# Fetches @pull_requests and @issues and filters them based on options.
#
# @return [Nil] No return.
def fetch_issues_and_pr
issues, pull_requests = @fetcher.fetch_closed_issues_and_pr

Expand All @@ -144,6 +147,8 @@ def fetch_issues_and_pr

fetch_events_for_issues_and_pr
detect_actual_closed_dates(@issues + @pull_requests)
add_first_occurring_tag_to_prs(@sorted_tags, @pull_requests)
nil
end
end
end
130 changes: 130 additions & 0 deletions lib/github_changelog_generator/generator/generator_fetcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,122 @@ def detect_actual_closed_dates(issues)
puts "Fetching closed dates for issues: Done!" if options[:verbose]
end

# Adds a key "first_occurring_tag" to each PR with a value of the oldest
# tag that a PR's merge commit occurs in in the git history. This should
# indicate the release of each PR by git's history regardless of dates and
# divergent branches.
#
# @param [Array] tags The tags sorted by time, newest to oldest.
# @param [Array] prs The PRs to discover the tags of.
# @return [Nil] No return; PRs are updated in-place.
def add_first_occurring_tag_to_prs(tags, prs)
total = prs.count

prs_left = associate_tagged_prs(tags, prs, total)
prs_left = associate_release_branch_prs(prs_left, total)
prs_left = associate_rebase_comment_prs(tags, prs_left, total) if prs_left.any?
# PRs in prs_left will be untagged, not in release branch, and not
# rebased. They should not be included in the changelog as they probably
# have been merged to a branch other than the release branch.
@pull_requests -= prs_left
Helper.log.info "Associating PRs with tags: #{total}/#{total}"
end

# Associate merged PRs by the merge SHA contained in each tag. If the
# merge_commit_sha is not found in any tag's history, skip association.
#
# @param [Array] tags The tags sorted by time, newest to oldest.
# @param [Array] prs The PRs to associate.
# @return [Array] PRs without their merge_commit_sha in a tag.
def associate_tagged_prs(tags, prs, total)
@fetcher.fetch_tag_shas_async(tags)

i = 0
prs.reject do |pr|
found = false
# XXX Wish I could use merge_commit_sha, but gcg doesn't currently
# fetch that. See
# https://developer.github.com/v3/pulls/#get-a-single-pull-request vs.
# https://developer.github.com/v3/pulls/#list-pull-requests
if pr["events"] && (event = pr["events"].find { |e| e["event"] == "merged" })
# Iterate tags.reverse (oldest to newest) to find first tag of each PR.
if (oldest_tag = tags.reverse.find { |tag| tag["shas_in_tag"].include?(event["commit_id"]) })
pr["first_occurring_tag"] = oldest_tag["name"]
found = true
i += 1
print("Associating PRs with tags: #{i}/#{total}\r") if @options[:verbose]
end
else
# Either there were no events or no merged event. Github's api can be
# weird like that apparently. Check for a rebased comment before erroring.
no_events_pr = associate_rebase_comment_prs(tags, [pr], total)
raise StandardError, "No merge sha found for PR #{pr['number']} via the GitHub API" unless no_events_pr.empty?
found = true
i += 1
print("Associating PRs with tags: #{i}/#{total}\r") if @options[:verbose]
end
found
end
end

# Associate merged PRs by the HEAD of the release branch. If no
# --release-branch was specified, then the github default branch is used.
#
# @param [Array] prs_left PRs not associated with any tag.
# @param [Integer] total The total number of PRs to associate; used for verbose printing.
# @return [Array] PRs without their merge_commit_sha in the branch.
def associate_release_branch_prs(prs_left, total)
if prs_left.any?
i = total - prs_left.count
prs_left.reject do |pr|
found = false
if pr["events"] && (event = pr["events"].find { |e| e["event"] == "merged" }) && sha_in_release_branch(event["commit_id"])
found = true
i += 1
print("Associating PRs with tags: #{i}/#{total}\r") if @options[:verbose]
end
found
end
else
prs_left
end
end

# Associate merged PRs by the SHA detected in github comments of the form
# "rebased commit: <sha>". For use when the merge_commit_sha is not in the
# actual git history due to rebase.
#
# @param [Array] tags The tags sorted by time, newest to oldest.
# @param [Array] prs_left The PRs not yet associated with any tag or branch.
# @return [Array] PRs without rebase comments.
def associate_rebase_comment_prs(tags, prs_left, total)
i = total - prs_left.count
# Any remaining PRs were not found in the list of tags by their merge
# commit and not found in any specified release branch. Fallback to
# rebased commit comment.
@fetcher.fetch_comments_async(prs_left)
prs_left.reject do |pr|
found = false
if pr["comments"] && (rebased_comment = pr["comments"].reverse.find { |c| c["body"].match(%r{rebased commit: ([0-9a-f]{40})}i) })
rebased_sha = rebased_comment["body"].match(%r{rebased commit: ([0-9a-f]{40})}i)[1]
if (oldest_tag = tags.reverse.find { |tag| tag["shas_in_tag"].include?(rebased_sha) })
pr["first_occurring_tag"] = oldest_tag["name"]
found = true
i += 1
elsif sha_in_release_branch(rebased_sha)
found = true
i += 1
else
raise StandardError, "PR #{pr['number']} has a rebased SHA comment but that SHA was not found in the release branch or any tags"
end
print("Associating PRs with tags: #{i}/#{total}\r") if @options[:verbose]
else
puts "Warning: PR #{pr['number']} merge commit was not found in the release branch or tagged git history and no rebased SHA comment was found"
end
found
end
end

# Fill :actual_date parameter of specified issue by closed date of the commit, if it was closed by commit.
# @param [Hash] issue
def find_closed_date_by_commit(issue)
Expand Down Expand Up @@ -84,5 +200,19 @@ def set_date_from_event(event, issue)
end
end
end

private

# Detect if a sha occurs in the --release-branch. Uses the github repo
# default branch if not specified.
#
# @param [String] sha SHA to check.
# @return [Boolean] True if SHA is in the branch git history.
def sha_in_release_branch(sha)
branch = @options[:release_branch] || @fetcher.default_branch
commits_in_branch = @fetcher.fetch_compare(@fetcher.oldest_commit["sha"], branch)
shas_in_branch = commits_in_branch["commits"].collect { |commit| commit["sha"] }
shas_in_branch.include?(sha)
end
end
end
15 changes: 13 additions & 2 deletions lib/github_changelog_generator/generator/generator_processor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,22 @@ def remove_issues_in_milestones(filtered_issues)
end
end

# Method filter issues, that belong only specified tag range
#
# @param [Array] issues issues to filter
# @param [Hash, Nil] newer_tag Tag to find PRs of. May be nil for unreleased section
# @return [Array] filtered issues
def filter_by_tag(issues, newer_tag = nil)
issues.select do |issue|
issue["first_occurring_tag"] == (newer_tag.nil? ? nil : newer_tag["name"])
end
end

# Method filter issues, that belong only specified tag range
# @param [Array] issues issues to filter
# @param [Symbol] hash_key key of date value default is :actual_date
# @param [String] older_tag all issues before this tag date will be excluded. May be nil, if it's first tag
# @param [String] newer_tag all issue after this tag will be excluded. May be nil for unreleased section
# @param [Hash, Nil] older_tag all issues before this tag date will be excluded. May be nil, if it's first tag
# @param [Hash, Nil] newer_tag all issue after this tag will be excluded. May be nil for unreleased section
# @return [Array] filtered issues
def delete_by_time(issues, hash_key = "actual_date", older_tag = nil, newer_tag = nil)
# in case if not tags specified - return unchanged array
Expand Down
117 changes: 102 additions & 15 deletions lib/github_changelog_generator/octo_fetcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def initialize(options = {})
@http_cache = @options[:http_cache]
@cache_file = nil
@cache_log = nil
@commits = []
@compares = {}
prepare_cache
configure_octokit_ssl
@client = Octokit::Client.new(github_options)
Expand Down Expand Up @@ -162,10 +164,6 @@ def fetch_closed_pull_requests
pull_requests = []
options = { state: "closed" }

unless @options[:release_branch].nil?
options[:base] = @options[:release_branch]
end

page_i = 0
count_pages = calculate_pages(@client, "pull_requests", options)

Expand Down Expand Up @@ -211,6 +209,29 @@ def fetch_events_async(issues)
Helper.log.info "Fetching events for issues and PR: #{i}"
end

# Fetch comments for PRs and add them to "comments"
#
# @param [Array] prs The array of PRs.
# @return [Void] No return; PRs are updated in-place.
def fetch_comments_async(prs)
threads = []

prs.each_slice(MAX_THREAD_NUMBER) do |prs_slice|
prs_slice.each do |pr|
threads << Thread.new do
pr["comments"] = []
iterate_pages(@client, "issue_comments", pr["number"]) do |new_comment|
pr["comments"].concat(new_comment)
end
pr["comments"] = pr["comments"].map { |comment| stringify_keys_deep(comment.to_hash) }
end
end
threads.each(&:join)
threads = []
end
nil
end

# Fetch tag time from repo
#
# @param [Hash] tag GitHub data item about a Tag
Expand All @@ -223,26 +244,92 @@ def fetch_date_of_tag(tag)
commit_data["commit"]["committer"]["date"]
end

# Fetch and cache comparison between two github refs
#
# @param [String] older The older sha/tag/branch.
# @param [String] newer The newer sha/tag/branch.
# @return [Hash] Github api response for comparison.
def fetch_compare(older, newer)
unless @compares["#{older}...#{newer}"]
compare_data = check_github_response { @client.compare(user_project, older, newer || "HEAD") }
raise StandardError, "Sha #{older} and sha #{newer} are not related; please file a github-changelog-generator issues and describe how to replicate this issue." if compare_data["status"] == "diverged"
@compares["#{older}...#{newer}"] = stringify_keys_deep(compare_data.to_hash)
end
@compares["#{older}...#{newer}"]
end

# Fetch commit for specified event
#
# @return [Hash]
def fetch_commit(event)
check_github_response do
commit = @client.commit(user_project, event["commit_id"])
commit = stringify_keys_deep(commit.to_hash)
commit
found = commits.find do |commit|
commit["sha"] == event["commit_id"]
end
if found
stringify_keys_deep(found.to_hash)
else
# cache miss; don't add to @commits because unsure of order.
check_github_response do
commit = @client.commit(user_project, event["commit_id"])
commit = stringify_keys_deep(commit.to_hash)
commit
end
end
end

# Fetch all commits before certain point
# Fetch all commits
#
# @return [String]
def commits_before(start_time)
commits = []
iterate_pages(@client, "commits_before", start_time.to_datetime.to_s) do |new_commits|
commits.concat(new_commits)
# @return [Array] Commits in a repo.
def commits
if @commits.empty?
iterate_pages(@client, "commits") do |new_commits|
@commits.concat(new_commits)
end
end
@commits
end

# Return the oldest commit in a repo
#
# @return [Hash] Oldest commit in the github git history.
def oldest_commit
commits.last
end

# @return [String] Default branch of the repo
def default_branch
@default_branch ||= @client.repository(user_project)[:default_branch]
end

# Fetch all SHAs occurring in or before a given tag and add them to
# "shas_in_tag"
#
# @param [Array] tags The array of tags.
# @return [Nil] No return; tags are updated in-place.
def fetch_tag_shas_async(tags)
i = 0
threads = []
print_in_same_line("Fetching SHAs for tags: #{i}/#{tags.count}\r") if @options[:verbose]

tags.each_slice(MAX_THREAD_NUMBER) do |tags_slice|
tags_slice.each do |tag|
threads << Thread.new do
# Use oldest commit because comparing two arbitrary tags may be diverged
commits_in_tag = fetch_compare(oldest_commit["sha"], tag["name"])
tag["shas_in_tag"] = commits_in_tag["commits"].collect { |commit| commit["sha"] }
print_in_same_line("Fetching SHAs for tags: #{i + 1}/#{tags.count}") if @options[:verbose]
i += 1
end
end
threads.each(&:join)
threads = []
end
commits

# to clear line from prev print
print_empty_line

Helper.log.info "Fetching SHAs for tags: #{i}"
nil
end

private
Expand Down
Loading