Skip to content

allows diffing from history, files, strings, and mixed sections #1638

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 7, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 117 additions & 99 deletions winpython/diff.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
# -*- coding: utf-8 -*-
#
# WinPython diff.py script
# WinPython diff.py script (streamlined, with historical and flexible modes)
# Copyright © 2013 Pierre Raybaut
# Copyright © 2014-2025+ The Winpython development team https://github.com/winpython/
# Licensed under the terms of the MIT License
# (see winpython/__init__.py for details)

import os
from pathlib import Path
import re
import sys
import shutil
from pathlib import Path
from packaging import version
import sys

from . import utils

CHANGELOGS_DIR = Path(__file__).parent.parent / "changelogs"
Expand All @@ -22,12 +20,10 @@ class Package:
r"\[([\w\-\:\/\.\_]+)\]\(([^)]+)\) \| ([^\|]*) \| ([^\|]*)", # SourceForge
r"\[([\w\-\:\/\.\_]+) ([^\]\ ]+)\] \| ([^\|]*) \| ([^\|]*)" # Google Code
]

def __init__(self, text=None):
self.name = self.url = self.version = self.description = None
if text:
self.from_text(text)

def from_text(self, text):
for pattern in self.PATTERNS:
match = re.match(pattern, text)
Expand All @@ -36,31 +32,13 @@ def from_text(self, text):
return
raise ValueError(f"Unrecognized package line format: {text}")

def to_wiki(self):
return f" * [{self.name}]({self.url}) {self.version} ({self.description})\n"

def upgrade_wiki(self, other):
return f" * [{self.name}]({self.url}) {other.version} → {self.version} ({self.description})\n"

class PackageIndex:
HEADERS = {"tools": "### Tools", "python": "### Python packages", "wheelhouse": "### WheelHouse packages"}
BLANKS = ["Name | Version | Description", "-----|---------|------------", "", "<details>", "</details>"]

def __init__(self, version, searchdir=None, flavor="", architecture=64):
self.version = version
self.flavor = flavor
self.searchdir = Path(searchdir) if searchdir else CHANGELOGS_DIR
self.architecture = architecture
self.packages = {"tools": {}, "python": {}, "wheelhouse": {}}
self._load_index()

def _load_index(self):
filename = self.searchdir / f"WinPython{self.flavor}-{self.architecture}bit-{self.version}.md"
if not filename.exists():
raise FileNotFoundError(f"Changelog not found: {filename}")

with open(filename, "r", encoding=utils.guess_encoding(filename)[0]) as f:
self._parse_index(f.read())
def __init__(self, content):
self.packages = {k: {} for k in self.HEADERS}
self._parse_index(content)

def _parse_index(self, text):
current = None
Expand All @@ -71,123 +49,163 @@ def _parse_index(self, text):
if line.strip() in self.BLANKS:
continue
if current:
pkg = Package(line)
self.packages[current][pkg.name] = pkg
try:
pkg = Package(line)
self.packages[current][pkg.name] = pkg
except Exception:
continue

def compare_packages(old, new):
"""Return difference between package old and package new"""

# wheel replace '-' per '_' in key
def normalize(d): return {k.replace("-", "_").lower(): v for k, v in d.items()}
old, new = normalize(old), normalize(new)
added = [new[k] for k in new if k not in old]
upgraded = [new[k] for k in new if k in old and new[k].version != old[k].version]
removed = [old[k] for k in old if k not in new]
output = ""

added = [new[k].to_wiki() for k in new if k not in old]
upgraded = [new[k].upgrade_wiki(old[k]) for k in new if k in old and new[k].version != old[k].version]
removed = [old[k].to_wiki() for k in old if k not in new]

if added:
output += "New packages:\n\n" + "".join(added) + "\n\n"
output += "\nNew packages:\n" + "".join(f" * {p.name} {p.version} ({p.description})\n" for p in added)
if upgraded:
output += "Upgraded packages:\n\n" + "".join(upgraded) + "\n\n"
output += "\nUpgraded packages:\n" + "".join(f" * {p.name} {old[p.name].version} → {p.version} ({p.description})\n" for p in upgraded if p.name in old)
if removed:
output += "Removed packages:\n\n" + "".join(removed) + "\n\n"
return output
output += "\nRemoved packages:\n" + "".join(f" * {p.name} {p.version} ({p.description})\n" for p in removed)
return output or "\nNo differences found.\n"

def compare_markdown_sections(md1, md2, header1="python", header2="python", label1="Input1", label2="Input2"):
pkgs1 = PackageIndex(md1).packages
pkgs2 = PackageIndex(md2).packages
diff = compare_packages(pkgs1[header1], pkgs2[header2])
# If comparing the same section, use the historical header
if header1 == header2 and header1 in PackageIndex.HEADERS:
title = PackageIndex.HEADERS[header1]
else:
title = f"## {label1} [{header1}] vs {label2} [{header2}]"
return f"{title}\n\n{diff}"

def compare_markdown_section_pairs(md1, md2, header_pairs, label1="Input1", label2="Input2"):
pkgs1 = PackageIndex(md1).packages
pkgs2 = PackageIndex(md2).packages
text = f"# {label1} vs {label2} section-pairs comparison\n"
for h1, h2 in header_pairs:
diff = compare_packages(pkgs1[h1], pkgs2[h2])
if diff.strip() and diff != "No differences found.\n":
text += f"\n## {label1} [{h1}] vs {label2} [{h2}]\n\n{diff}\n"
return text

def compare_files(file1, file2, mode="full", header1=None, header2=None, header_pairs=None):
with open(file1, encoding=utils.guess_encoding(file1)[0]) as f1, \
open(file2, encoding=utils.guess_encoding(file2)[0]) as f2:
md1, md2 = f1.read(), f2.read()
if mode == "full":
result = ""
for k in PackageIndex.HEADERS:
result += compare_markdown_sections(md1, md2, k, k, file1, file2) + "\n"
return result
elif mode == "section":
return compare_markdown_sections(md1, md2, header1, header2, file1, file2)
elif mode == "pairs":
return compare_markdown_section_pairs(md1, md2, header_pairs, file1, file2)
else:
raise ValueError("Unknown mode.")

# --- ORIGINAL/HISTORICAL VERSION-TO-VERSION COMPARISON ---

def find_previous_version(target_version, searchdir=None, flavor="", architecture=64):
"""Find version which is the closest to `version`"""
search_dir = Path(searchdir) if searchdir else CHANGELOGS_DIR
pattern = re.compile(rf"WinPython{flavor}-{architecture}bit-([0-9\.]+)\.(txt|md)")
versions = [pattern.match(f).group(1) for f in os.listdir(search_dir) if pattern.match(f)]
versions = [v for v in versions if version.parse(v) < version.parse(target_version)]
return max(versions, key=version.parse, default=target_version)

def load_version_markdown(version, searchdir, flavor="", architecture=64):
filename = Path(searchdir) / f"WinPython{flavor}-{architecture}bit-{version}.md"
if not filename.exists():
raise FileNotFoundError(f"Changelog not found: {filename}")
with open(filename, "r", encoding=utils.guess_encoding(filename)[0]) as f:
return f.read()

def compare_package_indexes(version2, version1=None, searchdir=None, flavor="", flavor1=None, architecture=64):
"""Comparison by looking versions in a given Changelog directory"""
if not searchdir or (not Path(searchdir).is_dir() and not CHANGELOGS_DIR.is_dir()):
print(f"Error: changelogs directory {CHANGELOGS_DIR} does not exist.")
sys.exit(1)
searchdir = Path(searchdir) if searchdir else CHANGELOGS_DIR
version1 = version1 or find_previous_version(version2, searchdir, flavor, architecture)
flavor1 = flavor1 or flavor

pi1 = PackageIndex(version1, searchdir, flavor1, architecture)
pi2 = PackageIndex(version2, searchdir, flavor, architecture)

text = (
md1 = load_version_markdown(version1, searchdir, flavor1, architecture)
md2 = load_version_markdown(version2, searchdir, flavor, architecture)
result = f"# WinPython {architecture}bit {version2}{flavor} vs {version1}{flavor1}\n"
result = (
f"## History of changes for WinPython-{architecture}bit {version2 + flavor}\r\n\r\n"
f"The following changes were made to WinPython-{architecture}bit distribution since version {version1 + flavor1}.\n\n\n"
"<details>\n\n"
)

for key in PackageIndex.HEADERS:
diff = compare_packages(pi1.packages[key], pi2.packages[key])
if diff:
text += f"\n{PackageIndex.HEADERS[key]}\n\n{diff}"

return text + "\n</details>\n\n* * *\n"
for k in PackageIndex.HEADERS:
result += compare_markdown_sections(md1, md2, k, k, version1, version2) + "\n"
return result+ "\n</details>\n\n* * *\n"

def copy_changelogs(version, searchdir, flavor="", architecture=64, basedir=None):
basever = ".".join(version.split(".")[:2])
"""Copy all changelogs for a major.minor version into basedir."""
basever = ".".join(str(version).split(".")[:2])
pattern = re.compile(rf"WinPython{flavor}-{architecture}bit-{basever}[0-9\.]*\.(txt|md)")
dest = Path(basedir)
for fname in os.listdir(searchdir):
if pattern.match(fname):
shutil.copyfile(searchdir / fname, dest / fname)
shutil.copyfile(Path(searchdir) / fname, dest / fname)

def write_changelog(version2, version1=None, searchdir=None, flavor="", architecture=64, basedir=None):
"""Write changelog between version1 and version2 of WinPython"""
"""Write changelog between version1 and version2 of WinPython."""
searchdir = Path(searchdir) if searchdir else CHANGELOGS_DIR
if basedir:
copy_changelogs(version2, searchdir, flavor, architecture, basedir)
print("comparing_package_indexes", version2, searchdir, flavor, architecture)
changelog = compare_package_indexes(version2, version1, searchdir, flavor, architecture=architecture)
output_file = searchdir / f"WinPython{flavor}-{architecture}bit-{version2}_History.md"
with open(output_file, "w", encoding="utf-8") as f:
f.write(changelog)
# Copy to winpython/changelogs back to basedir
if basedir:
shutil.copyfile(output_file, Path(basedir) / output_file.name)

def compare_two_markdown_files(file1, file2):
"""Compare two arbitrary markdown files with WinPython changelog format."""
class DummyPackageIndex(PackageIndex):
def __init__(self, filename):
self.packages = {"tools": {}, "python": {}, "wheelhouse": {}}
self._load_index(filename)

def _load_index(self, filename):
with open(filename, "r", encoding=utils.guess_encoding(filename)[0]) as f:
self._parse_index(f.read())

pi1 = DummyPackageIndex(Path(file1))
pi2 = DummyPackageIndex(Path(file2))

text = f"## Differences between {file1} and {file2}\n\n<details>\n\n"
for key in PackageIndex.HEADERS:
diff = compare_packages(pi1.packages[key], pi2.packages[key])
if diff:
text += f"\n{PackageIndex.HEADERS[key]}\n\n{diff}"
return text + "\n</details>\n\n* * *\n"

def print_usage():
print("Usage:")
print(" python diff.py file1.md file2.md")
print(" - Compare two markdown changelog files directly.")
print(" - Compare all sections of two markdown files.")
print(" python diff.py file1.md file2.md --section header1 header2")
print(" - Compare section 'header1' of file1 with section 'header2' of file2.")
print(" python diff.py file1.md file2.md --pairs header1a header2a [header1b header2b ...]")
print(" - Compare pairs of sections. Example: python diff.py f1.md f2.md --pairs python wheelhouse tools tools")
print(" python diff.py <version2> <version1> [searchdir] [flavor] [architecture]")
print(" - Compare WinPython markdown changelogs by version.")
print(" - Compare WinPython markdown changelogs by version (historical mode).")
print(" python diff.py --write-changelog <version2> <version1> [searchdir] [flavor] [architecture] [basedir]")
print(" - Write changelog between version1 and version2 to file (and optionally copy to basedir).")

if __name__ == "__main__":
if len(sys.argv) == 3 and all(arg.lower().endswith('.md') for arg in sys.argv[1:]):
# Usage: python diff.py file1.md file2.md
file1, file2 = sys.argv[1], sys.argv[2]
print(compare_two_markdown_files(file1, file2))
elif len(sys.argv) >= 3:
# Original usage (version comparison)
# Example: python diff.py 3.7.4.0 3.7.2.0 "C:\WinP\bd37\budot" "Zero" 32
version2 = sys.argv[1]
version1 = sys.argv[2]
searchdir = Path(sys.argv[3]) if len(sys.argv) > 3 else CHANGELOGS_DIR
flavor = sys.argv[4] if len(sys.argv) > 4 else ""
architecture = int(sys.argv[5]) if len(sys.argv) > 5 else 64
args = sys.argv
if len(args) >= 3 and all(arg.lower().endswith('.md') for arg in args[1:3]):
file1, file2 = args[1], args[2]
if len(args) == 3:
print(compare_files(file1, file2))
elif args[3] == "--section" and len(args) >= 6:
h1, h2 = args[4], args[5]
print(compare_files(file1, file2, mode="section", header1=h1, header2=h2))
elif args[3] == "--pairs" and len(args) > 4 and len(args[4:]) % 2 == 0:
pairs = list(zip(args[4::2], args[5::2]))
print(compare_files(file1, file2, mode="pairs", header_pairs=pairs))
else:
print_usage()
elif len(args) >= 2 and args[1] == "--write-changelog":
# Usage: --write-changelog <version2> <version1> [searchdir] [flavor] [architecture] [basedir]
if len(args) < 4:
print_usage()
sys.exit(1)
version2 = args[2]
version1 = args[3]
searchdir = args[4] if len(args) > 4 else CHANGELOGS_DIR
flavor = args[5] if len(args) > 5 else ""
architecture = int(args[6]) if len(args) > 6 else 64
basedir = args[7] if len(args) > 7 else None
write_changelog(version2, version1, searchdir, flavor, architecture, basedir)
print(f"Changelog written for {version2} vs {version1}.")
elif len(args) >= 3:
version2 = args[1]
version1 = args[2] if len(args) > 2 and not args[2].endswith('.md') else None
searchdir = args[3] if len(args) > 3 else CHANGELOGS_DIR
flavor = args[4] if len(args) > 4 else ""
architecture = int(args[5]) if len(args) > 5 else 64
print(compare_package_indexes(version2, version1, searchdir, flavor, architecture=architecture))
else:
print_usage()