Skip to content

Commit 6395528

Browse files
authored
Merge pull request #1638 from stonebig/master
allows diffing from history, files, strings, and mixed sections
2 parents af3d4da + 81dabf7 commit 6395528

File tree

1 file changed

+117
-99
lines changed

1 file changed

+117
-99
lines changed

winpython/diff.py

Lines changed: 117 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
11
# -*- coding: utf-8 -*-
22
#
3-
# WinPython diff.py script
3+
# WinPython diff.py script (streamlined, with historical and flexible modes)
44
# Copyright © 2013 Pierre Raybaut
55
# Copyright © 2014-2025+ The Winpython development team https://github.com/winpython/
66
# Licensed under the terms of the MIT License
7-
# (see winpython/__init__.py for details)
87

98
import os
10-
from pathlib import Path
119
import re
10+
import sys
1211
import shutil
12+
from pathlib import Path
1313
from packaging import version
14-
import sys
15-
1614
from . import utils
1715

1816
CHANGELOGS_DIR = Path(__file__).parent.parent / "changelogs"
@@ -22,12 +20,10 @@ class Package:
2220
r"\[([\w\-\:\/\.\_]+)\]\(([^)]+)\) \| ([^\|]*) \| ([^\|]*)", # SourceForge
2321
r"\[([\w\-\:\/\.\_]+) ([^\]\ ]+)\] \| ([^\|]*) \| ([^\|]*)" # Google Code
2422
]
25-
2623
def __init__(self, text=None):
2724
self.name = self.url = self.version = self.description = None
2825
if text:
2926
self.from_text(text)
30-
3127
def from_text(self, text):
3228
for pattern in self.PATTERNS:
3329
match = re.match(pattern, text)
@@ -36,31 +32,13 @@ def from_text(self, text):
3632
return
3733
raise ValueError(f"Unrecognized package line format: {text}")
3834

39-
def to_wiki(self):
40-
return f" * [{self.name}]({self.url}) {self.version} ({self.description})\n"
41-
42-
def upgrade_wiki(self, other):
43-
return f" * [{self.name}]({self.url}) {other.version}{self.version} ({self.description})\n"
44-
4535
class PackageIndex:
4636
HEADERS = {"tools": "### Tools", "python": "### Python packages", "wheelhouse": "### WheelHouse packages"}
4737
BLANKS = ["Name | Version | Description", "-----|---------|------------", "", "<details>", "</details>"]
4838

49-
def __init__(self, version, searchdir=None, flavor="", architecture=64):
50-
self.version = version
51-
self.flavor = flavor
52-
self.searchdir = Path(searchdir) if searchdir else CHANGELOGS_DIR
53-
self.architecture = architecture
54-
self.packages = {"tools": {}, "python": {}, "wheelhouse": {}}
55-
self._load_index()
56-
57-
def _load_index(self):
58-
filename = self.searchdir / f"WinPython{self.flavor}-{self.architecture}bit-{self.version}.md"
59-
if not filename.exists():
60-
raise FileNotFoundError(f"Changelog not found: {filename}")
61-
62-
with open(filename, "r", encoding=utils.guess_encoding(filename)[0]) as f:
63-
self._parse_index(f.read())
39+
def __init__(self, content):
40+
self.packages = {k: {} for k in self.HEADERS}
41+
self._parse_index(content)
6442

6543
def _parse_index(self, text):
6644
current = None
@@ -71,123 +49,163 @@ def _parse_index(self, text):
7149
if line.strip() in self.BLANKS:
7250
continue
7351
if current:
74-
pkg = Package(line)
75-
self.packages[current][pkg.name] = pkg
52+
try:
53+
pkg = Package(line)
54+
self.packages[current][pkg.name] = pkg
55+
except Exception:
56+
continue
7657

7758
def compare_packages(old, new):
78-
"""Return difference between package old and package new"""
79-
80-
# wheel replace '-' per '_' in key
8159
def normalize(d): return {k.replace("-", "_").lower(): v for k, v in d.items()}
8260
old, new = normalize(old), normalize(new)
61+
added = [new[k] for k in new if k not in old]
62+
upgraded = [new[k] for k in new if k in old and new[k].version != old[k].version]
63+
removed = [old[k] for k in old if k not in new]
8364
output = ""
84-
85-
added = [new[k].to_wiki() for k in new if k not in old]
86-
upgraded = [new[k].upgrade_wiki(old[k]) for k in new if k in old and new[k].version != old[k].version]
87-
removed = [old[k].to_wiki() for k in old if k not in new]
88-
8965
if added:
90-
output += "New packages:\n\n" + "".join(added) + "\n\n"
66+
output += "\nNew packages:\n" + "".join(f" * {p.name} {p.version} ({p.description})\n" for p in added)
9167
if upgraded:
92-
output += "Upgraded packages:\n\n" + "".join(upgraded) + "\n\n"
68+
output += "\nUpgraded packages:\n" + "".join(f" * {p.name} {old[p.name].version}{p.version} ({p.description})\n" for p in upgraded if p.name in old)
9369
if removed:
94-
output += "Removed packages:\n\n" + "".join(removed) + "\n\n"
95-
return output
70+
output += "\nRemoved packages:\n" + "".join(f" * {p.name} {p.version} ({p.description})\n" for p in removed)
71+
return output or "\nNo differences found.\n"
72+
73+
def compare_markdown_sections(md1, md2, header1="python", header2="python", label1="Input1", label2="Input2"):
74+
pkgs1 = PackageIndex(md1).packages
75+
pkgs2 = PackageIndex(md2).packages
76+
diff = compare_packages(pkgs1[header1], pkgs2[header2])
77+
# If comparing the same section, use the historical header
78+
if header1 == header2 and header1 in PackageIndex.HEADERS:
79+
title = PackageIndex.HEADERS[header1]
80+
else:
81+
title = f"## {label1} [{header1}] vs {label2} [{header2}]"
82+
return f"{title}\n\n{diff}"
83+
84+
def compare_markdown_section_pairs(md1, md2, header_pairs, label1="Input1", label2="Input2"):
85+
pkgs1 = PackageIndex(md1).packages
86+
pkgs2 = PackageIndex(md2).packages
87+
text = f"# {label1} vs {label2} section-pairs comparison\n"
88+
for h1, h2 in header_pairs:
89+
diff = compare_packages(pkgs1[h1], pkgs2[h2])
90+
if diff.strip() and diff != "No differences found.\n":
91+
text += f"\n## {label1} [{h1}] vs {label2} [{h2}]\n\n{diff}\n"
92+
return text
93+
94+
def compare_files(file1, file2, mode="full", header1=None, header2=None, header_pairs=None):
95+
with open(file1, encoding=utils.guess_encoding(file1)[0]) as f1, \
96+
open(file2, encoding=utils.guess_encoding(file2)[0]) as f2:
97+
md1, md2 = f1.read(), f2.read()
98+
if mode == "full":
99+
result = ""
100+
for k in PackageIndex.HEADERS:
101+
result += compare_markdown_sections(md1, md2, k, k, file1, file2) + "\n"
102+
return result
103+
elif mode == "section":
104+
return compare_markdown_sections(md1, md2, header1, header2, file1, file2)
105+
elif mode == "pairs":
106+
return compare_markdown_section_pairs(md1, md2, header_pairs, file1, file2)
107+
else:
108+
raise ValueError("Unknown mode.")
109+
110+
# --- ORIGINAL/HISTORICAL VERSION-TO-VERSION COMPARISON ---
96111

97112
def find_previous_version(target_version, searchdir=None, flavor="", architecture=64):
98-
"""Find version which is the closest to `version`"""
99113
search_dir = Path(searchdir) if searchdir else CHANGELOGS_DIR
100114
pattern = re.compile(rf"WinPython{flavor}-{architecture}bit-([0-9\.]+)\.(txt|md)")
101115
versions = [pattern.match(f).group(1) for f in os.listdir(search_dir) if pattern.match(f)]
102116
versions = [v for v in versions if version.parse(v) < version.parse(target_version)]
103117
return max(versions, key=version.parse, default=target_version)
104118

119+
def load_version_markdown(version, searchdir, flavor="", architecture=64):
120+
filename = Path(searchdir) / f"WinPython{flavor}-{architecture}bit-{version}.md"
121+
if not filename.exists():
122+
raise FileNotFoundError(f"Changelog not found: {filename}")
123+
with open(filename, "r", encoding=utils.guess_encoding(filename)[0]) as f:
124+
return f.read()
125+
105126
def compare_package_indexes(version2, version1=None, searchdir=None, flavor="", flavor1=None, architecture=64):
106-
"""Comparison by looking versions in a given Changelog directory"""
107-
if not searchdir or (not Path(searchdir).is_dir() and not CHANGELOGS_DIR.is_dir()):
108-
print(f"Error: changelogs directory {CHANGELOGS_DIR} does not exist.")
109-
sys.exit(1)
127+
searchdir = Path(searchdir) if searchdir else CHANGELOGS_DIR
110128
version1 = version1 or find_previous_version(version2, searchdir, flavor, architecture)
111129
flavor1 = flavor1 or flavor
112-
113-
pi1 = PackageIndex(version1, searchdir, flavor1, architecture)
114-
pi2 = PackageIndex(version2, searchdir, flavor, architecture)
115-
116-
text = (
130+
md1 = load_version_markdown(version1, searchdir, flavor1, architecture)
131+
md2 = load_version_markdown(version2, searchdir, flavor, architecture)
132+
result = f"# WinPython {architecture}bit {version2}{flavor} vs {version1}{flavor1}\n"
133+
result = (
117134
f"## History of changes for WinPython-{architecture}bit {version2 + flavor}\r\n\r\n"
118135
f"The following changes were made to WinPython-{architecture}bit distribution since version {version1 + flavor1}.\n\n\n"
119136
"<details>\n\n"
120137
)
121-
122-
for key in PackageIndex.HEADERS:
123-
diff = compare_packages(pi1.packages[key], pi2.packages[key])
124-
if diff:
125-
text += f"\n{PackageIndex.HEADERS[key]}\n\n{diff}"
126-
127-
return text + "\n</details>\n\n* * *\n"
138+
for k in PackageIndex.HEADERS:
139+
result += compare_markdown_sections(md1, md2, k, k, version1, version2) + "\n"
140+
return result+ "\n</details>\n\n* * *\n"
128141

129142
def copy_changelogs(version, searchdir, flavor="", architecture=64, basedir=None):
130-
basever = ".".join(version.split(".")[:2])
143+
"""Copy all changelogs for a major.minor version into basedir."""
144+
basever = ".".join(str(version).split(".")[:2])
131145
pattern = re.compile(rf"WinPython{flavor}-{architecture}bit-{basever}[0-9\.]*\.(txt|md)")
132146
dest = Path(basedir)
133147
for fname in os.listdir(searchdir):
134148
if pattern.match(fname):
135-
shutil.copyfile(searchdir / fname, dest / fname)
149+
shutil.copyfile(Path(searchdir) / fname, dest / fname)
136150

137151
def write_changelog(version2, version1=None, searchdir=None, flavor="", architecture=64, basedir=None):
138-
"""Write changelog between version1 and version2 of WinPython"""
152+
"""Write changelog between version1 and version2 of WinPython."""
153+
searchdir = Path(searchdir) if searchdir else CHANGELOGS_DIR
139154
if basedir:
140155
copy_changelogs(version2, searchdir, flavor, architecture, basedir)
141-
print("comparing_package_indexes", version2, searchdir, flavor, architecture)
142156
changelog = compare_package_indexes(version2, version1, searchdir, flavor, architecture=architecture)
143157
output_file = searchdir / f"WinPython{flavor}-{architecture}bit-{version2}_History.md"
144158
with open(output_file, "w", encoding="utf-8") as f:
145159
f.write(changelog)
146-
# Copy to winpython/changelogs back to basedir
147160
if basedir:
148161
shutil.copyfile(output_file, Path(basedir) / output_file.name)
149162

150-
def compare_two_markdown_files(file1, file2):
151-
"""Compare two arbitrary markdown files with WinPython changelog format."""
152-
class DummyPackageIndex(PackageIndex):
153-
def __init__(self, filename):
154-
self.packages = {"tools": {}, "python": {}, "wheelhouse": {}}
155-
self._load_index(filename)
156-
157-
def _load_index(self, filename):
158-
with open(filename, "r", encoding=utils.guess_encoding(filename)[0]) as f:
159-
self._parse_index(f.read())
160-
161-
pi1 = DummyPackageIndex(Path(file1))
162-
pi2 = DummyPackageIndex(Path(file2))
163-
164-
text = f"## Differences between {file1} and {file2}\n\n<details>\n\n"
165-
for key in PackageIndex.HEADERS:
166-
diff = compare_packages(pi1.packages[key], pi2.packages[key])
167-
if diff:
168-
text += f"\n{PackageIndex.HEADERS[key]}\n\n{diff}"
169-
return text + "\n</details>\n\n* * *\n"
170-
171163
def print_usage():
172164
print("Usage:")
173165
print(" python diff.py file1.md file2.md")
174-
print(" - Compare two markdown changelog files directly.")
166+
print(" - Compare all sections of two markdown files.")
167+
print(" python diff.py file1.md file2.md --section header1 header2")
168+
print(" - Compare section 'header1' of file1 with section 'header2' of file2.")
169+
print(" python diff.py file1.md file2.md --pairs header1a header2a [header1b header2b ...]")
170+
print(" - Compare pairs of sections. Example: python diff.py f1.md f2.md --pairs python wheelhouse tools tools")
175171
print(" python diff.py <version2> <version1> [searchdir] [flavor] [architecture]")
176-
print(" - Compare WinPython markdown changelogs by version.")
172+
print(" - Compare WinPython markdown changelogs by version (historical mode).")
173+
print(" python diff.py --write-changelog <version2> <version1> [searchdir] [flavor] [architecture] [basedir]")
174+
print(" - Write changelog between version1 and version2 to file (and optionally copy to basedir).")
177175

178176
if __name__ == "__main__":
179-
if len(sys.argv) == 3 and all(arg.lower().endswith('.md') for arg in sys.argv[1:]):
180-
# Usage: python diff.py file1.md file2.md
181-
file1, file2 = sys.argv[1], sys.argv[2]
182-
print(compare_two_markdown_files(file1, file2))
183-
elif len(sys.argv) >= 3:
184-
# Original usage (version comparison)
185-
# Example: python diff.py 3.7.4.0 3.7.2.0 "C:\WinP\bd37\budot" "Zero" 32
186-
version2 = sys.argv[1]
187-
version1 = sys.argv[2]
188-
searchdir = Path(sys.argv[3]) if len(sys.argv) > 3 else CHANGELOGS_DIR
189-
flavor = sys.argv[4] if len(sys.argv) > 4 else ""
190-
architecture = int(sys.argv[5]) if len(sys.argv) > 5 else 64
177+
args = sys.argv
178+
if len(args) >= 3 and all(arg.lower().endswith('.md') for arg in args[1:3]):
179+
file1, file2 = args[1], args[2]
180+
if len(args) == 3:
181+
print(compare_files(file1, file2))
182+
elif args[3] == "--section" and len(args) >= 6:
183+
h1, h2 = args[4], args[5]
184+
print(compare_files(file1, file2, mode="section", header1=h1, header2=h2))
185+
elif args[3] == "--pairs" and len(args) > 4 and len(args[4:]) % 2 == 0:
186+
pairs = list(zip(args[4::2], args[5::2]))
187+
print(compare_files(file1, file2, mode="pairs", header_pairs=pairs))
188+
else:
189+
print_usage()
190+
elif len(args) >= 2 and args[1] == "--write-changelog":
191+
# Usage: --write-changelog <version2> <version1> [searchdir] [flavor] [architecture] [basedir]
192+
if len(args) < 4:
193+
print_usage()
194+
sys.exit(1)
195+
version2 = args[2]
196+
version1 = args[3]
197+
searchdir = args[4] if len(args) > 4 else CHANGELOGS_DIR
198+
flavor = args[5] if len(args) > 5 else ""
199+
architecture = int(args[6]) if len(args) > 6 else 64
200+
basedir = args[7] if len(args) > 7 else None
201+
write_changelog(version2, version1, searchdir, flavor, architecture, basedir)
202+
print(f"Changelog written for {version2} vs {version1}.")
203+
elif len(args) >= 3:
204+
version2 = args[1]
205+
version1 = args[2] if len(args) > 2 and not args[2].endswith('.md') else None
206+
searchdir = args[3] if len(args) > 3 else CHANGELOGS_DIR
207+
flavor = args[4] if len(args) > 4 else ""
208+
architecture = int(args[5]) if len(args) > 5 else 64
191209
print(compare_package_indexes(version2, version1, searchdir, flavor, architecture=architecture))
192210
else:
193211
print_usage()

0 commit comments

Comments
 (0)