From 5f5547769b0598afc0dc6e1a406cb7fd66e6aab0 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 24 Sep 2022 11:51:20 -0500 Subject: [PATCH 1/7] feat(str): import_loader from werkzeug --- src/libvcs/_internal/module_loading.py | 113 +++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 src/libvcs/_internal/module_loading.py diff --git a/src/libvcs/_internal/module_loading.py b/src/libvcs/_internal/module_loading.py new file mode 100644 index 00000000..464419b0 --- /dev/null +++ b/src/libvcs/_internal/module_loading.py @@ -0,0 +1,113 @@ +import sys +import typing as t + + +class ImportStringError(ImportError): + """ + Provides information about a failed :func:`import_string` attempt. + + Notes + ----- + This is from werkzeug.utils d36aaf1 on August 20 2022, LICENSE BSD. + https://github.com/pallets/werkzeug + + Changes: + - Deferred load import import_string + - Format with black + """ + + #: String in dotted notation that failed to be imported. + import_name: str + #: Wrapped exception. + exception: BaseException + + def __init__(self, import_name: str, exception: BaseException) -> None: + self.import_name = import_name + self.exception = exception + msg = import_name + name = "" + tracked = [] + for part in import_name.replace(":", ".").split("."): + name = f"{name}.{part}" if name else part + imported = import_string(name, silent=True) + if imported: + tracked.append((name, getattr(imported, "__file__", None))) + else: + track = [f"- {n!r} found in {i!r}." for n, i in tracked] + track.append(f"- {name!r} not found.") + track_str = "\n".join(track) + msg = ( + f"import_string() failed for {import_name!r}. Possible reasons" + f" are:\n\n" + "- missing __init__.py in a package;\n" + "- package or module path not included in sys.path;\n" + "- duplicated package or module name taking precedence in" + " sys.path;\n" + "- missing module, class, function or variable;\n\n" + f"Debugged import:\n\n{track_str}\n\n" + f"Original exception:\n\n{type(exception).__name__}: {exception}" + ) + break + + super().__init__(msg) + + def __repr__(self) -> str: + return f"<{type(self).__name__}({self.import_name!r}, {self.exception!r})>" + + +def import_string(import_name: str, silent: bool = False) -> t.Any: + """Imports an object based on a string. + + This is useful if you want to use import paths as endpoints or + something similar. An import path can be specified either in dotted + notation (``xml.sax.saxutils.escape``) or with a colon as object + delimiter (``xml.sax.saxutils:escape``). + + If `silent` is True the return value will be `None` if the import fails. + + Parameters + ---------- + import_name : string + the dotted name for the object to import. + silent : bool + if set to `True` import errors are ignored and `None` is returned instead. + + Returns + ------- + imported object + + Raises + ------ + ImportStringError (ImportError, libvcs.exc.libvcsException) + + Notes + ----- + This is from werkzeug.utils d36aaf1 on May 23, 2022, LICENSE BSD. + https://github.com/pallets/werkzeug + + Changes: + - Exception raised is ImportStringError + - Format with black + """ + import_name = import_name.replace(":", ".") + try: + try: + __import__(import_name) + except ImportError: + if "." not in import_name: + raise + else: + return sys.modules[import_name] + + module_name, obj_name = import_name.rsplit(".", 1) + module = __import__(module_name, globals(), locals(), [obj_name]) + try: + return getattr(module, obj_name) + except AttributeError as e: + raise ImportError(e) from None + except ImportError as e: + if not silent: + raise ImportStringError(import_name, e).with_traceback( + sys.exc_info()[2] + ) from None + return None From d747513973a7141f418bf94c15005ac45275988c Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Thu, 22 Sep 2022 21:52:01 -0500 Subject: [PATCH 2/7] feat(registry): Add VCS detection --- src/libvcs/url/registry.py | 49 +++++++++++++++++++ tests/url/test_registry.py | 97 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 src/libvcs/url/registry.py create mode 100644 tests/url/test_registry.py diff --git a/src/libvcs/url/registry.py b/src/libvcs/url/registry.py new file mode 100644 index 00000000..a4e5cc5a --- /dev/null +++ b/src/libvcs/url/registry.py @@ -0,0 +1,49 @@ +import typing as t + +from libvcs._internal.module_loading import import_string + +from .base import URLProtocol + +if t.TYPE_CHECKING: + from typing_extensions import TypeAlias + + ParserLazyMap: TypeAlias = t.Dict[str, t.Union[t.Type[URLProtocol], str]] + ParserMap: TypeAlias = t.Dict[str, t.Type[URLProtocol]] + +DEFAULT_PARSERS: "ParserLazyMap" = { + "git": "libvcs.url.git.GitURL", + "hg": "libvcs.url.hg.HgURL", + "svn": "libvcs.url.svn.SvnURL", +} + + +class ParserMatch(t.NamedTuple): + vcs: str + """VCS system matched""" + match: URLProtocol + """Matcher vcs detected with""" + + +class VCSRegistry: + """Index of parsers""" + + parser_map: t.ClassVar["ParserMap"] = {} + + def __init__(self, parsers: "ParserLazyMap"): + for k, v in parsers.items(): + if isinstance(v, str): + v = import_string(v) + assert callable(v) + self.parser_map[k] = v + + def match( + self, url: str, is_explicit: t.Optional[bool] = None + ) -> t.List["ParserMatch"]: + matches: t.List[ParserMatch] = [] + for vcs, parser in self.parser_map.items(): + if parser.is_valid(url=url, is_explicit=is_explicit): + matches.append(ParserMatch(vcs=vcs, match=parser(url))) + return matches + + +registry = VCSRegistry(parsers=DEFAULT_PARSERS) diff --git a/tests/url/test_registry.py b/tests/url/test_registry.py new file mode 100644 index 00000000..7a83408a --- /dev/null +++ b/tests/url/test_registry.py @@ -0,0 +1,97 @@ +import typing as t + +import pytest + +from libvcs.url import registry +from libvcs.url.git import GitURL +from libvcs.url.hg import HgURL +from libvcs.url.svn import SvnURL + +if t.TYPE_CHECKING: + from typing_extensions import TypeAlias + + ParserMatchLazy: TypeAlias = t.Callable[[str], registry.ParserMatch] + DetectVCSFixtureExpectedMatch: TypeAlias = t.Union[ + registry.ParserMatch, ParserMatchLazy + ] + + +class DetectVCSFixture(t.NamedTuple): + url: str + expected_matches_lazy: t.List["DetectVCSFixtureExpectedMatch"] + is_explicit: bool + + +TEST_FIXTURES: list[DetectVCSFixture] = [ + *[ + DetectVCSFixture( + url=url, + expected_matches_lazy=[ + lambda url: registry.ParserMatch(vcs="git", match=GitURL(url)) + ], + is_explicit=True, + ) + for url in [ + "git+https://github.com/vcs-python/libvcs", + "git+https://github.com/vcs-python/libvcs.git", + "git+https://github.com:vcs-python/libvcs.git", + "git+ssh://git@github.com:vcs-python/libvcs.git", + "git+ssh://git@github.com:vcs-python/libvcs", + "git+ssh://git@github.com/tony/ScreenToGif.git", + "git+https://github.com/nltk/nltk.git", + "git+https://github.com/nltk/nltk", + ] + ], + *[ + DetectVCSFixture( + url=url, + expected_matches_lazy=[ + lambda url: registry.ParserMatch(vcs="hg", match=HgURL(url)) + ], + is_explicit=True, + ) + for url in [ + "hg+http://hg.example.com/MyProject@da39a3ee5e6b", + "hg+ssh://hg.example.com:MyProject@da39a3ee5e6b", + "hg+https://hg.mozilla.org/mozilla-central/", + ] + ], + *[ + DetectVCSFixture( + url=url, + expected_matches_lazy=[ + lambda url: registry.ParserMatch(vcs="svn", match=SvnURL(url)) + ], + is_explicit=True, + ) + for url in [ + "svn+http://svn.example.com/MyProject@da39a3ee5e6b", + "svn+ssh://svn.example.com:MyProject@da39a3ee5e6b", + "svn+ssh://svn.example.com:MyProject@da39a3ee5e6b", + ] + ], +] + + +@pytest.mark.parametrize( + list(DetectVCSFixture._fields), + TEST_FIXTURES, +) +def test_registry( + url: str, + expected_matches_lazy: t.List["DetectVCSFixtureExpectedMatch"], + is_explicit: bool, +) -> None: + assert url + assert registry.registry + + matches = registry.registry.match(url, is_explicit=is_explicit) + + # Just add water + expected_matches: t.List["DetectVCSFixtureExpectedMatch"] = [] + for idx, expected_match in enumerate(expected_matches_lazy): + if callable(expected_match): + assert callable(expected_match) + expected_matches.append(expected_match(url)) + + assert matches == expected_matches From ed735aaa5976e00ecb4dfcbe5ea03e8a2174c338 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 24 Sep 2022 21:23:11 -0500 Subject: [PATCH 3/7] docs(CHANGES): Note registry --- CHANGES | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES b/CHANGES index c26fa081..b237ac4f 100644 --- a/CHANGES +++ b/CHANGES @@ -13,6 +13,10 @@ $ pip install --user --upgrade --pre libvcs - _Add your latest changes from PRs here_ +### New features + +- URLs: Added `registry`, match find which VCS a URL matches with (#420) + ### Breaking changes URL renamings (#417): From e4bdfad2716e9cb428c682ababa8c61247ef8132 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 24 Sep 2022 21:25:59 -0500 Subject: [PATCH 4/7] docs(url): Add registry --- docs/url/index.md | 1 + docs/url/registry.md | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 docs/url/registry.md diff --git a/docs/url/index.md b/docs/url/index.md index e8708c1e..67b412f1 100644 --- a/docs/url/index.md +++ b/docs/url/index.md @@ -159,4 +159,5 @@ git svn hg base +registry ``` diff --git a/docs/url/registry.md b/docs/url/registry.md new file mode 100644 index 00000000..18304953 --- /dev/null +++ b/docs/url/registry.md @@ -0,0 +1,29 @@ +# VCS Detection - `libvcs.url.registry` + +Detect VCS from `git`, `hg`, and `svn` URLs. + +```python +>>> from libvcs.url.registry import registry, ParserMatch +>>> from libvcs.url.git import GitURL + +>>> registry.match('git@invent.kde.org:plasma/plasma-sdk.git') +[ParserMatch(vcs='git', match=GitURL(...))] + +>>> registry.match('git@invent.kde.org:plasma/plasma-sdk.git', is_explicit=True) +[] + +>>> registry.match('git+ssh://git@invent.kde.org:plasma/plasma-sdk.git') +[ParserMatch(vcs='git', match=GitURL(...))] + +>>> registry.match('git+ssh://git@invent.kde.org:plasma/plasma-sdk.git', is_explicit=False) +[] + +>>> registry.match('git+ssh://git@invent.kde.org:plasma/plasma-sdk.git', is_explicit=True) +[ParserMatch(vcs='git', match=GitURL(...))] +``` + +```{eval-rst} +.. automodule:: libvcs.url.registry + :members: + :undoc-members: +``` From 9325a7e9ec21f7cb40bd1dd05d5087c9f92bb875 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 25 Sep 2022 08:17:31 -0500 Subject: [PATCH 5/7] feat(create_project): Guess VCS from URL --- src/libvcs/_internal/shortcuts.py | 34 +++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/libvcs/_internal/shortcuts.py b/src/libvcs/_internal/shortcuts.py index f24460f8..ab1f3779 100644 --- a/src/libvcs/_internal/shortcuts.py +++ b/src/libvcs/_internal/shortcuts.py @@ -10,7 +10,11 @@ from libvcs import GitSync, HgSync, SvnSync from libvcs._internal.run import ProgressCallbackProtocol from libvcs._internal.types import StrPath, VCSLiteral -from libvcs.exc import InvalidVCS +from libvcs.exc import InvalidVCS, LibVCSException +from libvcs.url import registry as url_tools + +if t.TYPE_CHECKING: + from typing_extensions import TypeGuard @t.overload @@ -20,7 +24,7 @@ def create_project( dir: StrPath, vcs: t.Literal["git"], progress_callback: t.Optional[ProgressCallbackProtocol] = None, - **kwargs: dict[t.Any, t.Any] + **kwargs: dict[t.Any, t.Any], ) -> GitSync: ... @@ -32,7 +36,7 @@ def create_project( dir: StrPath, vcs: t.Literal["svn"], progress_callback: t.Optional[ProgressCallbackProtocol] = None, - **kwargs: dict[t.Any, t.Any] + **kwargs: dict[t.Any, t.Any], ) -> SvnSync: ... @@ -44,7 +48,7 @@ def create_project( dir: StrPath, vcs: t.Literal["hg"], progress_callback: t.Optional[ProgressCallbackProtocol] = ..., - **kwargs: dict[t.Any, t.Any] + **kwargs: dict[t.Any, t.Any], ) -> HgSync: ... @@ -53,9 +57,9 @@ def create_project( *, url: str, dir: StrPath, - vcs: VCSLiteral, + vcs: t.Optional[VCSLiteral] = None, progress_callback: t.Optional[ProgressCallbackProtocol] = None, - **kwargs: dict[t.Any, t.Any] + **kwargs: dict[t.Any, t.Any], ) -> Union[GitSync, HgSync, SvnSync]: r"""Return an object representation of a VCS repository. @@ -71,6 +75,24 @@ def create_project( >>> isinstance(r, GitSync) True """ + if vcs is None: + vcs_matches = url_tools.registry.match(url=url, is_explicit=True) + + if len(vcs_matches) == 0: + raise LibVCSException(f"No vcs found for {url}") + if len(vcs_matches) > 1: + raise LibVCSException(f"No exact matches for {url}") + + assert vcs_matches[0].vcs is not None + + def is_vcs(val: t.Any) -> "TypeGuard[VCSLiteral]": + return isinstance(val, str) and val in ["git", "hg", "svn"] + + if is_vcs(vcs_matches[0].vcs): + vcs = vcs_matches[0].vcs + else: + raise InvalidVCS(f"{url} does not have supported vcs: {vcs}") + if vcs == "git": return GitSync(url=url, dir=dir, progress_callback=progress_callback, **kwargs) elif vcs == "hg": From 135e5066e7bdd24ecee0f6fb2bc21186a77830ef Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 25 Sep 2022 08:21:01 -0500 Subject: [PATCH 6/7] docs(CHANGES): Note create_project VCS guessing --- CHANGES | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES b/CHANGES index b237ac4f..3242cd54 100644 --- a/CHANGES +++ b/CHANGES @@ -16,6 +16,7 @@ $ pip install --user --upgrade --pre libvcs ### New features - URLs: Added `registry`, match find which VCS a URL matches with (#420) +- `create_project`: Learn to guess VCS from URL, if none provided (#420) ### Breaking changes From 79d9b010edfacde45451ea278457995fd831fc6a Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 25 Sep 2022 08:22:22 -0500 Subject: [PATCH 7/7] tests(create_project): Add example of URL guessing --- src/libvcs/_internal/shortcuts.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/libvcs/_internal/shortcuts.py b/src/libvcs/_internal/shortcuts.py index ab1f3779..98491cff 100644 --- a/src/libvcs/_internal/shortcuts.py +++ b/src/libvcs/_internal/shortcuts.py @@ -72,6 +72,17 @@ def create_project( ... dir=tmp_path ... ) + >>> isinstance(r, GitSync) + True + + create_project can also guess VCS for certain URLs: + + >>> r = create_project( + ... # Note the git+ before the URL + ... url=f'git+file://{create_git_remote_repo()}', + ... dir=tmp_path + ... ) + >>> isinstance(r, GitSync) True """