Skip to content

vcs-registry: All vcspull compatible URLs must have URLs detected #420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ $ pip install --user --upgrade --pre libvcs

- _Add your latest changes from PRs here_

### New features

- URLs: Added `registry`, match find which VCS a URL matches with (#420)
- `create_project`: Learn to guess VCS from URL, if none provided (#420)

### Breaking changes

URL renamings (#417):
Expand Down
1 change: 1 addition & 0 deletions docs/url/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,5 @@ git
svn
hg
base
registry
```
29 changes: 29 additions & 0 deletions docs/url/registry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# VCS Detection - `libvcs.url.registry`

Detect VCS from `git`, `hg`, and `svn` URLs.

```python
>>> from libvcs.url.registry import registry, ParserMatch
>>> from libvcs.url.git import GitURL

>>> registry.match('git@invent.kde.org:plasma/plasma-sdk.git')
[ParserMatch(vcs='git', match=GitURL(...))]

>>> registry.match('git@invent.kde.org:plasma/plasma-sdk.git', is_explicit=True)
[]

>>> registry.match('git+ssh://git@invent.kde.org:plasma/plasma-sdk.git')
[ParserMatch(vcs='git', match=GitURL(...))]

>>> registry.match('git+ssh://git@invent.kde.org:plasma/plasma-sdk.git', is_explicit=False)
[]

>>> registry.match('git+ssh://git@invent.kde.org:plasma/plasma-sdk.git', is_explicit=True)
[ParserMatch(vcs='git', match=GitURL(...))]
```

```{eval-rst}
.. automodule:: libvcs.url.registry
:members:
:undoc-members:
```
113 changes: 113 additions & 0 deletions src/libvcs/_internal/module_loading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import sys
import typing as t


class ImportStringError(ImportError):
"""
Provides information about a failed :func:`import_string` attempt.

Notes
-----
This is from werkzeug.utils d36aaf1 on August 20 2022, LICENSE BSD.
https://github.com/pallets/werkzeug

Changes:
- Deferred load import import_string
- Format with black
"""

#: String in dotted notation that failed to be imported.
import_name: str
#: Wrapped exception.
exception: BaseException

def __init__(self, import_name: str, exception: BaseException) -> None:
self.import_name = import_name
self.exception = exception
msg = import_name
name = ""
tracked = []
for part in import_name.replace(":", ".").split("."):
name = f"{name}.{part}" if name else part
imported = import_string(name, silent=True)
if imported:
tracked.append((name, getattr(imported, "__file__", None)))
else:
track = [f"- {n!r} found in {i!r}." for n, i in tracked]
track.append(f"- {name!r} not found.")
track_str = "\n".join(track)
msg = (
f"import_string() failed for {import_name!r}. Possible reasons"
f" are:\n\n"
"- missing __init__.py in a package;\n"
"- package or module path not included in sys.path;\n"
"- duplicated package or module name taking precedence in"
" sys.path;\n"
"- missing module, class, function or variable;\n\n"
f"Debugged import:\n\n{track_str}\n\n"
f"Original exception:\n\n{type(exception).__name__}: {exception}"
)
break

super().__init__(msg)

def __repr__(self) -> str:
return f"<{type(self).__name__}({self.import_name!r}, {self.exception!r})>"


def import_string(import_name: str, silent: bool = False) -> t.Any:
"""Imports an object based on a string.

This is useful if you want to use import paths as endpoints or
something similar. An import path can be specified either in dotted
notation (``xml.sax.saxutils.escape``) or with a colon as object
delimiter (``xml.sax.saxutils:escape``).

If `silent` is True the return value will be `None` if the import fails.

Parameters
----------
import_name : string
the dotted name for the object to import.
silent : bool
if set to `True` import errors are ignored and `None` is returned instead.

Returns
-------
imported object

Raises
------
ImportStringError (ImportError, libvcs.exc.libvcsException)

Notes
-----
This is from werkzeug.utils d36aaf1 on May 23, 2022, LICENSE BSD.
https://github.com/pallets/werkzeug

Changes:
- Exception raised is ImportStringError
- Format with black
"""
import_name = import_name.replace(":", ".")
try:
try:
__import__(import_name)
except ImportError:
if "." not in import_name:
raise
else:
return sys.modules[import_name]

module_name, obj_name = import_name.rsplit(".", 1)
module = __import__(module_name, globals(), locals(), [obj_name])
try:
return getattr(module, obj_name)
except AttributeError as e:
raise ImportError(e) from None
except ImportError as e:
if not silent:
raise ImportStringError(import_name, e).with_traceback(
sys.exc_info()[2]
) from None
return None
45 changes: 39 additions & 6 deletions src/libvcs/_internal/shortcuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
from libvcs import GitSync, HgSync, SvnSync
from libvcs._internal.run import ProgressCallbackProtocol
from libvcs._internal.types import StrPath, VCSLiteral
from libvcs.exc import InvalidVCS
from libvcs.exc import InvalidVCS, LibVCSException
from libvcs.url import registry as url_tools

if t.TYPE_CHECKING:
from typing_extensions import TypeGuard


@t.overload
Expand All @@ -20,7 +24,7 @@ def create_project(
dir: StrPath,
vcs: t.Literal["git"],
progress_callback: t.Optional[ProgressCallbackProtocol] = None,
**kwargs: dict[t.Any, t.Any]
**kwargs: dict[t.Any, t.Any],
) -> GitSync:
...

Expand All @@ -32,7 +36,7 @@ def create_project(
dir: StrPath,
vcs: t.Literal["svn"],
progress_callback: t.Optional[ProgressCallbackProtocol] = None,
**kwargs: dict[t.Any, t.Any]
**kwargs: dict[t.Any, t.Any],
) -> SvnSync:
...

Expand All @@ -44,7 +48,7 @@ def create_project(
dir: StrPath,
vcs: t.Literal["hg"],
progress_callback: t.Optional[ProgressCallbackProtocol] = ...,
**kwargs: dict[t.Any, t.Any]
**kwargs: dict[t.Any, t.Any],
) -> HgSync:
...

Expand All @@ -53,9 +57,9 @@ def create_project(
*,
url: str,
dir: StrPath,
vcs: VCSLiteral,
vcs: t.Optional[VCSLiteral] = None,
progress_callback: t.Optional[ProgressCallbackProtocol] = None,
**kwargs: dict[t.Any, t.Any]
**kwargs: dict[t.Any, t.Any],
) -> Union[GitSync, HgSync, SvnSync]:
r"""Return an object representation of a VCS repository.

Expand All @@ -68,9 +72,38 @@ def create_project(
... dir=tmp_path
... )

>>> isinstance(r, GitSync)
True

create_project can also guess VCS for certain URLs:

>>> r = create_project(
... # Note the git+ before the URL
... url=f'git+file://{create_git_remote_repo()}',
... dir=tmp_path
... )

>>> isinstance(r, GitSync)
True
"""
if vcs is None:
vcs_matches = url_tools.registry.match(url=url, is_explicit=True)

if len(vcs_matches) == 0:
raise LibVCSException(f"No vcs found for {url}")
if len(vcs_matches) > 1:
raise LibVCSException(f"No exact matches for {url}")

assert vcs_matches[0].vcs is not None

def is_vcs(val: t.Any) -> "TypeGuard[VCSLiteral]":
return isinstance(val, str) and val in ["git", "hg", "svn"]

if is_vcs(vcs_matches[0].vcs):
vcs = vcs_matches[0].vcs
else:
raise InvalidVCS(f"{url} does not have supported vcs: {vcs}")

if vcs == "git":
return GitSync(url=url, dir=dir, progress_callback=progress_callback, **kwargs)
elif vcs == "hg":
Expand Down
49 changes: 49 additions & 0 deletions src/libvcs/url/registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import typing as t

from libvcs._internal.module_loading import import_string

from .base import URLProtocol

if t.TYPE_CHECKING:
from typing_extensions import TypeAlias

ParserLazyMap: TypeAlias = t.Dict[str, t.Union[t.Type[URLProtocol], str]]
ParserMap: TypeAlias = t.Dict[str, t.Type[URLProtocol]]

DEFAULT_PARSERS: "ParserLazyMap" = {
"git": "libvcs.url.git.GitURL",
"hg": "libvcs.url.hg.HgURL",
"svn": "libvcs.url.svn.SvnURL",
}


class ParserMatch(t.NamedTuple):
vcs: str
"""VCS system matched"""
match: URLProtocol
"""Matcher vcs detected with"""


class VCSRegistry:
"""Index of parsers"""

parser_map: t.ClassVar["ParserMap"] = {}

def __init__(self, parsers: "ParserLazyMap"):
for k, v in parsers.items():
if isinstance(v, str):
v = import_string(v)
assert callable(v)
self.parser_map[k] = v

def match(
self, url: str, is_explicit: t.Optional[bool] = None
) -> t.List["ParserMatch"]:
matches: t.List[ParserMatch] = []
for vcs, parser in self.parser_map.items():
if parser.is_valid(url=url, is_explicit=is_explicit):
matches.append(ParserMatch(vcs=vcs, match=parser(url)))
return matches


registry = VCSRegistry(parsers=DEFAULT_PARSERS)
Loading