Skip to content

Commit 6ac7753

Browse files
authored
Refine errors for pretrained objects (huggingface#15261)
* Refine errors for pretrained objects * PoC to avoid using get_list_of_files * Adapt tests to use new errors * Quality + Fix PoC * Revert "PoC to avoid using get_list_of_files" This reverts commit cb93b7c. * Revert "Quality + Fix PoC" This reverts commit 3ba6d0d. * Fix doc * Revert PoC * Add feature extractors * More tests and PT model * Adapt error message * Feature extractor tests * TF model * Flax model and test * Merge flax auto tests * Add tokenization * Fix test
1 parent 80af104 commit 6ac7753

16 files changed

+603
-103
lines changed

src/transformers/configuration_utils.py

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,15 @@
2525

2626
from packaging import version
2727

28+
from requests import HTTPError
29+
2830
from . import __version__
2931
from .file_utils import (
3032
CONFIG_NAME,
33+
EntryNotFoundError,
3134
PushToHubMixin,
35+
RepositoryNotFoundError,
36+
RevisionNotFoundError,
3237
cached_path,
3338
copy_func,
3439
get_list_of_files,
@@ -520,8 +525,6 @@ def get_config_dict(
520525
From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a
521526
[`PretrainedConfig`] using `from_dict`.
522527
523-
524-
525528
Parameters:
526529
pretrained_model_name_or_path (`str` or `os.PathLike`):
527530
The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.
@@ -578,30 +581,51 @@ def get_config_dict(
578581
use_auth_token=use_auth_token,
579582
user_agent=user_agent,
580583
)
581-
# Load config dict
582-
config_dict = cls._dict_from_json_file(resolved_config_file)
583584

585+
except RepositoryNotFoundError as err:
586+
logger.error(err)
587+
raise EnvironmentError(
588+
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier listed on "
589+
"'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having "
590+
"permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and pass "
591+
"`use_auth_token=True`."
592+
)
593+
except RevisionNotFoundError as err:
594+
logger.error(err)
595+
raise EnvironmentError(
596+
f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this "
597+
f"model name. Check the model page at 'https://huggingface.co/{pretrained_model_name_or_path}' for "
598+
"available revisions."
599+
)
600+
except EntryNotFoundError as err:
601+
logger.error(err)
602+
raise EnvironmentError(
603+
f"{pretrained_model_name_or_path} does not appear to have a file named {configuration_file}."
604+
)
605+
except HTTPError as err:
606+
logger.error(err)
607+
raise EnvironmentError(
608+
"We couldn't connect to 'https://huggingface.co/' to load this model and it looks like "
609+
f"{pretrained_model_name_or_path} is not the path to a directory conaining a {configuration_file} "
610+
"file.\nCheckout your internet connection or see how to run the library in offline mode at "
611+
"'https://huggingface.co/docs/transformers/installation#offline-mode'."
612+
)
584613
except EnvironmentError as err:
585614
logger.error(err)
586-
msg = (
587-
f"Can't load config for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
588-
f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n"
589-
f" (make sure '{pretrained_model_name_or_path}' is not a path to a local directory with something else, in that case)\n\n"
590-
f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a {CONFIG_NAME} file\n\n"
615+
raise EnvironmentError(
616+
f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from "
617+
"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
618+
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
619+
f"containing a {configuration_file} file"
591620
)
592621

593-
if revision is not None:
594-
msg += f"- or '{revision}' is a valid git identifier (branch name, a tag name, or a commit id) that exists for this model name as listed on its model page on 'https://huggingface.co/models'\n\n"
595-
596-
raise EnvironmentError(msg)
597-
622+
try:
623+
# Load config dict
624+
config_dict = cls._dict_from_json_file(resolved_config_file)
598625
except (json.JSONDecodeError, UnicodeDecodeError):
599-
msg = (
600-
f"Couldn't reach server at '{config_file}' to download configuration file or "
601-
"configuration file is not a valid JSON file. "
602-
f"Please check network or file content here: {resolved_config_file}."
626+
raise EnvironmentError(
627+
f"It looks like the config file at '{resolved_config_file}' is not a valid JSON file."
603628
)
604-
raise EnvironmentError(msg)
605629

606630
if resolved_config_file == config_file:
607631
logger.info(f"loading configuration file {config_file}")
@@ -842,9 +866,13 @@ def get_configuration_file(
842866
`str`: The configuration file to use.
843867
"""
844868
# Inspect all files from the repo/folder.
845-
all_files = get_list_of_files(
846-
path_or_repo, revision=revision, use_auth_token=use_auth_token, local_files_only=local_files_only
847-
)
869+
try:
870+
all_files = get_list_of_files(
871+
path_or_repo, revision=revision, use_auth_token=use_auth_token, local_files_only=local_files_only
872+
)
873+
except Exception:
874+
return FULL_CONFIGURATION_FILE
875+
848876
configuration_files_map = {}
849877
for file_name in all_files:
850878
search = _re_configuration_file.search(file_name)

src/transformers/feature_extraction_utils.py

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,13 @@
2424

2525
import numpy as np
2626

27+
from requests import HTTPError
28+
2729
from .file_utils import (
2830
FEATURE_EXTRACTOR_NAME,
31+
EntryNotFoundError,
32+
RepositoryNotFoundError,
33+
RevisionNotFoundError,
2934
TensorType,
3035
_is_jax,
3136
_is_numpy,
@@ -374,28 +379,54 @@ def get_feature_extractor_dict(
374379
use_auth_token=use_auth_token,
375380
user_agent=user_agent,
376381
)
377-
# Load feature_extractor dict
378-
with open(resolved_feature_extractor_file, "r", encoding="utf-8") as reader:
379-
text = reader.read()
380-
feature_extractor_dict = json.loads(text)
381382

383+
except RepositoryNotFoundError as err:
384+
logger.error(err)
385+
raise EnvironmentError(
386+
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier listed on "
387+
"'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having "
388+
"permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and pass "
389+
"`use_auth_token=True`."
390+
)
391+
except RevisionNotFoundError as err:
392+
logger.error(err)
393+
raise EnvironmentError(
394+
f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this "
395+
f"model name. Check the model page at 'https://huggingface.co/{pretrained_model_name_or_path}' for "
396+
"available revisions."
397+
)
398+
except EntryNotFoundError as err:
399+
logger.error(err)
400+
raise EnvironmentError(
401+
f"{pretrained_model_name_or_path} does not appear to have a file named {FEATURE_EXTRACTOR_NAME}."
402+
)
403+
except HTTPError as err:
404+
logger.error(err)
405+
raise EnvironmentError(
406+
"We couldn't connect to 'https://huggingface.co/' to load this model and it looks like "
407+
f"{pretrained_model_name_or_path} is not the path to a directory conaining a "
408+
f"{FEATURE_EXTRACTOR_NAME} file.\nCheckout your internet connection or see how to run the library in "
409+
"offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'."
410+
)
382411
except EnvironmentError as err:
383412
logger.error(err)
384-
msg = (
385-
f"Can't load feature extractor for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
386-
f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n"
387-
f" (make sure '{pretrained_model_name_or_path}' is not a path to a local directory with something else, in that case)\n\n"
388-
f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a {FEATURE_EXTRACTOR_NAME} file\n\n"
413+
raise EnvironmentError(
414+
f"Can't load feature extractor for '{pretrained_model_name_or_path}'. If you were trying to load it "
415+
"from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
416+
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
417+
f"containing a {FEATURE_EXTRACTOR_NAME} file"
389418
)
390-
raise EnvironmentError(msg)
419+
420+
try:
421+
# Load feature_extractor dict
422+
with open(resolved_feature_extractor_file, "r", encoding="utf-8") as reader:
423+
text = reader.read()
424+
feature_extractor_dict = json.loads(text)
391425

392426
except json.JSONDecodeError:
393-
msg = (
394-
f"Couldn't reach server at '{feature_extractor_file}' to download feature extractor configuration file or "
395-
"feature extractor configuration file is not a valid JSON file. "
396-
f"Please check network or file content here: {resolved_feature_extractor_file}."
427+
raise EnvironmentError(
428+
f"It looks like the config file at '{resolved_feature_extractor_file}' is not a valid JSON file."
397429
)
398-
raise EnvironmentError(msg)
399430

400431
if resolved_feature_extractor_file == feature_extractor_file:
401432
logger.info(f"loading feature extractor configuration file {feature_extractor_file}")

src/transformers/file_utils.py

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1900,6 +1900,37 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
19001900
return ua
19011901

19021902

1903+
class RepositoryNotFoundError(HTTPError):
1904+
"""
1905+
Raised when trying to access a hf.co URL with an invalid repository name, or with a private repo name the user does
1906+
not have access to.
1907+
"""
1908+
1909+
1910+
class EntryNotFoundError(HTTPError):
1911+
"""Raised when trying to access a hf.co URL with a valid repository and revision but an invalid filename."""
1912+
1913+
1914+
class RevisionNotFoundError(HTTPError):
1915+
"""Raised when trying to access a hf.co URL with a valid repository but an invalid revision."""
1916+
1917+
1918+
def _raise_for_status(request):
1919+
"""
1920+
Internal version of `request.raise_for_status()` that will refine a potential HTTPError.
1921+
"""
1922+
if "X-Error-Code" in request.headers:
1923+
error_code = request.headers["X-Error-Code"]
1924+
if error_code == "RepoNotFound":
1925+
raise RepositoryNotFoundError(f"404 Client Error: Repository Not Found for url: {request.url}")
1926+
elif error_code == "EntryNotFound":
1927+
raise EntryNotFoundError(f"404 Client Error: Entry Not Found for url: {request.url}")
1928+
elif error_code == "RevisionNotFound":
1929+
raise RevisionNotFoundError((f"404 Client Error: Revision Not Found for url: {request.url}"))
1930+
1931+
request.raise_for_status()
1932+
1933+
19031934
def http_get(url: str, temp_file: BinaryIO, proxies=None, resume_size=0, headers: Optional[Dict[str, str]] = None):
19041935
"""
19051936
Download remote file. Do not gobble up errors.
@@ -1908,7 +1939,7 @@ def http_get(url: str, temp_file: BinaryIO, proxies=None, resume_size=0, headers
19081939
if resume_size > 0:
19091940
headers["Range"] = f"bytes={resume_size}-"
19101941
r = requests.get(url, stream=True, proxies=proxies, headers=headers)
1911-
r.raise_for_status()
1942+
_raise_for_status(r)
19121943
content_length = r.headers.get("Content-Length")
19131944
total = resume_size + int(content_length) if content_length is not None else None
19141945
# `tqdm` behavior is determined by `utils.logging.is_progress_bar_enabled()`
@@ -1970,7 +2001,7 @@ def get_from_cache(
19702001
if not local_files_only:
19712002
try:
19722003
r = requests.head(url, headers=headers, allow_redirects=False, proxies=proxies, timeout=etag_timeout)
1973-
r.raise_for_status()
2004+
_raise_for_status(r)
19742005
etag = r.headers.get("X-Linked-Etag") or r.headers.get("ETag")
19752006
# We favor a custom header indicating the etag of the linked resource, and
19762007
# we fallback to the regular etag header.
@@ -2081,6 +2112,56 @@ def _resumable_file_manager() -> "io.BufferedWriter":
20812112
return cache_path
20822113

20832114

2115+
def has_file(
2116+
path_or_repo: Union[str, os.PathLike],
2117+
filename: str,
2118+
revision: Optional[str] = None,
2119+
mirror: Optional[str] = None,
2120+
proxies: Optional[Dict[str, str]] = None,
2121+
use_auth_token: Optional[Union[bool, str]] = None,
2122+
):
2123+
"""
2124+
Checks if a repo contains a given file wihtout downloading it. Works for remote repos and local folders.
2125+
2126+
<Tip warning={false}>
2127+
2128+
This function will raise an error if the repository `path_or_repo` is not valid or if `revision` does not exist for
2129+
this repo, but will return False for regular connection errors.
2130+
2131+
</Tip>
2132+
"""
2133+
if os.path.isdir(path_or_repo):
2134+
return os.path.isfile(os.path.join(path_or_repo, filename))
2135+
2136+
url = hf_bucket_url(path_or_repo, filename=filename, revision=revision, mirror=mirror)
2137+
2138+
headers = {"user-agent": http_user_agent()}
2139+
if isinstance(use_auth_token, str):
2140+
headers["authorization"] = f"Bearer {use_auth_token}"
2141+
elif use_auth_token:
2142+
token = HfFolder.get_token()
2143+
if token is None:
2144+
raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.")
2145+
headers["authorization"] = f"Bearer {token}"
2146+
2147+
r = requests.head(url, headers=headers, allow_redirects=False, proxies=proxies, timeout=10)
2148+
try:
2149+
_raise_for_status(r)
2150+
return True
2151+
except RepositoryNotFoundError as e:
2152+
logger.error(e)
2153+
raise EnvironmentError(f"{path_or_repo} is not a local folder or a valid repository name on 'https://hf.co'.")
2154+
except RevisionNotFoundError as e:
2155+
logger.error(e)
2156+
raise EnvironmentError(
2157+
f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this "
2158+
"model name. Check the model page at 'https://huggingface.co/{path_or_repo}' for available revisions."
2159+
)
2160+
except requests.HTTPError:
2161+
# We return false for EntryNotFoundError (logical) as well as any connection error.
2162+
return False
2163+
2164+
20842165
def get_list_of_files(
20852166
path_or_repo: Union[str, os.PathLike],
20862167
revision: Optional[str] = None,

0 commit comments

Comments
 (0)