Skip to content

Commit 56dcb39

Browse files
authored
Fix references to config file in the docs & UX (explosion#9961)
* doc fixes around config file * fix typo * clarify default
1 parent 029a48e commit 56dcb39

File tree

6 files changed

+16
-16
lines changed

6 files changed

+16
-16
lines changed

spacy/cli/debug_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def debug_config_cli(
2525
show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.")
2626
# fmt: on
2727
):
28-
"""Debug a config.cfg file and show validation errors. The command will
28+
"""Debug a config file and show validation errors. The command will
2929
create all objects in the tree and validate them. Note that some config
3030
validation errors are blocking and will prevent the rest of the config from
3131
being resolved. This means that you may not see all validation errors at

spacy/cli/init_config.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class Optimizations(str, Enum):
2727
@init_cli.command("config")
2828
def init_config_cli(
2929
# fmt: off
30-
output_file: Path = Arg(..., help="File to save config.cfg to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
30+
output_file: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
3131
lang: str = Opt("en", "--lang", "-l", help="Two-letter code of the language to use"),
3232
pipeline: str = Opt("tagger,parser,ner", "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"),
3333
optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
@@ -37,7 +37,7 @@ def init_config_cli(
3737
# fmt: on
3838
):
3939
"""
40-
Generate a starter config.cfg for training. Based on your requirements
40+
Generate a starter config file for training. Based on your requirements
4141
specified via the CLI arguments, this command generates a config with the
4242
optimal settings for your use case. This includes the choice of architecture,
4343
pretrained weights and related hyperparameters.
@@ -66,15 +66,15 @@ def init_config_cli(
6666
@init_cli.command("fill-config")
6767
def init_fill_config_cli(
6868
# fmt: off
69-
base_path: Path = Arg(..., help="Base config to fill", exists=True, dir_okay=False),
70-
output_file: Path = Arg("-", help="File to save config.cfg to (or - for stdout)", allow_dash=True),
69+
base_path: Path = Arg(..., help="Path to base config to fill", exists=True, dir_okay=False),
70+
output_file: Path = Arg("-", help="Path to output .cfg file (or - for stdout)", allow_dash=True),
7171
pretraining: bool = Opt(False, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
7272
diff: bool = Opt(False, "--diff", "-D", help="Print a visual diff highlighting the changes"),
7373
code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
7474
# fmt: on
7575
):
7676
"""
77-
Fill partial config.cfg with default values. Will add all missing settings
77+
Fill partial config file with default values. Will add all missing settings
7878
from the default config and will create all objects, check the registered
7979
functions for their default values and update the base config. This command
8080
can be used with a config generated via the training quickstart widget:

spacy/errors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ class Errors(metaclass=ErrorsWithCodes):
642642
E912 = ("Failed to initialize lemmatizer. Missing lemmatizer table(s) found "
643643
"for mode '{mode}'. Required tables: {tables}. Found: {found}.")
644644
E913 = ("Corpus path can't be None. Maybe you forgot to define it in your "
645-
"config.cfg or override it on the CLI?")
645+
".cfg file or override it on the CLI?")
646646
E914 = ("Executing {name} callback failed. Expected the function to "
647647
"return the nlp object but got: {value}. Maybe you forgot to return "
648648
"the modified object in your function?")

spacy/util.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
DEFAULT_OOV_PROB = -20
6464
LEXEME_NORM_LANGS = ["cs", "da", "de", "el", "en", "id", "lb", "mk", "pt", "ru", "sr", "ta", "th"]
6565

66-
# Default order of sections in the config.cfg. Not all sections needs to exist,
66+
# Default order of sections in the config file. Not all sections needs to exist,
6767
# and additional sections are added at the end, in alphabetical order.
6868
CONFIG_SECTION_ORDER = ["paths", "variables", "system", "nlp", "components", "corpora", "training", "pretraining", "initialize"]
6969
# fmt: on
@@ -465,7 +465,7 @@ def load_model_from_path(
465465
"""Load a model from a data directory path. Creates Language class with
466466
pipeline from config.cfg and then calls from_disk() with path.
467467
468-
model_path (Path): Mmodel path.
468+
model_path (Path): Model path.
469469
meta (Dict[str, Any]): Optional model meta.
470470
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
471471
a new Vocab object will be created.
@@ -642,8 +642,8 @@ def load_config(
642642
sys.stdin.read(), overrides=overrides, interpolate=interpolate
643643
)
644644
else:
645-
if not config_path or not config_path.exists() or not config_path.is_file():
646-
raise IOError(Errors.E053.format(path=config_path, name="config.cfg"))
645+
if not config_path or not config_path.is_file():
646+
raise IOError(Errors.E053.format(path=config_path, name="config file"))
647647
return config.from_disk(
648648
config_path, overrides=overrides, interpolate=interpolate
649649
)

website/docs/api/cli.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ $ python -m spacy init config [output_file] [--lang] [--pipeline] [--optimize] [
148148

149149
### init fill-config {#init-fill-config new="3"}
150150

151-
Auto-fill a partial [`config.cfg` file](/usage/training#config) file with **all
152-
default values**, e.g. a config generated with the
151+
Auto-fill a partial [.cfg file](/usage/training#config) with **all default
152+
values**, e.g. a config generated with the
153153
[quickstart widget](/usage/training#quickstart). Config files used for training
154154
should always be complete and not contain any hidden defaults or missing values,
155155
so this command helps you create your final training config. In order to find
@@ -175,7 +175,7 @@ $ python -m spacy init fill-config [base_path] [output_file] [--diff]
175175
| Name | Description |
176176
| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
177177
| `base_path` | Path to base config to fill, e.g. generated by the [quickstart widget](/usage/training#quickstart). ~~Path (positional)~~ |
178-
| `output_file` | Path to output `.cfg` file. If not set, the config is written to stdout so you can pipe it forward to a file. ~~Path (positional)~~ |
178+
| `output_file` | Path to output `.cfg` file or "-" to write to stdout so you can pipe it to a file. Defaults to "-" (stdout). ~~Path (positional)~~ |
179179
| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
180180
| `--pretraining`, `-pt` | Include config for pretraining (with [`spacy pretrain`](/api/cli#pretrain)). Defaults to `False`. ~~bool (flag)~~ |
181181
| `--diff`, `-D` | Print a visual diff highlighting the changes. ~~bool (flag)~~ |
@@ -208,7 +208,7 @@ $ python -m spacy init vectors [lang] [vectors_loc] [output_dir] [--prune] [--tr
208208
| `output_dir` | Pipeline output directory. Will be created if it doesn't exist. ~~Path (positional)~~ |
209209
| `--truncate`, `-t` | Number of vectors to truncate to when reading in vectors file. Defaults to `0` for no truncation. ~~int (option)~~ |
210210
| `--prune`, `-p` | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning. ~~int (option)~~ |
211-
| `--mode`, `-m` | Vectors mode: `default` or [`floret`](https://github.com/explosion/floret). Defaults to `default`. ~~Optional[str] \(option)~~ |
211+
| `--mode`, `-m` | Vectors mode: `default` or [`floret`](https://github.com/explosion/floret). Defaults to `default`. ~~Optional[str] \(option)~~ |
212212
| `--name`, `-n` | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. ~~Optional[str] \(option)~~ |
213213
| `--verbose`, `-V` | Print additional information and explanations. ~~bool (flag)~~ |
214214
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |

website/docs/api/data-formats.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ As of spaCy v3.0, the `meta.json` **isn't** used to construct the language class
535535
and pipeline anymore and only contains meta information for reference and for
536536
creating a Python package with [`spacy package`](/api/cli#package). How to set
537537
up the `nlp` object is now defined in the
538-
[`config.cfg`](/api/data-formats#config), which includes detailed information
538+
[config file](/api/data-formats#config), which includes detailed information
539539
about the pipeline components and their model architectures, and all other
540540
settings and hyperparameters used to train the pipeline. It's the **single
541541
source of truth** used for loading a pipeline.

0 commit comments

Comments
 (0)