Skip to content

Commit 694c318

Browse files
committed
Address random results in slow readers tests (explosion#9544)
* Set random seed for dataset shuffling * Use more dev examples for non-zero scores
1 parent 308b170 commit 694c318

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

spacy/tests/training/test_readers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Dict, Iterable, Callable
22
import pytest
3-
from thinc.api import Config
3+
from thinc.api import Config, fix_random_seed
44
from spacy import Language
55
from spacy.util import load_model_from_config, registry, resolve_dot_names
66
from spacy.schemas import ConfigSchemaTraining
@@ -64,8 +64,8 @@ def reader(nlp: Language):
6464
@pytest.mark.parametrize(
6565
"reader,additional_config",
6666
[
67-
("ml_datasets.imdb_sentiment.v1", {"train_limit": 10, "dev_limit": 2}),
68-
("ml_datasets.dbpedia.v1", {"train_limit": 10, "dev_limit": 2}),
67+
("ml_datasets.imdb_sentiment.v1", {"train_limit": 10, "dev_limit": 10}),
68+
("ml_datasets.dbpedia.v1", {"train_limit": 10, "dev_limit": 10}),
6969
("ml_datasets.cmu_movies.v1", {"limit": 10, "freq_cutoff": 200, "split": 0.8}),
7070
],
7171
)
@@ -93,6 +93,7 @@ def test_cat_readers(reader, additional_config):
9393
factory = "textcat"
9494
"""
9595
config = Config().from_str(nlp_config_string)
96+
fix_random_seed(config["training"]["seed"])
9697
config["corpora"]["@readers"] = reader
9798
config["corpora"].update(additional_config)
9899
nlp = load_model_from_config(config, auto_fill=True)

0 commit comments

Comments
 (0)