1
1
from typing import Dict , Iterable , Callable
2
2
import pytest
3
- from thinc .api import Config
3
+ from thinc .api import Config , fix_random_seed
4
4
from spacy import Language
5
5
from spacy .util import load_model_from_config , registry , resolve_dot_names
6
6
from spacy .schemas import ConfigSchemaTraining
@@ -64,8 +64,8 @@ def reader(nlp: Language):
64
64
@pytest .mark .parametrize (
65
65
"reader,additional_config" ,
66
66
[
67
- ("ml_datasets.imdb_sentiment.v1" , {"train_limit" : 10 , "dev_limit" : 2 }),
68
- ("ml_datasets.dbpedia.v1" , {"train_limit" : 10 , "dev_limit" : 2 }),
67
+ ("ml_datasets.imdb_sentiment.v1" , {"train_limit" : 10 , "dev_limit" : 10 }),
68
+ ("ml_datasets.dbpedia.v1" , {"train_limit" : 10 , "dev_limit" : 10 }),
69
69
("ml_datasets.cmu_movies.v1" , {"limit" : 10 , "freq_cutoff" : 200 , "split" : 0.8 }),
70
70
],
71
71
)
@@ -82,17 +82,18 @@ def test_cat_readers(reader, additional_config):
82
82
83
83
[nlp]
84
84
lang = "en"
85
- pipeline = ["tok2vec", "textcat "]
85
+ pipeline = ["tok2vec", "textcat_multilabel "]
86
86
87
87
[components]
88
88
89
89
[components.tok2vec]
90
90
factory = "tok2vec"
91
91
92
- [components.textcat ]
93
- factory = "textcat "
92
+ [components.textcat_multilabel ]
93
+ factory = "textcat_multilabel "
94
94
"""
95
95
config = Config ().from_str (nlp_config_string )
96
+ fix_random_seed (config ["training" ]["seed" ])
96
97
config ["corpora" ]["@readers" ] = reader
97
98
config ["corpora" ].update (additional_config )
98
99
nlp = load_model_from_config (config , auto_fill = True )
0 commit comments