@@ -126,11 +126,11 @@ def __getitem__(self, key: Union[int, Iterable[int], torch.Tensor]) -> torch.Ten
126
126
def __call__ (self , key : Union [int , Iterable [int ], torch .Tensor ]) -> torch .Tensor :
127
127
return self [key ]
128
128
129
- @property
129
+ @lazy_property
130
130
def dim (self ):
131
131
return len (self [0 ])
132
132
133
- @property
133
+ @lazy_property
134
134
def unk_index (self ):
135
135
if self .unk is not None :
136
136
return self .vocab [self .unk ]
@@ -181,8 +181,10 @@ class GloVeEmbedding(Embedding):
181
181
and the resulting representations showcase interesting linear substructures of the word vector space.
182
182
183
183
Args:
184
- lang (str):
185
- Language code. Default: ``en``.
184
+ src (str):
185
+ Size of the source data for training. Default: ``6B``.
186
+ dim (int):
187
+ Which dimension of the embeddings to use. Default: 100.
186
188
reload (bool):
187
189
If ``True``, forces a fresh download. Default: ``False``.
188
190
@@ -276,10 +278,17 @@ class TencentEmbedding(Embedding):
276
278
dim (int):
277
279
Which dimension of the embeddings to use. Currently 100 and 200 are available. Default: 100.
278
280
large (bool):
279
- If ``True``, uses large version with larger vocab size (12,287,936 ); 2,000,000 otherwise. Default: ``False``.
281
+ If ``True``, uses large version with larger vocab size (12,287,933 ); 2,000,000 otherwise. Default: ``False``.
280
282
reload (bool):
281
283
If ``True``, forces a fresh download. Default: ``False``.
282
284
285
+ Examples:
286
+ >>> from supar.utils.embed import Embedding
287
+ >>> Embedding.load('tencent-100')
288
+ TencentEmbedding(n_tokens=2000000, dim=100, skip_first=True, cache=True)
289
+ >>> Embedding.load('tencent-100-large')
290
+ TencentEmbedding(n_tokens=12287933, dim=100, skip_first=True, cache=True)
291
+
283
292
.. _Tencent:
284
293
https://ai.tencent.com/ailab/nlp/zh/download.html
285
294
"""
@@ -319,7 +328,7 @@ def __init__(self, dim: int = 100, large: bool = False, reload=False, *args, **k
319
328
'fasttext-ru' : {'_target_' : FasttextEmbedding , 'lang' : 'ru' },
320
329
'giga-100' : {'_target_' : GigaEmbedding },
321
330
'tencent-100' : {'_target_' : TencentEmbedding , 'dim' : 100 },
322
- 'tencent-100-b ' : {'_target_' : TencentEmbedding , 'dim' : 100 , 'large' : True },
331
+ 'tencent-100-large ' : {'_target_' : TencentEmbedding , 'dim' : 100 , 'large' : True },
323
332
'tencent-200' : {'_target_' : TencentEmbedding , 'dim' : 200 },
324
- 'tencent-200-b ' : {'_target_' : TencentEmbedding , 'dim' : 200 , 'large' : True },
333
+ 'tencent-200-large ' : {'_target_' : TencentEmbedding , 'dim' : 200 , 'large' : True },
325
334
}
0 commit comments