Skip to content

Commit 1034c6a

Browse files
committed
Support traditional Chinese tok, pos, ner, dep, con, srl
1 parent 3df5475 commit 1034c6a

File tree

6 files changed

+16
-11
lines changed

6 files changed

+16
-11
lines changed

hanlp/pretrained/dep.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
PMT1_DEP_ELECTRA_SMALL = HANLP_URL + 'dep/pmt_dep_electra_small_20220218_134518.zip'
1414
'Electra small encoder (:cite:`clark2020electra`) with Biaffine decoder (:cite:`dozat:17a`) trained on PKU ' \
1515
'Multi-view Chinese Treebank (PMT) 1.0 (:cite:`qiu-etal-2014-multi`). Performance is UAS=87.68% LAS=83.54%.'
16+
CTB9_UDC_ELECTRA_SMALL = HANLP_URL + 'dep/udc_dep_electra_small_20220218_095452.zip'
17+
'Electra small encoder (:cite:`clark2020electra`) with Biaffine decoder (:cite:`dozat:17a`) trained on CTB9-UD420. ' \
18+
'Performance is UAS=85.92% LAS=81.13% .'
1619

1720
PTB_BIAFFINE_DEP_EN = HANLP_URL + 'dep/ptb_dep_biaffine_20200101_174624.zip'
1821
'Biaffine LSTM model (:cite:`dozat:17a`) trained on PTB.'

hanlp/pretrained/pos.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
CTB9_POS_RADICAL_ELECTRA_SMALL = HANLP_URL + 'pos/pos_ctb_radical_electra_small_20220215_111932.zip'
1717
'Electra small model (:cite:`clark2020electra`) with radical embeddings (:cite:`he2018dual`) trained on CTB9 (:cite:`https://doi.org/10.35111/gvd0-xk91`). Accuracy = `96.14`.'
1818

19-
C863_POS_ELECTRA_SMALL = HANLP_URL + 'pos/pos_863_electra_small_20210808_124848.zip'
20-
'Electra small model (:cite:`clark2020electra`) trained on Chinese 863 corpus. Accuracy = `95.22`.'
19+
C863_POS_ELECTRA_SMALL = HANLP_URL + 'pos/pos_863_electra_small_20220217_101958.zip'
20+
'Electra small model (:cite:`clark2020electra`) trained on Chinese 863 corpus. Accuracy = `95.19`.'
21+
PKU_POS_ELECTRA_SMALL = HANLP_URL + 'pos/pos_pku_electra_small_20220217_142436.zip'
22+
'Electra small model (:cite:`clark2020electra`) trained on Chinese PKU corpus. Accuracy = `97.55`.'
2123

2224
PKU98_POS_ELECTRA_SMALL = HANLP_URL + 'pos/pos_pku_electra_small_20210808_125158.zip'
2325
'Electra small model (:cite:`clark2020electra`) trained on CTB9 (:cite:`https://doi.org/10.35111/gvd0-xk91`). Accuracy = `97.60`.'

hanlp/pretrained/sdp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
SEMEVAL16_TEXT_BIAFFINE_ZH = HANLP_URL + 'sdp/semeval16-text-biaffine_20200101_002257.zip'
99
'Biaffine SDP (:cite:`he-choi-2019`) trained on SemEval16 text data.'
1010

11-
SEMEVAL16_ALL_ELECTRA_SMALL_ZH = HANLP_URL + 'sdp/semeval16_sdp_electra_small_20220208_122026.zip'
12-
'Biaffine SDP (:cite:`he-choi-2019`) trained on SemEval16 text and news data.'
11+
SEMEVAL16_ALL_ELECTRA_SMALL_ZH = HANLP_URL + 'sdp/semeval16_sdp_electra_small_20220218_140116.zip'
12+
'Biaffine SDP (:cite:`he-choi-2019`) trained on SemEval16 text and news data. UF=80.82% LF=70.02%.'
1313

1414
SEMEVAL15_PAS_BIAFFINE_EN = HANLP_URL + 'sdp/semeval15_biaffine_pas_20200103_152405.zip'
1515
'Biaffine SDP (:cite:`he-choi-2019`) trained on SemEval15 PAS data.'

hanlp/pretrained/srl.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Date: 2021-08-07 19:07
44
from hanlp_common.constant import HANLP_URL
55

6-
CPB3_SRL_ELECTRA_SMALL = HANLP_URL + 'srl/cpb3_electra_small_crf_20210807_190553.zip'
7-
'Electra small model (:cite:`clark2020electra`) trained on CPB3. F1 = `76.53`.'
6+
CPB3_SRL_ELECTRA_SMALL = HANLP_URL + 'srl/cpb3_electra_small_crf_has_transform_20220218_135910.zip'
7+
'Electra small model (:cite:`clark2020electra`) trained on CPB3. P=75.87% R=76.24% F1=76.05%.'
88

99
ALL = {}

hanlp/pretrained/tok.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
'ALBERT model (:cite:`Lan2020ALBERT:`) trained on the largest CWS dataset in the world.'
1616
SIGHAN2005_PKU_BERT_BASE_ZH = HANLP_URL + 'tok/sighan2005_pku_bert_base_zh_20201231_141130.zip'
1717
'BERT model (:cite:`devlin-etal-2019-bert`) trained on sighan2005 pku dataset.'
18-
COARSE_ELECTRA_SMALL_ZH = HANLP_URL + 'tok/coarse_electra_small_zh_20210603_112321.zip'
19-
'Electra (:cite:`clark2020electra`) small model trained on coarse-grained CWS corpora. Its performance is P=97.08% R=96.94% F1=97.01% which is ' \
18+
COARSE_ELECTRA_SMALL_ZH = HANLP_URL + 'tok/coarse_electra_small_20220218_100557.zip'
19+
'Electra (:cite:`clark2020electra`) small model trained on coarse-grained CWS corpora. Its performance is P=96.97% R=96.87% F1=96.92% which is ' \
2020
'much higher than that of MTL model '
21-
FINE_ELECTRA_SMALL_ZH = HANLP_URL + 'tok/fine_tok_electra_small_20220205_185926.zip'
22-
'Electra (:cite:`clark2020electra`) small model trained on fine-grained CWS corpora. Its performance is P=97.72% R=97.67% F1=97.69% which is ' \
21+
FINE_ELECTRA_SMALL_ZH = HANLP_URL + 'tok/fine_electra_small_20220217_190117.zip'
22+
'Electra (:cite:`clark2020electra`) small model trained on fine-grained CWS corpora. Its performance is P=97.44% R=97.40% F1=97.42% which is ' \
2323
'much higher than that of MTL model '
2424
CTB9_TOK_ELECTRA_SMALL = HANLP_URL + 'tok/ctb9_electra_small_20220215_205427.zip'
2525
'Electra (:cite:`clark2020electra`) small model trained on CTB9. Its performance is P=97.15% R=97.36% F1=97.26% which is ' \

hanlp/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Author: hankcs
33
# Date: 2019-12-28 19:26
44

5-
__version__ = '2.1.0-beta.15'
5+
__version__ = '2.1.0-beta.16'
66
"""HanLP version"""
77

88

0 commit comments

Comments
 (0)