Skip to content

Commit dd561bd

Browse files
committed
开放 CRFNERecognizer.tagSet,补充CRF自动机名称识别案例 https://bbs.hankcs.com/t/crf/1155
1 parent 9577651 commit dd561bd

File tree

3 files changed

+57
-1
lines changed

3 files changed

+57
-1
lines changed

src/main/java/com/hankcs/hanlp/model/crf/CRFNERecognizer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
*/
3434
public class CRFNERecognizer extends CRFTagger implements NERecognizer
3535
{
36-
private NERTagSet tagSet;
36+
public NERTagSet tagSet;
3737
/**
3838
* 复用感知机的解码模块
3939
*/

src/test/java/com/hankcs/book/ch08/DemoCRFNER.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public static NERecognizer train(String corpus, String model) throws IOException
4343
return new CRFNERecognizer(model + ".txt");
4444
CRFNERecognizer recognizer = new CRFNERecognizer(null); // 空白
4545
recognizer.train(corpus, model);
46+
recognizer = new CRFNERecognizer(model + ".txt");
4647
return recognizer;
4748
}
4849
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
* <author>Han He</author>
3+
* <email>me@hankcs.com</email>
4+
* <create-date>2018-07-29 4:18 PM</create-date>
5+
*
6+
* <copyright file="DemoCRFNER.java">
7+
* Copyright (c) 2018, Han He. All Rights Reserved, http://www.hankcs.com/
8+
* This source is subject to Han He. Please contact Han He for more information.
9+
* </copyright>
10+
*/
11+
package com.hankcs.book.ch08;
12+
13+
import com.hankcs.hanlp.corpus.io.IOUtil;
14+
import com.hankcs.hanlp.model.crf.CRFNERecognizer;
15+
import com.hankcs.hanlp.tokenizer.lexical.NERecognizer;
16+
17+
import java.io.IOException;
18+
19+
import static com.hankcs.book.ch08.DemoPlane.PLANE_CORPUS;
20+
import static com.hankcs.book.ch08.DemoPlane.PLANE_MODEL;
21+
22+
23+
/**
24+
* 《自然语言处理入门》8.6.2 训练领域模型 (书本之外的补充试验)
25+
* 配套书籍:http://nlp.hankcs.com/book.php
26+
* 讨论答疑:https://bbs.hankcs.com/
27+
*
28+
* @author hankcs
29+
* @see <a href="http://nlp.hankcs.com/book.php">《自然语言处理入门》</a>
30+
* @see <a href="https://bbs.hankcs.com/">讨论答疑</a>
31+
*/
32+
public class DemoCRFNERPlane
33+
{
34+
public static void main(String[] args) throws IOException
35+
{
36+
NERecognizer recognizer = train(PLANE_CORPUS, PLANE_MODEL);
37+
String[] wordArray = {"歼", "-", "7", "战斗机", "正是", "仿照", "米格", "-", "21", "而", "制", "。"}; // 构造单词序列
38+
String[] posArray = {"v", "w", "w", "n", "d", "v", "nr", "w", "m", "c", "v", "w"}; // 构造词性序列
39+
String[] nerTagArray = recognizer.recognize(wordArray, posArray); // 序列标注
40+
for (int i = 0; i < wordArray.length; i++)
41+
System.out.printf("%-4s\t%s\t%s\t\n", wordArray[i], posArray[i], nerTagArray[i]);
42+
}
43+
44+
public static NERecognizer train(String corpus, String model) throws IOException
45+
{
46+
if (IOUtil.isFileExisted(model + ".txt")) // 若存在CRF++训练结果,则直接加载
47+
return new CRFNERecognizer(model + ".txt");
48+
CRFNERecognizer recognizer = new CRFNERecognizer(null); // 空白
49+
recognizer.tagSet.nerLabels.clear(); // 不识别nr、ns、nt
50+
recognizer.tagSet.nerLabels.add("np"); // 目标是识别np
51+
recognizer.train(corpus, model);
52+
recognizer = new CRFNERecognizer(model + ".txt");
53+
return recognizer;
54+
}
55+
}

0 commit comments

Comments
 (0)