Skip to content

Commit 95f8956

Browse files
committed
Release RESTful keyphrase_extraction APIs
1 parent 9b3a786 commit 95f8956

File tree

8 files changed

+70
-4
lines changed

8 files changed

+70
-4
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ the [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) licens
6969
<dependency>
7070
<groupId>com.hankcs.hanlp.restful</groupId>
7171
<artifactId>hanlp-restful</artifactId>
72-
<version>0.0.8</version>
72+
<version>0.0.9</version>
7373
</dependency>
7474
```
7575

docs/api/restful_java.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Add the following dependency into the `pom.xml` file of your project.
66
<dependency>
77
<groupId>com.hankcs.hanlp.restful</groupId>
88
<artifactId>hanlp-restful</artifactId>
9-
<version>0.0.8</version>
9+
<version>0.0.9</version>
1010
</dependency>
1111
```
1212

plugins/hanlp_restful/hanlp_restful/__init__.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,3 +332,26 @@ def abstract_meaning_representation(self,
332332
'language': language or self._language,
333333
'visualization': visualization,
334334
})
335+
336+
def keyphrase_extraction(
337+
self,
338+
text: str,
339+
topk: int = 10,
340+
language: str = None,
341+
) -> Dict[str, float]:
342+
""" Keyphrase extraction aims to identify keywords or phrases reflecting the main topics of a document.
343+
344+
Args:
345+
text: The text content of the document. Preferably the concatenation of the title and the content.
346+
topk: The number of top-K ranked keywords or keyphrases.
347+
language: The language of input text or tokens. ``None`` to use the default language on server.
348+
349+
Returns:
350+
A dictionary containing each keyword or keyphrase and its ranking score :math:`s`, :math:`s \in [0, 1]`.
351+
"""
352+
assert text, 'Text has to be specified.'
353+
return self._send_post_json(self._url + '/keyphrase_extraction', {
354+
'text': text,
355+
'language': language or self._language,
356+
'topk': topk,
357+
})

plugins/hanlp_restful/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
setup(
1212
name='hanlp_restful',
13-
version='0.0.12',
13+
version='0.0.15',
1414
description='HanLP: Han Language Processing',
1515
long_description=long_description,
1616
long_description_content_type="text/markdown",

plugins/hanlp_restful/tests/test_client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ def test_abstract_meaning_representation(self):
5454
print(self.HanLP.abstract_meaning_representation(tokens=[['男孩', '希望', '女孩', '相信', '他', '。']]))
5555
print(self.HanLP.abstract_meaning_representation('The boy wants the girl to believe him.', language='en'))
5656

57+
def test_keyphrase_extraction(self):
58+
print(self.HanLP.keyphrase_extraction(
59+
'自然语言处理是一门博大精深的学科,掌握理论才能发挥出HanLP的全部性能。 '
60+
'《自然语言处理入门》是一本配套HanLP的NLP入门书,助你零起点上手自然语言处理。', topk=3))
61+
5762

5863
if __name__ == '__main__':
5964
unittest.main()

plugins/hanlp_restful_java/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>com.hankcs.hanlp.restful</groupId>
88
<artifactId>hanlp-restful</artifactId>
9-
<version>0.0.8</version>
9+
<version>0.0.9</version>
1010

1111
<name>HanLP RESTful Client in Java</name>
1212
<url>https://github.com/hankcs/HanLP</url>

plugins/hanlp_restful_java/src/main/java/com/hankcs/hanlp/restful/HanLPClient.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,36 @@ public MeaningRepresentation[] abstractMeaningRepresentation(String[][] tokens)
358358
return mapper.readValue(post("/abstract_meaning_representation", input), MeaningRepresentation[].class);
359359
}
360360

361+
/**
362+
* Keyphrase extraction aims to identify keywords or phrases reflecting the main topics of a document.
363+
*
364+
* @param text The text content of the document. Preferably the concatenation of the title and the content.
365+
* @param topk The number of top-K ranked keywords or keyphrases.
366+
* @return A dictionary containing each keyphrase and its ranking score s between 0 and 1.
367+
* @throws IOException HTTP errors.
368+
*/
369+
public Map<String, Double> keyphraseExtraction(String text, int topk) throws IOException
370+
{
371+
Map<String, Object> input = new HashMap<>();
372+
input.put("text", text);
373+
input.put("topk", topk);
374+
input.put("language", language);
375+
//noinspection unchecked
376+
return mapper.readValue(post("/keyphrase_extraction", input), LinkedHashMap.class);
377+
}
378+
379+
/**
380+
* Keyphrase extraction aims to identify keywords or phrases reflecting the main topics of a document.
381+
*
382+
* @param text The text content of the document. Preferably the concatenation of the title and the content.
383+
* @return A dictionary containing 10 keyphrases and their ranking scores s between 0 and 1.
384+
* @throws IOException HTTP errors.
385+
*/
386+
public Map<String, Double> keyphraseExtraction(String text) throws IOException
387+
{
388+
return keyphraseExtraction(text, 10);
389+
}
390+
361391
private String post(String api, Object input_) throws IOException
362392
{
363393
URL url = new URL(this.url + api);

plugins/hanlp_restful_java/src/test/java/com/hankcs/hanlp/restful/HanLPClientTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,14 @@ void coreferenceResolutionTokensWithSpeakers() throws IOException
108108
prettyPrint(clusters);
109109
}
110110

111+
@Test
112+
void keyphraseExtraction() throws IOException
113+
{
114+
prettyPrint(client.keyphraseExtraction(
115+
"自然语言处理是一门博大精深的学科,掌握理论才能发挥出HanLP的全部性能。" +
116+
"《自然语言处理入门》是一本配套HanLP的NLP入门书,助你零起点上手自然语言处理。", 3));
117+
}
118+
111119
@Test
112120
void abstractMeaningRepresentationText() throws IOException
113121
{

0 commit comments

Comments
 (0)