Skip to content

Commit b62db0d

Browse files
committed
进一步改进原子切分 fix hankcs#1421 (comment)
1 parent e8a920c commit b62db0d

File tree

3 files changed

+3
-2
lines changed

3 files changed

+3
-2
lines changed

src/main/java/com/hankcs/hanlp/seg/Segment.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ protected static List<AtomNode> quickAtomSegment(char[] charArray, int start, in
161161
int offsetAtom = start;
162162
int preType = CharType.get(charArray[offsetAtom]);
163163
int curType;
164-
while (++offsetAtom < end || (atomNodeList.isEmpty() && offsetAtom < charArray.length))
164+
while (++offsetAtom < end)
165165
{
166166
curType = CharType.get(charArray[offsetAtom]);
167167
if (curType != preType)

src/main/java/com/hankcs/hanlp/seg/WordBasedSegment.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ public void hit(int begin, int end, CoreDictionary.Attribute value)
414414
int j = i + 1;
415415
for (; j < vertexes.length - 1; ++j)
416416
{
417-
if (!vertexes[j].isEmpty()) break;
417+
if (!vertexes[j].isEmpty() && CharType.get(charArray[j - 1]) != CharType.CT_CNUM) break;
418418
}
419419
wordNetStorage.add(i, quickAtomSegment(charArray, i - 1, j - 1));
420420
i = j;

src/test/java/com/hankcs/hanlp/seg/SegmentTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,7 @@ public void testIssue1172()
539539

540540
public void testIssue1421()
541541
{
542+
System.out.println(HanLP.segment("android十一中国版本"));
542543
System.out.println(HanLP.segment("十一中国放假吗"));
543544
System.out.println(HanLP.segment("十一发展计划"));
544545
System.out.println(HanLP.segment("十一通过山海关吗"));

0 commit comments

Comments
 (0)