Skip to content

Commit 5792499

Browse files
author
cclauss
authored
print() function and define xrange() for Python 3
1 parent c9f7222 commit 5792499

File tree

1 file changed

+18
-13
lines changed

1 file changed

+18
-13
lines changed

exts/smallseg.py

+18-13
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
11
# -*- coding: utf-8 -*-
2+
from __future__ import print_function
23
import re
34
import os
45
import sys
6+
7+
try:
8+
xrange # Python 2
9+
except NameError:
10+
xrange = range # Python 3
11+
512
class SEG(object):
613
def __init__(self):
714
_localDir=os.path.dirname(__file__)
815
_curpath=os.path.normpath(os.path.join(os.getcwd(),_localDir))
916
curpath=_curpath
1017
self.d = {}
11-
print >> sys.stderr,"loading dict..."
18+
print("loading dict...", file=sys.stderr)
1219
self.set([x.rstrip() for x in file(os.path.join(curpath,"main.dic")) ])
1320
self.specialwords= set([x.rstrip().decode('utf-8') for x in file(os.path.join(curpath,"suffix.dic"))])
14-
print >> sys.stderr,'dict ok.'
21+
print('dict ok.', file=sys.stderr)
1522
#set dictionary(a list)
1623
def set(self,keywords):
1724
p = self.d
@@ -33,8 +40,6 @@ def set(self,keywords):
3340
q = p
3441
k = char
3542
p = p[char]
36-
37-
pass
3843

3944
def _binary_seg(self,s):
4045
ln = len(s)
@@ -47,7 +52,7 @@ def _binary_seg(self,s):
4752
return R
4853

4954
def _pro_unreg(self,piece):
50-
#print piece
55+
#print(piece)
5156
R = []
5257
tmp = re.sub(u"。|,|,|!|…|!|《|》|<|>|\"|'|:|:|?|\?|、|\||“|”|‘|’|;|—|(|)|·|\(|\)| "," ",piece).split()
5358
ln1 = len(tmp)
@@ -77,7 +82,7 @@ def cut(self,text):
7782
mem2 = None
7883
while i-j>0:
7984
t = text[i-j-1].lower()
80-
#print i,j,t,mem
85+
#print(i,j,t,mem)
8186
if not (t in p):
8287
if (mem!=None) or (mem2!=None):
8388
if mem!=None:
@@ -88,7 +93,7 @@ def cut(self,text):
8893
if delta>=1:
8994
if (delta<5) and (re.search(u"[\w\u2E80-\u9FFF]",t)!=None):
9095
pre = text[i-j]
91-
#print pre
96+
#print(pre)
9297
if not (pre in self.specialwords):
9398
i,j,z,q = mem2
9499
del recognised[q:]
@@ -99,7 +104,7 @@ def cut(self,text):
99104
unreg_tmp = self._pro_unreg(text[i:z])
100105
recognised.extend(unreg_tmp)
101106
recognised.append(text[i-j:i])
102-
#print text[i-j:i],mem2
107+
#print(text[i-j:i],mem2)
103108
i = i-j
104109
z = i
105110
j = 0
@@ -113,18 +118,18 @@ def cut(self,text):
113118
if chr(11) in p:
114119
if j<=2:
115120
mem = i,j,z
116-
#print text[i-1]
121+
#print(text[i-1])
117122
if (z-i<2) and (text[i-1] in self.specialwords) and ((mem2==None) or ((mem2!=None and mem2[0]-i>1))):
118-
#print text[i-1]
123+
#print(text[i-1])
119124
mem = None
120125
mem2 = i,j,z,len(recognised)
121126
p = self.d
122127
i -= 1
123128
j = 0
124129
continue
125-
#print mem
130+
#print(mem)
126131
p = self.d
127-
#print i,j,z,text[i:z]
132+
#print(i,j,z,text[i:z])
128133
if((i<ln) and (i<z)):
129134
unreg_tmp = self._pro_unreg(text[i:z])
130135
recognised.extend(unreg_tmp)
@@ -134,7 +139,7 @@ def cut(self,text):
134139
j = 0
135140
mem = None
136141
mem2 = None
137-
#print mem
142+
#print(mem)
138143
if mem!=None:
139144
i,j,z = mem
140145
recognised.extend(self._pro_unreg(text[i:z]))

0 commit comments

Comments
 (0)