@@ -38,15 +38,13 @@ class HumanName(object):
38
38
39
39
"""
40
40
41
- def __init__ (self , full_name = "" , constants = constants , regexes = regexes ,
42
- encoding = ENCODING , string_format = None ):
41
+ def __init__ (self , full_name = "" , constants = constants , encoding = ENCODING ,
42
+ string_format = None ):
43
43
if constants :
44
44
self .C = constants
45
- self .RE = regexes or Regexes ()
46
45
self .has_own_config = False
47
46
else :
48
47
self .C = Constants ()
49
- self .RE = Regexes ()
50
48
self .has_own_config = True
51
49
self .ENCODING = encoding
52
50
self .string_format = string_format
@@ -197,7 +195,7 @@ def is_rootname(self, piece):
197
195
and not self .is_an_initial (piece )
198
196
199
197
def is_an_initial (self , value ):
200
- return self .RE .initial .match (value ) or False
198
+ return self .C . RE .initial .match (value ) or False
201
199
202
200
# def is_a_roman_numeral(value):
203
201
# return re_roman_numeral.match(value) or False
@@ -215,6 +213,21 @@ def full_name(self, value):
215
213
self .parse_full_name ()
216
214
217
215
216
+ def pre_process (self ):
217
+ """
218
+ This happens at the beginning of the parse_full_name() before
219
+ any other processing of the string aside from unicode normalization.
220
+ """
221
+ self .parse_nicknames ()
222
+
223
+
224
+ def post_process (self ):
225
+ """
226
+ This happens at the end of the parse_full_name() after
227
+ all other processing has taken place.
228
+ """
229
+ self .handle_firstnames ()
230
+
218
231
def parse_nicknames (self ):
219
232
"""
220
233
Handling Nicknames
@@ -226,11 +239,23 @@ def parse_nicknames(self):
226
239
227
240
https://code.google.com/p/python-nameparser/issues/detail?id=33
228
241
"""
229
- re_nickname = self .RE .nickname
242
+ re_nickname = self .C . RE .nickname
230
243
if re_nickname .search (self ._full_name ):
231
244
self .nickname_list = re_nickname .findall (self ._full_name )
232
245
self ._full_name = re_nickname .sub ('' , self ._full_name )
233
246
247
+ def handle_firstnames (self ):
248
+ """
249
+ If there are only two parts and one is a title, assume it's a last name
250
+ instead of a first name. e.g. Mr. Johnson. Unless it's a special title
251
+ like "Sir", then when it's followed by a single name that name is always
252
+ a first name.
253
+ """
254
+ if self .title \
255
+ and len (self ) == 2 \
256
+ and not lc (self .title ) in self .C .first_name_titles :
257
+ self .last , self .first = self .first , self .last
258
+
234
259
def parse_full_name (self ):
235
260
"""
236
261
Parse full name into the buckets
@@ -247,10 +272,10 @@ def parse_full_name(self):
247
272
if not isinstance (self ._full_name , text_type ):
248
273
self ._full_name = u (self ._full_name , self .ENCODING )
249
274
250
- self .parse_nicknames ()
275
+ self .pre_process ()
251
276
252
277
# collapse multiple spaces
253
- self ._full_name = self .RE .spaces .sub (" " , self ._full_name .strip ())
278
+ self ._full_name = self .C . RE .spaces .sub (" " , self ._full_name .strip ())
254
279
255
280
# break up full_name by commas
256
281
parts = [x .strip () for x in self ._full_name .split ("," )]
@@ -350,11 +375,13 @@ def parse_full_name(self):
350
375
351
376
def _parse_pieces (self , parts , additional_parts_count = 0 ):
352
377
"""
353
- Split parts on spaces and remove commas, join on conjunctions and lastname prefixes.
378
+ Split parts on spaces and remove commas, join on conjunctions and
379
+ lastname prefixes.
354
380
355
381
additional_parts_count: if the comma format contains other parts, we need to know
356
382
how many there are to decide if things should be considered a conjunction.
357
383
"""
384
+
358
385
ps = []
359
386
for part in parts :
360
387
ps += [x .strip (' ,' ) for x in part .split (' ' )]
@@ -451,15 +478,6 @@ def find_p(p):
451
478
log .debug ("pieces: {0}" .format (pieces ))
452
479
return pieces
453
480
454
- def post_process (self ):
455
- # if there are only two parts and one is a title,
456
- # assume it's a last name instead of a first name.
457
- # e.g. Mr. Johnson.
458
- if self .title \
459
- and len (self ) == 2 \
460
- and not lc (self .title ) in self .C .first_name_titles :
461
- self .last , self .first = self .first , self .last
462
-
463
481
464
482
### Capitalization Support
465
483
@@ -469,19 +487,19 @@ def cap_word(self, word):
469
487
exceptions = dict (self .C .capitalization_exceptions )
470
488
if word in exceptions :
471
489
return exceptions [word ]
472
- mac_match = self .RE .mac .match (word )
490
+ mac_match = self .C . RE .mac .match (word )
473
491
if mac_match :
474
492
def cap_after_mac (m ):
475
493
return m .group (1 ).capitalize () + m .group (2 ).capitalize ()
476
- return self .RE .mac .sub (cap_after_mac , word )
494
+ return self .C . RE .mac .sub (cap_after_mac , word )
477
495
else :
478
496
return word .capitalize ()
479
497
480
498
def cap_piece (self , piece ):
481
499
if not piece :
482
500
return ""
483
501
replacement = lambda m : self .cap_word (m .group (0 ))
484
- return self .RE .word .sub (replacement , piece )
502
+ return self .C . RE .word .sub (replacement , piece )
485
503
486
504
def capitalize (self ):
487
505
"""
0 commit comments