7
7
import re
8
8
import sys
9
9
from encodings .aliases import aliases as ALIASES
10
- from functools import reduce , wraps
10
+ from functools import reduce , update_wrapper , wraps
11
11
from importlib import import_module
12
12
from inspect import currentframe
13
13
from itertools import chain , product
39
39
"remove" , "reset" , "s2i" , "search" , "stopfunc" , "BytesIO" , "_input" , "_stripl" , "CodecMacro" ,
40
40
"DARWIN" , "LANG" , "LINUX" , "MASKS" , "PY3" , "UNIX" , "WINDOWS" ]
41
41
CODECS_REGISTRY = None
42
+ CODECS_OVERWRITTEN = []
42
43
CODECS_CATEGORIES = ["native" , "custom" ]
43
- LANG = getlocale ()[0 ][:2 ].lower () if getlocale () else None
44
+ LANG = getlocale ()
45
+ if LANG :
46
+ LANG = (LANG [0 ] or "" )[:2 ].lower ()
44
47
MASKS = {
45
48
'a' : printable ,
46
49
'b' : "" .join (chr (i ) for i in range (256 )),
@@ -142,6 +145,20 @@ def __repr__(self):
142
145
return "<codext.CodecMacro object for encoding %s at %#x>" % (self .name , id (self ))
143
146
144
147
148
+ # inspired from: https://stackoverflow.com/questions/10875442/possible-to-change-a-functions-repr-in-python
149
+ class Repr (object ):
150
+ def __init__ (self , name , func ):
151
+ self .__name = name
152
+ self .__func = func
153
+ update_wrapper (self , func )
154
+
155
+ def __call__ (self , * args , ** kwargs ):
156
+ return self .__func (* args , ** kwargs )
157
+
158
+ def __repr__ (self ):
159
+ return "<search-function %s at 0x%x>" % (self .__name , id (self ))
160
+
161
+
145
162
def __stdin_pipe ():
146
163
""" Stdin pipe read function. """
147
164
try :
@@ -173,6 +190,12 @@ def _stripl(s, st_lines, st_crlf):
173
190
return s
174
191
175
192
193
+ def _with_repr (name ):
194
+ def _wrapper (f ):
195
+ return Repr (name , f )
196
+ return _wrapper
197
+
198
+
176
199
def add (ename , encode = None , decode = None , pattern = None , text = True , add_to_codecs = False , ** kwargs ):
177
200
""" This adds a new codec to the codecs module setting its encode and/or decode functions, eventually dynamically
178
201
naming the encoding with a pattern and with file handling.
@@ -195,6 +218,7 @@ def add(ename, encode=None, decode=None, pattern=None, text=True, add_to_codecs=
195
218
raise ValueError ("At least one en/decoding function must be defined" )
196
219
glob = currentframe ().f_back .f_globals
197
220
# search function for the new encoding
221
+ @_with_repr (ename )
198
222
def getregentry (encoding ):
199
223
if encoding != ename and not (pattern and re .match (pattern , encoding )):
200
224
return
@@ -304,6 +328,7 @@ class StreamReader(Codec, codecs.StreamReader):
304
328
getregentry .__aliases__ = list (map (lambda n : re .sub (r"[\s\-]" , "_" , n ), kwargs ['aliases' ]))
305
329
getregentry .__pattern__ = pattern
306
330
register (getregentry , add_to_codecs )
331
+ return getregentry
307
332
308
333
309
334
def add_macro (mname , * encodings ):
@@ -500,7 +525,7 @@ def __get_value(token, position, case_changed=False):
500
525
return __get_value (token_inv_case , position , True )
501
526
return error_func (token , position )
502
527
if isinstance (result , list ):
503
- result = random . choice ( result )
528
+ result = result [ 0 ]
504
529
return result + lsep
505
530
506
531
# if a separator is defined, rely on it by splitting the input text
@@ -567,7 +592,7 @@ def __get_value(token, position, case_changed=False):
567
592
kwargs ['repl_minlen_b' ] = max (1 , min (map (len , map (b , set (smapdict .values ()) - {'' }))))
568
593
except :
569
594
pass
570
- add (ename , __generic_code (), __generic_code (True ), ** kwargs )
595
+ return add (ename , __generic_code (), __generic_code (True ), ** kwargs )
571
596
codecs .add_map = add_map
572
597
573
598
@@ -651,17 +676,15 @@ def list_encodings(*categories):
651
676
if (len (categories ) == 0 or "native" in categories ) and "native" not in exclude :
652
677
for a in set (ALIASES .values ()):
653
678
try :
654
- __orig_lookup (a )
679
+ ci = __orig_lookup (a )
655
680
except LookupError :
656
681
continue
657
- enc .append (a )
658
- for search_function in __codecs_registry :
682
+ if lookup (a ) is ci :
683
+ enc .append (ci .name )
684
+ for search_function in CODECS_OVERWRITTEN + __codecs_registry :
659
685
name = search_function .__name__ .replace ("_" , "-" )
660
686
p = search_function .__pattern__
661
- if p is None :
662
- ci = search_function (name )
663
- else :
664
- ci = search_function (generate_string_from_regex (p ))
687
+ ci = search_function (name ) if p is None else search_function (generate_string_from_regex (p ))
665
688
c = "other" if ci is None else ci .parameters ['category' ]
666
689
if (len (categories ) == 0 or c in categories ) and c not in exclude :
667
690
enc .append (name )
@@ -834,8 +857,9 @@ def _handle_error(token, position, output=""):
834
857
__orig_register = _codecs .register
835
858
836
859
837
- def __add (ename , encode = None , decode = None , pattern = None , text = True , add_to_codecs = True ):
838
- add (ename , encode , decode , pattern , text , add_to_codecs )
860
+ def __add (ename , encode = None , decode = None , pattern = None , text = True , ** kwargs ):
861
+ kwargs .pop ('add_to_codecs' , None )
862
+ return add (ename , encode , decode , pattern , text , True , ** kwargs )
839
863
__add .__doc__ = add .__doc__
840
864
codecs .add = __add
841
865
@@ -862,19 +886,19 @@ def encode(obj, encoding='utf-8', errors='strict'):
862
886
def lookup (encoding , macro = True ):
863
887
""" Hooked lookup function for searching first for codecs in the local registry of this module. """
864
888
# first, try to match the given encoding with codecs' search functions
865
- for search_function in __codecs_registry :
889
+ for search_function in CODECS_OVERWRITTEN + __codecs_registry :
866
890
codecinfo = search_function (encoding )
867
891
if codecinfo is not None :
868
892
return codecinfo
869
893
# then, if a codec name was given, generate an encoding name from its pattern and get the CodecInfo
870
- for search_function in __codecs_registry :
894
+ for search_function in CODECS_OVERWRITTEN + __codecs_registry :
871
895
if search_function .__name__ .replace ("_" , "-" ) == encoding or \
872
896
encoding in getattr (search_function , "__aliases__" , []):
873
897
codecinfo = search_function (generate_string_from_regex (search_function .__pattern__ ))
874
898
if codecinfo is not None :
875
899
return codecinfo
900
+ # finally, get a CodecInfo with the original lookup function and refine it with a dictionary of parameters
876
901
try :
877
- # finally, get a CodecInfo with the original lookup function and refine it with a dictionary of parameters
878
902
ci = __orig_lookup (encoding )
879
903
ci .parameters = {'category' : "native" , 'module' : "codecs" , 'name' : ALIASES .get (ci .name , ci .name )}
880
904
return ci
@@ -898,14 +922,19 @@ def register(search_function, add_to_codecs=False):
898
922
to remove the codec later
899
923
"""
900
924
if search_function not in __codecs_registry :
901
- __codecs_registry .append (search_function )
925
+ try :
926
+ __orig_lookup (search_function .__name__ )
927
+ l = CODECS_OVERWRITTEN
928
+ except LookupError :
929
+ l = __codecs_registry
930
+ l .append (search_function )
902
931
if add_to_codecs :
903
932
__orig_register (search_function )
904
933
905
934
906
- def __register (search_function , add_to_codecs = True ):
935
+ def __register (search_function ):
907
936
""" Same as register(...), but with add_to_codecs set by default to True. """
908
- register (search_function , add_to_codecs )
937
+ register (search_function , True )
909
938
codecs .register = __register
910
939
911
940
0 commit comments