Skip to content

Commit 49c83a7

Browse files
committed
Applied minor improvements to codebase
1 parent ed14c33 commit 49c83a7

File tree

3 files changed

+63
-26
lines changed

3 files changed

+63
-26
lines changed

codext/__common__.py

Lines changed: 48 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import re
88
import sys
99
from encodings.aliases import aliases as ALIASES
10-
from functools import reduce, wraps
10+
from functools import reduce, update_wrapper, wraps
1111
from importlib import import_module
1212
from inspect import currentframe
1313
from itertools import chain, product
@@ -39,8 +39,11 @@
3939
"remove", "reset", "s2i", "search", "stopfunc", "BytesIO", "_input", "_stripl", "CodecMacro",
4040
"DARWIN", "LANG", "LINUX", "MASKS", "PY3", "UNIX", "WINDOWS"]
4141
CODECS_REGISTRY = None
42+
CODECS_OVERWRITTEN = []
4243
CODECS_CATEGORIES = ["native", "custom"]
43-
LANG = getlocale()[0][:2].lower() if getlocale() else None
44+
LANG = getlocale()
45+
if LANG:
46+
LANG = (LANG[0] or "")[:2].lower()
4447
MASKS = {
4548
'a': printable,
4649
'b': "".join(chr(i) for i in range(256)),
@@ -142,6 +145,20 @@ def __repr__(self):
142145
return "<codext.CodecMacro object for encoding %s at %#x>" % (self.name, id(self))
143146

144147

148+
# inspired from: https://stackoverflow.com/questions/10875442/possible-to-change-a-functions-repr-in-python
149+
class Repr(object):
150+
def __init__(self, name, func):
151+
self.__name = name
152+
self.__func = func
153+
update_wrapper(self, func)
154+
155+
def __call__(self, *args, **kwargs):
156+
return self.__func(*args, **kwargs)
157+
158+
def __repr__(self):
159+
return "<search-function %s at 0x%x>" % (self.__name, id(self))
160+
161+
145162
def __stdin_pipe():
146163
""" Stdin pipe read function. """
147164
try:
@@ -173,6 +190,12 @@ def _stripl(s, st_lines, st_crlf):
173190
return s
174191

175192

193+
def _with_repr(name):
194+
def _wrapper(f):
195+
return Repr(name, f)
196+
return _wrapper
197+
198+
176199
def add(ename, encode=None, decode=None, pattern=None, text=True, add_to_codecs=False, **kwargs):
177200
""" This adds a new codec to the codecs module setting its encode and/or decode functions, eventually dynamically
178201
naming the encoding with a pattern and with file handling.
@@ -195,6 +218,7 @@ def add(ename, encode=None, decode=None, pattern=None, text=True, add_to_codecs=
195218
raise ValueError("At least one en/decoding function must be defined")
196219
glob = currentframe().f_back.f_globals
197220
# search function for the new encoding
221+
@_with_repr(ename)
198222
def getregentry(encoding):
199223
if encoding != ename and not (pattern and re.match(pattern, encoding)):
200224
return
@@ -304,6 +328,7 @@ class StreamReader(Codec, codecs.StreamReader):
304328
getregentry.__aliases__ = list(map(lambda n: re.sub(r"[\s\-]", "_", n), kwargs['aliases']))
305329
getregentry.__pattern__ = pattern
306330
register(getregentry, add_to_codecs)
331+
return getregentry
307332

308333

309334
def add_macro(mname, *encodings):
@@ -500,7 +525,7 @@ def __get_value(token, position, case_changed=False):
500525
return __get_value(token_inv_case, position, True)
501526
return error_func(token, position)
502527
if isinstance(result, list):
503-
result = random.choice(result)
528+
result = result[0]
504529
return result + lsep
505530

506531
# if a separator is defined, rely on it by splitting the input text
@@ -567,7 +592,7 @@ def __get_value(token, position, case_changed=False):
567592
kwargs['repl_minlen_b'] = max(1, min(map(len, map(b, set(smapdict.values()) - {''}))))
568593
except:
569594
pass
570-
add(ename, __generic_code(), __generic_code(True), **kwargs)
595+
return add(ename, __generic_code(), __generic_code(True), **kwargs)
571596
codecs.add_map = add_map
572597

573598

@@ -651,17 +676,15 @@ def list_encodings(*categories):
651676
if (len(categories) == 0 or "native" in categories) and "native" not in exclude:
652677
for a in set(ALIASES.values()):
653678
try:
654-
__orig_lookup(a)
679+
ci = __orig_lookup(a)
655680
except LookupError:
656681
continue
657-
enc.append(a)
658-
for search_function in __codecs_registry:
682+
if lookup(a) is ci:
683+
enc.append(ci.name)
684+
for search_function in CODECS_OVERWRITTEN + __codecs_registry:
659685
name = search_function.__name__.replace("_", "-")
660686
p = search_function.__pattern__
661-
if p is None:
662-
ci = search_function(name)
663-
else:
664-
ci = search_function(generate_string_from_regex(p))
687+
ci = search_function(name) if p is None else search_function(generate_string_from_regex(p))
665688
c = "other" if ci is None else ci.parameters['category']
666689
if (len(categories) == 0 or c in categories) and c not in exclude:
667690
enc.append(name)
@@ -834,8 +857,9 @@ def _handle_error(token, position, output=""):
834857
__orig_register = _codecs.register
835858

836859

837-
def __add(ename, encode=None, decode=None, pattern=None, text=True, add_to_codecs=True):
838-
add(ename, encode, decode, pattern, text, add_to_codecs)
860+
def __add(ename, encode=None, decode=None, pattern=None, text=True, **kwargs):
861+
kwargs.pop('add_to_codecs', None)
862+
return add(ename, encode, decode, pattern, text, True, **kwargs)
839863
__add.__doc__ = add.__doc__
840864
codecs.add = __add
841865

@@ -862,19 +886,19 @@ def encode(obj, encoding='utf-8', errors='strict'):
862886
def lookup(encoding, macro=True):
863887
""" Hooked lookup function for searching first for codecs in the local registry of this module. """
864888
# first, try to match the given encoding with codecs' search functions
865-
for search_function in __codecs_registry:
889+
for search_function in CODECS_OVERWRITTEN + __codecs_registry:
866890
codecinfo = search_function(encoding)
867891
if codecinfo is not None:
868892
return codecinfo
869893
# then, if a codec name was given, generate an encoding name from its pattern and get the CodecInfo
870-
for search_function in __codecs_registry:
894+
for search_function in CODECS_OVERWRITTEN + __codecs_registry:
871895
if search_function.__name__.replace("_", "-") == encoding or \
872896
encoding in getattr(search_function, "__aliases__", []):
873897
codecinfo = search_function(generate_string_from_regex(search_function.__pattern__))
874898
if codecinfo is not None:
875899
return codecinfo
900+
# finally, get a CodecInfo with the original lookup function and refine it with a dictionary of parameters
876901
try:
877-
# finally, get a CodecInfo with the original lookup function and refine it with a dictionary of parameters
878902
ci = __orig_lookup(encoding)
879903
ci.parameters = {'category': "native", 'module': "codecs", 'name': ALIASES.get(ci.name, ci.name)}
880904
return ci
@@ -898,14 +922,19 @@ def register(search_function, add_to_codecs=False):
898922
to remove the codec later
899923
"""
900924
if search_function not in __codecs_registry:
901-
__codecs_registry.append(search_function)
925+
try:
926+
__orig_lookup(search_function.__name__)
927+
l = CODECS_OVERWRITTEN
928+
except LookupError:
929+
l = __codecs_registry
930+
l.append(search_function)
902931
if add_to_codecs:
903932
__orig_register(search_function)
904933

905934

906-
def __register(search_function, add_to_codecs=True):
935+
def __register(search_function):
907936
""" Same as register(...), but with add_to_codecs set by default to True. """
908-
register(search_function, add_to_codecs)
937+
register(search_function, True)
909938
codecs.register = __register
910939

911940

codext/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
44
"""
55
from __future__ import print_function
6+
from _codecs import lookup as orig_lookup
67
from ast import literal_eval
78
from six import binary_type, text_type
89

@@ -26,6 +27,12 @@
2627
reset()
2728

2829

30+
# overwritten native codec
31+
add("uu", lambda i, e="strict": orig_lookup("uu").encode(b(i), e),
32+
lambda i, e="strict": orig_lookup("uu").decode(b(i), e),
33+
pattern=r"^uu(?:[-_]encode|codec)?$", add_to_codecs=True, category="native")
34+
35+
2936
def __literal_eval(o):
3037
""" Non-failing ast.literal_eval alias function. """
3138
try:

tests/test_common.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import json
99
import random
1010
import sys
11-
from codext.__common__ import PERS_MACROS, PERS_MACROS_FILE
11+
from codext.__common__ import CODECS_OVERWRITTEN, PERS_MACROS, PERS_MACROS_FILE
1212
from six import b, binary_type, text_type
1313
from unittest import TestCase
1414

@@ -56,25 +56,25 @@ def test_add_codec(self):
5656
self.assertRaises(ValueError, codext.add, "test")
5757
self.assertRaises(ValueError, codext.add, "test", "BAD")
5858
self.assertRaises(ValueError, codext.add, "test", lambda: None, "BAD")
59-
self.assertIsNone(codext.add("dummy", dummy_encode, dummy_decode))
59+
self.assertIsNotNone(codext.add("dummy", dummy_encode, dummy_decode))
6060
self.assertEqual(codext.encode("test", "dummy"), "test")
6161
ci = codext.lookup("dummy")
6262
for k in ["add_to_codecs", "category", "examples", "name", "pattern", "text"]:
6363
self.assertIn(k, ci.parameters.keys())
64-
self.assertIsNone(codext.add("dummy_errored", None, dummy_errored_decode, r"dummy_errored(\d+)$"))
64+
self.assertIsNotNone(codext.add("dummy_errored", None, dummy_errored_decode, r"dummy_errored(\d+)$"))
6565
self.assertRaises(AttributeError, codext.lookup, "dummy_errored1")
6666

6767
def test_add_map_codec(self):
6868
ENCMAP = [{'a': "A", 'b': "B", 'c': "C"}, {'d': "D", 'e': "E", 'f': "F"}, {'g': "G", 'h': "H", 'i': "I"}]
69-
self.assertIsNone(codext.add_map("dummy2", ENCMAP, pattern=r"^dummy2(?:[-_]?(\d))?$"))
69+
self.assertIsNotNone(codext.add_map("dummy2", ENCMAP, pattern=r"^dummy2(?:[-_]?(\d))?$"))
7070
self.assertRaises(ValueError, codext.add_map, "dummy2", "BAD_ENCMAP")
7171
self.assertEqual(codext.encode("abc", "dummy2"), "ABC")
7272
self.assertEqual(codext.encode("abc", "dummy2-1"), "ABC")
7373
self.assertEqual(codext.encode("def", "dummy2-2"), "DEF")
7474
self.assertEqual(codext.encode("ghi", "dummy2-3"), "GHI")
7575
self.assertRaises(LookupError, codext.encode, "test", "dummy2-4")
7676
ENCMAP = {'': {'a': "A", 'b': "B"}, r'bad': {'a': "B", 'b': "A"}}
77-
self.assertIsNone(codext.add_map("dummy3", ENCMAP, pattern=r"^dummy3([-_]inverted)?$"))
77+
self.assertIsNotNone(codext.add_map("dummy3", ENCMAP, pattern=r"^dummy3([-_]inverted)?$"))
7878
self.assertRaises(LookupError, codext.encode, "test", "dummy3_inverted")
7979
self.assertRaises(ValueError, codext.add_map, "dummy2", ENCMAP, ignore_case="BAD")
8080
self.assertRaises(ValueError, codext.add_map, "dummy2", ENCMAP, intype="BAD")
@@ -98,13 +98,13 @@ def test_list_codecs(self):
9898
self.assertFalse(codext.is_native("base64"))
9999

100100
def test_remove_codec(self):
101-
self.assertIsNone(codext.add("dummy", dummy_encode, dummy_decode))
101+
self.assertIsNotNone(codext.add("dummy", dummy_encode, dummy_decode))
102102
self.assertEqual(codext.encode("test", "dummy"), "test")
103103
self.assertIsNone(codext.remove("dummy"))
104104
self.assertRaises(LookupError, codext.encode, "test", "dummy")
105105
# special case, when adding a new codec also to the native codecs registry, then it won't be possible to remove
106106
# it afterwards
107-
self.assertIsNone(codecs.add("dummy2", dummy_encode, dummy_decode))
107+
self.assertIsNotNone(codecs.add("dummy2", dummy_encode, dummy_decode))
108108
self.assertEqual(codecs.encode("test", "dummy2"), "test")
109109
self.assertIsNone(codecs.remove("dummy2"))
110110
self.assertEqual(codecs.encode("test", "dummy2"), "test")
@@ -122,6 +122,7 @@ def test_reset_codecs(self):
122122
self.assertIsNone(codext.reset())
123123
self.assertIsNotNone(codext.encode("test", "morse"))
124124
self.assertRaises(LookupError, codext.encode, "test", "dummy")
125+
self.assertTrue(len(CODECS_OVERWRITTEN) > 0)
125126

126127
def test_search_codecs(self):
127128
self.assertIsNotNone(codext.search("morse"))

0 commit comments

Comments
 (0)