Skip to content

Commit 42bdbec

Browse files
committed
better support utf-8
1 parent 74f4d3f commit 42bdbec

File tree

3 files changed

+33
-7
lines changed

3 files changed

+33
-7
lines changed

winpython/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,6 @@
2828
OTHER DEALINGS IN THE SOFTWARE.
2929
"""
3030

31-
__version__ = '4.2.20210422'
31+
__version__ = '4.3.20210508'
3232
__license__ = __doc__
3333
__project_url__ = 'http://winpython.github.io/'

winpython/data/packages.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3082,3 +3082,7 @@ description = A GridStack template for Voila.
30823082
[deprecation]
30833083
description = A library to handle automated deprecations
30843084
3085+
[matplotlib-inline]
3086+
description = Inline Matplotlib backend for Jupyter
3087+
3088+

winpython/utils.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,15 @@
2424
import sys
2525
import stat
2626
import locale
27+
import io
2728

2829
# Local imports
2930
from winpython.py3compat import winreg
3031

3132
def get_python_executable(path = None):
3233
"""return the python executable"""
3334
my_path = sys.executable if path == None else path # default = current one
34-
my_path = path if osp.isdir(path) else osp.dirname(path)
35+
my_path = my_path if osp.isdir(my_path) else osp.dirname(my_path)
3536
exec_py = os.path.join(path, 'python.exe')
3637
exec_pypy = os.path.join(path, 'pypy3.exe') # PyPy !
3738
python_executable = exec_pypy if osp.isfile(exec_pypy) else exec_py
@@ -40,7 +41,7 @@ def get_python_executable(path = None):
4041
def get_site_packages_path(path = None):
4142
"""return the python site-packages"""
4243
my_path = sys.executable if path == None else path # default = current one
43-
my_path = path if osp.isdir(path) else osp.dirname(path)
44+
my_path = my_path if osp.isdir(my_path) else osp.dirname(my_path)
4445
site_py = os.path.join(path, 'Lib', 'site-packages')
4546
site_pypy = os.path.join(path, 'site-packages') # PyPy !!
4647
site_packages_path = site_pypy if osp.isfile(site_pypy) else site_py
@@ -501,6 +502,25 @@ def patch_shebang_line_py(
501502
print(line, end='')
502503

503504

505+
# =============================================================================
506+
# Guess encoding (shall rather be utf-8 per default)
507+
# =============================================================================
508+
def guess_encoding(csv_file):
509+
"""guess the encoding of the given file"""
510+
# UTF_8_BOM = "\xEF\xBB\xBF"
511+
# Python behavior on UTF-16 not great on write, so we drop it
512+
with io.open(csv_file, "rb") as f:
513+
data = f.read(5)
514+
if data.startswith(b"\xEF\xBB\xBF"): # UTF-8 with a "BOM" (normally no BOM in utf-8)
515+
return ["utf-8-sig"]
516+
else: # in Windows, guessing utf-8 doesn't work, so we have to try
517+
try:
518+
with io.open(csv_file, encoding="utf-8") as f:
519+
preview = f.read(222222)
520+
return ["utf-8"]
521+
except:
522+
return [locale.getdefaultlocale()[1], "utf-8"]
523+
504524
# =============================================================================
505525
# Patch sourcefile (instead of forking packages)
506526
# =============================================================================
@@ -511,7 +531,8 @@ def patch_sourcefile(
511531
import io
512532

513533
if osp.isfile(fname) and not in_text == out_text:
514-
with io.open(fname, 'r') as fh:
534+
the_encoding = guess_encoding(fname)[0]
535+
with io.open(fname, 'r', encoding=the_encoding) as fh:
515536
content = fh.read()
516537
new_content = content.replace(in_text, out_text)
517538
if not new_content == content:
@@ -524,7 +545,7 @@ def patch_sourcefile(
524545
"to",
525546
out_text,
526547
)
527-
with io.open(fname, 'wt') as fh:
548+
with io.open(fname, 'wt', encoding=the_encoding) as fh:
528549
fh.write(new_content)
529550

530551

@@ -543,7 +564,8 @@ def patch_sourcelines(
543564
import os.path as osp
544565

545566
if osp.isfile(fname):
546-
with io.open(fname, 'r') as fh:
567+
the_encoding = guess_encoding(fname)[0]
568+
with io.open(fname, 'r', encoding=the_encoding) as fh:
547569
contents = fh.readlines()
548570
content = "".join(contents)
549571
for l in range(len(contents)):
@@ -575,7 +597,7 @@ def patch_sourcelines(
575597
if not new_content == content:
576598
# if not silent_mode:
577599
# print("patching ", fname, "from", content, "to", new_content)
578-
with io.open(fname, 'wt') as fh:
600+
with io.open(fname, 'wt', encoding=the_encoding) as fh:
579601
try:
580602
fh.write(new_content)
581603
except:

0 commit comments

Comments
 (0)