Skip to content

better support utf-8 #979

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion winpython/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@
OTHER DEALINGS IN THE SOFTWARE.
"""

__version__ = '4.2.20210422'
__version__ = '4.3.20210508'
__license__ = __doc__
__project_url__ = 'http://winpython.github.io/'
4 changes: 4 additions & 0 deletions winpython/data/packages.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3082,3 +3082,7 @@ description = A GridStack template for Voila.
[deprecation]
description = A library to handle automated deprecations

[matplotlib-inline]
description = Inline Matplotlib backend for Jupyter


34 changes: 28 additions & 6 deletions winpython/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@
import sys
import stat
import locale
import io

# Local imports
from winpython.py3compat import winreg

def get_python_executable(path = None):
"""return the python executable"""
my_path = sys.executable if path == None else path # default = current one
my_path = path if osp.isdir(path) else osp.dirname(path)
my_path = my_path if osp.isdir(my_path) else osp.dirname(my_path)
exec_py = os.path.join(path, 'python.exe')
exec_pypy = os.path.join(path, 'pypy3.exe') # PyPy !
python_executable = exec_pypy if osp.isfile(exec_pypy) else exec_py
Expand All @@ -40,7 +41,7 @@ def get_python_executable(path = None):
def get_site_packages_path(path = None):
"""return the python site-packages"""
my_path = sys.executable if path == None else path # default = current one
my_path = path if osp.isdir(path) else osp.dirname(path)
my_path = my_path if osp.isdir(my_path) else osp.dirname(my_path)
site_py = os.path.join(path, 'Lib', 'site-packages')
site_pypy = os.path.join(path, 'site-packages') # PyPy !!
site_packages_path = site_pypy if osp.isfile(site_pypy) else site_py
Expand Down Expand Up @@ -501,6 +502,25 @@ def patch_shebang_line_py(
print(line, end='')


# =============================================================================
# Guess encoding (shall rather be utf-8 per default)
# =============================================================================
def guess_encoding(csv_file):
"""guess the encoding of the given file"""
# UTF_8_BOM = "\xEF\xBB\xBF"
# Python behavior on UTF-16 not great on write, so we drop it
with io.open(csv_file, "rb") as f:
data = f.read(5)
if data.startswith(b"\xEF\xBB\xBF"): # UTF-8 with a "BOM" (normally no BOM in utf-8)
return ["utf-8-sig"]
else: # in Windows, guessing utf-8 doesn't work, so we have to try
try:
with io.open(csv_file, encoding="utf-8") as f:
preview = f.read(222222)
return ["utf-8"]
except:
return [locale.getdefaultlocale()[1], "utf-8"]

# =============================================================================
# Patch sourcefile (instead of forking packages)
# =============================================================================
Expand All @@ -511,7 +531,8 @@ def patch_sourcefile(
import io

if osp.isfile(fname) and not in_text == out_text:
with io.open(fname, 'r') as fh:
the_encoding = guess_encoding(fname)[0]
with io.open(fname, 'r', encoding=the_encoding) as fh:
content = fh.read()
new_content = content.replace(in_text, out_text)
if not new_content == content:
Expand All @@ -524,7 +545,7 @@ def patch_sourcefile(
"to",
out_text,
)
with io.open(fname, 'wt') as fh:
with io.open(fname, 'wt', encoding=the_encoding) as fh:
fh.write(new_content)


Expand All @@ -543,7 +564,8 @@ def patch_sourcelines(
import os.path as osp

if osp.isfile(fname):
with io.open(fname, 'r') as fh:
the_encoding = guess_encoding(fname)[0]
with io.open(fname, 'r', encoding=the_encoding) as fh:
contents = fh.readlines()
content = "".join(contents)
for l in range(len(contents)):
Expand Down Expand Up @@ -575,7 +597,7 @@ def patch_sourcelines(
if not new_content == content:
# if not silent_mode:
# print("patching ", fname, "from", content, "to", new_content)
with io.open(fname, 'wt') as fh:
with io.open(fname, 'wt', encoding=the_encoding) as fh:
try:
fh.write(new_content)
except:
Expand Down