24
24
import sys
25
25
import stat
26
26
import locale
27
+ import io
27
28
28
29
# Local imports
29
30
from winpython .py3compat import winreg
30
31
31
32
def get_python_executable (path = None ):
32
33
"""return the python executable"""
33
34
my_path = sys .executable if path == None else path # default = current one
34
- my_path = path if osp .isdir (path ) else osp .dirname (path )
35
+ my_path = my_path if osp .isdir (my_path ) else osp .dirname (my_path )
35
36
exec_py = os .path .join (path , 'python.exe' )
36
37
exec_pypy = os .path .join (path , 'pypy3.exe' ) # PyPy !
37
38
python_executable = exec_pypy if osp .isfile (exec_pypy ) else exec_py
@@ -40,7 +41,7 @@ def get_python_executable(path = None):
40
41
def get_site_packages_path (path = None ):
41
42
"""return the python site-packages"""
42
43
my_path = sys .executable if path == None else path # default = current one
43
- my_path = path if osp .isdir (path ) else osp .dirname (path )
44
+ my_path = my_path if osp .isdir (my_path ) else osp .dirname (my_path )
44
45
site_py = os .path .join (path , 'Lib' , 'site-packages' )
45
46
site_pypy = os .path .join (path , 'site-packages' ) # PyPy !!
46
47
site_packages_path = site_pypy if osp .isfile (site_pypy ) else site_py
@@ -501,6 +502,25 @@ def patch_shebang_line_py(
501
502
print (line , end = '' )
502
503
503
504
505
+ # =============================================================================
506
+ # Guess encoding (shall rather be utf-8 per default)
507
+ # =============================================================================
508
+ def guess_encoding (csv_file ):
509
+ """guess the encoding of the given file"""
510
+ # UTF_8_BOM = "\xEF\xBB\xBF"
511
+ # Python behavior on UTF-16 not great on write, so we drop it
512
+ with io .open (csv_file , "rb" ) as f :
513
+ data = f .read (5 )
514
+ if data .startswith (b"\xEF \xBB \xBF " ): # UTF-8 with a "BOM" (normally no BOM in utf-8)
515
+ return ["utf-8-sig" ]
516
+ else : # in Windows, guessing utf-8 doesn't work, so we have to try
517
+ try :
518
+ with io .open (csv_file , encoding = "utf-8" ) as f :
519
+ preview = f .read (222222 )
520
+ return ["utf-8" ]
521
+ except :
522
+ return [locale .getdefaultlocale ()[1 ], "utf-8" ]
523
+
504
524
# =============================================================================
505
525
# Patch sourcefile (instead of forking packages)
506
526
# =============================================================================
@@ -511,7 +531,8 @@ def patch_sourcefile(
511
531
import io
512
532
513
533
if osp .isfile (fname ) and not in_text == out_text :
514
- with io .open (fname , 'r' ) as fh :
534
+ the_encoding = guess_encoding (fname )[0 ]
535
+ with io .open (fname , 'r' , encoding = the_encoding ) as fh :
515
536
content = fh .read ()
516
537
new_content = content .replace (in_text , out_text )
517
538
if not new_content == content :
@@ -524,7 +545,7 @@ def patch_sourcefile(
524
545
"to" ,
525
546
out_text ,
526
547
)
527
- with io .open (fname , 'wt' ) as fh :
548
+ with io .open (fname , 'wt' , encoding = the_encoding ) as fh :
528
549
fh .write (new_content )
529
550
530
551
@@ -543,7 +564,8 @@ def patch_sourcelines(
543
564
import os .path as osp
544
565
545
566
if osp .isfile (fname ):
546
- with io .open (fname , 'r' ) as fh :
567
+ the_encoding = guess_encoding (fname )[0 ]
568
+ with io .open (fname , 'r' , encoding = the_encoding ) as fh :
547
569
contents = fh .readlines ()
548
570
content = "" .join (contents )
549
571
for l in range (len (contents )):
@@ -575,7 +597,7 @@ def patch_sourcelines(
575
597
if not new_content == content :
576
598
# if not silent_mode:
577
599
# print("patching ", fname, "from", content, "to", new_content)
578
- with io .open (fname , 'wt' ) as fh :
600
+ with io .open (fname , 'wt' , encoding = the_encoding ) as fh :
579
601
try :
580
602
fh .write (new_content )
581
603
except :
0 commit comments