Merge branch 'fix-625'

thomasballinger · thomasballinger · commit a60f28d1ab79 · 2016-09-02T17:50:32.000-04:00
diff --git a/bpython/inspection.py b/bpython/inspection.py
@@ -283,24 +283,33 @@ def is_callable(obj):
     return callable(obj)
 
 
-get_encoding_re = LazyReCompile(r'coding[:=]\s*([-\w.]+)')
+get_encoding_line_re = LazyReCompile(r'^.*coding[:=]\s*([-\w.]+).*$')
 
 
 def get_encoding(obj):
     """Try to obtain encoding information of the source of an object."""
     for line in inspect.findsource(obj)[0][:2]:
-        m = get_encoding_re.search(line)
+        m = get_encoding_line_re.search(line)
         if m:
             return m.group(1)
     return 'ascii'
 
 
+def get_encoding_comment(source):
+    """Returns encoding line without the newline, or None is not found"""
+    for line in source.splitlines()[:2]:
+        m = get_encoding_line_re.search(line)
+        if m:
+            return m.group(0)
+    return None
+
+
 def get_encoding_file(fname):
     """Try to obtain encoding information from a Python source file."""
     with io.open(fname, 'rt', encoding='ascii', errors='ignore') as f:
         for unused in range(2):
             line = f.readline()
-            match = get_encoding_re.search(line)
+            match = get_encoding_line_re.search(line)
             if match:
                 return match.group(1)
     return 'ascii'
diff --git a/bpython/repl.py b/bpython/repl.py
@@ -85,7 +85,13 @@ def __init__(self, locals=None, encoding=None):
         necessarily must be with the current factoring) and then an exception
         callback can be added to the Interpreter instance afterwards - more
         specifically, this is so that autoindentation does not occur after a
-        traceback."""
+        traceback.
+
+        encoding is only used in Python 2, where it may be necessary to add an
+        encoding comment to a source bytestring before running it.
+        encoding must be a bytestring in Python 2 because it will be templated
+        into a bytestring source as part of an encoding comment.
+        """
 
         self.encoding = encoding or sys.getdefaultencoding()
         self.syntaxerror_callback = None
@@ -98,15 +104,55 @@ def reset_running_time(self):
         self.running_time = 0
 
     def runsource(self, source, filename=None, symbol='single',
-                  encode=True):
+                  encode='auto'):
         """Execute Python code.
 
         source, filename and symbol are passed on to
-        code.InteractiveInterpreter.runsource. If encode is True, the source
-        will be encoded. On Python 3.X, encode will be ignored."""
-        if not py3 and encode:
-            source = u'# coding: %s\n\n%s' % (self.encoding, source)
-            source = source.encode(self.encoding)
+        code.InteractiveInterpreter.runsource. If encode is True,
+        an encoding comment will be added to the source.
+        On Python 3.X, encode will be ignored.
+
+        encode should only be used for interactive interpreter input,
+        files should always already have an encoding comment or be ASCII.
+        By default an encoding line will be added if no filename is given.
+
+        In Python 3, source must be a unicode string
+        In Python 2, source may be latin-1 bytestring or unicode string,
+        following the interface of code.InteractiveInterpreter.
+
+        Because adding an encoding comment to a unicode string in Python 2
+        would cause a syntax error to be thrown which would reference code
+        the user did not write, setting encoding to True when source is a
+        unicode string in Python 2 will throw a ValueError."""
+        # str means bytestring in Py2
+        if encode and not py3 and isinstance(source, unicode):
+            if encode != 'auto':
+                raise ValueError("can't add encoding line to unicode input")
+            encode = False
+        if encode and filename is not None:
+            # files have encoding comments or implicit encoding of ASCII
+            if encode != 'auto':
+                raise ValueError("shouldn't add encoding line to file contents")
+            encode = False
+
+        if encode and not py3 and isinstance(source, str):
+            # encoding makes sense for bytestrings, so long as there
+            # isn't already an encoding comment
+            comment = inspection.get_encoding_comment(source)
+            if comment:
+                # keep the existing encoding comment, but add two lines
+                # because this interp always adds 2 to stack trace line
+                # numbers in Python 2
+                source = source.replace(comment, b'%s\n\n' % comment, 1)
+            else:
+                source = b'# coding: %s\n\n%s' % (self.encoding, source)
+        elif not py3 and filename is None:
+            # 2 blank lines still need to be added
+            # because this interpreter always adds 2 to stack trace line
+            # numbers in Python 2 when the filename is "<input>"
+            newlines = u'\n\n' if isinstance(source, unicode) else b'\n\n'
+            source = newlines + source
+            # we know we're in Python 2 here, so ok to reference unicode
         if filename is None:
             filename = filename_for_console_input(source)
         with self.timer:
@@ -132,11 +178,11 @@ def showsyntaxerror(self, filename=None):
                 pass
             else:
                 # Stuff in the right filename and right lineno
-                if not py3:
-                    lineno -= 2
                 # strip linecache line number
                 if re.match(r'<bpython-input-\d+>', filename):
                     filename = '<input>'
+                if filename == '<input>' and not py3:
+                    lineno -= 2
                 value = SyntaxError(msg, (filename, lineno, offset, line))
                 sys.last_value = value
         list = traceback.format_exception_only(type, value)
@@ -160,8 +206,7 @@ def showtraceback(self):
                     fname = '<input>'
                     tblist[i] = (fname, lineno, module, something)
                 # Set the right lineno (encoding header adds an extra line)
-                if not py3:
-                    if fname == '<input>':
+                if fname == '<input>' and not py3:
                         tblist[i] = (fname, lineno - 2, module, something)
 
             l = traceback.format_list(tblist)
diff --git a/bpython/test/test_args.py b/bpython/test/test_args.py
@@ -1,15 +1,13 @@
+# encoding: utf-8
+
+import re
 import subprocess
 import sys
 import tempfile
 from textwrap import dedent
 
 from bpython import args
-from bpython.test import FixLanguageTestCase as TestCase
-
-try:
-    import unittest2 as unittest
-except ImportError:
-    import unittest
+from bpython.test import (FixLanguageTestCase as TestCase, unittest)
 
 try:
     from nose.plugins.attrib import attr
@@ -39,6 +37,42 @@ def test_exec_dunder_file(self):
 
             self.assertEquals(stderr.strip(), f.name)
 
+    def test_exec_nonascii_file(self):
+        with tempfile.NamedTemporaryFile(mode="w") as f:
+            f.write(dedent('''\
+                #!/usr/bin/env python2
+                # coding: utf-8
+                "你好 # nonascii"
+                '''))
+            f.flush()
+            try:
+                subprocess.check_call([
+                    'python', '-m', 'bpython.curtsies',
+                    f.name])
+            except subprocess.CalledProcessError:
+                self.fail('Error running module with nonascii characters')
+
+    def test_exec_nonascii_file_linenums(self):
+        with tempfile.NamedTemporaryFile(mode="w") as f:
+            f.write(dedent("""\
+                #!/usr/bin/env python2
+                # coding: utf-8
+                1/0
+                """))
+            f.flush()
+            p = subprocess.Popen(
+                [sys.executable, "-m", "bpython.curtsies",
+                    f.name],
+                stderr=subprocess.PIPE,
+                universal_newlines=True)
+            (_, stderr) = p.communicate()
+
+            self.assertIn('line 3', clean_colors(stderr))
+
+
+def clean_colors(s):
+    return re.sub(r'\x1b[^m]*m', '', s)
+
 
 class TestParse(TestCase):
 
diff --git a/bpython/test/test_interpreter.py b/bpython/test/test_interpreter.py