fix Python 2 running encoded source files

thomasballinger · thomasballinger · commit dfb44cf97abd · 2016-07-28T21:53:23.000-04:00
diff --git a/bpython/inspection.py b/bpython/inspection.py
@@ -284,6 +284,7 @@ def is_callable(obj):
 
 
 get_encoding_re = LazyReCompile(r'coding[:=]\s*([-\w.]+)')
+get_encoding_line_re = LazyReCompile(r'^.*coding[:=]\s*[-\w.]+.*$')
 
 
 def get_encoding(obj):
@@ -295,6 +296,15 @@ def get_encoding(obj):
     return 'ascii'
 
 
+def get_encoding_comment(source):
+    """Returns encoding line without the newline, or None is not found"""
+    for line in source.splitlines()[:2]:
+        m = get_encoding_line_re.search(line)
+        if m:
+            return m.group(0)
+    return None
+
+
 def get_encoding_file(fname):
     """Try to obtain encoding information from a Python source file."""
     with io.open(fname, 'rt', encoding='ascii', errors='ignore') as f:
diff --git a/bpython/repl.py b/bpython/repl.py
@@ -109,10 +109,30 @@ def runsource(self, source, filename=None, symbol='single',
 
         source, filename and symbol are passed on to
         code.InteractiveInterpreter.runsource. If encode is True, the source
-        will be encoded. On Python 3.X, encode will be ignored."""
-        if not py3 and encode:
-            source = u'# coding: %s\n\n%s' % (self.encoding, source)
-            source = source.encode(self.encoding)
+        will be encoded. On Python 3.X, encode will be ignored.
+
+        encode doesn't encode the source, it just adds an encoding comment
+        that specifies the encoding of the source.
+        encode should only be used for interactive interpreter input,
+        files should always have an encoding comment or be ASCII.
+
+        In Python 3, source must be a unicode string
+        In Python 2, source may be latin-1 bytestring or unicode string,
+        following the interface of code.InteractiveInterpreter"""
+        if encode and not py3:
+            if isinstance(source, str):
+                # encoding only makes sense for bytestrings
+                assert isinstance(source, str)
+                source = b'# coding: %s\n\n%s' % (self.encoding, source)
+            else:
+                # 2 blank lines still need to be added because this
+                # interpreter always adds 2 lines to stack trace line
+                # numbers in Python 2
+                comment = inspection.get_encoding_comment(source)
+                if comment:
+                    source = source.replace(comment, u'%s\n\n' % comment, 1)
+                else:
+                    source = u'\n\n' + source
         if filename is None:
             filename = filename_for_console_input(source)
         with self.timer:
@@ -138,11 +158,11 @@ def showsyntaxerror(self, filename=None):
                 pass
             else:
                 # Stuff in the right filename and right lineno
-                if not py3:
-                    lineno -= 2
                 # strip linecache line number
                 if re.match(r'<bpython-input-\d+>', filename):
                     filename = '<input>'
+                if filename == '<input>' and not py3:
+                    lineno -= 2
                 value = SyntaxError(msg, (filename, lineno, offset, line))
                 sys.last_value = value
         list = traceback.format_exception_only(type, value)
@@ -166,8 +186,7 @@ def showtraceback(self):
                     fname = '<input>'
                     tblist[i] = (fname, lineno, module, something)
                 # Set the right lineno (encoding header adds an extra line)
-                if not py3:
-                    if fname == '<input>':
+                if fname == '<input>' and not py3:
                         tblist[i] = (fname, lineno - 2, module, something)
 
             l = traceback.format_list(tblist)
diff --git a/bpython/test/test_interpreter.py b/bpython/test/test_interpreter.py
@@ -50,7 +50,7 @@ def append_to_a(message):
         i.write = append_to_a
 
         def f():
-            return 1/0
+            return 1 / 0
 
         def g():
             return f()
@@ -73,7 +73,7 @@ def g():
 
     @unittest.skipIf(py3, "runsource() accepts only unicode in Python 3")
     def test_runsource_bytes(self):
-        i = interpreter.Interp(encoding='latin-1')
+        i = interpreter.Interp(encoding=b'latin-1')
 
         i.runsource("a = b'\xfe'".encode('latin-1'), encode=False)
         self.assertIsInstance(i.locals['a'], str)
@@ -85,23 +85,23 @@ def test_runsource_bytes(self):
 
     @unittest.skipUnless(py3, "Only a syntax error in Python 3")
     def test_runsource_bytes_over_128_syntax_error_py3(self):
-        i = interpreter.Interp(encoding='latin-1')
+        i = interpreter.Interp(encoding=b'latin-1')
         i.showsyntaxerror = mock.Mock(return_value=None)
 
         i.runsource("a = b'\xfe'", encode=True)
         i.showsyntaxerror.assert_called_with(mock.ANY)
 
     @unittest.skipIf(py3, "encode is Python 2 only")
     def test_runsource_bytes_over_128_syntax_error_py2(self):
-        i = interpreter.Interp(encoding='latin-1')
+        i = interpreter.Interp(encoding=b'latin-1')
 
-        i.runsource("a = b'\xfe'", encode=True)
+        i.runsource(b"a = b'\xfe'", encode=True)
         self.assertIsInstance(i.locals['a'], type(b''))
         self.assertEqual(i.locals['a'], b"\xfe")
 
     @unittest.skipIf(py3, "encode is Python 2 only")
     def test_runsource_unicode(self):
-        i = interpreter.Interp(encoding='latin-1')
+        i = interpreter.Interp(encoding=b'latin-1')
 
         i.runsource("a = u'\xfe'", encode=True)
         self.assertIsInstance(i.locals['a'], type(u''))