add auto default to runsource

thomasballinger · thomasballinger · commit a2a934cc2b4f · 2016-08-04T08:58:59.000-04:00
diff --git a/bpython/repl.py b/bpython/repl.py
@@ -118,23 +118,41 @@ def runsource(self, source, filename=None, symbol='single',
 
         In Python 3, source must be a unicode string
         In Python 2, source may be latin-1 bytestring or unicode string,
-        following the interface of code.InteractiveInterpreter"""
-        if encode == 'auto':
-            encode = filename is None
-        if encode and not py3:
-            if isinstance(source, str):
-                # encoding only makes sense for bytestrings
-                assert isinstance(source, str)
-                source = b'# coding: %s\n\n%s' % (self.encoding, source)
-            else:
-                # 2 blank lines still need to be added because this
-                # interpreter always adds 2 lines to stack trace line
+        following the interface of code.InteractiveInterpreter.
+
+        Because adding an encoding comment to a unicode string in Python 2
+        would cause a syntax error to be thrown which would reference code
+        the user did not write, setting encoding to True when source is a
+        unicode string in Python 2 will throw a ValueError."""
+        # str means bytestring in Py2
+        if encode and not py3 and isinstance(source, unicode):
+            if encode != 'auto':
+                raise ValueError("can't add encoding line to unicode input")
+            encode = False
+        if encode and filename is not None:
+            # files have encoding comments or implicit encoding of ASCII
+            if encode != 'auto':
+                raise ValueError("shouldn't add encoding line to file contents")
+            encode = False
+
+        if encode and not py3 and isinstance(source, str):
+            # encoding makes sense for bytestrings, so long as there
+            # isn't already an encoding comment
+            comment = inspection.get_encoding_comment(source)
+            if comment:
+                # keep the existing encoding comment, but add two lines
+                # because this interp always adds 2 to stack trace line
                 # numbers in Python 2
-                comment = inspection.get_encoding_comment(source)
-                if comment:
-                    source = source.replace(comment, u'%s\n\n' % comment, 1)
-                else:
-                    source = u'\n\n' + source
+                source = source.replace(comment, b'%s\n\n' % comment, 1)
+            else:
+                source = b'# coding: %s\n\n%s' % (self.encoding, source)
+        elif not py3 and filename is None:
+            # 2 blank lines still need to be added
+            # because this interpreter always adds 2 to stack trace line
+            # numbers in Python 2 when the filename is "<input>"
+            newlines = u'\n\n' if isinstance(source, unicode) else b'\n\n'
+            source = newlines + source
+            # we know we're in Python 2 here, so ok to reference unicode
         if filename is None:
             filename = filename_for_console_input(source)
         with self.timer:
diff --git a/bpython/test/test_interpreter.py b/bpython/test/test_interpreter.py
@@ -3,6 +3,8 @@
 from __future__ import unicode_literals
 
 import sys
+import re
+from textwrap import dedent
 
 from curtsies.fmtfuncs import bold, green, magenta, cyan, red, plain
 
@@ -13,15 +15,32 @@
 pypy = 'PyPy' in sys.version
 
 
+def remove_ansi(s):
+    return re.sub(r'(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]'.encode('ascii'), b'', s)
+
+
 class TestInterpreter(unittest.TestCase):
-    def test_syntaxerror(self):
+    def interp_errlog(self):
         i = interpreter.Interp()
         a = []
+        i.write = a.append
+        return i, a
+
+    def err_lineno(self, a):
+        strings = [x.__unicode__() for x in a]
+        print('looking for lineno')
+        for line in reversed(strings):
+            clean_line = remove_ansi(line)
+            print(clean_line)
+            m = re.search(r'line (\d+)[,]', clean_line)
+            if m:
+                print('found!', m.group(1))
+                return int(m.group(1))
+        return None
 
-        def append_to_a(message):
-            a.append(message)
+    def test_syntaxerror(self):
+        i, a = self.interp_errlog()
 
-        i.write = append_to_a
         i.runsource('1.1.1.1')
 
         if pypy:
@@ -41,13 +60,7 @@ def append_to_a(message):
         self.assertEquals(plain('').join(a), expected)
 
     def test_traceback(self):
-        i = interpreter.Interp()
-        a = []
-
-        def append_to_a(message):
-            a.append(message)
-
-        i.write = append_to_a
+        i, a = self.interp_errlog()
 
         def f():
             return 1 / 0
@@ -88,22 +101,22 @@ def test_runsource_bytes_over_128_syntax_error_py3(self):
         i = interpreter.Interp(encoding=b'latin-1')
         i.showsyntaxerror = mock.Mock(return_value=None)
 
-        i.runsource("a = b'\xfe'", encode=True)
+        i.runsource("a = b'\xfe'")
         i.showsyntaxerror.assert_called_with(mock.ANY)
 
     @unittest.skipIf(py3, "encode is Python 2 only")
     def test_runsource_bytes_over_128_syntax_error_py2(self):
         i = interpreter.Interp(encoding=b'latin-1')
 
-        i.runsource(b"a = b'\xfe'", encode=True)
+        i.runsource(b"a = b'\xfe'")
         self.assertIsInstance(i.locals['a'], type(b''))
         self.assertEqual(i.locals['a'], b"\xfe")
 
     @unittest.skipIf(py3, "encode is Python 2 only")
     def test_runsource_unicode(self):
         i = interpreter.Interp(encoding=b'latin-1')
 
-        i.runsource("a = u'\xfe'", encode=True)
+        i.runsource("a = u'\xfe'")
         self.assertIsInstance(i.locals['a'], type(u''))
         self.assertEqual(i.locals['a'], u"\xfe")
 
@@ -114,3 +127,119 @@ def test_getsource_works_on_interactively_defined_functions(self):
         import inspect
         inspected_source = inspect.getsource(i.locals['foo'])
         self.assertEquals(inspected_source, source)
+
+    @unittest.skipIf(py3, "encode only does anything in Python 2")
+    def test_runsource_unicode_autoencode_and_noencode(self):
+        """error line numbers should be fixed"""
+
+        # Since correct behavior for unicode is the same
+        # for auto and False, run the same tests
+        for encode in ['auto', False]:
+            i, a = self.interp_errlog()
+            i.runsource(u'[1 + 1,\nabc]', encode=encode)
+            self.assertEqual(self.err_lineno(a), 2)
+
+            i, a = self.interp_errlog()
+            i.runsource(u'[1 + 1,\nabc]', encode=encode)
+            self.assertEqual(self.err_lineno(a), 2)
+
+            i, a = self.interp_errlog()
+            i.runsource(u'#encoding: utf-8\nabc', encode=encode)
+            self.assertEqual(self.err_lineno(a), 2)
+
+            i, a = self.interp_errlog()
+            i.runsource(u'#encoding: utf-8\nabc',
+                        filename='x.py', encode=encode)
+            self.assertIn('SyntaxError: encoding',
+                          ''.join(''.join(remove_ansi(x.__unicode__()) for x in a)))
+
+    @unittest.skipIf(py3, "encode only does anything in Python 2")
+    def test_runsource_unicode_encode(self):
+        i, _ = self.interp_errlog()
+        with self.assertRaises(ValueError):
+            i.runsource(u'1 + 1', encode=True)
+
+        i, _ = self.interp_errlog()
+        with self.assertRaises(ValueError):
+            i.runsource(u'1 + 1', filename='x.py', encode=True)
+
+    @unittest.skipIf(py3, "encode only does anything in Python 2")
+    def test_runsource_bytestring_noencode(self):
+        i, a = self.interp_errlog()
+        i.runsource(b'[1 + 1,\nabc]', encode=False)
+        self.assertEqual(self.err_lineno(a), 2)
+
+        i, a = self.interp_errlog()
+        i.runsource(b'[1 + 1,\nabc]', filename='x.py', encode=False)
+        self.assertEqual(self.err_lineno(a), 2)
+
+        i, a = self.interp_errlog()
+        i.runsource(dedent(b'''\
+                    #encoding: utf-8
+
+                    ["%s",
+                    abc]''' % (u'åß∂ƒ'.encode('utf8'),)), encode=False)
+        self.assertEqual(self.err_lineno(a), 4)
+
+        i, a = self.interp_errlog()
+        i.runsource(dedent(b'''\
+                    #encoding: utf-8
+
+                    ["%s",
+                    abc]''' % (u'åß∂ƒ'.encode('utf8'),)),
+                    filename='x.py', encode=False)
+        self.assertEqual(self.err_lineno(a), 4)
+
+    @unittest.skipIf(py3, "encode only does anything in Python 2")
+    def test_runsource_bytestring_encode(self):
+        i, a = self.interp_errlog()
+        i.runsource(b'[1 + 1,\nabc]', encode=True)
+        self.assertEqual(self.err_lineno(a), 2)
+
+        i, a = self.interp_errlog()
+        with self.assertRaises(ValueError):
+            i.runsource(b'[1 + 1,\nabc]', filename='x.py', encode=True)
+
+        i, a = self.interp_errlog()
+        i.runsource(dedent(b'''\
+                    #encoding: utf-8
+
+                    [u"%s",
+                    abc]''' % (u'åß∂ƒ'.encode('utf8'),)), encode=True)
+        self.assertEqual(self.err_lineno(a), 4)
+
+        i, a = self.interp_errlog()
+        with self.assertRaises(ValueError):
+            i.runsource(dedent(b'''\
+                        #encoding: utf-8
+
+                        [u"%s",
+                        abc]''' % (u'åß∂ƒ'.encode('utf8'),)),
+                        filename='x.py',
+                        encode=True)
+
+    @unittest.skipIf(py3, "encode only does anything in Python 2")
+    def test_runsource_bytestring_autoencode(self):
+        i, a = self.interp_errlog()
+        i.runsource(b'[1 + 1,\n abc]')
+        self.assertEqual(self.err_lineno(a), 2)
+
+        i, a = self.interp_errlog()
+        i.runsource(b'[1 + 1,\nabc]', filename='x.py')
+        self.assertEqual(self.err_lineno(a), 2)
+
+        i, a = self.interp_errlog()
+        i.runsource(dedent(b'''\
+                    #encoding: utf-8
+
+                    [u"%s",
+                    abc]''' % (u'åß∂ƒ'.encode('utf8'),)))
+        self.assertEqual(self.err_lineno(a), 4)
+
+        i, a = self.interp_errlog()
+        i.runsource(dedent(b'''\
+                    #encoding: utf-8
+
+                    [u"%s",
+                    abc]''' % (u'åß∂ƒ'.encode('utf8'),)))
+        self.assertEqual(self.err_lineno(a), 4)