From 805b2fdca1ff0f3ea9c5986b67e0e2492cdfd5de Mon Sep 17 00:00:00 2001 From: Corvin Date: Wed, 30 Aug 2023 05:06:21 -0400 Subject: [PATCH] [3.12] gh-108590: Fix sqlite3.iterdump for invalid Unicode in TEXT columns (GH-108657) Co-authored-by: Erlend E. Aasland . (cherry picked from commit 400a1cebc743515e40157ed7af86e48d654290ce) Co-authored-by: Corvin --- Lib/sqlite3/dump.py | 27 +++++++++++++++++-- Lib/test/test_sqlite3/test_dump.py | 15 +++++++++++ ...-08-29-22-53-48.gh-issue-108590.6k0pOl.rst | 1 + 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst diff --git a/Lib/sqlite3/dump.py b/Lib/sqlite3/dump.py index 1cf8759f8970f1..cd5fd79258555f 100644 --- a/Lib/sqlite3/dump.py +++ b/Lib/sqlite3/dump.py @@ -7,6 +7,28 @@ # future enhancements, you should normally quote any identifier that # is an English language word, even if you do not have to." + +from contextlib import contextmanager + + +def _force_decode(bs, *args, **kwargs): + # gh-108590: Don't fail if the database contains invalid Unicode data. + try: + return bs.decode(*args, **kwargs) + except UnicodeDecodeError: + return "".join([chr(c) for c in bs]) + + +@contextmanager +def _text_factory(con, factory): + saved_factory = con.text_factory + con.text_factory = factory + try: + yield + finally: + con.text_factory = saved_factory + + def _iterdump(connection): """ Returns an iterator to the dump of the database in an SQL text format. @@ -63,8 +85,9 @@ def _iterdump(connection): table_name_ident, ",".join("""'||quote("{0}")||'""".format(col.replace('"', '""')) for col in column_names)) query_res = cu.execute(q) - for row in query_res: - yield("{0};".format(row[0])) + with _text_factory(connection, bytes): + for row in query_res: + yield("{0};".format(_force_decode(row[0]))) # Now when the type is 'index', 'trigger', or 'view' q = """ diff --git a/Lib/test/test_sqlite3/test_dump.py b/Lib/test/test_sqlite3/test_dump.py index c3ed3aefef0445..3f6de925368e9e 100644 --- a/Lib/test/test_sqlite3/test_dump.py +++ b/Lib/test/test_sqlite3/test_dump.py @@ -137,6 +137,21 @@ def test_dump_virtual_tables(self): actual = list(self.cx.iterdump()) self.assertEqual(expected, actual) + def test_dump_unicode_invalid(self): + # gh-108590 + expected = [ + "BEGIN TRANSACTION;", + "CREATE TABLE foo (data TEXT);", + "INSERT INTO \"foo\" VALUES('a\x9f');", + "COMMIT;", + ] + self.cu.executescript(""" + CREATE TABLE foo (data TEXT); + INSERT INTO foo VALUES (CAST(X'619f' AS TEXT)); + """) + actual = list(self.cx.iterdump()) + self.assertEqual(expected, actual) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst new file mode 100644 index 00000000000000..50b41f2a94d9be --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst @@ -0,0 +1 @@ +Fixed an issue where :meth:`sqlite3.Connection.iterdump` would fail and leave an incomplete SQL dump if a table includes invalid Unicode sequences. Patch by Corvin McPherson