[3.13] gh-116622: Android logging fixes (GH-122698) (#122719)

miss-islington · mhsmith · web-flow · commit 0dd89a7f4054 · 2024-08-16T16:07:42.000+08:00
gh-116622: Android logging fixes (GH-122698) Modifies the handling of stdout/stderr redirection on Android to accomodate the rate and buffer size limits imposed by Android's logging infrastructure. (cherry picked from commit b0c48b8) Co-authored-by: Malcolm Smith <smith@chaquo.com>
diff --git a/Lib/_android_support.py b/Lib/_android_support.py
@@ -1,19 +1,20 @@
 import io
 import sys
-
+from threading import RLock
+from time import sleep, time
 
 # The maximum length of a log message in bytes, including the level marker and
-# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD in
-# platform/system/logging/liblog/include/log/log.h. As of API level 30, messages
-# longer than this will be be truncated by logcat. This limit has already been
-# reduced at least once in the history of Android (from 4076 to 4068 between API
-# level 23 and 26), so leave some headroom.
+# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at
+# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71.
+# Messages longer than this will be be truncated by logcat. This limit has already
+# been reduced at least once in the history of Android (from 4076 to 4068 between
+# API level 23 and 26), so leave some headroom.
 MAX_BYTES_PER_WRITE = 4000
 
 # UTF-8 uses a maximum of 4 bytes per character, so limiting text writes to this
-# size ensures that TextIOWrapper can always avoid exceeding MAX_BYTES_PER_WRITE.
+# size ensures that we can always avoid exceeding MAX_BYTES_PER_WRITE.
 # However, if the actual number of bytes per character is smaller than that,
-# then TextIOWrapper may still join multiple consecutive text writes into binary
+# then we may still join multiple consecutive text writes into binary
 # writes containing a larger number of characters.
 MAX_CHARS_PER_WRITE = MAX_BYTES_PER_WRITE // 4
 
@@ -26,18 +27,22 @@ def init_streams(android_log_write, stdout_prio, stderr_prio):
     if sys.executable:
         return  # Not embedded in an app.
 
+    global logcat
+    logcat = Logcat(android_log_write)
+
     sys.stdout = TextLogStream(
-        android_log_write, stdout_prio, "python.stdout", errors=sys.stdout.errors)
+        stdout_prio, "python.stdout", errors=sys.stdout.errors)
     sys.stderr = TextLogStream(
-        android_log_write, stderr_prio, "python.stderr", errors=sys.stderr.errors)
+        stderr_prio, "python.stderr", errors=sys.stderr.errors)
 
 
 class TextLogStream(io.TextIOWrapper):
-    def __init__(self, android_log_write, prio, tag, **kwargs):
+    def __init__(self, prio, tag, **kwargs):
         kwargs.setdefault("encoding", "UTF-8")
-        kwargs.setdefault("line_buffering", True)
-        super().__init__(BinaryLogStream(android_log_write, prio, tag), **kwargs)
-        self._CHUNK_SIZE = MAX_BYTES_PER_WRITE
+        super().__init__(BinaryLogStream(prio, tag), **kwargs)
+        self._lock = RLock()
+        self._pending_bytes = []
+        self._pending_bytes_count = 0
 
     def __repr__(self):
         return f"<TextLogStream {self.buffer.tag!r}>"
@@ -52,19 +57,48 @@ def write(self, s):
         s = str.__str__(s)
 
         # We want to emit one log message per line wherever possible, so split
-        # the string before sending it to the superclass. Note that
-        # "".splitlines() == [], so nothing will be logged for an empty string.
-        for line in s.splitlines(keepends=True):
-            while line:
-                super().write(line[:MAX_CHARS_PER_WRITE])
-                line = line[MAX_CHARS_PER_WRITE:]
+        # the string into lines first. Note that "".splitlines() == [], so
+        # nothing will be logged for an empty string.
+        with self._lock:
+            for line in s.splitlines(keepends=True):
+                while line:
+                    chunk = line[:MAX_CHARS_PER_WRITE]
+                    line = line[MAX_CHARS_PER_WRITE:]
+                    self._write_chunk(chunk)
 
         return len(s)
 
+    # The size and behavior of TextIOWrapper's buffer is not part of its public
+    # API, so we handle buffering ourselves to avoid truncation.
+    def _write_chunk(self, s):
+        b = s.encode(self.encoding, self.errors)
+        if self._pending_bytes_count + len(b) > MAX_BYTES_PER_WRITE:
+            self.flush()
+
+        self._pending_bytes.append(b)
+        self._pending_bytes_count += len(b)
+        if (
+            self.write_through
+            or b.endswith(b"\n")
+            or self._pending_bytes_count > MAX_BYTES_PER_WRITE
+        ):
+            self.flush()
+
+    def flush(self):
+        with self._lock:
+            self.buffer.write(b"".join(self._pending_bytes))
+            self._pending_bytes.clear()
+            self._pending_bytes_count = 0
+
+    # Since this is a line-based logging system, line buffering cannot be turned
+    # off, i.e. a newline always causes a flush.
+    @property
+    def line_buffering(self):
+        return True
+
 
 class BinaryLogStream(io.RawIOBase):
-    def __init__(self, android_log_write, prio, tag):
-        self.android_log_write = android_log_write
+    def __init__(self, prio, tag):
         self.prio = prio
         self.tag = tag
 
@@ -85,10 +119,48 @@ def write(self, b):
 
         # Writing an empty string to the stream should have no effect.
         if b:
-            # Encode null bytes using "modified UTF-8" to avoid truncating the
-            # message. This should not affect the return value, as the caller
-            # may be expecting it to match the length of the input.
-            self.android_log_write(self.prio, self.tag,
-                                   b.replace(b"\x00", b"\xc0\x80"))
-
+            logcat.write(self.prio, self.tag, b)
         return len(b)
+
+
+# When a large volume of data is written to logcat at once, e.g. when a test
+# module fails in --verbose3 mode, there's a risk of overflowing logcat's own
+# buffer and losing messages. We avoid this by imposing a rate limit using the
+# token bucket algorithm, based on a conservative estimate of how fast `adb
+# logcat` can consume data.
+MAX_BYTES_PER_SECOND = 1024 * 1024
+
+# The logcat buffer size of a device can be determined by running `logcat -g`.
+# We set the token bucket size to half of the buffer size of our current minimum
+# API level, because other things on the system will be producing messages as
+# well.
+BUCKET_SIZE = 128 * 1024
+
+# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39
+PER_MESSAGE_OVERHEAD = 28
+
+
+class Logcat:
+    def __init__(self, android_log_write):
+        self.android_log_write = android_log_write
+        self._lock = RLock()
+        self._bucket_level = 0
+        self._prev_write_time = time()
+
+    def write(self, prio, tag, message):
+        # Encode null bytes using "modified UTF-8" to avoid them truncating the
+        # message.
+        message = message.replace(b"\x00", b"\xc0\x80")
+
+        with self._lock:
+            now = time()
+            self._bucket_level += (
+                (now - self._prev_write_time) * MAX_BYTES_PER_SECOND)
+            self._bucket_level = min(self._bucket_level, BUCKET_SIZE)
+            self._prev_write_time = now
+
+            self._bucket_level -= PER_MESSAGE_OVERHEAD + len(tag) + len(message)
+            if self._bucket_level < 0:
+                sleep(-self._bucket_level / MAX_BYTES_PER_SECOND)
+
+            self.android_log_write(prio, tag, message)
diff --git a/Lib/test/test_android.py b/Lib/test/test_android.py
@@ -1,14 +1,17 @@
+import io
 import platform
 import queue
 import re
 import subprocess
 import sys
 import unittest
+from _android_support import TextLogStream
 from array import array
-from contextlib import contextmanager
+from contextlib import ExitStack, contextmanager
 from threading import Thread
 from test.support import LOOPBACK_TIMEOUT
-from time import time
+from time import sleep, time
+from unittest.mock import patch
 
 
 if sys.platform != "android":
@@ -81,18 +84,39 @@ def unbuffered(self, stream):
         finally:
             stream.reconfigure(write_through=False)
 
+    # In --verbose3 mode, sys.stdout and sys.stderr are captured, so we can't
+    # test them directly. Detect this mode and use some temporary streams with
+    # the same properties.
+    def stream_context(self, stream_name, level):
+        # https://developer.android.com/ndk/reference/group/logging
+        prio = {"I": 4, "W": 5}[level]
+
+        stack = ExitStack()
+        stack.enter_context(self.subTest(stream_name))
+        stream = getattr(sys, stream_name)
+        if isinstance(stream, io.StringIO):
+            stack.enter_context(
+                patch(
+                    f"sys.{stream_name}",
+                    TextLogStream(
+                        prio, f"python.{stream_name}", errors="backslashreplace"
+                    ),
+                )
+            )
+        return stack
+
     def test_str(self):
         for stream_name, level in [("stdout", "I"), ("stderr", "W")]:
-            with self.subTest(stream=stream_name):
+            with self.stream_context(stream_name, level):
                 stream = getattr(sys, stream_name)
                 tag = f"python.{stream_name}"
                 self.assertEqual(f"<TextLogStream '{tag}'>", repr(stream))
 
-                self.assertTrue(stream.writable())
-                self.assertFalse(stream.readable())
+                self.assertIs(stream.writable(), True)
+                self.assertIs(stream.readable(), False)
                 self.assertEqual("UTF-8", stream.encoding)
-                self.assertTrue(stream.line_buffering)
-                self.assertFalse(stream.write_through)
+                self.assertIs(stream.line_buffering, True)
+                self.assertIs(stream.write_through, False)
 
                 # stderr is backslashreplace by default; stdout is configured
                 # that way by libregrtest.main.
@@ -147,6 +171,13 @@ def write(s, lines=None, *, write_len=None):
                 write("f\n\ng", ["exxf", ""])
                 write("\n", ["g"])
 
+                # Since this is a line-based logging system, line buffering
+                # cannot be turned off, i.e. a newline always causes a flush.
+                stream.reconfigure(line_buffering=False)
+                self.assertIs(stream.line_buffering, True)
+
+                # However, buffering can be turned off completely if you want a
+                # flush after every write.
                 with self.unbuffered(stream):
                     write("\nx", ["", "x"])
                     write("\na\n", ["", "a"])
@@ -209,30 +240,30 @@ def __str__(self):
                 # (MAX_BYTES_PER_WRITE).
                 #
                 # ASCII (1 byte per character)
-                write(("foobar" * 700) + "\n",
-                      [("foobar" * 666) + "foob",  # 4000 bytes
-                       "ar" + ("foobar" * 33)])  # 200 bytes
+                write(("foobar" * 700) + "\n",  # 4200 bytes in
+                      [("foobar" * 666) + "foob",  # 4000 bytes out
+                       "ar" + ("foobar" * 33)])  # 200 bytes out
 
                 # "Full-width" digits 0-9 (3 bytes per character)
                 s = "\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19"
-                write((s * 150) + "\n",
-                      [s * 100,  # 3000 bytes
-                       s * 50])  # 1500 bytes
+                write((s * 150) + "\n",  # 4500 bytes in
+                      [s * 100,  # 3000 bytes out
+                       s * 50])  # 1500 bytes out
 
                 s = "0123456789"
-                write(s * 200, [])
-                write(s * 150, [])
-                write(s * 51, [s * 350])  # 3500 bytes
-                write("\n", [s * 51])  # 510 bytes
+                write(s * 200, [])  # 2000 bytes in
+                write(s * 150, [])  # 1500 bytes in
+                write(s * 51, [s * 350])  # 510 bytes in, 3500 bytes out
+                write("\n", [s * 51])  # 0 bytes in, 510 bytes out
 
     def test_bytes(self):
         for stream_name, level in [("stdout", "I"), ("stderr", "W")]:
-            with self.subTest(stream=stream_name):
+            with self.stream_context(stream_name, level):
                 stream = getattr(sys, stream_name).buffer
                 tag = f"python.{stream_name}"
                 self.assertEqual(f"<BinaryLogStream '{tag}'>", repr(stream))
-                self.assertTrue(stream.writable())
-                self.assertFalse(stream.readable())
+                self.assertIs(stream.writable(), True)
+                self.assertIs(stream.readable(), False)
 
                 def write(b, lines=None, *, write_len=None):
                     if write_len is None:
@@ -330,3 +361,64 @@ def write(b, lines=None, *, write_len=None):
                             fr"{type(obj).__name__}"
                         ):
                             stream.write(obj)
+
+    def test_rate_limit(self):
+        # https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39
+        PER_MESSAGE_OVERHEAD = 28
+
+        # https://developer.android.com/ndk/reference/group/logging
+        ANDROID_LOG_DEBUG = 3
+
+        # To avoid flooding the test script output, use a different tag rather
+        # than stdout or stderr.
+        tag = "python.rate_limit"
+        stream = TextLogStream(ANDROID_LOG_DEBUG, tag)
+
+        # Make a test message which consumes 1 KB of the logcat buffer.
+        message = "Line {:03d} "
+        message += "." * (
+            1024 - PER_MESSAGE_OVERHEAD - len(tag) - len(message.format(0))
+        ) + "\n"
+
+        # See _android_support.py. The default values of these parameters work
+        # well across a wide range of devices, but we'll use smaller values to
+        # ensure a quick and reliable test that doesn't flood the log too much.
+        MAX_KB_PER_SECOND = 100
+        BUCKET_KB = 10
+        with (
+            patch("_android_support.MAX_BYTES_PER_SECOND", MAX_KB_PER_SECOND * 1024),
+            patch("_android_support.BUCKET_SIZE", BUCKET_KB * 1024),
+        ):
+            # Make sure the token bucket is full.
+            sleep(BUCKET_KB / MAX_KB_PER_SECOND)
+            line_num = 0
+
+            # Write BUCKET_KB messages, and return the rate at which they were
+            # accepted in KB per second.
+            def write_bucketful():
+                nonlocal line_num
+                start = time()
+                max_line_num = line_num + BUCKET_KB
+                while line_num < max_line_num:
+                    stream.write(message.format(line_num))
+                    line_num += 1
+                return BUCKET_KB / (time() - start)
+
+            # The first bucketful should be written with minimal delay. The
+            # factor of 2 here is not arbitrary: it verifies that the system can
+            # write fast enough to empty the bucket within two bucketfuls, which
+            # the next part of the test depends on.
+            self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2)
+
+            # Write another bucketful to empty the token bucket completely.
+            write_bucketful()
+
+            # The next bucketful should be written at the rate limit.
+            self.assertAlmostEqual(
+                write_bucketful(), MAX_KB_PER_SECOND,
+                delta=MAX_KB_PER_SECOND * 0.1
+            )
+
+            # Once the token bucket refills, we should go back to full speed.
+            sleep(BUCKET_KB / MAX_KB_PER_SECOND)
+            self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst b/Misc/NEWS.d/next/Core and Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst
@@ -0,0 +1 @@
+Fix Android stdout and stderr messages being truncated or lost.

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Fix Android stdout and stderr messages being truncated or lost.`