diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 5e2f35497cbe86..453c751a180251 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -398,9 +398,11 @@ following: .. index:: single: z; in string formatting -The ``'z'`` option coerces negative zero floating-point values to positive -zero after rounding to the format precision. This option is only valid for -floating-point presentation types. +For floating-point presentation types the ``'z'`` option coerces negative zero +floating-point values to positive zero after rounding to the format precision. +For integer types ``'b'``, ``'o'``, ``'x'`` and ``'X'`` it can be used to +interpret integer value as two's complement. This option is invalid for other +presentation types. .. versionchanged:: 3.11 Added the ``'z'`` option (see also :pep:`682`). @@ -437,8 +439,18 @@ displayed after the decimal point for presentation types ``'f'`` and ``'F'``, or before and after the decimal point for presentation types ``'g'`` or ``'G'``. For string presentation types the field indicates the maximum field size - in other words, how many characters will be -used from the field content. The *precision* is not allowed for integer -presentation types. +used from the field content. + +For integer presentation types (excluding ``'c'``), the precision gives the +minimal number of digits to appear, expanded with an appropriate number of +leading zeros. If ``'z'`` option specified for non-decimal presentation types +--- integer value interpreted as two's complement, the precision gives it's +minimum size ``precision*k`` in bits, where ``k=1,3,4`` for ``'b'``, ``'o'`` +and ``'x'``/``'X'`` types, respectively. A precision of ``0`` is treated as +equivalent to a precision of ``1`` here. + +.. versionchanged:: next + Precision specification allowed for integer presentation types. The *grouping* option after *width* and *precision* fields specifies a digit group separator for the integral and fractional parts diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index c7cc32e09490b2..10f2fb8c0c603e 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -620,9 +620,10 @@ def test_specifier_z_error(self): with self.assertRaisesRegex(ValueError, error_msg): f"{0:fz}" # wrong position - error_msg = re.escape("Negative zero coercion (z) not allowed") + error_msg = re.escape("'z' option not allowed with 'c', 'd' and 'n'") with self.assertRaisesRegex(ValueError, error_msg): f"{0:zd}" # can't apply to int presentation type + error_msg = re.escape("Negative zero coercion (z) not allowed") with self.assertRaisesRegex(ValueError, error_msg): f"{'x':zs}" # can't apply to string diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index f336d49fa4f008..9c700ded8b1598 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -675,6 +675,7 @@ def test__format__(self): self.assertEqual(format(123456789, 'd'), '123456789') self.assertEqual(format(123456789, ','), '123,456,789') self.assertEqual(format(123456789, '_'), '123_456_789') + self.assertEqual(format(3, '1.3'), '003') # sign and aligning are interdependent self.assertEqual(format(1, "-"), '1') @@ -706,6 +707,9 @@ def test__format__(self): self.assertRaises(ValueError, format, 1234567890, ',x') self.assertEqual(format(1234567890, '_x'), '4996_02d2') self.assertEqual(format(1234567890, '_X'), '4996_02D2') + self.assertEqual(format(8086, 'z#.8x'), '0x00001f96') + self.assertEqual(format(2048, 'z.3x'), '0800') + self.assertEqual(format(-2049, 'z.3x'), 'f7ff') # octal self.assertEqual(format(3, "o"), "3") @@ -720,6 +724,9 @@ def test__format__(self): self.assertEqual(format(-1234, "+o"), "-2322") self.assertRaises(ValueError, format, 1234567890, ',o') self.assertEqual(format(1234567890, '_o'), '111_4540_1322') + self.assertEqual(format(18, 'z#.3o'), '0o022') + self.assertEqual(format(256, 'z.3o'), '0400') + self.assertEqual(format(-257, 'z.3o'), '7377') # binary self.assertEqual(format(3, "b"), "11") @@ -734,9 +741,19 @@ def test__format__(self): self.assertEqual(format(-1234, "+b"), "-10011010010") self.assertRaises(ValueError, format, 1234567890, ',b') self.assertEqual(format(12345, '_b'), '11_0000_0011_1001') + self.assertEqual(format(-12, 'z.8b'), '11110100') + self.assertEqual(format(73, 'z.8b'), '01001001') + self.assertEqual(format(73, 'z#.8b'), '0b01001001') + self.assertEqual(format(300, 'z.8b'), '0100101100') + self.assertEqual(format(200, '.8b'), '11001000') + self.assertEqual(format(200, 'z.8b'), '011001000') + self.assertEqual(format(-200, 'z.8b'), '100111000') + self.assertEqual(format(128, 'z.8b'), '010000000') + self.assertEqual(format(-129, 'z.8b'), '101111111') + self.assertEqual(format(256, 'z.8b'), '0100000000') # make sure these are errors - self.assertRaises(ValueError, format, 3, "1.3") # precision disallowed + self.assertRaises(ValueError, format, 3, "1.3c") # precision disallowed with 'c', self.assertRaises(ValueError, format, 3, "_c") # underscore, self.assertRaises(ValueError, format, 3, ",c") # comma, and self.assertRaises(ValueError, format, 3, "+c") # sign not allowed diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index f9e12931a63e4f..acba388789f5ec 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -383,8 +383,6 @@ def test(i, format_spec, result): # make sure these are errors - # precision disallowed - self.assertRaises(ValueError, 3 .__format__, "1.3") # sign not allowed with 'c' self.assertRaises(ValueError, 3 .__format__, "+c") # format spec must be string diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-07-25-18.gh-issue-74756.7aCPNT.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-07-25-18.gh-issue-74756.7aCPNT.rst new file mode 100644 index 00000000000000..8312b438780a18 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-07-25-18.gh-issue-74756.7aCPNT.rst @@ -0,0 +1,3 @@ +Support precision field for integer formatting types. For binary, octal and +hexadecimal formatting types --- twos complements are used to represent +signed values. Patch by Sergey B Kirpichev. diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 30807f428c7d71..26a54c394cdbe2 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -358,7 +358,8 @@ parse_internal_render_format_spec(PyObject *obj, } } - if (format->type == 'n' + if ((format->type == 'n' || format->type == 'd' || format->type == 'b' + || format->type == 'o' || format->type == 'x' || format->type == 'X') && format->frac_thousands_separator != LT_NO_LOCALE) { invalid_thousands_separator_type(format->frac_thousands_separator, @@ -979,17 +980,20 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, from a hard-code pseudo-locale */ LocaleInfo locale = LocaleInfo_STATIC_INIT; - /* no precision allowed on integers */ - if (format->precision != -1) { + /* no precision allowed on 'c' integer representation type */ + if (format->precision != -1 && format->type == 'c') { PyErr_SetString(PyExc_ValueError, - "Precision not allowed in integer format specifier"); + "Precision not allowed with 'c' integer format specifier"); goto done; } + /* no negative zero coercion on integers */ - if (format->no_neg_0) { + if (format->no_neg_0 && format->type != 'b' && format->type != 'o' + && format->type != 'x' && format->type != 'X') + { PyErr_SetString(PyExc_ValueError, - "Negative zero coercion (z) not allowed in integer" - " format specifier"); + "'z' option not allowed with 'c', 'd' and 'n' " + "integer format specifier"); goto done; } @@ -1063,6 +1067,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, if (format->sign != '+' && format->sign != ' ' && format->width == -1 + && format->precision == -1 && format->type != 'X' && format->type != 'n' && !format->thousands_separators && PyLong_CheckExact(value)) @@ -1077,9 +1082,119 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, n_prefix = leading_chars_to_skip; /* Do the hard part, converting to a string in a given base */ - tmp = _PyLong_Format(value, base); - if (tmp == NULL) - goto done; + if (format->precision != -1) { + int64_t precision = Py_MAX(1, format->precision); + + /* Use two's complement for 'b', 'o' and 'x' formatting types */ + if (format->no_neg_0 && (format->type == 'b' || format->type == 'x' + || format->type == 'o' + || format->type == 'X')) + { + int dbits = 1; + + if (format->type == 'x' || format->type == 'X') { + dbits = 4; + } + else if (format->type == 'o') { + dbits = 3; + } + + int64_t nbits = _PyLong_NumBits(value); + + if (nbits > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "int too large to format"); + goto done; + } + + Py_ssize_t shift = Py_MAX(precision, + ((Py_ssize_t)nbits + dbits - 1)/dbits); + + shift *= dbits; + shift--; + + /* expected value in range(-2**n, 2**n), where n=shift + or n=shift+dbits */ + PyObject *mod = _PyLong_Lshift(PyLong_FromLong(1), shift); + int incr = 1; + + if (mod == NULL) { + goto done; + } + if (PyLong_IsNegative(value)) { + Py_SETREF(mod, PyNumber_Negative(mod)); + if (mod == NULL) { + goto done; + } + if (PyObject_RichCompareBool(value, mod, Py_LT)) { + incr += dbits; + } + Py_SETREF(mod, _PyLong_Lshift(mod, incr)); + tmp = PyNumber_Subtract(value, mod); + Py_DECREF(mod); + if (tmp == NULL) { + goto done; + } + Py_SETREF(tmp, _PyLong_Format(tmp, base)); + } + else { + if (PyObject_RichCompareBool(value, mod, Py_GE)) { + incr += dbits; + } + Py_DECREF(mod); + tmp = _PyLong_Format(value, base); + } + shift += incr; + precision = shift/dbits; + } + else { + tmp = _PyLong_Format(value, base); + } + if (tmp == NULL) { + goto done; + } + + /* Prepend enough leading zeros (after sign and prefix) */ + + int sign = PyUnicode_READ_CHAR(tmp, 0) == '-'; + Py_ssize_t tmp2_len = precision + leading_chars_to_skip + sign; + Py_ssize_t tmp_len = PyUnicode_GET_LENGTH(tmp); + Py_ssize_t gap = tmp2_len - tmp_len; + + if (gap > 0) { + PyObject *tmp2 = PyUnicode_New(tmp2_len, 127); + Py_ssize_t value_start = leading_chars_to_skip + sign; + + if (PyUnicode_CopyCharacters(tmp2, value_start + gap, + tmp, value_start, + precision) == -1) { + Py_DECREF(tmp2); + goto done; + } + if (PyUnicode_Fill(tmp2, value_start, gap, '0') == -1) { + Py_DECREF(tmp2); + goto done; + } + if (sign && PyUnicode_WriteChar(tmp2, 0, '-') == -1) { + Py_DECREF(tmp2); + goto done; + } + if (leading_chars_to_skip + && PyUnicode_CopyCharacters(tmp2, sign, tmp, sign, + leading_chars_to_skip) == -1) + { + Py_DECREF(tmp2); + goto done; + } + Py_SETREF(tmp, tmp2); + } + } + else { + tmp = _PyLong_Format(value, base); + if (tmp == NULL) { + goto done; + } + } inumeric_chars = 0; n_digits = PyUnicode_GET_LENGTH(tmp);