Skip to content

gh-74756: support precision field for integer formatting types #131926

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions Doc/library/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,11 @@ following:

.. index:: single: z; in string formatting

The ``'z'`` option coerces negative zero floating-point values to positive
zero after rounding to the format precision. This option is only valid for
floating-point presentation types.
For floating-point presentation types the ``'z'`` option coerces negative zero
floating-point values to positive zero after rounding to the format precision.
For integer types ``'b'``, ``'o'``, ``'x'`` and ``'X'`` it can be used to
interpret integer value as two's complement. This option is invalid for other
presentation types.

.. versionchanged:: 3.11
Added the ``'z'`` option (see also :pep:`682`).
Expand Down Expand Up @@ -437,8 +439,18 @@ displayed after the decimal point for presentation types
``'f'`` and ``'F'``, or before and after the decimal point for presentation
types ``'g'`` or ``'G'``. For string presentation types the field
indicates the maximum field size - in other words, how many characters will be
used from the field content. The *precision* is not allowed for integer
presentation types.
used from the field content.

For integer presentation types (excluding ``'c'``), the precision gives the
minimal number of digits to appear, expanded with an appropriate number of
leading zeros. If ``'z'`` option specified for non-decimal presentation types
--- integer value interpreted as two's complement, the precision gives it's
minimum size ``precision*k`` in bits, where ``k=1,3,4`` for ``'b'``, ``'o'``
and ``'x'``/``'X'`` types, respectively. A precision of ``0`` is treated as
equivalent to a precision of ``1`` here.

.. versionchanged:: next
Precision specification allowed for integer presentation types.

The *grouping* option after *width* and *precision* fields specifies
a digit group separator for the integral and fractional parts
Expand Down
3 changes: 2 additions & 1 deletion Lib/test/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,9 +620,10 @@ def test_specifier_z_error(self):
with self.assertRaisesRegex(ValueError, error_msg):
f"{0:fz}" # wrong position

error_msg = re.escape("Negative zero coercion (z) not allowed")
error_msg = re.escape("'z' option not allowed with 'c', 'd' and 'n'")
with self.assertRaisesRegex(ValueError, error_msg):
f"{0:zd}" # can't apply to int presentation type
error_msg = re.escape("Negative zero coercion (z) not allowed")
with self.assertRaisesRegex(ValueError, error_msg):
f"{'x':zs}" # can't apply to string

Expand Down
19 changes: 18 additions & 1 deletion Lib/test/test_long.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,7 @@ def test__format__(self):
self.assertEqual(format(123456789, 'd'), '123456789')
self.assertEqual(format(123456789, ','), '123,456,789')
self.assertEqual(format(123456789, '_'), '123_456_789')
self.assertEqual(format(3, '1.3'), '003')

# sign and aligning are interdependent
self.assertEqual(format(1, "-"), '1')
Expand Down Expand Up @@ -706,6 +707,9 @@ def test__format__(self):
self.assertRaises(ValueError, format, 1234567890, ',x')
self.assertEqual(format(1234567890, '_x'), '4996_02d2')
self.assertEqual(format(1234567890, '_X'), '4996_02D2')
self.assertEqual(format(8086, 'z#.8x'), '0x00001f96')
self.assertEqual(format(2048, 'z.3x'), '0800')
self.assertEqual(format(-2049, 'z.3x'), 'f7ff')

# octal
self.assertEqual(format(3, "o"), "3")
Expand All @@ -720,6 +724,9 @@ def test__format__(self):
self.assertEqual(format(-1234, "+o"), "-2322")
self.assertRaises(ValueError, format, 1234567890, ',o')
self.assertEqual(format(1234567890, '_o'), '111_4540_1322')
self.assertEqual(format(18, 'z#.3o'), '0o022')
self.assertEqual(format(256, 'z.3o'), '0400')
self.assertEqual(format(-257, 'z.3o'), '7377')

# binary
self.assertEqual(format(3, "b"), "11")
Expand All @@ -734,9 +741,19 @@ def test__format__(self):
self.assertEqual(format(-1234, "+b"), "-10011010010")
self.assertRaises(ValueError, format, 1234567890, ',b')
self.assertEqual(format(12345, '_b'), '11_0000_0011_1001')
self.assertEqual(format(-12, 'z.8b'), '11110100')
self.assertEqual(format(73, 'z.8b'), '01001001')
self.assertEqual(format(73, 'z#.8b'), '0b01001001')
self.assertEqual(format(300, 'z.8b'), '0100101100')
self.assertEqual(format(200, '.8b'), '11001000')
self.assertEqual(format(200, 'z.8b'), '011001000')
self.assertEqual(format(-200, 'z.8b'), '100111000')
self.assertEqual(format(128, 'z.8b'), '010000000')
self.assertEqual(format(-129, 'z.8b'), '101111111')
self.assertEqual(format(256, 'z.8b'), '0100000000')

# make sure these are errors
self.assertRaises(ValueError, format, 3, "1.3") # precision disallowed
self.assertRaises(ValueError, format, 3, "1.3c") # precision disallowed with 'c',
self.assertRaises(ValueError, format, 3, "_c") # underscore,
self.assertRaises(ValueError, format, 3, ",c") # comma, and
self.assertRaises(ValueError, format, 3, "+c") # sign not allowed
Expand Down
2 changes: 0 additions & 2 deletions Lib/test/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,6 @@ def test(i, format_spec, result):

# make sure these are errors

# precision disallowed
self.assertRaises(ValueError, 3 .__format__, "1.3")
# sign not allowed with 'c'
self.assertRaises(ValueError, 3 .__format__, "+c")
# format spec must be string
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Support precision field for integer formatting types. For binary, octal and
hexadecimal formatting types --- twos complements are used to represent
signed values. Patch by Sergey B Kirpichev.
135 changes: 125 additions & 10 deletions Python/formatter_unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,8 @@
}
}

if (format->type == 'n'
if ((format->type == 'n' || format->type == 'd' || format->type == 'b'
|| format->type == 'o' || format->type == 'x' || format->type == 'X')
&& format->frac_thousands_separator != LT_NO_LOCALE)
{
invalid_thousands_separator_type(format->frac_thousands_separator,
Expand Down Expand Up @@ -979,17 +980,20 @@
from a hard-code pseudo-locale */
LocaleInfo locale = LocaleInfo_STATIC_INIT;

/* no precision allowed on integers */
if (format->precision != -1) {
/* no precision allowed on 'c' integer representation type */
if (format->precision != -1 && format->type == 'c') {
PyErr_SetString(PyExc_ValueError,
"Precision not allowed in integer format specifier");
"Precision not allowed with 'c' integer format specifier");
goto done;
}

/* no negative zero coercion on integers */
if (format->no_neg_0) {
if (format->no_neg_0 && format->type != 'b' && format->type != 'o'
&& format->type != 'x' && format->type != 'X')
{
PyErr_SetString(PyExc_ValueError,
"Negative zero coercion (z) not allowed in integer"
" format specifier");
"'z' option not allowed with 'c', 'd' and 'n' "
"integer format specifier");
goto done;
}

Expand Down Expand Up @@ -1063,6 +1067,7 @@

if (format->sign != '+' && format->sign != ' '
&& format->width == -1
&& format->precision == -1
&& format->type != 'X' && format->type != 'n'
&& !format->thousands_separators
&& PyLong_CheckExact(value))
Expand All @@ -1077,9 +1082,119 @@
n_prefix = leading_chars_to_skip;

/* Do the hard part, converting to a string in a given base */
tmp = _PyLong_Format(value, base);
if (tmp == NULL)
goto done;
if (format->precision != -1) {
int64_t precision = Py_MAX(1, format->precision);

/* Use two's complement for 'b', 'o' and 'x' formatting types */
if (format->no_neg_0 && (format->type == 'b' || format->type == 'x'
|| format->type == 'o'
|| format->type == 'X'))
{
int dbits = 1;

if (format->type == 'x' || format->type == 'X') {
dbits = 4;
}
else if (format->type == 'o') {
dbits = 3;
}

int64_t nbits = _PyLong_NumBits(value);

if (nbits > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"int too large to format");
goto done;
}

Py_ssize_t shift = Py_MAX(precision,

Check warning on line 1110 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (arm64)

'initializing': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [C:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]

Check warning on line 1110 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (arm64)

'initializing': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [C:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]

Check warning on line 1110 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (x64)

'initializing': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [D:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]

Check warning on line 1110 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (x64)

'initializing': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [D:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]
((Py_ssize_t)nbits + dbits - 1)/dbits);

shift *= dbits;
shift--;

/* expected value in range(-2**n, 2**n), where n=shift
or n=shift+dbits */
PyObject *mod = _PyLong_Lshift(PyLong_FromLong(1), shift);
int incr = 1;

if (mod == NULL) {
goto done;
}
if (PyLong_IsNegative(value)) {
Py_SETREF(mod, PyNumber_Negative(mod));
if (mod == NULL) {
goto done;
}
if (PyObject_RichCompareBool(value, mod, Py_LT)) {
incr += dbits;
}
Py_SETREF(mod, _PyLong_Lshift(mod, incr));
tmp = PyNumber_Subtract(value, mod);
Py_DECREF(mod);
if (tmp == NULL) {
goto done;
}
Py_SETREF(tmp, _PyLong_Format(tmp, base));
}
else {
if (PyObject_RichCompareBool(value, mod, Py_GE)) {
incr += dbits;
}
Py_DECREF(mod);
tmp = _PyLong_Format(value, base);
}
shift += incr;
precision = shift/dbits;
}
else {
tmp = _PyLong_Format(value, base);
}
if (tmp == NULL) {
goto done;
}

/* Prepend enough leading zeros (after sign and prefix) */

int sign = PyUnicode_READ_CHAR(tmp, 0) == '-';
Py_ssize_t tmp2_len = precision + leading_chars_to_skip + sign;

Check warning on line 1160 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (arm64)

'initializing': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [C:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]

Check warning on line 1160 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (arm64)

'initializing': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [C:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]

Check warning on line 1160 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (x64)

'initializing': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [D:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]

Check warning on line 1160 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (x64)

'initializing': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [D:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]
Py_ssize_t tmp_len = PyUnicode_GET_LENGTH(tmp);
Py_ssize_t gap = tmp2_len - tmp_len;

if (gap > 0) {
PyObject *tmp2 = PyUnicode_New(tmp2_len, 127);
Py_ssize_t value_start = leading_chars_to_skip + sign;

if (PyUnicode_CopyCharacters(tmp2, value_start + gap,
tmp, value_start,
precision) == -1) {

Check warning on line 1170 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (arm64)

'function': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [C:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]

Check warning on line 1170 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (arm64)

'function': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [C:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]

Check warning on line 1170 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (x64)

'function': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [D:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]

Check warning on line 1170 in Python/formatter_unicode.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (x64)

'function': conversion from 'int64_t' to 'Py_ssize_t', possible loss of data [D:\a\cpython\cpython\PCbuild\_freeze_module.vcxproj]
Py_DECREF(tmp2);
goto done;
}
if (PyUnicode_Fill(tmp2, value_start, gap, '0') == -1) {
Py_DECREF(tmp2);
goto done;
}
if (sign && PyUnicode_WriteChar(tmp2, 0, '-') == -1) {
Py_DECREF(tmp2);
goto done;
}
if (leading_chars_to_skip
&& PyUnicode_CopyCharacters(tmp2, sign, tmp, sign,
leading_chars_to_skip) == -1)
{
Py_DECREF(tmp2);
goto done;
}
Py_SETREF(tmp, tmp2);
}
}
else {
tmp = _PyLong_Format(value, base);
if (tmp == NULL) {
goto done;
}
}

inumeric_chars = 0;
n_digits = PyUnicode_GET_LENGTH(tmp);
Expand Down
Loading