Skip to content

gh-87790: support thousands separators for formatting fractional part of floats #125304

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions Doc/library/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -319,14 +319,19 @@ non-empty format specification typically modifies the result.
The general form of a *standard format specifier* is:

.. productionlist:: format-spec
format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision`][`type`]
format_spec: [`options`][`width_and_precision`][`type`]
options: [[`fill`]`align`][`sign`]["z"]["#"]["0"]
fill: <any character>
align: "<" | ">" | "=" | "^"
sign: "+" | "-" | " "
width_and_precision: [`width_with_grouping`][`precision_with_grouping`]
width_with_grouping: [`width`][`grouping_option`]
precision_with_grouping: "." [`precision`]`grouping_option`
width: `~python-grammar:digit`+
grouping_option: "_" | ","
precision: `~python-grammar:digit`+
type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"
type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g"
: | "G" | "n" | "o" | "s" | "x" | "X" | "%"

If a valid *align* value is specified, it can be preceded by a *fill*
character that can be any character and defaults to a space if omitted.
Expand Down Expand Up @@ -458,6 +463,13 @@ indicates the maximum field size - in other words, how many characters will be
used from the field content. The *precision* is not allowed for integer
presentation types.

The ``'_'`` or ``','`` option after *precision* means the use of an underscore
or a comma for a thousands separator of the fractional part for floating-point
presentation types.

.. versionchanged:: 3.14
Support thousands separators for the fractional part.

Finally, the *type* determines how the data should be presented.

The available string presentation types are:
Expand Down Expand Up @@ -704,10 +716,18 @@ Replacing ``%x`` and ``%o`` and converting the value to different bases::
>>> "int: {0:d}; hex: {0:#x}; oct: {0:#o}; bin: {0:#b}".format(42)
'int: 42; hex: 0x2a; oct: 0o52; bin: 0b101010'

Using the comma as a thousands separator::
Using the comma or the underscore as a thousands separator::

>>> '{:,}'.format(1234567890)
'1,234,567,890'
>>> '{:_}'.format(1234567890)
'1_234_567_890'
>>> '{:_}'.format(123456789.123456789)
'123_456_789.12345679'
>>> '{:._}'.format(123456789.123456789)
'123456789.123_456_79'
>>> '{:_._}'.format(123456789.123456789)
'123_456_789.123_456_79'

Expressing a percentage::

Expand Down
5 changes: 5 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,11 @@ Other language changes
making it a :term:`generic type`.
(Contributed by Brian Schubert in :gh:`126012`.)

* Support underscore and comma as thousands separators in the fractional part
for floating-point presentation types of the new-style string formatting
(with :func:`format` or :ref:`f-strings`).
(Contrubuted by Sergey B Kirpichev in :gh:`87790`.)

* ``\B`` in :mod:`regular expression <re>` now matches empty input string.
Now it is always the opposite of ``\b``.
(Contributed by Serhiy Storchaka in :gh:`124130`.)
Expand Down
3 changes: 2 additions & 1 deletion Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
Py_ssize_t min_width,
const char *grouping,
PyObject *thousands_sep,
Py_UCS4 *maxchar);
Py_UCS4 *maxchar,
int forward);

/* --- Misc functions ----------------------------------------------------- */

Expand Down
22 changes: 22 additions & 0 deletions Lib/test/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,28 @@ def test_format(self):
self.assertEqual(format(INF, 'f'), 'inf')
self.assertEqual(format(INF, 'F'), 'INF')

# thousands separators
x = 123_456.123_456
self.assertEqual(format(x, '_f'), '123_456.123456')
self.assertEqual(format(x, ',f'), '123,456.123456')
self.assertEqual(format(x, '._f'), '123456.123_456')
self.assertEqual(format(x, '.,f'), '123456.123,456')
self.assertEqual(format(x, '_._f'), '123_456.123_456')
self.assertEqual(format(x, ',.,f'), '123,456.123,456')
self.assertEqual(format(x, '.10_f'), '123456.123_456_000_0')
self.assertEqual(format(x, '.10,f'), '123456.123,456,000,0')
self.assertEqual(format(x, '>21._f'), ' 123456.123_456')
self.assertEqual(format(x, '<21._f'), '123456.123_456 ')
self.assertEqual(format(x, '+.11_e'), '+1.234_561_234_56e+05')
self.assertEqual(format(x, '+.11,e'), '+1.234,561,234,56e+05')

self.assertRaises(ValueError, format, x, '._6f')
self.assertRaises(ValueError, format, x, '.,_f')
self.assertRaises(ValueError, format, x, '.6,_f')
self.assertRaises(ValueError, format, x, '.6_,f')
self.assertRaises(ValueError, format, x, '.6_n')
self.assertRaises(ValueError, format, x, '.6,n')

@support.requires_IEEE_754
def test_format_testfile(self):
with open(format_testfile, encoding="utf-8") as testfile:
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,11 +515,15 @@ def test_with_a_commas_and_an_underscore_in_format_specifier(self):
error_msg = re.escape("Cannot specify both ',' and '_'.")
with self.assertRaisesRegex(ValueError, error_msg):
'{:,_}'.format(1)
with self.assertRaisesRegex(ValueError, error_msg):
'{:.,_f}'.format(1.1)

def test_with_an_underscore_and_a_comma_in_format_specifier(self):
error_msg = re.escape("Cannot specify both ',' and '_'.")
with self.assertRaisesRegex(ValueError, error_msg):
'{:_,}'.format(1)
with self.assertRaisesRegex(ValueError, error_msg):
'{:._,f}'.format(1.1)

def test_better_error_message_format(self):
# https://bugs.python.org/issue20524
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Support underscore and comma as thousands separators in the fractional part for
floating-point presentation types of the new-style string formatting (with
:func:`format` or :ref:`f-strings`). Patch by Sergey B Kirpichev.
27 changes: 21 additions & 6 deletions Objects/stringlib/localeutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
PyObject *digits, Py_ssize_t *digits_pos,
Py_ssize_t n_chars, Py_ssize_t n_zeros,
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
Py_UCS4 *maxchar)
Py_UCS4 *maxchar, int forward)
{
if (!writer) {
/* if maxchar > 127, maxchar is already set */
Expand All @@ -59,24 +59,39 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
}

if (thousands_sep) {
*buffer_pos -= thousands_sep_len;

if (!forward) {
*buffer_pos -= thousands_sep_len;
}
/* Copy the thousands_sep chars into the buffer. */
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
thousands_sep, 0,
thousands_sep_len);
if (forward) {
*buffer_pos += thousands_sep_len;
}
}

*buffer_pos -= n_chars;
*digits_pos -= n_chars;
if (!forward) {
*buffer_pos -= n_chars;
*digits_pos -= n_chars;
}
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
digits, *digits_pos,
n_chars);
if (forward) {
*buffer_pos += n_chars;
*digits_pos += n_chars;
}

if (n_zeros) {
*buffer_pos -= n_zeros;
if (!forward) {
*buffer_pos -= n_zeros;
}
int kind = PyUnicode_KIND(writer->buffer);
void *data = PyUnicode_DATA(writer->buffer);
unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
if (forward) {
*buffer_pos += n_zeros;
}
}
}
13 changes: 7 additions & 6 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -9764,7 +9764,8 @@ _PyUnicode_InsertThousandsGrouping(
Py_ssize_t min_width,
const char *grouping,
PyObject *thousands_sep,
Py_UCS4 *maxchar)
Py_UCS4 *maxchar,
int forward)
{
min_width = Py_MAX(0, min_width);
if (writer) {
Expand Down Expand Up @@ -9801,14 +9802,14 @@ _PyUnicode_InsertThousandsGrouping(
should be an empty string */
assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0));

digits_pos = d_pos + n_digits;
digits_pos = d_pos + (forward ? 0 : n_digits);
if (writer) {
buffer_pos = writer->pos + n_buffer;
buffer_pos = writer->pos + (forward ? 0 : n_buffer);
assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer));
assert(digits_pos <= PyUnicode_GET_LENGTH(digits));
}
else {
buffer_pos = n_buffer;
buffer_pos = forward ? 0 : n_buffer;
}

if (!writer) {
Expand All @@ -9830,7 +9831,7 @@ _PyUnicode_InsertThousandsGrouping(
digits, &digits_pos,
n_chars, n_zeros,
use_separator ? thousands_sep : NULL,
thousands_sep_len, maxchar);
thousands_sep_len, maxchar, forward);

/* Use a separator next time. */
use_separator = 1;
Expand Down Expand Up @@ -9859,7 +9860,7 @@ _PyUnicode_InsertThousandsGrouping(
digits, &digits_pos,
n_chars, n_zeros,
use_separator ? thousands_sep : NULL,
thousands_sep_len, maxchar);
thousands_sep_len, maxchar, forward);
}
return count;
}
Expand Down
Loading
Loading