From 74e3ea676590c5d55ae5c31d6d457784ffecb7ea Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Fri, 10 Mar 2017 03:31:45 +0100 Subject: [PATCH 01/10] bpo-29882: Add an efficient popcount method for integers --- Doc/library/stdtypes.rst | 18 +++++ Lib/test/test_doctest.py | 3 +- Lib/test/test_long.py | 11 ++++ .../2019-06-02-11-29-15.bpo-29882.AkRzjb.rst | 2 + Objects/clinic/longobject.c.h | 25 ++++++- Objects/longobject.c | 66 +++++++++++++++++++ 6 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-06-02-11-29-15.bpo-29882.AkRzjb.rst diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index fcb0da74e158b0..f054f92b181a89 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -479,6 +479,24 @@ class`. In addition, it provides a few more methods: .. versionadded:: 3.1 +.. method:: int.bit_count() + + Return the number of ones in the binary representation of the integer, + excluding the sign bit:: + + >>> n = -19 + >>> bin(n) + '-0b10011' + >>> n.bit_count() + 3 + + Equivalent to:: + + def bit_count(self): + return bin(self).count("1") + + .. versionadded:: 3.8 + .. method:: int.to_bytes(length, byteorder, \*, signed=False) Return an array of bytes representing an integer. diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index 5ea18f52c4fcf2..52f539fa1f54f3 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -665,7 +665,7 @@ def non_Python_modules(): r""" True >>> real_tests = [t for t in tests if len(t.examples) > 0] >>> len(real_tests) # objects that actually have doctests - 12 + 13 >>> for t in real_tests: ... print('{} {}'.format(len(t.examples), t.name)) ... @@ -678,6 +678,7 @@ def non_Python_modules(): r""" 1 builtins.hex 1 builtins.int 3 builtins.int.as_integer_ratio + 2 builtins.int.bit_count 2 builtins.int.bit_length 5 builtins.memoryview.hex 1 builtins.oct diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 53101b3badb36d..f1616cf122f13d 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -1008,6 +1008,17 @@ def test_bit_length(self): self.assertEqual((a+1).bit_length(), i+1) self.assertEqual((-a-1).bit_length(), i+1) + def test_bit_count(self): + for a in range(-1000, 1000): + self.assertEqual(a.bit_count(), bin(a).count("1")) + + for exp in [10, 17, 63, 64, 65, 1009, 70234, 1234567]: + a = 2**exp + self.assertEqual(a.bit_count(), 1) + self.assertEqual((a - 1).bit_count(), exp) + self.assertEqual((a ^ 63).bit_count(), 7) + self.assertEqual(((a - 1) ^ 510).bit_count(), exp - 8) + def test_round(self): # check round-half-even algorithm. For round to nearest ten; # rounding map is invariant under adding multiples of 20 diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-06-02-11-29-15.bpo-29882.AkRzjb.rst b/Misc/NEWS.d/next/Core and Builtins/2019-06-02-11-29-15.bpo-29882.AkRzjb.rst new file mode 100644 index 00000000000000..240b5680b36a2a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-06-02-11-29-15.bpo-29882.AkRzjb.rst @@ -0,0 +1,2 @@ +Add :meth:`int.bit_count()`, counting the number of ones in the binary +representation of an integer. Patch by Niklas Fiekas. diff --git a/Objects/clinic/longobject.c.h b/Objects/clinic/longobject.c.h index 453edba481dfb5..5a6d7c5b3fea0a 100644 --- a/Objects/clinic/longobject.c.h +++ b/Objects/clinic/longobject.c.h @@ -138,6 +138,29 @@ int_bit_length(PyObject *self, PyObject *Py_UNUSED(ignored)) return int_bit_length_impl(self); } +PyDoc_STRVAR(int_bit_count__doc__, +"bit_count($self, /)\n" +"--\n" +"\n" +"Number of ones in the binary representation of self.\n" +"\n" +">>> bin(13)\n" +"\'0b1101\'\n" +">>> (13).bit_count()\n" +"3"); + +#define INT_BIT_COUNT_METHODDEF \ + {"bit_count", (PyCFunction)int_bit_count, METH_NOARGS, int_bit_count__doc__}, + +static PyObject * +int_bit_count_impl(PyObject *self); + +static PyObject * +int_bit_count(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return int_bit_count_impl(self); +} + PyDoc_STRVAR(int_as_integer_ratio__doc__, "as_integer_ratio($self, /)\n" "--\n" @@ -313,4 +336,4 @@ int_from_bytes(PyTypeObject *type, PyObject *const *args, Py_ssize_t nargs, PyOb exit: return return_value; } -/*[clinic end generated code: output=709503897c55bca1 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=75f3e3d6b2ef4839 input=a9049054013a1b77]*/ diff --git a/Objects/longobject.c b/Objects/longobject.c index 5d2b595621f3d8..00747e14f6e8f7 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -807,6 +807,17 @@ bits_in_digit(digit d) return d_bits; } +static int +popcount_digit(digit d) +{ + /* 32bit SWAR popcount. */ + uint32_t u = d; + u -= (u >> 1) & 0x55555555; + u = (u & 0x33333333) + ((u >> 2) & 0x33333333); + u = (u + (u >> 4)) & 0x0f0f0f0f; + return (u * 0x01010101) >> 24; +} + size_t _PyLong_NumBits(PyObject *vv) { @@ -5358,6 +5369,60 @@ int_bit_length_impl(PyObject *self) return NULL; } +/*[clinic input] +int.bit_count + +Number of ones in the binary representation of self. + +>>> bin(13) +'0b1101' +>>> (13).bit_count() +3 +[clinic start generated code]*/ + +static PyObject * +int_bit_count_impl(PyObject *self) +/*[clinic end generated code: output=2e571970daf1e5c3 input=a428900d3e39a606]*/ +{ + Py_ssize_t ndigits, i, bit_count = 0; + PyLongObject *result, *x, *y; + + assert(self != NULL); + assert(PyLong_Check(self)); + + ndigits = Py_ABS(Py_SIZE(self)); + + for (i = 0; i < ndigits && i < PY_SSIZE_T_MAX/PyLong_SHIFT; i++) { + bit_count += popcount_digit(((PyLongObject *)self)->ob_digit[i]); + } + + result = (PyLongObject *)PyLong_FromSsize_t(bit_count); + if (result == NULL) { + return NULL; + } + + /* Use Python integers if bit_count would overflow. */ + for (; i < ndigits; i++) { + x = (PyLongObject *)PyLong_FromLong(popcount_digit(((PyLongObject *)self)->ob_digit[i])); + if (x == NULL) { + goto error; + } + y = (PyLongObject *)long_add(result, x); + Py_DECREF(x); + if (y == NULL) { + goto error; + } + Py_DECREF(result); + result = y; + } + + return (PyObject *)result; + + error: + Py_DECREF(result); + return NULL; +} + #if 0 static PyObject * long_is_finite(PyObject *v) @@ -5522,6 +5587,7 @@ static PyMethodDef long_methods[] = { {"conjugate", long_long_meth, METH_NOARGS, "Returns self, the complex conjugate of any int."}, INT_BIT_LENGTH_METHODDEF + INT_BIT_COUNT_METHODDEF #if 0 {"is_finite", (PyCFunction)long_is_finite, METH_NOARGS, "Returns always True."}, From 19da190d38ef0753ac6f6ff075e7544def9b0bc7 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Tue, 26 May 2020 10:06:18 +0100 Subject: [PATCH 02/10] Update 'sign bit' and versionadded in docs --- Doc/library/stdtypes.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 066fb575ea085e..e31f7ab1be44f6 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -480,8 +480,8 @@ class`. In addition, it provides a few more methods: .. method:: int.bit_count() - Return the number of ones in the binary representation of the integer, - excluding the sign bit:: + Return the number of ones in the binary representation of the absolute + value of the integer:: >>> n = -19 >>> bin(n) @@ -494,7 +494,7 @@ class`. In addition, it provides a few more methods: def bit_count(self): return bin(self).count("1") - .. versionadded:: 3.8 + .. versionadded:: 3.10 .. method:: int.to_bytes(length, byteorder, \*, signed=False) From 070ac455e2c6fed94fea6cf89cd13f9b5fb12bda Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Tue, 26 May 2020 10:16:37 +0100 Subject: [PATCH 03/10] Add entry to whatsnew document --- Doc/whatsnew/3.10.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 98a231f80aaf20..cf06782bc36633 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -75,6 +75,9 @@ New Features Other Language Changes ====================== +* The :class:`int` type has a new method :meth:`int.bit_count`, returning the + number of ones in the binary expansion of a given integer, also known + as the population count. (Contributed by Niklas Fiekas in :issue:`29882`.) New Modules From b87f1621aea6152743be0aff4fb68bcb707b7932 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Wed, 27 May 2020 10:01:41 +0100 Subject: [PATCH 04/10] Doc: use positive example, mention population count --- Doc/library/stdtypes.rst | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index e31f7ab1be44f6..2082b849fd05b0 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -481,13 +481,16 @@ class`. In addition, it provides a few more methods: .. method:: int.bit_count() Return the number of ones in the binary representation of the absolute - value of the integer:: + value of the integer. This is also known as the population count. + Example:: - >>> n = -19 + >>> n = 19 >>> bin(n) - '-0b10011' + '0b10011' >>> n.bit_count() 3 + >>> (-n).bit_count() + 3 Equivalent to:: From 48084e75790a44f36bc61ab3488e0d118e52c476 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Wed, 27 May 2020 10:21:27 +0100 Subject: [PATCH 05/10] Minor cleanups of the core code --- Objects/longobject.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 2abf02172bec0e..53dfeb5b9f17b1 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -5447,30 +5447,33 @@ static PyObject * int_bit_count_impl(PyObject *self) /*[clinic end generated code: output=2e571970daf1e5c3 input=a428900d3e39a606]*/ { - Py_ssize_t ndigits, i, bit_count = 0; - PyLongObject *result, *x, *y; - assert(self != NULL); assert(PyLong_Check(self)); - ndigits = Py_ABS(Py_SIZE(self)); + PyLongObject *z = (PyLongObject *)self; + Py_ssize_t ndigits = Py_ABS(Py_SIZE(self)); + Py_ssize_t bit_count = 0; - for (i = 0; i < ndigits && i < PY_SSIZE_T_MAX/PyLong_SHIFT; i++) { - bit_count += popcount_digit(((PyLongObject *)self)->ob_digit[i]); + /* Each digit has up to PyLong_SHIFT ones, so the accumulated bit count + from the first PY_SSIZE_T_MAX/PyLong_SHIFT digits can't overflow a + Py_ssize_t. */ + Py_ssize_t ndigits_fast = Py_MIN(ndigits, PY_SSIZE_T_MAX/PyLong_SHIFT); + for (Py_ssize_t i = 0; i < ndigits_fast; i++) { + bit_count += popcount_digit(z->ob_digit[i]); } - result = (PyLongObject *)PyLong_FromSsize_t(bit_count); + PyObject *result = PyLong_FromSsize_t(bit_count); if (result == NULL) { return NULL; } /* Use Python integers if bit_count would overflow. */ - for (; i < ndigits; i++) { - x = (PyLongObject *)PyLong_FromLong(popcount_digit(((PyLongObject *)self)->ob_digit[i])); + for (Py_ssize_t i = ndigits_fast; i < ndigits; i++) { + PyObject *x = PyLong_FromLong(popcount_digit(z->ob_digit[i])); if (x == NULL) { goto error; } - y = (PyLongObject *)long_add(result, x); + PyObject *y = long_add((PyLongObject *)result, (PyLongObject *)x); Py_DECREF(x); if (y == NULL) { goto error; @@ -5479,7 +5482,7 @@ int_bit_count_impl(PyObject *self) result = y; } - return (PyObject *)result; + return result; error: Py_DECREF(result); From 729737cc83d09ed9384693a8dccb8bf6969a621c Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Wed, 27 May 2020 10:33:59 +0100 Subject: [PATCH 06/10] Move popcount_digit closer to where it's used --- Objects/longobject.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index a805e421677f33..a1ede75757be2f 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -694,17 +694,6 @@ _PyLong_Sign(PyObject *vv) return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1); } -static int -popcount_digit(digit d) -{ - /* 32bit SWAR popcount. */ - uint32_t u = d; - u -= (u >> 1) & 0x55555555; - u = (u & 0x33333333) + ((u >> 2) & 0x33333333); - u = (u + (u >> 4)) & 0x0f0f0f0f; - return (u * 0x01010101) >> 24; -} - size_t _PyLong_NumBits(PyObject *vv) { @@ -5315,6 +5304,17 @@ int_bit_length_impl(PyObject *self) return NULL; } +static int +popcount_digit(digit d) +{ + /* 32bit SWAR popcount. */ + uint32_t u = d; + u -= (u >> 1) & 0x55555555; + u = (u & 0x33333333) + ((u >> 2) & 0x33333333); + u = (u + (u >> 4)) & 0x0f0f0f0f; + return (u * 0x01010101) >> 24; +} + /*[clinic input] int.bit_count From 79f4d165f8c412cf486ed629eae548bd293742bf Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Wed, 27 May 2020 10:37:18 +0100 Subject: [PATCH 07/10] Use z instead of self after conversion --- Objects/longobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index a1ede75757be2f..9bd6a46a829d63 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -5334,7 +5334,7 @@ int_bit_count_impl(PyObject *self) assert(PyLong_Check(self)); PyLongObject *z = (PyLongObject *)self; - Py_ssize_t ndigits = Py_ABS(Py_SIZE(self)); + Py_ssize_t ndigits = Py_ABS(Py_SIZE(z)); Py_ssize_t bit_count = 0; /* Each digit has up to PyLong_SHIFT ones, so the accumulated bit count From e3228cf8ae46a4d0dcde0725ff4032f77e0d0967 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Wed, 27 May 2020 10:40:22 +0100 Subject: [PATCH 08/10] Add 'absolute value' and 'population count' to docstring --- Objects/longobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 9bd6a46a829d63..1ed0e08523f9c8 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -5318,7 +5318,8 @@ popcount_digit(digit d) /*[clinic input] int.bit_count -Number of ones in the binary representation of self. +Number of ones in the binary representation of the absolute value of self. +Also known as the population count. >>> bin(13) '0b1101' From a0fed82ba1d7dc8cb4c96d78c7930f1f573bf340 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Wed, 27 May 2020 11:05:37 +0100 Subject: [PATCH 09/10] Fix clinic error about missing summary line --- Objects/clinic/longobject.c.h | 6 ++++-- Objects/longobject.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Objects/clinic/longobject.c.h b/Objects/clinic/longobject.c.h index b76093ef65a71f..5f70b3f8254e80 100644 --- a/Objects/clinic/longobject.c.h +++ b/Objects/clinic/longobject.c.h @@ -142,7 +142,9 @@ PyDoc_STRVAR(int_bit_count__doc__, "bit_count($self, /)\n" "--\n" "\n" -"Number of ones in the binary representation of self.\n" +"Number of ones in the binary representation of the absolute value of self.\n" +"\n" +"Also known as the population count.\n" "\n" ">>> bin(13)\n" "\'0b1101\'\n" @@ -331,4 +333,4 @@ int_from_bytes(PyTypeObject *type, PyObject *const *args, Py_ssize_t nargs, PyOb exit: return return_value; } -/*[clinic end generated code: output=37049157360926ab input=a9049054013a1b77]*/ +/*[clinic end generated code: output=59dfa71aad39d5e6 input=a9049054013a1b77]*/ diff --git a/Objects/longobject.c b/Objects/longobject.c index 1ed0e08523f9c8..d3e355d976b6d6 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -5319,6 +5319,7 @@ popcount_digit(digit d) int.bit_count Number of ones in the binary representation of the absolute value of self. + Also known as the population count. >>> bin(13) @@ -5329,7 +5330,7 @@ Also known as the population count. static PyObject * int_bit_count_impl(PyObject *self) -/*[clinic end generated code: output=2e571970daf1e5c3 input=a428900d3e39a606]*/ +/*[clinic end generated code: output=2e571970daf1e5c3 input=7e0adef8e8ccdf2e]*/ { assert(self != NULL); assert(PyLong_Check(self)); From 3e8422bb6c9fd0cdc4381815fca613e6975ee582 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Fri, 29 May 2020 12:49:27 +0100 Subject: [PATCH 10/10] Ensure popcount_digit is portable with 64-bit ints --- Objects/longobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 0871a948446942..0b209a403c4b76 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -5309,10 +5309,10 @@ popcount_digit(digit d) { /* 32bit SWAR popcount. */ uint32_t u = d; - u -= (u >> 1) & 0x55555555; - u = (u & 0x33333333) + ((u >> 2) & 0x33333333); - u = (u + (u >> 4)) & 0x0f0f0f0f; - return (u * 0x01010101) >> 24; + u -= (u >> 1) & 0x55555555U; + u = (u & 0x33333333U) + ((u >> 2) & 0x33333333U); + u = (u + (u >> 4)) & 0x0f0f0f0fU; + return (uint32_t)(u * 0x01010101U) >> 24; } /*[clinic input]