From 057390d71d3005ff19ea28a11681343c9445db42 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 16 Oct 2022 20:03:59 -0500 Subject: [PATCH 01/23] First draft --- Modules/clinic/itertoolsmodule.c.h | 81 +++++++++++++- Modules/itertoolsmodule.c | 169 ++++++++++++++++++++++++++++- 2 files changed, 248 insertions(+), 2 deletions(-) diff --git a/Modules/clinic/itertoolsmodule.c.h b/Modules/clinic/itertoolsmodule.c.h index 8806606d85befe..0c20483bc51089 100644 --- a/Modules/clinic/itertoolsmodule.c.h +++ b/Modules/clinic/itertoolsmodule.c.h @@ -8,6 +8,85 @@ preserve #endif +PyDoc_STRVAR(batched_new__doc__, +"batched(iterable, n)\n" +"--\n" +"\n" +"Batch data into lists of length n. The last batch may be shorter.\n" +"\n" +"Loops over the input iterable and accumulates data into lists\n" +"upto size n. The input is consumed lazily, just enough to\n" +"fill a list. The result is yielded as soon as a list is full\n" +"or when the input iterable is exhausted.\n" +"\n" +" >>> for batch in batched(\'ABCDEFG\', 3):\n" +" ... print(batch)\n" +" ...\n" +" [\'A\', \'B\', \'C\']\n" +" [\'D\', \'E\', \'F\']\n" +" [\'G\']"); + +static PyObject * +batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n); + +static PyObject * +batched_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(iterable), &_Py_ID(n), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"iterable", "n", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "batched", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + PyObject *iterable; + Py_ssize_t n; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 2, 2, 0, argsbuf); + if (!fastargs) { + goto exit; + } + iterable = fastargs[0]; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(fastargs[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + n = ival; + } + return_value = batched_new_impl(type, iterable, n); + +exit: + return return_value; +} + PyDoc_STRVAR(pairwise_new__doc__, "pairwise(iterable, /)\n" "--\n" @@ -834,4 +913,4 @@ itertools_count(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=b1056d63f68a9059 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=3c5876fbb1c45b4b input=a9049054013a1b77]*/ diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 4a7a95730395e6..e701a3fe8ff91c 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -16,6 +16,7 @@ class itertools.groupby "groupbyobject *" "&groupby_type" class itertools._grouper "_grouperobject *" "&_grouper_type" class itertools.teedataobject "teedataobject *" "&teedataobject_type" class itertools._tee "teeobject *" "&tee_type" +class itertools.batched "batchedobject *" "&batched_type" class itertools.cycle "cycleobject *" "&cycle_type" class itertools.dropwhile "dropwhileobject *" "&dropwhile_type" class itertools.takewhile "takewhileobject *" "&takewhile_type" @@ -30,12 +31,13 @@ class itertools.filterfalse "filterfalseobject *" "&filterfalse_type" class itertools.count "countobject *" "&count_type" class itertools.pairwise "pairwiseobject *" "&pairwise_type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6498ed21fbe1bf94]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1168b274011ce21b]*/ static PyTypeObject groupby_type; static PyTypeObject _grouper_type; static PyTypeObject teedataobject_type; static PyTypeObject tee_type; +static PyTypeObject batched_type; static PyTypeObject cycle_type; static PyTypeObject dropwhile_type; static PyTypeObject takewhile_type; @@ -51,6 +53,169 @@ static PyTypeObject pairwise_type; #include "clinic/itertoolsmodule.c.h" +/* batched object ************************************************************/ + +/* Note: The built-in zip() function includes a "strict" argument + that is needed because that function can silently truncate data + and there is no easy way for a user to detect that condition. + The same reasoning does not apply to batches() which never drops + data. Instead, it produces a shorter list which can be handled + as the user sees fit. + */ + +typedef struct { + PyObject_HEAD + Py_ssize_t batch_size; + PyObject *it; +} batchedobject; + +/*[clinic input] +@classmethod +itertools.batched.__new__ as batched_new + iterable: object + n: Py_ssize_t +Batch data into lists of length n. The last batch may be shorter. + +Loops over the input iterable and accumulates data into lists +upto size n. The input is consumed lazily, just enough to +fill a list. The result is yielded as soon as a list is full +or when the input iterable is exhausted. + + >>> for batch in batched('ABCDEFG', 3): + ... print(batch) + ... + ['A', 'B', 'C'] + ['D', 'E', 'F'] + ['G'] + +[clinic start generated code]*/ + +static PyObject * +batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n) +/*[clinic end generated code: output=7ebc954d655371b6 input=cde5922eeb597020]*/ +{ + PyObject *it; + batchedobject *bo; + + /* XXX Do we want do defined batched(it, n=0) as just an empty iterator ?*/ + if (n < 1) { + PyErr_SetString(PyExc_ValueError, "n must be >= 1"); + return NULL; + } + it = PyObject_GetIter(iterable); + if (it == NULL) { + Py_DECREF(it); + return NULL; + } + + /* create batchedobject structure */ + bo = (batchedobject *)type->tp_alloc(type, 0); + if (bo == NULL) { + Py_DECREF(it); + return NULL; + } + bo->batch_size = n; + bo->it = it; + bo->it = it; + return (PyObject *)bo; +} + +static void +batched_dealloc(batchedobject *bo) +{ + PyObject_GC_UnTrack(bo); + Py_XDECREF(bo->it); + Py_TYPE(bo)->tp_free(bo); +} + +static int +batched_traverse(batchedobject *bo, visitproc visit, void *arg) +{ + Py_VISIT(bo->it); + return 0; +} + +static PyObject * +batched_next(batchedobject *bo) +{ + Py_ssize_t i; + PyObject *it = bo->it; + PyObject *item; + PyObject *result; + + // If the iterator has stopped, it should stay stopped + if (it == NULL) { + return NULL; + } + result = PyList_New(0); + if (result == NULL) { + return NULL; + } + for (i=0 ; i < bo->batch_size ; i++) { + item = (*Py_TYPE(it)->tp_iternext)(it); + if (item != NULL) { + break; + } + if (PyList_Append(result, item) < 0) { + Py_DECREF(item); + Py_DECREF(result); + return NULL; + } + Py_DECREF(item); + } + if (PyList_GET_SIZE(result) > 0) { + return result; + } + Py_CLEAR(bo->it); + Py_DECREF(result); + return NULL; +} + +static PyTypeObject batched_type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "itertools.batched", /* tp_name */ + sizeof(batchedobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)batched_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + batched_new__doc__, /* tp_doc */ + (traverseproc)batched_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)batched_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + batched_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + + /* pairwise object ***********************************************************/ typedef struct { @@ -4815,6 +4980,7 @@ repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\ \n\ Iterators terminating on the shortest input sequence:\n\ accumulate(p[, func]) --> p0, p0+p1, p0+p1+p2\n\ +batched(p, n) --> [p0, p1, ..., p_n-1], [p_n, p_n+1, ..., p_2n-1], ...\n\ chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ...\n\ chain.from_iterable([p, q, ...]) --> p0, p1, ... plast, q0, q1, ...\n\ compress(data, selectors) --> (d[0] if s[0]), (d[1] if s[1]), ...\n\ @@ -4841,6 +5007,7 @@ itertoolsmodule_exec(PyObject *m) { PyTypeObject *typelist[] = { &accumulate_type, + &batched_type, &combinations_type, &cwr_type, &cycle_type, From 047c4410a77fc719d58099e0a9941b5d4e6ce3c8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 16 Oct 2022 20:09:44 -0500 Subject: [PATCH 02/23] . --- Modules/itertoolsmodule.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index e701a3fe8ff91c..229b7388ff6a25 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -116,8 +116,7 @@ batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n) } bo->batch_size = n; bo->it = it; - bo->it = it; - return (PyObject *)bo; + return (PyObject *)bo; } static void @@ -131,7 +130,9 @@ batched_dealloc(batchedobject *bo) static int batched_traverse(batchedobject *bo, visitproc visit, void *arg) { - Py_VISIT(bo->it); + if (bo->it != NULL) { + Py_VISIT(bo->it); + } return 0; } From 9061042eb208afa20c0812d54370cb46fd37640f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 16 Oct 2022 20:53:03 -0500 Subject: [PATCH 03/23] Fix NULL test --- Modules/itertoolsmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 229b7388ff6a25..93b034856a613f 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -154,7 +154,7 @@ batched_next(batchedobject *bo) } for (i=0 ; i < bo->batch_size ; i++) { item = (*Py_TYPE(it)->tp_iternext)(it); - if (item != NULL) { + if (item == NULL) { break; } if (PyList_Append(result, item) < 0) { From 72a03c6e84080b08561665df7ce3ea5c1c34eb1b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 02:16:06 -0500 Subject: [PATCH 04/23] Improve the docstring and add the regular docs --- Doc/library/itertools.rst | 71 ++++++++++++++---------------- Modules/clinic/itertoolsmodule.c.h | 8 ++-- Modules/itertoolsmodule.c | 8 ++-- 3 files changed, 42 insertions(+), 45 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 88e1e5aa6ef7f3..f7fae4cbf7d4d4 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -170,6 +170,40 @@ loops that truncate the stream. .. versionchanged:: 3.8 Added the optional *initial* parameter. + +.. function:: batched(iterable, n) + + Batch data from the *iterable* into lists of length *n*. The last + batch may be shorter. + + Loops over the input iterable and accumulates data into lists up to + size *n*. The input is consumed lazily, just enough to fill a list. + The result is yielded as soon as the batch is full or when the input + iterable is exhausted: + + .. doctest:: + + >>> for batch in batched('ABCDEFG', 3): + ... print(batch) + ... + ['A', 'B', 'C'] + ['D', 'E', 'F'] + ['G'] + + Roughly equivalent to:: + + def batched(iterable, n): + "Batch data into lists of length n. The last batch may be shorter." + # batched('ABCDEFG', 3) --> ABC DEF G + if n < 1: + raise ValueError + it = iter(iterable) + while (batch := list(islice(it, n))): + yield batch + + .. versionadded:: 3.12 + + .. function:: chain(*iterables) Make an iterator that returns elements from the first iterable until it is @@ -858,13 +892,6 @@ which incur interpreter overhead. else: raise ValueError('Expected fill, strict, or ignore') - def batched(iterable, n): - "Batch data into lists of length n. The last batch may be shorter." - # batched('ABCDEFG', 3) --> ABC DEF G - it = iter(iterable) - while (batch := list(islice(it, n))): - yield batch - def triplewise(iterable): "Return overlapping triplets from an iterable" # triplewise('ABCDEFG') --> ABC BCD CDE DEF EFG @@ -1236,36 +1263,6 @@ which incur interpreter overhead. >>> list(grouper('abcdefg', n=3, incomplete='ignore')) [('a', 'b', 'c'), ('d', 'e', 'f')] - >>> list(batched('ABCDEFG', 3)) - [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']] - >>> list(batched('ABCDEF', 3)) - [['A', 'B', 'C'], ['D', 'E', 'F']] - >>> list(batched('ABCDE', 3)) - [['A', 'B', 'C'], ['D', 'E']] - >>> list(batched('ABCD', 3)) - [['A', 'B', 'C'], ['D']] - >>> list(batched('ABC', 3)) - [['A', 'B', 'C']] - >>> list(batched('AB', 3)) - [['A', 'B']] - >>> list(batched('A', 3)) - [['A']] - >>> list(batched('', 3)) - [] - >>> list(batched('ABCDEFG', 2)) - [['A', 'B'], ['C', 'D'], ['E', 'F'], ['G']] - >>> list(batched('ABCDEFG', 1)) - [['A'], ['B'], ['C'], ['D'], ['E'], ['F'], ['G']] - >>> list(batched('ABCDEFG', 0)) - [] - >>> list(batched('ABCDEFG', -1)) - Traceback (most recent call last): - ... - ValueError: Stop argument for islice() must be None or an integer: 0 <= x <= sys.maxsize. - >>> s = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - >>> all(list(flatten(batched(s[:n], 5))) == list(s[:n]) for n in range(len(s))) - True - >>> list(triplewise('ABCDEFG')) [('A', 'B', 'C'), ('B', 'C', 'D'), ('C', 'D', 'E'), ('D', 'E', 'F'), ('E', 'F', 'G')] diff --git a/Modules/clinic/itertoolsmodule.c.h b/Modules/clinic/itertoolsmodule.c.h index 0c20483bc51089..17f9ebb249390f 100644 --- a/Modules/clinic/itertoolsmodule.c.h +++ b/Modules/clinic/itertoolsmodule.c.h @@ -12,11 +12,11 @@ PyDoc_STRVAR(batched_new__doc__, "batched(iterable, n)\n" "--\n" "\n" -"Batch data into lists of length n. The last batch may be shorter.\n" +"Batch data into lists of length n. The last batch may be shorter than n.\n" "\n" "Loops over the input iterable and accumulates data into lists\n" -"upto size n. The input is consumed lazily, just enough to\n" -"fill a list. The result is yielded as soon as a list is full\n" +"up to size n. The input is consumed lazily, just enough to\n" +"fill a list. The result is yielded as soon as a batch is full\n" "or when the input iterable is exhausted.\n" "\n" " >>> for batch in batched(\'ABCDEFG\', 3):\n" @@ -913,4 +913,4 @@ itertools_count(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=3c5876fbb1c45b4b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=efea8cd1e647bd17 input=a9049054013a1b77]*/ diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 93b034856a613f..ef99295c9f1ab4 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -74,11 +74,11 @@ typedef struct { itertools.batched.__new__ as batched_new iterable: object n: Py_ssize_t -Batch data into lists of length n. The last batch may be shorter. +Batch data into lists of length n. The last batch may be shorter than n. Loops over the input iterable and accumulates data into lists -upto size n. The input is consumed lazily, just enough to -fill a list. The result is yielded as soon as a list is full +up to size n. The input is consumed lazily, just enough to +fill a list. The result is yielded as soon as a batch is full or when the input iterable is exhausted. >>> for batch in batched('ABCDEFG', 3): @@ -92,7 +92,7 @@ or when the input iterable is exhausted. static PyObject * batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n) -/*[clinic end generated code: output=7ebc954d655371b6 input=cde5922eeb597020]*/ +/*[clinic end generated code: output=7ebc954d655371b6 input=f28fd12cb52365f0]*/ { PyObject *it; batchedobject *bo; From 21a6f6707451507881c809b675d25b1b8185fe8e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 02:24:40 -0500 Subject: [PATCH 05/23] . --- Doc/library/itertools.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index f7fae4cbf7d4d4..ca729d4171714d 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -174,7 +174,7 @@ loops that truncate the stream. .. function:: batched(iterable, n) Batch data from the *iterable* into lists of length *n*. The last - batch may be shorter. + batch may be shorter than *n*. Loops over the input iterable and accumulates data into lists up to size *n*. The input is consumed lazily, just enough to fill a list. From 61b83df6e498e4d1795f997412593e648ca7e68a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 12:00:51 -0500 Subject: [PATCH 06/23] Add tests --- Lib/test/test_itertools.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index f469bfe185e65b..cf826a17d99bb3 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -159,6 +159,44 @@ def test_accumulate(self): with self.assertRaises(TypeError): list(accumulate([10, 20], 100)) + def test_batched(self): + self.assertEqual(list(batched('ABCDEFG', 3)), + [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]) + self.assertEqual(list(batched('ABCDEFG', 2)), + [['A', 'B'], ['C', 'D'], ['E', 'F'], ['G']]) + self.assertEqual(list(batched('ABCDEFG', 1)), + [['A'], ['B'], ['C'], ['D'], ['E'], ['F'], ['G']]) + + with self.assertRaises(TypeError): # Too few arguments + list(batched('ABCDEFG')) + with self.assertRaises(TypeError): + list(batched('ABCDEFG', 3, None)) # Too many arguments + with self.assertRaises(TypeError): + list(batched(None, 3)) # Non-iterable input + with self.assertRaises(TypeError): + list(batched('ABCDEFG', 'hello')) # n is a string + with self.assertRaises(ValueError): + list(batched('ABCDEFG', 0)) # n is zero + with self.assertRaises(ValueError): + list(batched('ABCDEFG', -1)) # n is negative + + data = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + for n in range(1, 6): + for i in range(len(data)): + s = data[:i] + batches = list(batched(s, n)) + with self.subTest(s, n, batches): + # Order is preserved and no data is lost + self.assertEqual(''.join(batches), s) + # Each batch is an exact list + self.assertTrue(all(type(batch) == list for batch in batches)) + # All but the last batch is of size n + if batches: + last_batch = batches.pop() + self.assertTrue(all(len(batch) == n for batch in batches)) + self.assertTrue(len(last_batch) <= n) + batches.append(last_batch) + def test_chain(self): def chain2(*iterables): From 93bf0134f2b8e6a02742a8327927e968d695455e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 12:09:59 -0500 Subject: [PATCH 07/23] Add entry to toplevel table in the docs --- Doc/library/itertools.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index ca729d4171714d..a33ed47b8f3459 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -48,6 +48,7 @@ Iterator Arguments Results Iterator Arguments Results Example ============================ ============================ ================================================= ============================================================= :func:`accumulate` p [,func] p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15`` +:func:`batched` p, n [p0, p1, ..., p_n-1], ... ``batched('ABCDEFG') --> ABC DEF G`` :func:`chain` p, q, ... p0, p1, ... plast, q0, q1, ... ``chain('ABC', 'DEF') --> A B C D E F`` :func:`chain.from_iterable` iterable p0, p1, ... plast, q0, q1, ... ``chain.from_iterable(['ABC', 'DEF']) --> A B C D E F`` :func:`compress` data, selectors (d[0] if s[0]), (d[1] if s[1]), ... ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F`` From eb6949c8bd40b81451cacde4a2c773b1cebbf79d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 12:30:52 -0500 Subject: [PATCH 08/23] Remove bogus decref --- Lib/test/test_itertools.py | 2 +- Modules/itertoolsmodule.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index cf826a17d99bb3..00ee9ab3f4a583 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -185,7 +185,7 @@ def test_batched(self): for i in range(len(data)): s = data[:i] batches = list(batched(s, n)) - with self.subTest(s, n, batches): + with self.subTest(s=s, n=n, batches=batches): # Order is preserved and no data is lost self.assertEqual(''.join(batches), s) # Each batch is an exact list diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index ef99295c9f1ab4..7cbdf4f4f92a37 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -104,7 +104,6 @@ batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n) } it = PyObject_GetIter(iterable); if (it == NULL) { - Py_DECREF(it); return NULL; } From 7fd40169f6fdb9c12183db67cef3851dd017e775 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 12:35:02 -0500 Subject: [PATCH 09/23] Flatten before joining --- Lib/test/test_itertools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 00ee9ab3f4a583..c3452da9b4b75d 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -187,7 +187,7 @@ def test_batched(self): batches = list(batched(s, n)) with self.subTest(s=s, n=n, batches=batches): # Order is preserved and no data is lost - self.assertEqual(''.join(batches), s) + self.assertEqual(''.join(chain(*batches)), s) # Each batch is an exact list self.assertTrue(all(type(batch) == list for batch in batches)) # All but the last batch is of size n From 8fb8bdf0e2cfaa2c3d74f522feaa7e5dbc1390ec Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 12:50:06 -0500 Subject: [PATCH 10/23] Add blurb --- .../next/Library/2022-10-17-12-49-02.gh-issue-98363.aFmSP-.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-10-17-12-49-02.gh-issue-98363.aFmSP-.rst diff --git a/Misc/NEWS.d/next/Library/2022-10-17-12-49-02.gh-issue-98363.aFmSP-.rst b/Misc/NEWS.d/next/Library/2022-10-17-12-49-02.gh-issue-98363.aFmSP-.rst new file mode 100644 index 00000000000000..9c6e7552a3f440 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-10-17-12-49-02.gh-issue-98363.aFmSP-.rst @@ -0,0 +1,2 @@ +Added itertools.batched() to batch data into lists of a given length with +the last list possibly being shorter than the others. From 2b994a3a31b155207ac14b6de5f4bfe11aff5ce8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 13:08:34 -0500 Subject: [PATCH 11/23] Add motivating use case and make equivalent code more accurate --- Doc/library/itertools.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index a33ed47b8f3459..08859ed80731bf 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -184,6 +184,10 @@ loops that truncate the stream. .. doctest:: + >>> flattened_data = ['roses', 'red', 'violets', 'blue', 'sugar', 'sweet'] + >>> list(batched(flattened_data, 2)) + [['roses', 'red'], ['violets', 'blue'], ['sugar', 'sweet']] + >>> for batch in batched('ABCDEFG', 3): ... print(batch) ... @@ -197,7 +201,7 @@ loops that truncate the stream. "Batch data into lists of length n. The last batch may be shorter." # batched('ABCDEFG', 3) --> ABC DEF G if n < 1: - raise ValueError + raise ValueError('n must be >= 1') it = iter(iterable) while (batch := list(islice(it, n))): yield batch From fa9c30699145922065dd3a8fdf447689631f7020 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 13:11:10 -0500 Subject: [PATCH 12/23] Remove TODO comment --- Modules/itertoolsmodule.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 7cbdf4f4f92a37..d210158fe3ca9f 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -97,7 +97,6 @@ batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n) PyObject *it; batchedobject *bo; - /* XXX Do we want do defined batched(it, n=0) as just an empty iterator ?*/ if (n < 1) { PyErr_SetString(PyExc_ValueError, "n must be >= 1"); return NULL; From c49fb17573ecdf5ef5d33691b1868c6def51ee39 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 14:14:22 -0500 Subject: [PATCH 13/23] Update Lib/test/test_itertools.py Co-authored-by: Jelle Zijlstra --- Lib/test/test_itertools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index c3452da9b4b75d..2a5749a3a3af21 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -189,7 +189,7 @@ def test_batched(self): # Order is preserved and no data is lost self.assertEqual(''.join(chain(*batches)), s) # Each batch is an exact list - self.assertTrue(all(type(batch) == list for batch in batches)) + self.assertTrue(all(type(batch) is list for batch in batches)) # All but the last batch is of size n if batches: last_batch = batches.pop() From 4c25341c56b8f3a9eb7df55e1f4964e5b6328522 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 14:31:45 -0500 Subject: [PATCH 14/23] Neaten-up --- Doc/library/itertools.rst | 2 +- Modules/itertoolsmodule.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 08859ed80731bf..66cd3af6fbeede 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -201,7 +201,7 @@ loops that truncate the stream. "Batch data into lists of length n. The last batch may be shorter." # batched('ABCDEFG', 3) --> ABC DEF G if n < 1: - raise ValueError('n must be >= 1') + raise ValueError('n must be at least one') it = iter(iterable) while (batch := list(islice(it, n))): yield batch diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index d210158fe3ca9f..afa736328fd1c4 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -65,8 +65,8 @@ static PyTypeObject pairwise_type; typedef struct { PyObject_HEAD - Py_ssize_t batch_size; PyObject *it; + Py_ssize_t batch_size; } batchedobject; /*[clinic input] @@ -98,7 +98,11 @@ batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n) batchedobject *bo; if (n < 1) { - PyErr_SetString(PyExc_ValueError, "n must be >= 1"); + /* We could define the n==0 case to return an empty iterator + but that is add odds with the idea that batching should + never throw-away input data. + */ + PyErr_SetString(PyExc_ValueError, "n must be at least one"); return NULL; } it = PyObject_GetIter(iterable); From 41acb455fe8c8d20ab379079e0f9c1da8bb35557 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 14:35:00 -0500 Subject: [PATCH 15/23] Make the example more self-explanatory --- Doc/library/itertools.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 66cd3af6fbeede..55c94cc2256549 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -185,7 +185,8 @@ loops that truncate the stream. .. doctest:: >>> flattened_data = ['roses', 'red', 'violets', 'blue', 'sugar', 'sweet'] - >>> list(batched(flattened_data, 2)) + >>> unflattened = list(batched(flattened_data, 2)) + >>> unflattened [['roses', 'red'], ['violets', 'blue'], ['sugar', 'sweet']] >>> for batch in batched('ABCDEFG', 3): From 77de77c15666b798257cfe3dedf6ca916ceb2da6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 14:54:37 -0500 Subject: [PATCH 16/23] Test pure python equivalent --- Lib/test/test_itertools.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 2a5749a3a3af21..3a62352c364210 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1775,6 +1775,31 @@ def test_takewhile(self): class TestPurePythonRoughEquivalents(unittest.TestCase): + def test_batched_recipe(self): + def batched_recipe(iterable, n): + "Batch data into lists of length n. The last batch may be shorter." + # batched('ABCDEFG', 3) --> ABC DEF G + if n < 1: + raise ValueError('n must be at least one') + it = iter(iterable) + while (batch := list(islice(it, n))): + yield batch + + for iterable, n in product( + ['', 'a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef', 'abcdefg'], + [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, None]): + with self.subTest(iterable=iterable, n=n): + try: + e1, r1 = None, list(batched(iterable, n)) + except Exception as e: + e1, r1 = type(e), None + try: + e2, r2 = None, list(batched_recipe(iterable, n)) + except Exception as e: + e2, r2 = type(e), None + self.assertEqual(r1, r2) + self.assertEqual(e1, e2) + @staticmethod def islice(iterable, *args): s = slice(*args) From 7390670f7bd7ddc56067d7a67919d7e3fec8ba97 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 14:54:49 -0500 Subject: [PATCH 17/23] Test pure python equivalent --- Lib/test/test_itertools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 3a62352c364210..db051f54a0852a 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1786,7 +1786,7 @@ def batched_recipe(iterable, n): yield batch for iterable, n in product( - ['', 'a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef', 'abcdefg'], + ['', 'a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef', 'abcdefg', None], [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, None]): with self.subTest(iterable=iterable, n=n): try: From c836bc73ea42bf4c8b699ad9bd14bf1df61af40d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 14:57:41 -0500 Subject: [PATCH 18/23] Add GC test --- Lib/test/test_itertools.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index db051f54a0852a..80358687722119 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1851,6 +1851,10 @@ def test_accumulate(self): a = [] self.makecycle(accumulate([1,2,a,3]), a) + def test_batched(self): + a = [] + self.makecycle(batched([1,2,a,3], 2), a) + def test_chain(self): a = [] self.makecycle(chain(a), a) From c0ca79d9a295c4ecce320cd4e026575ecff61bd8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 15:25:46 -0500 Subject: [PATCH 19/23] Add "TestVariousIteratorArgs" tests --- Lib/test/test_itertools.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 80358687722119..f174ba135fa6d5 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -2039,6 +2039,18 @@ def test_accumulate(self): self.assertRaises(TypeError, accumulate, N(s)) self.assertRaises(ZeroDivisionError, list, accumulate(E(s))) + def test_batched(self): + s = 'abcde' + r = [['a', 'b'], ['c', 'd'], ['e']] + n = 2 + for g in (G, Ig, L, R): # XXX I(s) is failing + with self.subTest(g=g): + self.assertEqual(list(batched(g(s), n)), r) + self.assertEqual(list(batched(S(s), 2)), []) + self.assertRaises(TypeError, batched, X(s), 2) + self.assertRaises(TypeError, batched, N(s), 2) + self.assertRaises(ZeroDivisionError, list, batched(E(s), 2)) + def test_chain(self): for s in ("123", "", range(1000), ('do', 1.2), range(2000,2200,5)): for g in (G, I, Ig, S, L, R): From 8238c28a71d1bff685a3e0d5d2eb54766ce2f65f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 15:46:23 -0500 Subject: [PATCH 20/23] Fix test case I() --- Lib/test/test_itertools.py | 2 +- Modules/itertoolsmodule.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index f174ba135fa6d5..c0e35711a2b3dd 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -2043,7 +2043,7 @@ def test_batched(self): s = 'abcde' r = [['a', 'b'], ['c', 'd'], ['e']] n = 2 - for g in (G, Ig, L, R): # XXX I(s) is failing + for g in (G, I, Ig, L, R): with self.subTest(g=g): self.assertEqual(list(batched(g(s), n)), r) self.assertEqual(list(batched(S(s), 2)), []) diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index afa736328fd1c4..280bc4a1ac51b3 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -155,7 +155,7 @@ batched_next(batchedobject *bo) return NULL; } for (i=0 ; i < bo->batch_size ; i++) { - item = (*Py_TYPE(it)->tp_iternext)(it); + item = PyIter_Next(it); if (item == NULL) { break; } From b9cbb469bee21049a1a0211b67484ed0471b5170 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 15:50:12 -0500 Subject: [PATCH 21/23] Remove development note --- Modules/itertoolsmodule.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 280bc4a1ac51b3..99dc30eb412cfb 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -146,7 +146,6 @@ batched_next(batchedobject *bo) PyObject *item; PyObject *result; - // If the iterator has stopped, it should stay stopped if (it == NULL) { return NULL; } From 4799e6a661823bf181c6cd6ea4a55d13680fda58 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 16:28:03 -0500 Subject: [PATCH 22/23] Remove docstring from pure Python equivalent. --- Doc/library/itertools.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 55c94cc2256549..b5074f6a1cf73c 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -199,7 +199,6 @@ loops that truncate the stream. Roughly equivalent to:: def batched(iterable, n): - "Batch data into lists of length n. The last batch may be shorter." # batched('ABCDEFG', 3) --> ABC DEF G if n < 1: raise ValueError('n must be at least one') From bac2f6c6a19388a68ae4a48f03653ced641aa876 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 17 Oct 2022 17:16:44 -0500 Subject: [PATCH 23/23] Fix missing argument in the docs table entry. --- Doc/library/itertools.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index b5074f6a1cf73c..0e41ec1464a0ec 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -48,7 +48,7 @@ Iterator Arguments Results Iterator Arguments Results Example ============================ ============================ ================================================= ============================================================= :func:`accumulate` p [,func] p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15`` -:func:`batched` p, n [p0, p1, ..., p_n-1], ... ``batched('ABCDEFG') --> ABC DEF G`` +:func:`batched` p, n [p0, p1, ..., p_n-1], ... ``batched('ABCDEFG', n=3) --> ABC DEF G`` :func:`chain` p, q, ... p0, p1, ... plast, q0, q1, ... ``chain('ABC', 'DEF') --> A B C D E F`` :func:`chain.from_iterable` iterable p0, p1, ... plast, q0, q1, ... ``chain.from_iterable(['ABC', 'DEF']) --> A B C D E F`` :func:`compress` data, selectors (d[0] if s[0]), (d[1] if s[1]), ... ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F``