Skip to content

Commit a231428

Browse files
bpo-20047: Make bytearray methods partition() and rpartition() rejecting (python#4158)
separators that are not bytes-like objects.
1 parent 5a4bbcd commit a231428

File tree

9 files changed

+89
-38
lines changed

9 files changed

+89
-38
lines changed

Doc/library/stdtypes.rst

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2573,8 +2573,9 @@ arbitrary binary data.
25732573
bytearray.partition(sep)
25742574

25752575
Split the sequence at the first occurrence of *sep*, and return a 3-tuple
2576-
containing the part before the separator, the separator, and the part
2577-
after the separator. If the separator is not found, return a 3-tuple
2576+
containing the part before the separator, the separator itself or its
2577+
bytearray copy, and the part after the separator.
2578+
If the separator is not found, return a 3-tuple
25782579
containing a copy of the original sequence, followed by two empty bytes or
25792580
bytearray objects.
25802581

@@ -2629,8 +2630,9 @@ arbitrary binary data.
26292630
bytearray.rpartition(sep)
26302631

26312632
Split the sequence at the last occurrence of *sep*, and return a 3-tuple
2632-
containing the part before the separator, the separator, and the part
2633-
after the separator. If the separator is not found, return a 3-tuple
2633+
containing the part before the separator, the separator itself or its
2634+
bytearray copy, and the part after the separator.
2635+
If the separator is not found, return a 3-tuple
26342636
containing a copy of the original sequence, followed by two empty bytes or
26352637
bytearray objects.
26362638

Lib/test/test_bytes.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -548,8 +548,16 @@ def test_replace(self):
548548
self.assertEqual(b.replace(b'i', b'a'), b'massassappa')
549549
self.assertEqual(b.replace(b'ss', b'x'), b'mixixippi')
550550

551+
def test_replace_int_error(self):
552+
self.assertRaises(TypeError, self.type2test(b'a b').replace, 32, b'')
553+
551554
def test_split_string_error(self):
552555
self.assertRaises(TypeError, self.type2test(b'a b').split, ' ')
556+
self.assertRaises(TypeError, self.type2test(b'a b').rsplit, ' ')
557+
558+
def test_split_int_error(self):
559+
self.assertRaises(TypeError, self.type2test(b'a b').split, 32)
560+
self.assertRaises(TypeError, self.type2test(b'a b').rsplit, 32)
553561

554562
def test_split_unicodewhitespace(self):
555563
for b in (b'a\x1Cb', b'a\x1Db', b'a\x1Eb', b'a\x1Fb'):
@@ -558,9 +566,6 @@ def test_split_unicodewhitespace(self):
558566
b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F")
559567
self.assertEqual(b.split(), [b'\x1c\x1d\x1e\x1f'])
560568

561-
def test_rsplit_string_error(self):
562-
self.assertRaises(TypeError, self.type2test(b'a b').rsplit, ' ')
563-
564569
def test_rsplit_unicodewhitespace(self):
565570
b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F")
566571
self.assertEqual(b.rsplit(), [b'\x1c\x1d\x1e\x1f'])
@@ -576,6 +581,14 @@ def test_rpartition(self):
576581
self.assertEqual(b.rpartition(b'i'), (b'mississipp', b'i', b''))
577582
self.assertEqual(b.rpartition(b'w'), (b'', b'', b'mississippi'))
578583

584+
def test_partition_string_error(self):
585+
self.assertRaises(TypeError, self.type2test(b'a b').partition, ' ')
586+
self.assertRaises(TypeError, self.type2test(b'a b').rpartition, ' ')
587+
588+
def test_partition_int_error(self):
589+
self.assertRaises(TypeError, self.type2test(b'a b').partition, 32)
590+
self.assertRaises(TypeError, self.type2test(b'a b').rpartition, 32)
591+
579592
def test_pickling(self):
580593
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
581594
for b in b"", b"a", b"abc", b"\xffab\x80", b"\0\0\377\0\0":
@@ -608,9 +621,14 @@ def test_strip_bytearray(self):
608621
self.assertEqual(self.type2test(b'abc').rstrip(memoryview(b'ac')), b'ab')
609622

610623
def test_strip_string_error(self):
611-
self.assertRaises(TypeError, self.type2test(b'abc').strip, 'b')
612-
self.assertRaises(TypeError, self.type2test(b'abc').lstrip, 'b')
613-
self.assertRaises(TypeError, self.type2test(b'abc').rstrip, 'b')
624+
self.assertRaises(TypeError, self.type2test(b'abc').strip, 'ac')
625+
self.assertRaises(TypeError, self.type2test(b'abc').lstrip, 'ac')
626+
self.assertRaises(TypeError, self.type2test(b'abc').rstrip, 'ac')
627+
628+
def test_strip_int_error(self):
629+
self.assertRaises(TypeError, self.type2test(b' abc ').strip, 32)
630+
self.assertRaises(TypeError, self.type2test(b' abc ').lstrip, 32)
631+
self.assertRaises(TypeError, self.type2test(b' abc ').rstrip, 32)
614632

615633
def test_center(self):
616634
# Fill character can be either bytes or bytearray (issue 12380)
@@ -633,6 +651,11 @@ def test_rjust(self):
633651
self.assertEqual(b.rjust(7, fill_type(b'-')),
634652
self.type2test(b'----abc'))
635653

654+
def test_xjust_int_error(self):
655+
self.assertRaises(TypeError, self.type2test(b'abc').center, 7, 32)
656+
self.assertRaises(TypeError, self.type2test(b'abc').ljust, 7, 32)
657+
self.assertRaises(TypeError, self.type2test(b'abc').rjust, 7, 32)
658+
636659
def test_ord(self):
637660
b = self.type2test(b'\0A\x7f\x80\xff')
638661
self.assertEqual([ord(b[i:i+1]) for i in range(len(b))],
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Bytearray methods partition() and rpartition() now accept only bytes-like
2+
objects as separator, as documented. In particular they now raise TypeError
3+
rather of returning a bogus result when an integer is passed as a separator.

Objects/bytearrayobject.c

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,26 @@ PyByteArray_FromObject(PyObject *input)
104104
input, NULL);
105105
}
106106

107+
static PyObject *
108+
_PyByteArray_FromBufferObject(PyObject *obj)
109+
{
110+
PyObject *result;
111+
Py_buffer view;
112+
113+
if (PyObject_GetBuffer(obj, &view, PyBUF_FULL_RO) < 0) {
114+
return NULL;
115+
}
116+
result = PyByteArray_FromStringAndSize(NULL, view.len);
117+
if (result != NULL &&
118+
PyBuffer_ToContiguous(PyByteArray_AS_STRING(result),
119+
&view, view.len, 'C') < 0)
120+
{
121+
Py_CLEAR(result);
122+
}
123+
PyBuffer_Release(&view);
124+
return result;
125+
}
126+
107127
PyObject *
108128
PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
109129
{
@@ -536,7 +556,8 @@ bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
536556
if (values == (PyObject *)self) {
537557
/* Make a copy and call this function recursively */
538558
int err;
539-
values = PyByteArray_FromObject(values);
559+
values = PyByteArray_FromStringAndSize(PyByteArray_AS_STRING(values),
560+
PyByteArray_GET_SIZE(values));
540561
if (values == NULL)
541562
return -1;
542563
err = bytearray_setslice(self, lo, hi, values);
@@ -1387,19 +1408,19 @@ Partition the bytearray into three parts using the given separator.
13871408
13881409
This will search for the separator sep in the bytearray. If the separator is
13891410
found, returns a 3-tuple containing the part before the separator, the
1390-
separator itself, and the part after it.
1411+
separator itself, and the part after it as new bytearray objects.
13911412
1392-
If the separator is not found, returns a 3-tuple containing the original
1393-
bytearray object and two empty bytearray objects.
1413+
If the separator is not found, returns a 3-tuple containing the copy of the
1414+
original bytearray object and two empty bytearray objects.
13941415
[clinic start generated code]*/
13951416

13961417
static PyObject *
13971418
bytearray_partition(PyByteArrayObject *self, PyObject *sep)
1398-
/*[clinic end generated code: output=45d2525ddd35f957 input=86f89223892b70b5]*/
1419+
/*[clinic end generated code: output=45d2525ddd35f957 input=8f644749ee4fc83a]*/
13991420
{
14001421
PyObject *bytesep, *result;
14011422

1402-
bytesep = PyByteArray_FromObject(sep);
1423+
bytesep = _PyByteArray_FromBufferObject(sep);
14031424
if (! bytesep)
14041425
return NULL;
14051426

@@ -1420,23 +1441,24 @@ bytearray.rpartition
14201441
sep: object
14211442
/
14221443
1423-
Partition the bytes into three parts using the given separator.
1444+
Partition the bytearray into three parts using the given separator.
14241445
1425-
This will search for the separator sep in the bytearray, starting and the end.
1446+
This will search for the separator sep in the bytearray, starting at the end.
14261447
If the separator is found, returns a 3-tuple containing the part before the
1427-
separator, the separator itself, and the part after it.
1448+
separator, the separator itself, and the part after it as new bytearray
1449+
objects.
14281450
14291451
If the separator is not found, returns a 3-tuple containing two empty bytearray
1430-
objects and the original bytearray object.
1452+
objects and the copy of the original bytearray object.
14311453
[clinic start generated code]*/
14321454

14331455
static PyObject *
14341456
bytearray_rpartition(PyByteArrayObject *self, PyObject *sep)
1435-
/*[clinic end generated code: output=440de3c9426115e8 input=5f4094f2de87c8f3]*/
1457+
/*[clinic end generated code: output=440de3c9426115e8 input=7e3df3e6cb8fa0ac]*/
14361458
{
14371459
PyObject *bytesep, *result;
14381460

1439-
bytesep = PyByteArray_FromObject(sep);
1461+
bytesep = _PyByteArray_FromBufferObject(sep);
14401462
if (! bytesep)
14411463
return NULL;
14421464

Objects/bytesobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,7 +1834,7 @@ bytes.rpartition
18341834
18351835
Partition the bytes into three parts using the given separator.
18361836
1837-
This will search for the separator sep in the bytes, starting and the end. If
1837+
This will search for the separator sep in the bytes, starting at the end. If
18381838
the separator is found, returns a 3-tuple containing the part before the
18391839
separator, the separator itself, and the part after it.
18401840
@@ -1844,7 +1844,7 @@ objects and the original bytes object.
18441844

18451845
static PyObject *
18461846
bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1847-
/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
1847+
/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
18481848
{
18491849
return stringlib_rpartition(
18501850
(PyObject*) self,

Objects/clinic/bytearrayobject.c.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,10 @@ PyDoc_STRVAR(bytearray_partition__doc__,
214214
"\n"
215215
"This will search for the separator sep in the bytearray. If the separator is\n"
216216
"found, returns a 3-tuple containing the part before the separator, the\n"
217-
"separator itself, and the part after it.\n"
217+
"separator itself, and the part after it as new bytearray objects.\n"
218218
"\n"
219-
"If the separator is not found, returns a 3-tuple containing the original\n"
220-
"bytearray object and two empty bytearray objects.");
219+
"If the separator is not found, returns a 3-tuple containing the copy of the\n"
220+
"original bytearray object and two empty bytearray objects.");
221221

222222
#define BYTEARRAY_PARTITION_METHODDEF \
223223
{"partition", (PyCFunction)bytearray_partition, METH_O, bytearray_partition__doc__},
@@ -226,14 +226,15 @@ PyDoc_STRVAR(bytearray_rpartition__doc__,
226226
"rpartition($self, sep, /)\n"
227227
"--\n"
228228
"\n"
229-
"Partition the bytes into three parts using the given separator.\n"
229+
"Partition the bytearray into three parts using the given separator.\n"
230230
"\n"
231-
"This will search for the separator sep in the bytearray, starting and the end.\n"
231+
"This will search for the separator sep in the bytearray, starting at the end.\n"
232232
"If the separator is found, returns a 3-tuple containing the part before the\n"
233-
"separator, the separator itself, and the part after it.\n"
233+
"separator, the separator itself, and the part after it as new bytearray\n"
234+
"objects.\n"
234235
"\n"
235236
"If the separator is not found, returns a 3-tuple containing two empty bytearray\n"
236-
"objects and the original bytearray object.");
237+
"objects and the copy of the original bytearray object.");
237238

238239
#define BYTEARRAY_RPARTITION_METHODDEF \
239240
{"rpartition", (PyCFunction)bytearray_rpartition, METH_O, bytearray_rpartition__doc__},
@@ -711,4 +712,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored))
711712
{
712713
return bytearray_sizeof_impl(self);
713714
}
714-
/*[clinic end generated code: output=e53f10084457a46b input=a9049054013a1b77]*/
715+
/*[clinic end generated code: output=c2804d009182328c input=a9049054013a1b77]*/

Objects/clinic/bytesobject.c.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ PyDoc_STRVAR(bytes_rpartition__doc__,
8686
"\n"
8787
"Partition the bytes into three parts using the given separator.\n"
8888
"\n"
89-
"This will search for the separator sep in the bytes, starting and the end. If\n"
89+
"This will search for the separator sep in the bytes, starting at the end. If\n"
9090
"the separator is found, returns a 3-tuple containing the part before the\n"
9191
"separator, the separator itself, and the part after it.\n"
9292
"\n"
@@ -499,4 +499,4 @@ bytes_fromhex(PyTypeObject *type, PyObject *arg)
499499
exit:
500500
return return_value;
501501
}
502-
/*[clinic end generated code: output=9e3374bd7d04c163 input=a9049054013a1b77]*/
502+
/*[clinic end generated code: output=fc9e02359cc56d36 input=a9049054013a1b77]*/

Objects/clinic/unicodeobject.c.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ PyDoc_STRVAR(unicode_rpartition__doc__,
682682
"\n"
683683
"Partition the string into three parts using the given separator.\n"
684684
"\n"
685-
"This will search for the separator in the string, starting and the end. If\n"
685+
"This will search for the separator in the string, starting at the end. If\n"
686686
"the separator is found, returns a 3-tuple containing the part before the\n"
687687
"separator, the separator itself, and the part after it.\n"
688688
"\n"
@@ -930,4 +930,4 @@ unicode_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored))
930930
{
931931
return unicode_sizeof_impl(self);
932932
}
933-
/*[clinic end generated code: output=8fd799fd7f2cc724 input=a9049054013a1b77]*/
933+
/*[clinic end generated code: output=816292e81a8a732e input=a9049054013a1b77]*/

Objects/unicodeobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13067,7 +13067,7 @@ str.rpartition as unicode_rpartition = str.partition
1306713067
1306813068
Partition the string into three parts using the given separator.
1306913069
13070-
This will search for the separator in the string, starting and the end. If
13070+
This will search for the separator in the string, starting at the end. If
1307113071
the separator is found, returns a 3-tuple containing the part before the
1307213072
separator, the separator itself, and the part after it.
1307313073
@@ -13077,7 +13077,7 @@ and the original string.
1307713077

1307813078
static PyObject *
1307913079
unicode_rpartition(PyObject *self, PyObject *sep)
13080-
/*[clinic end generated code: output=1aa13cf1156572aa input=e77c7acb69bdfca6]*/
13080+
/*[clinic end generated code: output=1aa13cf1156572aa input=c4b7db3ef5cf336a]*/
1308113081
{
1308213082
return PyUnicode_RPartition(self, sep);
1308313083
}

0 commit comments

Comments
 (0)