Skip to content

Commit 5623ac8

Browse files
shihai1991zhangyangyu
authored andcommitted
bpo-37476: Adding tests for asutf8 and asutf8andsize (GH-14531)
1 parent eed5e9a commit 5623ac8

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

Lib/test/test_unicode.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2819,6 +2819,34 @@ def test_asucs4(self):
28192819
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
28202820
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
28212821

2822+
# Test PyUnicode_AsUTF8()
2823+
@support.cpython_only
2824+
def test_asutf8(self):
2825+
from _testcapi import unicode_asutf8
2826+
2827+
bmp = '\u0100'
2828+
bmp2 = '\uffff'
2829+
nonbmp = chr(0x10ffff)
2830+
2831+
self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
2832+
self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
2833+
self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
2834+
self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')
2835+
2836+
# Test PyUnicode_AsUTF8AndSize()
2837+
@support.cpython_only
2838+
def test_asutf8andsize(self):
2839+
from _testcapi import unicode_asutf8andsize
2840+
2841+
bmp = '\u0100'
2842+
bmp2 = '\uffff'
2843+
nonbmp = chr(0x10ffff)
2844+
2845+
self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
2846+
self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
2847+
self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
2848+
self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
2849+
28222850
# Test PyUnicode_FindChar()
28232851
@support.cpython_only
28242852
def test_findchar(self):

Modules/_testcapimodule.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1921,6 +1921,48 @@ unicode_asucs4(PyObject *self, PyObject *args)
19211921
return result;
19221922
}
19231923

1924+
static PyObject *
1925+
unicode_asutf8(PyObject *self, PyObject *args)
1926+
{
1927+
PyObject *unicode;
1928+
const char *buffer;
1929+
1930+
if (!PyArg_ParseTuple(args, "U", &unicode)) {
1931+
return NULL;
1932+
}
1933+
1934+
buffer = PyUnicode_AsUTF8(unicode);
1935+
if (buffer == NULL) {
1936+
return NULL;
1937+
}
1938+
1939+
return PyBytes_FromString(buffer);
1940+
}
1941+
1942+
static PyObject *
1943+
unicode_asutf8andsize(PyObject *self, PyObject *args)
1944+
{
1945+
PyObject *unicode, *result;
1946+
const char *buffer;
1947+
Py_ssize_t utf8_len;
1948+
1949+
if(!PyArg_ParseTuple(args, "U", &unicode)) {
1950+
return NULL;
1951+
}
1952+
1953+
buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len);
1954+
if (buffer == NULL) {
1955+
return NULL;
1956+
}
1957+
1958+
result = PyBytes_FromString(buffer);
1959+
if (result == NULL) {
1960+
return NULL;
1961+
}
1962+
1963+
return Py_BuildValue("(Nn)", result, utf8_len);
1964+
}
1965+
19241966
static PyObject *
19251967
unicode_findchar(PyObject *self, PyObject *args)
19261968
{
@@ -5174,6 +5216,8 @@ static PyMethodDef TestMethods[] = {
51745216
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
51755217
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
51765218
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
5219+
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
5220+
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
51775221
{"unicode_findchar", unicode_findchar, METH_VARARGS},
51785222
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
51795223
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},

0 commit comments

Comments
 (0)