From 9f1012d37cc499306ef432bfd1eff99bbfbaf660 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sat, 15 Aug 2020 15:25:51 -0500 Subject: [PATCH 01/37] Reimplement memrchr (for Mac) --- src/cstring.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index f279226..45befdb 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -11,6 +11,17 @@ struct cstring { #define CSTRING_VALUE_AT(self, i) (&CSTRING_VALUE(self)[(i)]) +/* + * memrchr not available on some systems, so reimplement. + */ +const char *_memrchr(const char *s, int c, size_t n) { + for(const char *p = s + n - 1; p >= s; --p) { + if(*p == c) + return p; + } + return NULL; +} + static PyObject *_cstring_new(PyTypeObject *type, const char *value, size_t len) { struct cstring *new = (struct cstring *)type->tp_alloc(type, len + 1); new->hash = -1; @@ -268,7 +279,7 @@ static const char *_substr_params_str(const struct _substr_params *params) { static const char *_substr_params_rstr(const struct _substr_params *params) { const char *p = params->end - params->substr_len + 1; for(;;) { - p = memrchr(params->start, *params->substr, p - params->start); + p = _memrchr(params->start, *params->substr, p - params->start); if(!p) goto done; if(memcmp(p, params->substr, params->substr_len) == 0) @@ -316,6 +327,7 @@ PyObject *cstring_rfind(PyObject *self, PyObject *args) { return NULL; const char *p = _substr_params_rstr(¶ms); + if(!p) return PyLong_FromLong(-1); From 01777ff0f7da300a35e8ba2f7b027d38536dcfae Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sun, 28 Mar 2021 20:45:58 -0500 Subject: [PATCH 02/37] Apply PyObject_TypeCheck when possible --- src/cstring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index 5b941fa..da9ab76 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -49,7 +49,7 @@ static void cstring_dealloc(PyObject *self) { static PyTypeObject cstring_type; static int _ensure_cstring(PyObject *self) { - if(Py_TYPE(self) == &cstring_type) + if(PyObject_TypeCheck(self, &cstring_type)) return 1; PyErr_Format( PyExc_TypeError, @@ -201,7 +201,7 @@ static PyObject *cstring_subscript(PyObject *self, PyObject *key) { static const char *_obj_to_utf8(PyObject *o, Py_ssize_t *len_p) { if(PyUnicode_Check(o)) return PyUnicode_AsUTF8AndSize(o, len_p); - if(Py_TYPE(o) == &cstring_type) { + if(PyObject_TypeCheck(o, &cstring_type)) { *len_p = cstring_len(o); return CSTRING_VALUE(o); } From d3fbffc268358d100ae9d566c666b29428b094aa Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sun, 28 Mar 2021 21:13:05 -0500 Subject: [PATCH 03/37] Replace 'new empty' macro --- src/cstring.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index da9ab76..2e99269 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -20,6 +20,8 @@ struct cstring { char value[]; }; +static PyTypeObject cstring_type; + #define CSTRING_HASH(self) (((struct cstring *)self)->hash) #define CSTRING_VALUE(self) (((struct cstring *)self)->value) #define CSTRING_VALUE_AT(self, i) (&CSTRING_VALUE(self)[(i)]) @@ -32,7 +34,9 @@ static PyObject *_cstring_new(PyTypeObject *type, const char *value, size_t len) return (PyObject *)new; } -#define CSTRING_NEW_EMPTY(tp) (_cstring_new(tp, "", 0)) +static PyObject *cstring_new_empty(void) { + return _cstring_new(&cstring_type, "", 0); +} static PyObject *cstring_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { char *value = NULL; @@ -46,8 +50,6 @@ static void cstring_dealloc(PyObject *self) { Py_TYPE(self)->tp_free(self); } -static PyTypeObject cstring_type; - static int _ensure_cstring(PyObject *self) { if(PyObject_TypeCheck(self, &cstring_type)) return 1; @@ -127,7 +129,7 @@ static PyObject *cstring_repeat(PyObject *self, Py_ssize_t count) { if(!_ensure_cstring(self)) return NULL; if(count <= 0) - return CSTRING_NEW_EMPTY(Py_TYPE(self)); + return cstring_new_empty(); Py_ssize_t size = (cstring_len(self) * count) + 1; From 96a7e03a455b4ea187955741ea7ed7de6f124ba0 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sun, 28 Mar 2021 21:29:17 -0500 Subject: [PATCH 04/37] Introduce alloc macro --- src/cstring.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index 2e99269..19fde83 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -26,8 +26,10 @@ static PyTypeObject cstring_type; #define CSTRING_VALUE(self) (((struct cstring *)self)->value) #define CSTRING_VALUE_AT(self, i) (&CSTRING_VALUE(self)[(i)]) +#define CSTRING_ALLOC(tp, len) ((struct cstring *)(tp)->tp_alloc((tp), (len))) + static PyObject *_cstring_new(PyTypeObject *type, const char *value, size_t len) { - struct cstring *new = (struct cstring *)type->tp_alloc(type, len + 1); + struct cstring *new = CSTRING_ALLOC(type, len + 1); new->hash = -1; memcpy(new->value, value, len); new->value[len] = '\0'; @@ -119,7 +121,7 @@ static PyObject *cstring_concat(PyObject *left, PyObject *right) { Py_ssize_t size = cstring_len(left) + cstring_len(right) + 1; - struct cstring *new = (struct cstring *)Py_TYPE(left)->tp_alloc(Py_TYPE(left), size); + struct cstring *new = CSTRING_ALLOC(Py_TYPE(left), size); memcpy(new->value, CSTRING_VALUE(left), Py_SIZE(left)); memcpy(&new->value[cstring_len(left)], CSTRING_VALUE(right), Py_SIZE(right)); return (PyObject *)new; @@ -133,7 +135,7 @@ static PyObject *cstring_repeat(PyObject *self, Py_ssize_t count) { Py_ssize_t size = (cstring_len(self) * count) + 1; - struct cstring *new = (struct cstring *)Py_TYPE(self)->tp_alloc(Py_TYPE(self), size); + struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), size); for(Py_ssize_t i = 0; i < size - 1; i += cstring_len(self)) { memcpy(&new->value[i], CSTRING_VALUE(self), Py_SIZE(self)); } @@ -180,7 +182,7 @@ static PyObject *_cstring_subscript_slice(PyObject *self, PyObject *slice) { return NULL; } - struct cstring *new = (struct cstring *)Py_TYPE(self)->tp_alloc(Py_TYPE(self), slicelen + 1); + struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), slicelen + 1); char *src = CSTRING_VALUE_AT(self, start); for(Py_ssize_t i = 0; i < slicelen; ++i) { new->value[i] = *src; From c208d181c660a97fe27da393b234355938ee2107 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sun, 28 Mar 2021 21:36:52 -0500 Subject: [PATCH 05/37] Indicate qualified type name --- src/cstring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 19fde83..2b40ee6 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -404,7 +404,7 @@ static PyMethodDef cstring_methods[] = { static PyTypeObject cstring_type = { PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "cstring", + .tp_name = "cstring.cstring", .tp_doc = "", .tp_basicsize = sizeof(struct cstring), .tp_itemsize = sizeof(char), From 38dbc46c798c6b1cc36336b94bfdf4208deffd3f Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sun, 28 Mar 2021 22:15:09 -0500 Subject: [PATCH 06/37] Support bytes initialization --- src/cstring.c | 32 +++++++++++++++++++++++++++----- test/test_slots.py | 4 ++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index 2b40ee6..1a4a5cd 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -28,7 +28,7 @@ static PyTypeObject cstring_type; #define CSTRING_ALLOC(tp, len) ((struct cstring *)(tp)->tp_alloc((tp), (len))) -static PyObject *_cstring_new(PyTypeObject *type, const char *value, size_t len) { +static PyObject *_cstring_new(PyTypeObject *type, const char *value, Py_ssize_t len) { struct cstring *new = CSTRING_ALLOC(type, len + 1); new->hash = -1; memcpy(new->value, value, len); @@ -40,12 +40,34 @@ static PyObject *cstring_new_empty(void) { return _cstring_new(&cstring_type, "", 0); } +static const char *_obj_as_string_and_size(PyObject *o, Py_ssize_t *s) { + if(PyUnicode_Check(o)) + return PyUnicode_AsUTF8AndSize(o, s); + + if(PyBytes_Check(o)) { + char *buffer = NULL; + PyBytes_AsStringAndSize(o, &buffer, s); + return buffer; + } + + PyErr_Format( + PyExc_TypeError, + "Invalid initialization type: %s.", + Py_TYPE(o)->tp_name); + + *s = -1; + return NULL; +} + static PyObject *cstring_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - char *value = NULL; - if(!PyArg_ParseTuple(args, "s", &value)) + PyObject *argobj = NULL; + if(!PyArg_ParseTuple(args, "O", &argobj)) + return NULL; + Py_ssize_t len = 0; + const char *buffer = _obj_as_string_and_size(argobj, &len); + if(!buffer) return NULL; - size_t len = strlen(value); - return _cstring_new(type, value, len); + return _cstring_new(type, buffer, len); } static void cstring_dealloc(PyObject *self) { diff --git a/test/test_slots.py b/test/test_slots.py index 9726425..6b89174 100644 --- a/test/test_slots.py +++ b/test/test_slots.py @@ -1,6 +1,10 @@ from cstring import cstring +def test_new_from_bytes(): + assert cstring(b'hello, world') == cstring('hello, world') + + def test_str(): result = cstring('hello, world') assert str(result) == 'hello, world' From 08cbb987cf932df1e48751f2194b4b9eef7b30c9 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sun, 28 Mar 2021 22:25:19 -0500 Subject: [PATCH 07/37] Support initialization from existing cstrings --- src/cstring.c | 7 +++++++ test/test_slots.py | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/src/cstring.c b/src/cstring.c index 1a4a5cd..b6b4db1 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -63,10 +63,17 @@ static PyObject *cstring_new(PyTypeObject *type, PyObject *args, PyObject *kwarg PyObject *argobj = NULL; if(!PyArg_ParseTuple(args, "O", &argobj)) return NULL; + + if(PyObject_TypeCheck(argobj, type)) { + Py_INCREF(argobj); + return argobj; + } + Py_ssize_t len = 0; const char *buffer = _obj_as_string_and_size(argobj, &len); if(!buffer) return NULL; + return _cstring_new(type, buffer, len); } diff --git a/test/test_slots.py b/test/test_slots.py index 6b89174..b9b1722 100644 --- a/test/test_slots.py +++ b/test/test_slots.py @@ -5,6 +5,10 @@ def test_new_from_bytes(): assert cstring(b'hello, world') == cstring('hello, world') +def test_new_from_cstring(): + assert cstring(cstring('hello, world')) == cstring('hello, world') + + def test_str(): result = cstring('hello, world') assert str(result) == 'hello, world' From a4fb014db3adf704db23ee1ca76cbbea61cc2082 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sun, 28 Mar 2021 22:38:02 -0500 Subject: [PATCH 08/37] Support initialization from bytearray --- src/cstring.c | 5 +++++ test/test_slots.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/src/cstring.c b/src/cstring.c index b6b4db1..7f7f71c 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -50,6 +50,11 @@ static const char *_obj_as_string_and_size(PyObject *o, Py_ssize_t *s) { return buffer; } + if(PyByteArray_Check(o)) { + *s = PyByteArray_Size(o); + return PyByteArray_AsString(o); + } + PyErr_Format( PyExc_TypeError, "Invalid initialization type: %s.", diff --git a/test/test_slots.py b/test/test_slots.py index b9b1722..445cb9f 100644 --- a/test/test_slots.py +++ b/test/test_slots.py @@ -5,6 +5,10 @@ def test_new_from_bytes(): assert cstring(b'hello, world') == cstring('hello, world') +def test_new_from_bytearray(): + assert cstring(bytearray('hello, world', 'utf8')) == cstring('hello, world') + + def test_new_from_cstring(): assert cstring(cstring('hello, world')) == cstring('hello, world') From 1e34bc077c2fb7f26e9a6c64aee84ae23fab6648 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sun, 28 Mar 2021 23:37:38 -0500 Subject: [PATCH 09/37] Support initialization from buffer protocol --- src/cstring.c | 16 ++++++++-------- test/test_slots.py | 9 +++++++++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index 7f7f71c..0e597a8 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -44,17 +44,17 @@ static const char *_obj_as_string_and_size(PyObject *o, Py_ssize_t *s) { if(PyUnicode_Check(o)) return PyUnicode_AsUTF8AndSize(o, s); - if(PyBytes_Check(o)) { - char *buffer = NULL; - PyBytes_AsStringAndSize(o, &buffer, s); + if(PyObject_CheckBuffer(o)) { + /* handles bytes, bytearrays, arrays, memoryviews, etc. */ + Py_buffer view; + if(PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) < 0) + return NULL; + *s = view.len; + const char *buffer = view.buf; + PyBuffer_Release(&view); return buffer; } - if(PyByteArray_Check(o)) { - *s = PyByteArray_Size(o); - return PyByteArray_AsString(o); - } - PyErr_Format( PyExc_TypeError, "Invalid initialization type: %s.", diff --git a/test/test_slots.py b/test/test_slots.py index 445cb9f..b72ae68 100644 --- a/test/test_slots.py +++ b/test/test_slots.py @@ -9,6 +9,15 @@ def test_new_from_bytearray(): assert cstring(bytearray('hello, world', 'utf8')) == cstring('hello, world') +def test_new_from_array(): + import array + assert cstring(array.array('B', b'hello, world')) == cstring('hello, world') + + +def test_new_from_memoryview(): + assert cstring(memoryview(b'hello, world')) == cstring('hello, world') + + def test_new_from_cstring(): assert cstring(cstring('hello, world')) == cstring('hello, world') From 0cc1ce6a3c02e43ec69c01d0ba2dde0e3ed9c8a4 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sun, 28 Mar 2021 23:50:11 -0500 Subject: [PATCH 10/37] Update README --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9d14af5..edce698 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,11 @@ Alternate string representation to the built-in `str` type. * Uses C-string representation internally. -* Memory is allocated in one continuous buffer to reduce pointer-hopping. +* Memory is allocated contiguously to reduce pointer-hopping. * UTF-8 encoding. * `len` returns size in _bytes_ (not including terminating zero-byte). * Random access (to _bytes_, *not* Unicode code points) is supported with indices and slices. +* Supports initialization from `str`, `bytes`, `bytearray`, `array`, `memoryview`, `cstring`, and other buffer protocol objects. ## Methods @@ -85,11 +86,9 @@ Notes: * Write docs (see `str` type docs) * Write docstrings -* Fill out setup.py -* Allow initialization from bytes, bytearray, other cstrings, memoryview?, other? -* Read `__cstring__` "dunder" on objects, if available? +* Fill out setup.py classifiers * Implement iter (iterate over Unicode code points, "runes") * Implement str methods * Include start/end indexes as byte indexes? Calculate code points? Or just don't support? -* Implement buffer interface? +* Implement buffer interface * Decide subclassing protocol From 647dcbce170ce2f870b326336ce8aeea329a2e5a Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 11:59:59 -0500 Subject: [PATCH 11/37] Comment string methods TODOs --- src/cstring.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 0e597a8..4acfcdb 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -426,13 +426,52 @@ static PyMappingMethods cstring_as_mapping = { }; static PyMethodDef cstring_methods[] = { + /* TODO: capitalize */ + /* TODO: casefold */ + /* TODO: center */ {"count", cstring_count, METH_VARARGS, count__doc__}, + /* TODO: encode (decode???) */ + {"endswith", cstring_endswith, METH_VARARGS, endswith__doc__}, + /* TODO: expandtabs */ {"find", cstring_find, METH_VARARGS, find__doc__}, + /* TODO: format */ + /* TODO: format_map */ {"index", cstring_index, METH_VARARGS, index__doc__}, + /* TODO: isalnum */ + /* TODO: isalpha */ + /* TODO: isascii */ + /* TODO: isdecimal */ + /* TODO: isdigit */ + /* TODO: isidentifier */ + /* TODO: islower */ + /* TODO: isnumeric */ + /* TODO: isprintable */ + /* TODO: isspace */ + /* TODO: istitle */ + /* TODO: isupper */ + /* TODO: join */ + /* TODO: ljust */ + /* TODO: lower */ + /* TODO: lstrip */ + /* TODO: maketrans */ + /* TODO: partition */ + /* TODO: removeprefix */ + /* TODO: replace */ {"rfind", cstring_rfind, METH_VARARGS, rfind__doc__}, {"rindex", cstring_rindex, METH_VARARGS, rindex__doc__}, + /* TODO: rjust */ + /* TODO: rpartition */ + /* TODO: rsplit */ + /* TODO: rstrip */ + /* TODO: split */ + /* TODO: splitlines */ {"startswith", cstring_startswith, METH_VARARGS, startswith__doc__}, - {"endswith", cstring_endswith, METH_VARARGS, endswith__doc__}, + /* TODO: strip */ + /* TODO: swapcase */ + /* TODO: title */ + /* TODO: translate */ + /* TODO: upper */ + /* TODO: zfill */ {0}, }; From d80c70502a3c92ef29ff8bcf476e0b8790a59dcd Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 13:13:49 -0500 Subject: [PATCH 12/37] Implement isalpha method --- src/cstring.c | 13 ++++++++++++- test/test_methods.py | 10 ++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 4acfcdb..9d44cef 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -359,6 +359,17 @@ PyObject *cstring_index(PyObject *self, PyObject *args) { return PyLong_FromSsize_t(p - CSTRING_VALUE(self)); } +PyDoc_STRVAR(isalpha__doc__, ""); +PyObject *cstring_isalpha(PyObject *self, PyObject *args) { + const char *p = CSTRING_VALUE(self); + while(*p) { + if(!isalpha(*p)) + Py_RETURN_FALSE; + ++p; + } + Py_RETURN_TRUE; +} + PyDoc_STRVAR(rfind__doc__, ""); PyObject *cstring_rfind(PyObject *self, PyObject *args) { struct _substr_params params; @@ -438,7 +449,7 @@ static PyMethodDef cstring_methods[] = { /* TODO: format_map */ {"index", cstring_index, METH_VARARGS, index__doc__}, /* TODO: isalnum */ - /* TODO: isalpha */ + {"isalpha", cstring_isalpha, METH_VARARGS, isalpha__doc__}, /* TODO: isascii */ /* TODO: isdecimal */ /* TODO: isdigit */ diff --git a/test/test_methods.py b/test/test_methods.py index 67bd3f5..e720d73 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -53,6 +53,16 @@ def test_index_missing(): return target.index('lo', 0, 4) +def test_isalpha_True(): + target = cstring('hello') + assert target.isalpha() == True + + +def test_isalpha_False(): + target = cstring('hello123') + assert target.isalpha() == False + + def test_rfind(): target = cstring('hello') assert target.rfind('o') == 4 From 96adc3967184c43b84590651746da43dedc1f0ad Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 13:41:21 -0500 Subject: [PATCH 13/37] Implement isdigit method --- src/cstring.c | 13 ++++++++++++- test/test_methods.py | 10 ++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 9d44cef..adef85b 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -370,6 +370,17 @@ PyObject *cstring_isalpha(PyObject *self, PyObject *args) { Py_RETURN_TRUE; } +PyDoc_STRVAR(isdigit__doc__, ""); +PyObject *cstring_isdigit(PyObject *self, PyObject *args) { + const char *p = CSTRING_VALUE(self); + while(*p) { + if(!isdigit(*p)) + Py_RETURN_FALSE; + ++p; + } + Py_RETURN_TRUE; +} + PyDoc_STRVAR(rfind__doc__, ""); PyObject *cstring_rfind(PyObject *self, PyObject *args) { struct _substr_params params; @@ -452,7 +463,7 @@ static PyMethodDef cstring_methods[] = { {"isalpha", cstring_isalpha, METH_VARARGS, isalpha__doc__}, /* TODO: isascii */ /* TODO: isdecimal */ - /* TODO: isdigit */ + {"isdigit", cstring_isdigit, METH_VARARGS, isdigit__doc__}, /* TODO: isidentifier */ /* TODO: islower */ /* TODO: isnumeric */ diff --git a/test/test_methods.py b/test/test_methods.py index e720d73..c8655ed 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -63,6 +63,16 @@ def test_isalpha_False(): assert target.isalpha() == False +def test_isdigit_True(): + target = cstring('123') + assert target.isdigit() == True + + +def test_isdigit_False(): + target = cstring('123.4') + assert target.isdigit() == False + + def test_rfind(): target = cstring('hello') assert target.rfind('o') == 4 From 0b944f25a5c7d43baf7fe265f30be3ed5b7c3e3c Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 13:45:31 -0500 Subject: [PATCH 14/37] Implement isalnum method --- src/cstring.c | 13 ++++++++++++- test/test_methods.py | 10 ++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index adef85b..85a6fd0 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -359,6 +359,17 @@ PyObject *cstring_index(PyObject *self, PyObject *args) { return PyLong_FromSsize_t(p - CSTRING_VALUE(self)); } +PyDoc_STRVAR(isalnum__doc__, ""); +PyObject *cstring_isalnum(PyObject *self, PyObject *args) { + const char *p = CSTRING_VALUE(self); + while(*p) { + if(!isalnum(*p)) + Py_RETURN_FALSE; + ++p; + } + Py_RETURN_TRUE; +} + PyDoc_STRVAR(isalpha__doc__, ""); PyObject *cstring_isalpha(PyObject *self, PyObject *args) { const char *p = CSTRING_VALUE(self); @@ -459,7 +470,7 @@ static PyMethodDef cstring_methods[] = { /* TODO: format */ /* TODO: format_map */ {"index", cstring_index, METH_VARARGS, index__doc__}, - /* TODO: isalnum */ + {"isalnum", cstring_isalnum, METH_VARARGS, isalnum__doc__}, {"isalpha", cstring_isalpha, METH_VARARGS, isalpha__doc__}, /* TODO: isascii */ /* TODO: isdecimal */ diff --git a/test/test_methods.py b/test/test_methods.py index c8655ed..dccdfd8 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -53,6 +53,16 @@ def test_index_missing(): return target.index('lo', 0, 4) +def test_isalnum_True(): + target = cstring('hello123') + assert target.isalnum() == True + + +def test_isalnum_False(): + target = cstring('hello_123') + assert target.isalnum() == False + + def test_isalpha_True(): target = cstring('hello') assert target.isalpha() == True From 456902db8f476f8aec598307446a104fb43b9042 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 14:21:28 -0500 Subject: [PATCH 15/37] Implement islower method --- src/cstring.c | 23 ++++++++++++++++++++++- test/test_methods.py | 15 +++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 85a6fd0..c978b1f 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -392,6 +392,27 @@ PyObject *cstring_isdigit(PyObject *self, PyObject *args) { Py_RETURN_TRUE; } +PyDoc_STRVAR(islower__doc__, ""); +PyObject *cstring_islower(PyObject *self, PyObject *args) { + const char *p = CSTRING_VALUE(self); + while(*p) { + if(isalpha(*p)) { + if(!islower(*p)) + Py_RETURN_FALSE; + ++p; + while(*p) { + if(isalpha(*p) && !islower(*p)) + Py_RETURN_FALSE; + ++p; + } + /* at least one lc alpha and no uc alphas */ + Py_RETURN_TRUE; + } + ++p; + } + Py_RETURN_FALSE; +} + PyDoc_STRVAR(rfind__doc__, ""); PyObject *cstring_rfind(PyObject *self, PyObject *args) { struct _substr_params params; @@ -476,7 +497,7 @@ static PyMethodDef cstring_methods[] = { /* TODO: isdecimal */ {"isdigit", cstring_isdigit, METH_VARARGS, isdigit__doc__}, /* TODO: isidentifier */ - /* TODO: islower */ + {"islower", cstring_islower, METH_VARARGS, islower__doc__}, /* TODO: isnumeric */ /* TODO: isprintable */ /* TODO: isspace */ diff --git a/test/test_methods.py b/test/test_methods.py index dccdfd8..f4b33d2 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -83,6 +83,21 @@ def test_isdigit_False(): assert target.isdigit() == False +def test_islower_numeric(): + target = cstring('123') + assert target.islower() == False + + +def test_islower_alnum_lc(): + target = cstring('hello123') + assert target.islower() == True + + +def test_islower_alnum_uc(): + target = cstring('Hello123') + assert target.islower() == False + + def test_rfind(): target = cstring('hello') assert target.rfind('o') == 4 From 5cf468103a200e9ac8090bca1b31e4fb1bd0285a Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 14:30:41 -0500 Subject: [PATCH 16/37] Implement isprintable method --- src/cstring.c | 13 ++++++++++++- test/test_methods.py | 10 ++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index c978b1f..40d3279 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -413,6 +413,17 @@ PyObject *cstring_islower(PyObject *self, PyObject *args) { Py_RETURN_FALSE; } +PyDoc_STRVAR(isprintable__doc__, ""); +PyObject *cstring_isprintable(PyObject *self, PyObject *args) { + const char *p = CSTRING_VALUE(self); + while(*p) { + if(!isprint(*p)) + Py_RETURN_FALSE; + ++p; + } + Py_RETURN_TRUE; +} + PyDoc_STRVAR(rfind__doc__, ""); PyObject *cstring_rfind(PyObject *self, PyObject *args) { struct _substr_params params; @@ -499,7 +510,7 @@ static PyMethodDef cstring_methods[] = { /* TODO: isidentifier */ {"islower", cstring_islower, METH_VARARGS, islower__doc__}, /* TODO: isnumeric */ - /* TODO: isprintable */ + {"isprintable", cstring_isprintable, METH_VARARGS, isprintable__doc__}, /* TODO: isspace */ /* TODO: istitle */ /* TODO: isupper */ diff --git a/test/test_methods.py b/test/test_methods.py index f4b33d2..b96e1b5 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -98,6 +98,16 @@ def test_islower_alnum_uc(): assert target.islower() == False +def test_isprintable_True(): + target = cstring('hello, world') + assert target.isprintable() == True + + +def test_isprintable_False(): + target = cstring(b'hello, world\x01') + assert target.isprintable() == False + + def test_rfind(): target = cstring('hello') assert target.rfind('o') == 4 From 6a5916bf94346c076da51724531330b17222ea57 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 16:23:59 -0500 Subject: [PATCH 17/37] Implement isspace method --- src/cstring.c | 13 ++++++++++++- test/test_methods.py | 15 +++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 40d3279..0f9a8ab 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -424,6 +424,17 @@ PyObject *cstring_isprintable(PyObject *self, PyObject *args) { Py_RETURN_TRUE; } +PyDoc_STRVAR(isspace__doc__, ""); +PyObject *cstring_isspace(PyObject *self, PyObject *args) { + const char *p = CSTRING_VALUE(self); + while(*p) { + if(!isspace(*p)) + Py_RETURN_FALSE; + ++p; + } + return PyBool_FromLong(p != CSTRING_VALUE(self)); +} + PyDoc_STRVAR(rfind__doc__, ""); PyObject *cstring_rfind(PyObject *self, PyObject *args) { struct _substr_params params; @@ -511,7 +522,7 @@ static PyMethodDef cstring_methods[] = { {"islower", cstring_islower, METH_VARARGS, islower__doc__}, /* TODO: isnumeric */ {"isprintable", cstring_isprintable, METH_VARARGS, isprintable__doc__}, - /* TODO: isspace */ + {"isspace", cstring_isspace, METH_VARARGS, isspace__doc__}, /* TODO: istitle */ /* TODO: isupper */ /* TODO: join */ diff --git a/test/test_methods.py b/test/test_methods.py index b96e1b5..27c04da 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -108,6 +108,21 @@ def test_isprintable_False(): assert target.isprintable() == False +def test_isspace_empty(): + target = cstring('') + assert target.isspace() == False + + +def test_isspace_True(): + target = cstring('\t\n ') + assert target.isspace() == True + + +def test_isspace_False(): + target = cstring('hello, world\n') + assert target.isspace() == False + + def test_rfind(): target = cstring('hello') assert target.rfind('o') == 4 From 6db73c9f9ccdb6819f0bfb1175102561660c4cb8 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 16:37:24 -0500 Subject: [PATCH 18/37] Implement isupper method --- src/cstring.c | 23 ++++++++++++++++++++++- test/test_methods.py | 15 +++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 0f9a8ab..2b59f72 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -435,6 +435,27 @@ PyObject *cstring_isspace(PyObject *self, PyObject *args) { return PyBool_FromLong(p != CSTRING_VALUE(self)); } +PyDoc_STRVAR(isupper__doc__, ""); +PyObject *cstring_isupper(PyObject *self, PyObject *args) { + const char *p = CSTRING_VALUE(self); + while(*p) { + if(isalpha(*p)) { + if(!isupper(*p)) + Py_RETURN_FALSE; + ++p; + while(*p) { + if(isalpha(*p) && !isupper(*p)) + Py_RETURN_FALSE; + ++p; + } + /* at least one uc alpha and no lc alphas */ + Py_RETURN_TRUE; + } + ++p; + } + Py_RETURN_FALSE; +} + PyDoc_STRVAR(rfind__doc__, ""); PyObject *cstring_rfind(PyObject *self, PyObject *args) { struct _substr_params params; @@ -524,7 +545,7 @@ static PyMethodDef cstring_methods[] = { {"isprintable", cstring_isprintable, METH_VARARGS, isprintable__doc__}, {"isspace", cstring_isspace, METH_VARARGS, isspace__doc__}, /* TODO: istitle */ - /* TODO: isupper */ + {"isupper", cstring_isupper, METH_VARARGS, isupper__doc__}, /* TODO: join */ /* TODO: ljust */ /* TODO: lower */ diff --git a/test/test_methods.py b/test/test_methods.py index 27c04da..6f27c14 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -123,6 +123,21 @@ def test_isspace_False(): assert target.isspace() == False +def test_isupper_numeric(): + target = cstring('123') + assert target.isupper() == False + + +def test_isupper_alnum_uc(): + target = cstring('HELLO123') + assert target.isupper() == True + + +def test_isupper_alnum_lc(): + target = cstring('HELLo123') + assert target.isupper() == False + + def test_rfind(): target = cstring('hello') assert target.rfind('o') == 4 From 24c4e5c182c353fbad39b8a717dff39de5ee841d Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 16:53:26 -0500 Subject: [PATCH 19/37] Implement lower method --- src/cstring.c | 14 +++++++++++++- test/test_methods.py | 5 +++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 2b59f72..38011b0 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -456,6 +456,18 @@ PyObject *cstring_isupper(PyObject *self, PyObject *args) { Py_RETURN_FALSE; } +PyDoc_STRVAR(lower__doc__, ""); +PyObject *cstring_lower(PyObject *self, PyObject *args) { + struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), Py_SIZE(self)); + const char *s = CSTRING_VALUE(self); + char *d = CSTRING_VALUE(new); + + while((*d++ = tolower(*s++)) != '\0') + ; + + return (PyObject *)new; +} + PyDoc_STRVAR(rfind__doc__, ""); PyObject *cstring_rfind(PyObject *self, PyObject *args) { struct _substr_params params; @@ -548,7 +560,7 @@ static PyMethodDef cstring_methods[] = { {"isupper", cstring_isupper, METH_VARARGS, isupper__doc__}, /* TODO: join */ /* TODO: ljust */ - /* TODO: lower */ + {"lower", cstring_lower, METH_VARARGS, lower__doc__}, /* TODO: lstrip */ /* TODO: maketrans */ /* TODO: partition */ diff --git a/test/test_methods.py b/test/test_methods.py index 6f27c14..0e75a0e 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -138,6 +138,11 @@ def test_isupper_alnum_lc(): assert target.isupper() == False +def test_lower(): + target = cstring('HELLO123') + assert target.lower() == cstring('hello123') + + def test_rfind(): target = cstring('hello') assert target.rfind('o') == 4 From 2e1c31558492d07e31e588a3fcd5eb17dd30e685 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 16:59:35 -0500 Subject: [PATCH 20/37] Implement upper method --- src/cstring.c | 14 +++++++++++++- test/test_methods.py | 5 +++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 38011b0..67141af 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -521,6 +521,18 @@ PyObject *cstring_endswith(PyObject *self, PyObject *args) { return PyBool_FromLong(cmp == 0); } +PyDoc_STRVAR(upper__doc__, ""); +PyObject *cstring_upper(PyObject *self, PyObject *args) { + struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), Py_SIZE(self)); + const char *s = CSTRING_VALUE(self); + char *d = CSTRING_VALUE(new); + + while((*d++ = toupper(*s++)) != '\0') + ; + + return (PyObject *)new; +} + static PySequenceMethods cstring_as_sequence = { .sq_length = cstring_len, .sq_concat = cstring_concat, @@ -579,7 +591,7 @@ static PyMethodDef cstring_methods[] = { /* TODO: swapcase */ /* TODO: title */ /* TODO: translate */ - /* TODO: upper */ + {"upper", cstring_upper, METH_VARARGS, upper__doc__}, /* TODO: zfill */ {0}, }; diff --git a/test/test_methods.py b/test/test_methods.py index 0e75a0e..375f8a0 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -223,3 +223,8 @@ def test_endswith_with_start_and_end(): target = cstring('hello, world') assert target.endswith('wo', 7, 9) is True + +def test_upper(): + target = cstring('hello123') + assert target.upper() == cstring('HELLO123') + From 0f870c258e08030490641a18d731843a4b60cb91 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 17:11:15 -0500 Subject: [PATCH 21/37] Implement swapcase method --- src/cstring.c | 21 ++++++++++++++++++++- test/test_methods.py | 5 +++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 67141af..f7f7c7e 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -521,6 +521,25 @@ PyObject *cstring_endswith(PyObject *self, PyObject *args) { return PyBool_FromLong(cmp == 0); } +PyDoc_STRVAR(swapcase__doc__, ""); +PyObject *cstring_swapcase(PyObject *self, PyObject *args) { + struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), Py_SIZE(self)); + const char *s = CSTRING_VALUE(self); + char *d = CSTRING_VALUE(new); + + for(;*s; ++s, ++d) { + if(islower(*s)) { + *d = toupper(*s); + } else if(isupper(*s)) { + *d = tolower(*s); + } else { + *d = *s; + } + } + + return (PyObject *)new; +} + PyDoc_STRVAR(upper__doc__, ""); PyObject *cstring_upper(PyObject *self, PyObject *args) { struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), Py_SIZE(self)); @@ -588,7 +607,7 @@ static PyMethodDef cstring_methods[] = { /* TODO: splitlines */ {"startswith", cstring_startswith, METH_VARARGS, startswith__doc__}, /* TODO: strip */ - /* TODO: swapcase */ + {"swapcase", cstring_swapcase, METH_VARARGS, swapcase__doc__}, /* TODO: title */ /* TODO: translate */ {"upper", cstring_upper, METH_VARARGS, upper__doc__}, diff --git a/test/test_methods.py b/test/test_methods.py index 375f8a0..bb03dbe 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -228,3 +228,8 @@ def test_upper(): target = cstring('hello123') assert target.upper() == cstring('HELLO123') + +def test_swapcase(): + target = cstring('hElLo, WoRlD 123') + assert target.swapcase() == cstring('HeLlO, wOrLd 123') + From 4895fcc79547540a838da70f94f2fc4f199bc9b6 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Mon, 29 Mar 2021 17:18:00 -0500 Subject: [PATCH 22/37] Declare methods as METH_NOARGS where possible --- src/cstring.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index f7f7c7e..2b16856 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -577,21 +577,21 @@ static PyMethodDef cstring_methods[] = { /* TODO: format */ /* TODO: format_map */ {"index", cstring_index, METH_VARARGS, index__doc__}, - {"isalnum", cstring_isalnum, METH_VARARGS, isalnum__doc__}, - {"isalpha", cstring_isalpha, METH_VARARGS, isalpha__doc__}, + {"isalnum", cstring_isalnum, METH_NOARGS, isalnum__doc__}, + {"isalpha", cstring_isalpha, METH_NOARGS, isalpha__doc__}, /* TODO: isascii */ /* TODO: isdecimal */ - {"isdigit", cstring_isdigit, METH_VARARGS, isdigit__doc__}, + {"isdigit", cstring_isdigit, METH_NOARGS, isdigit__doc__}, /* TODO: isidentifier */ - {"islower", cstring_islower, METH_VARARGS, islower__doc__}, + {"islower", cstring_islower, METH_NOARGS, islower__doc__}, /* TODO: isnumeric */ - {"isprintable", cstring_isprintable, METH_VARARGS, isprintable__doc__}, - {"isspace", cstring_isspace, METH_VARARGS, isspace__doc__}, + {"isprintable", cstring_isprintable, METH_NOARGS, isprintable__doc__}, + {"isspace", cstring_isspace, METH_NOARGS, isspace__doc__}, /* TODO: istitle */ - {"isupper", cstring_isupper, METH_VARARGS, isupper__doc__}, + {"isupper", cstring_isupper, METH_NOARGS, isupper__doc__}, /* TODO: join */ /* TODO: ljust */ - {"lower", cstring_lower, METH_VARARGS, lower__doc__}, + {"lower", cstring_lower, METH_NOARGS, lower__doc__}, /* TODO: lstrip */ /* TODO: maketrans */ /* TODO: partition */ @@ -607,10 +607,10 @@ static PyMethodDef cstring_methods[] = { /* TODO: splitlines */ {"startswith", cstring_startswith, METH_VARARGS, startswith__doc__}, /* TODO: strip */ - {"swapcase", cstring_swapcase, METH_VARARGS, swapcase__doc__}, + {"swapcase", cstring_swapcase, METH_NOARGS, swapcase__doc__}, /* TODO: title */ /* TODO: translate */ - {"upper", cstring_upper, METH_VARARGS, upper__doc__}, + {"upper", cstring_upper, METH_NOARGS, upper__doc__}, /* TODO: zfill */ {0}, }; From 08ef20c17dcceee84b99ad37ec2d806a7034a3c3 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 00:29:03 -0500 Subject: [PATCH 23/37] Implement join method --- src/cstring.c | 70 +++++++++++++++++++++++++++++++++++++++++++- test/test_methods.py | 6 ++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 2b16856..e90e608 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -36,10 +36,24 @@ static PyObject *_cstring_new(PyTypeObject *type, const char *value, Py_ssize_t return (PyObject *)new; } +static PyObject *_cstring_realloc(PyObject *self, Py_ssize_t len) { + struct cstring *new = (struct cstring *)PyObject_Realloc(self, sizeof(struct cstring) + len + 1); + if(!new) + return PyErr_NoMemory(); + Py_SET_SIZE(new, len + 1); + new->hash = -1; + return (PyObject *)new; +} + +static PyObject *_cstring_copy(PyObject *self) { + return _cstring_new(Py_TYPE(self), CSTRING_VALUE(self), Py_SIZE(self) - 1); +} + static PyObject *cstring_new_empty(void) { return _cstring_new(&cstring_type, "", 0); } + static const char *_obj_as_string_and_size(PyObject *o, Py_ssize_t *s) { if(PyUnicode_Check(o)) return PyUnicode_AsUTF8AndSize(o, s); @@ -147,6 +161,29 @@ static Py_ssize_t cstring_len(PyObject *self) { return Py_SIZE(self) - 1; } +static PyObject *_concat_in_place(PyObject *self, PyObject *other) { + if(!other) + return PyErr_BadArgument(), NULL; + if(!_ensure_cstring(other)) + return NULL; + if(!self) + return _cstring_copy(other); /* new (mutable) copy with refcnt=1 */ + if(!_ensure_cstring(self)) + return NULL; + if(Py_REFCNT(self) > 1) + return PyErr_BadInternalCall(), NULL; + + Py_ssize_t origlen = cstring_len(self); + Py_ssize_t newlen = origlen + cstring_len(other); + PyObject *new = _cstring_realloc(self, newlen); + if(!new) + return NULL; + + memcpy(CSTRING_VALUE_AT(new, origlen), CSTRING_VALUE(other), cstring_len(other)); + *CSTRING_VALUE_AT(new, newlen) = '\0'; + return new; +} + static PyObject *cstring_concat(PyObject *left, PyObject *right) { if(!_ensure_cstring(left)) return NULL; @@ -456,6 +493,37 @@ PyObject *cstring_isupper(PyObject *self, PyObject *args) { Py_RETURN_FALSE; } +PyDoc_STRVAR(join__doc__, ""); +PyObject *cstring_join(PyObject *self, PyObject *arg) { + PyObject *iter = PyObject_GetIter(arg); + if(!iter) + return NULL; + + PyObject *result = NULL; + PyObject *item = NULL; + + while((item = PyIter_Next(iter)) != NULL) { + if(result) { + PyObject *next = _concat_in_place(result, self); + if(!next) + goto fail; + result = next; + } + PyObject *next = _concat_in_place(result, item); + if(!next) + goto fail; + Py_DECREF(item); + result = next; + } + + return result; + +fail: + Py_XDECREF(item); + Py_XDECREF(result); + return NULL; +} + PyDoc_STRVAR(lower__doc__, ""); PyObject *cstring_lower(PyObject *self, PyObject *args) { struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), Py_SIZE(self)); @@ -589,7 +657,7 @@ static PyMethodDef cstring_methods[] = { {"isspace", cstring_isspace, METH_NOARGS, isspace__doc__}, /* TODO: istitle */ {"isupper", cstring_isupper, METH_NOARGS, isupper__doc__}, - /* TODO: join */ + {"join", cstring_join, METH_O, join__doc__}, /* TODO: ljust */ {"lower", cstring_lower, METH_NOARGS, lower__doc__}, /* TODO: lstrip */ diff --git a/test/test_methods.py b/test/test_methods.py index bb03dbe..5993943 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -138,6 +138,12 @@ def test_isupper_alnum_lc(): assert target.isupper() == False +def test_join(): + sep = cstring(', ') + items = [cstring('hello'), cstring('world')] + assert sep.join(items) == cstring('hello, world') + + def test_lower(): target = cstring('HELLO123') assert target.lower() == cstring('hello123') From 89ec5c8b0dbf6af67738add99ccd7ab39b2005cb Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 02:25:15 -0500 Subject: [PATCH 24/37] Move REFCNT check to realloc function --- src/cstring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index e90e608..6387e1f 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -37,6 +37,8 @@ static PyObject *_cstring_new(PyTypeObject *type, const char *value, Py_ssize_t } static PyObject *_cstring_realloc(PyObject *self, Py_ssize_t len) { + if(Py_REFCNT(self) > 1) + return PyErr_BadInternalCall(), NULL; struct cstring *new = (struct cstring *)PyObject_Realloc(self, sizeof(struct cstring) + len + 1); if(!new) return PyErr_NoMemory(); @@ -170,8 +172,6 @@ static PyObject *_concat_in_place(PyObject *self, PyObject *other) { return _cstring_copy(other); /* new (mutable) copy with refcnt=1 */ if(!_ensure_cstring(self)) return NULL; - if(Py_REFCNT(self) > 1) - return PyErr_BadInternalCall(), NULL; Py_ssize_t origlen = cstring_len(self); Py_ssize_t newlen = origlen + cstring_len(other); From e9cbda3ddf7c98a3320320a2e9796a06ce71ac57 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 02:41:46 -0500 Subject: [PATCH 25/37] NULL-check memory allocations --- src/cstring.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/cstring.c b/src/cstring.c index 6387e1f..22ae4d3 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -30,6 +30,8 @@ static PyTypeObject cstring_type; static PyObject *_cstring_new(PyTypeObject *type, const char *value, Py_ssize_t len) { struct cstring *new = CSTRING_ALLOC(type, len + 1); + if(!new) + return NULL; new->hash = -1; memcpy(new->value, value, len); new->value[len] = '\0'; @@ -193,6 +195,8 @@ static PyObject *cstring_concat(PyObject *left, PyObject *right) { Py_ssize_t size = cstring_len(left) + cstring_len(right) + 1; struct cstring *new = CSTRING_ALLOC(Py_TYPE(left), size); + if(!new) + return NULL; memcpy(new->value, CSTRING_VALUE(left), Py_SIZE(left)); memcpy(&new->value[cstring_len(left)], CSTRING_VALUE(right), Py_SIZE(right)); return (PyObject *)new; @@ -207,6 +211,8 @@ static PyObject *cstring_repeat(PyObject *self, Py_ssize_t count) { Py_ssize_t size = (cstring_len(self) * count) + 1; struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), size); + if(!new) + return NULL; for(Py_ssize_t i = 0; i < size - 1; i += cstring_len(self)) { memcpy(&new->value[i], CSTRING_VALUE(self), Py_SIZE(self)); } @@ -254,6 +260,8 @@ static PyObject *_cstring_subscript_slice(PyObject *self, PyObject *slice) { } struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), slicelen + 1); + if(!new) + return NULL; char *src = CSTRING_VALUE_AT(self, start); for(Py_ssize_t i = 0; i < slicelen; ++i) { new->value[i] = *src; @@ -527,6 +535,8 @@ PyObject *cstring_join(PyObject *self, PyObject *arg) { PyDoc_STRVAR(lower__doc__, ""); PyObject *cstring_lower(PyObject *self, PyObject *args) { struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), Py_SIZE(self)); + if(!new) + return NULL; const char *s = CSTRING_VALUE(self); char *d = CSTRING_VALUE(new); @@ -592,6 +602,8 @@ PyObject *cstring_endswith(PyObject *self, PyObject *args) { PyDoc_STRVAR(swapcase__doc__, ""); PyObject *cstring_swapcase(PyObject *self, PyObject *args) { struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), Py_SIZE(self)); + if(!new) + return NULL; const char *s = CSTRING_VALUE(self); char *d = CSTRING_VALUE(new); @@ -611,6 +623,8 @@ PyObject *cstring_swapcase(PyObject *self, PyObject *args) { PyDoc_STRVAR(upper__doc__, ""); PyObject *cstring_upper(PyObject *self, PyObject *args) { struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), Py_SIZE(self)); + if(!new) + return NULL; const char *s = CSTRING_VALUE(self); char *d = CSTRING_VALUE(new); From fcf1ca529bb63e1abd4ea4c1aa92105f0f6f4b0f Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 02:44:44 -0500 Subject: [PATCH 26/37] Introduce CSTRING_LAST_BYTE macro --- src/cstring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index 22ae4d3..f32afd3 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -25,6 +25,7 @@ static PyTypeObject cstring_type; #define CSTRING_HASH(self) (((struct cstring *)self)->hash) #define CSTRING_VALUE(self) (((struct cstring *)self)->value) #define CSTRING_VALUE_AT(self, i) (&CSTRING_VALUE(self)[(i)]) +#define CSTRING_LAST_BYTE(self) (CSTRING_VALUE(self)[Py_SIZE(self) - 1]) #define CSTRING_ALLOC(tp, len) ((struct cstring *)(tp)->tp_alloc((tp), (len))) @@ -34,7 +35,7 @@ static PyObject *_cstring_new(PyTypeObject *type, const char *value, Py_ssize_t return NULL; new->hash = -1; memcpy(new->value, value, len); - new->value[len] = '\0'; + CSTRING_LAST_BYTE(new) = '\0'; return (PyObject *)new; } @@ -182,7 +183,7 @@ static PyObject *_concat_in_place(PyObject *self, PyObject *other) { return NULL; memcpy(CSTRING_VALUE_AT(new, origlen), CSTRING_VALUE(other), cstring_len(other)); - *CSTRING_VALUE_AT(new, newlen) = '\0'; + CSTRING_LAST_BYTE(new) = '\0'; return new; } From de02314c08207ed40e7d184f2c2c59ea177b60d8 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 02:49:49 -0500 Subject: [PATCH 27/37] Remove unneeded cast --- src/cstring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index f32afd3..d5ac68a 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -42,7 +42,7 @@ static PyObject *_cstring_new(PyTypeObject *type, const char *value, Py_ssize_t static PyObject *_cstring_realloc(PyObject *self, Py_ssize_t len) { if(Py_REFCNT(self) > 1) return PyErr_BadInternalCall(), NULL; - struct cstring *new = (struct cstring *)PyObject_Realloc(self, sizeof(struct cstring) + len + 1); + struct cstring *new = PyObject_Realloc(self, sizeof(struct cstring) + len + 1); if(!new) return PyErr_NoMemory(); Py_SET_SIZE(new, len + 1); From 10291c980105314e5d5e8ca01a8b102974805b9d Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 11:16:35 -0500 Subject: [PATCH 28/37] Clean up error handling --- src/cstring.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index d5ac68a..b9b8124 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -156,9 +156,7 @@ static PyObject *cstring_richcompare(PyObject *self, PyObject *other, int op) { case Py_GE: return PyBool_FromLong(*left >= *right); default: - /* Should be unreachable */ - PyErr_Format(PyExc_SystemError, "Invalid compare operation: %d", op); - return NULL; + Py_UNREACHABLE(); } } @@ -254,15 +252,12 @@ static PyObject *_cstring_subscript_slice(PyObject *self, PyObject *slice) { Py_ssize_t start, stop, step; if(PySlice_Unpack(slice, &start, &stop, &step) < 0) return NULL; - Py_ssize_t slicelen = PySlice_AdjustIndices(cstring_len(self), &start, &stop, step); - if(slicelen < 0) { - PyErr_Format(PyExc_SystemError, "Internal error: Invalid slicelen: %d", slicelen); - return NULL; - } + Py_ssize_t slicelen = PySlice_AdjustIndices(cstring_len(self), &start, &stop, step); struct cstring *new = CSTRING_ALLOC(Py_TYPE(self), slicelen + 1); if(!new) return NULL; + char *src = CSTRING_VALUE_AT(self, start); for(Py_ssize_t i = 0; i < slicelen; ++i) { new->value[i] = *src; From ecd8bc1b92f9dc2592f320709938c9caa4540e8c Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 12:21:34 -0500 Subject: [PATCH 29/37] Combine similar utility functions --- src/cstring.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index b9b8124..e839f71 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -29,6 +29,14 @@ static PyTypeObject cstring_type; #define CSTRING_ALLOC(tp, len) ((struct cstring *)(tp)->tp_alloc((tp), (len))) +static void *_bad_argument_type(PyObject *o) { + PyErr_Format( + PyExc_TypeError, + "Bad argument type: %s", + Py_TYPE(o)->tp_name); + return NULL; +} + static PyObject *_cstring_new(PyTypeObject *type, const char *value, Py_ssize_t len) { struct cstring *new = CSTRING_ALLOC(type, len + 1); if(!new) @@ -58,7 +66,6 @@ static PyObject *cstring_new_empty(void) { return _cstring_new(&cstring_type, "", 0); } - static const char *_obj_as_string_and_size(PyObject *o, Py_ssize_t *s) { if(PyUnicode_Check(o)) return PyUnicode_AsUTF8AndSize(o, s); @@ -74,13 +81,14 @@ static const char *_obj_as_string_and_size(PyObject *o, Py_ssize_t *s) { return buffer; } - PyErr_Format( - PyExc_TypeError, - "Invalid initialization type: %s.", - Py_TYPE(o)->tp_name); + if(PyObject_TypeCheck(o, &cstring_type)) { + /* TODO: implement buffer protocol for cstring */ + *s = Py_SIZE(o) - 1; + return CSTRING_VALUE(o); + } *s = -1; - return NULL; + return _bad_argument_type(o); } static PyObject *cstring_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { @@ -277,18 +285,6 @@ static PyObject *cstring_subscript(PyObject *self, PyObject *key) { return NULL; } -static const char *_obj_to_utf8(PyObject *o, Py_ssize_t *len_p) { - if(PyUnicode_Check(o)) - return PyUnicode_AsUTF8AndSize(o, len_p); - if(PyObject_TypeCheck(o, &cstring_type)) { - *len_p = cstring_len(o); - return CSTRING_VALUE(o); - } - PyErr_Format( - PyExc_TypeError, "Object cannot be type %s.", Py_TYPE(o)->tp_name); - return NULL; -} - static Py_ssize_t _fix_index(Py_ssize_t i, Py_ssize_t len) { Py_ssize_t result = i; if(result < 0) @@ -316,7 +312,7 @@ static struct _substr_params *_parse_substr_args(PyObject *self, PyObject *args, return NULL; Py_ssize_t substr_len; - const char *substr = _obj_to_utf8(substr_obj, &substr_len); + const char *substr = _obj_as_string_and_size(substr_obj, &substr_len); if(!substr) return NULL; From f6312b546b5ad856a37cb744204de0600919cd3d Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 14:45:22 -0500 Subject: [PATCH 30/37] Implement split method --- src/cstring.c | 29 ++++++++++++++++++++++++++++- test/test_methods.py | 15 +++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index e839f71..d7f437d 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -580,6 +580,33 @@ PyObject *cstring_startswith(PyObject *self, PyObject *args) { return PyBool_FromLong(cmp == 0); } +PyDoc_STRVAR(strip__doc__, ""); +PyObject *cstring_strip(PyObject *self, PyObject *args) { + PyObject *charsobj = NULL; + if(!PyArg_ParseTuple(args, "|O", &charsobj)) + return NULL; + + const char *chars = " \t\n\v\f\r"; + + if(charsobj && charsobj != Py_None) { + if(!PyUnicode_Check(charsobj)) + return _bad_argument_type(charsobj); + chars = PyUnicode_AsUTF8(charsobj); + } + + const char *start = CSTRING_VALUE(self); + while(strchr(chars, *start)) + ++start; + + const char *end = &CSTRING_LAST_BYTE(self) - 1; + while(strchr(chars, *end)) + --end; + + Py_ssize_t newsize = end - start + 1; + + return _cstring_new(Py_TYPE(self), start, newsize); +} + PyDoc_STRVAR(endswith__doc__, ""); PyObject *cstring_endswith(PyObject *self, PyObject *args) { struct _substr_params params; @@ -680,7 +707,7 @@ static PyMethodDef cstring_methods[] = { /* TODO: split */ /* TODO: splitlines */ {"startswith", cstring_startswith, METH_VARARGS, startswith__doc__}, - /* TODO: strip */ + {"strip", cstring_strip, METH_VARARGS, strip__doc__}, {"swapcase", cstring_swapcase, METH_NOARGS, swapcase__doc__}, /* TODO: title */ /* TODO: translate */ diff --git a/test/test_methods.py b/test/test_methods.py index 5993943..e89c1c0 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -190,6 +190,21 @@ def test_rindex_missing(): return target.rindex('lo', 0, 4) +def test_strip_noargs(): + target = cstring('\r\n\n\n\t hello, world \t\r\n\n ') + assert target.strip() == cstring('hello, world') + + +def test_strip_None(): + target = cstring('\r\n\n\n\t hello, world \t\r\n\n ') + assert target.strip(None) == cstring('hello, world') + + +def test_strip_arg(): + target = cstring('hello, world') + assert target.strip('held') == cstring('o, wor') + + def test_startswith(): target = cstring('hello, world') assert target.startswith('hello,') is True From e5dc80a6bf8e8689207af05ba401ddd022a0607e Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 15:19:18 -0500 Subject: [PATCH 31/37] Implement lstrip and rstrip methods --- src/cstring.c | 39 +++++++++++++++++++++++++++++++++++---- test/test_methods.py | 10 ++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index d7f437d..4e1e49d 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -580,8 +580,7 @@ PyObject *cstring_startswith(PyObject *self, PyObject *args) { return PyBool_FromLong(cmp == 0); } -PyDoc_STRVAR(strip__doc__, ""); -PyObject *cstring_strip(PyObject *self, PyObject *args) { +const char *_strip_chars_from_args(PyObject *args) { PyObject *charsobj = NULL; if(!PyArg_ParseTuple(args, "|O", &charsobj)) return NULL; @@ -594,6 +593,13 @@ PyObject *cstring_strip(PyObject *self, PyObject *args) { chars = PyUnicode_AsUTF8(charsobj); } + return chars; +} + +PyDoc_STRVAR(strip__doc__, ""); +PyObject *cstring_strip(PyObject *self, PyObject *args) { + const char *chars = _strip_chars_from_args(args); + const char *start = CSTRING_VALUE(self); while(strchr(chars, *start)) ++start; @@ -603,7 +609,32 @@ PyObject *cstring_strip(PyObject *self, PyObject *args) { --end; Py_ssize_t newsize = end - start + 1; + return _cstring_new(Py_TYPE(self), start, newsize); +} + +PyDoc_STRVAR(lstrip__doc__, ""); +PyObject *cstring_lstrip(PyObject *self, PyObject *args) { + const char *chars = _strip_chars_from_args(args); + + const char *start = CSTRING_VALUE(self); + while(strchr(chars, *start)) + ++start; + const char *end = &CSTRING_LAST_BYTE(self) - 1; + + Py_ssize_t newsize = end - start + 1; + return _cstring_new(Py_TYPE(self), start, newsize); +} + +PyDoc_STRVAR(rstrip__doc__, ""); +PyObject *cstring_rstrip(PyObject *self, PyObject *args) { + const char *chars = _strip_chars_from_args(args); + const char *start = CSTRING_VALUE(self); + const char *end = &CSTRING_LAST_BYTE(self) - 1; + while(strchr(chars, *end)) + --end; + + Py_ssize_t newsize = end - start + 1; return _cstring_new(Py_TYPE(self), start, newsize); } @@ -693,7 +724,7 @@ static PyMethodDef cstring_methods[] = { {"join", cstring_join, METH_O, join__doc__}, /* TODO: ljust */ {"lower", cstring_lower, METH_NOARGS, lower__doc__}, - /* TODO: lstrip */ + {"lstrip", cstring_lstrip, METH_VARARGS, lstrip__doc__}, /* TODO: maketrans */ /* TODO: partition */ /* TODO: removeprefix */ @@ -703,7 +734,7 @@ static PyMethodDef cstring_methods[] = { /* TODO: rjust */ /* TODO: rpartition */ /* TODO: rsplit */ - /* TODO: rstrip */ + {"rstrip", cstring_rstrip, METH_VARARGS, rstrip__doc__}, /* TODO: split */ /* TODO: splitlines */ {"startswith", cstring_startswith, METH_VARARGS, startswith__doc__}, diff --git a/test/test_methods.py b/test/test_methods.py index e89c1c0..ee832e9 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -205,6 +205,16 @@ def test_strip_arg(): assert target.strip('held') == cstring('o, wor') +def test_lstrip_arg(): + target = cstring('hello, world') + assert target.lstrip('held') == cstring('o, world') + + +def test_rstrip_arg(): + target = cstring('hello, world') + assert target.rstrip('held') == cstring('hello, wor') + + def test_startswith(): target = cstring('hello, world') assert target.startswith('hello,') is True From 247281a07158fb227320fba258bdda91cbdd1db8 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 19:21:19 -0500 Subject: [PATCH 32/37] Implement partition method --- src/cstring.c | 48 +++++++++++++++++++++++++++++++++++++++++++- test/test_methods.py | 12 +++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 4e1e49d..5a2e95f 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -63,6 +63,7 @@ static PyObject *_cstring_copy(PyObject *self) { } static PyObject *cstring_new_empty(void) { + /* TODO: empty cstring should be a singleton */ return _cstring_new(&cstring_type, "", 0); } @@ -538,6 +539,51 @@ PyObject *cstring_lower(PyObject *self, PyObject *args) { return (PyObject *)new; } +static PyObject *_tuple_steal_refs(Py_ssize_t count, ...) { + PyObject *result = PyTuple_New(count); + if(!result) + return NULL; + + va_list va; + va_start(va, count); + for(int i = 0; i < count; ++i) { + PyObject *o = va_arg(va, PyObject *); + if(!o) + goto fail; + PyTuple_SET_ITEM(result, i, o); + } + va_end(va); + + return result; + +fail: + Py_DECREF(result); + return NULL; +} + +PyDoc_STRVAR(partition__doc__, ""); +PyObject *cstring_partition(PyObject *self, PyObject *arg) { + if(!_ensure_cstring(arg)) + return NULL; + + const char *search = CSTRING_VALUE(arg); + + const char *left = CSTRING_VALUE(self); + const char *mid = strstr(left, search); + if(!mid) { + return _tuple_steal_refs(3, + (Py_INCREF(self), self), + cstring_new_empty(), + cstring_new_empty()); + } + const char *right = mid + strlen(search); + + return _tuple_steal_refs(3, + _cstring_new(Py_TYPE(self), left, mid - left), + _cstring_new(Py_TYPE(self), mid, right - mid), + _cstring_new(Py_TYPE(self), right, &CSTRING_LAST_BYTE(self) - right)); +} + PyDoc_STRVAR(rfind__doc__, ""); PyObject *cstring_rfind(PyObject *self, PyObject *args) { struct _substr_params params; @@ -726,7 +772,7 @@ static PyMethodDef cstring_methods[] = { {"lower", cstring_lower, METH_NOARGS, lower__doc__}, {"lstrip", cstring_lstrip, METH_VARARGS, lstrip__doc__}, /* TODO: maketrans */ - /* TODO: partition */ + {"partition", cstring_partition, METH_O, partition__doc__}, /* TODO: removeprefix */ /* TODO: replace */ {"rfind", cstring_rfind, METH_VARARGS, rfind__doc__}, diff --git a/test/test_methods.py b/test/test_methods.py index ee832e9..f3e18a7 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -215,6 +215,18 @@ def test_rstrip_arg(): assert target.rstrip('held') == cstring('hello, wor') +def test_partition(): + target = cstring('hello, world') + result = (cstring('hello'), cstring(', '), cstring('world')) + assert target.partition(cstring(', ')) == result + + +def test_partition_sep_not_found(): + target = cstring('hello, world') + result = (cstring('hello, world'), cstring(''), cstring('')) + assert target.partition(cstring(': ')) == result + + def test_startswith(): target = cstring('hello, world') assert target.startswith('hello,') is True From 2c6a6e515208307c20e02a150694b4e732f39df4 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Wed, 31 Mar 2021 20:24:41 -0500 Subject: [PATCH 33/37] Implement rpartition method --- src/cstring.c | 37 +++++++++++++++++++++++++++++++++---- test/test_methods.py | 12 ++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index 5a2e95f..ab9a847 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -1,9 +1,7 @@ #include -/* - * memrchr not available on some systems, so reimplement. - */ +/* memrchr not available on some systems, so reimplement. */ const char *_memrchr(const char *s, int c, size_t n) { for(const char *p = s + n - 1; p >= s; --p) { if(*p == c) @@ -12,6 +10,14 @@ const char *_memrchr(const char *s, int c, size_t n) { return NULL; } +const char *_strrstr(const char *s, const char *find) { + const char *p = s + strlen(s) - 1; + for(;p > s; --p) { + if(memcmp(p, find, strlen(find)) == 0) + return p; + } + return NULL; +} struct cstring { @@ -584,6 +590,29 @@ PyObject *cstring_partition(PyObject *self, PyObject *arg) { _cstring_new(Py_TYPE(self), right, &CSTRING_LAST_BYTE(self) - right)); } +PyDoc_STRVAR(rpartition__doc__, ""); +PyObject *cstring_rpartition(PyObject *self, PyObject *arg) { + if(!_ensure_cstring(arg)) + return NULL; + + const char *search = CSTRING_VALUE(arg); + + const char *left = CSTRING_VALUE(self); + const char *mid = _strrstr(left, search); + if(!mid) { + return _tuple_steal_refs(3, + cstring_new_empty(), + cstring_new_empty(), + (Py_INCREF(self), self)); + } + const char *right = mid + strlen(search); + + return _tuple_steal_refs(3, + _cstring_new(Py_TYPE(self), left, mid - left), + _cstring_new(Py_TYPE(self), mid, right - mid), + _cstring_new(Py_TYPE(self), right, &CSTRING_LAST_BYTE(self) - right)); +} + PyDoc_STRVAR(rfind__doc__, ""); PyObject *cstring_rfind(PyObject *self, PyObject *args) { struct _substr_params params; @@ -778,7 +807,7 @@ static PyMethodDef cstring_methods[] = { {"rfind", cstring_rfind, METH_VARARGS, rfind__doc__}, {"rindex", cstring_rindex, METH_VARARGS, rindex__doc__}, /* TODO: rjust */ - /* TODO: rpartition */ + {"rpartition", cstring_rpartition, METH_O, rpartition__doc__}, /* TODO: rsplit */ {"rstrip", cstring_rstrip, METH_VARARGS, rstrip__doc__}, /* TODO: split */ diff --git a/test/test_methods.py b/test/test_methods.py index f3e18a7..4ae7a77 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -227,6 +227,18 @@ def test_partition_sep_not_found(): assert target.partition(cstring(': ')) == result +def test_rpartition(): + target = cstring('hello, world') + result = (cstring('hello, wor'), cstring('l'), cstring('d')) + assert target.rpartition(cstring('l')) == result + + +def test_rpartition_sep_not_found(): + target = cstring('hello, world') + result = (cstring(''), cstring(''), cstring('hello, world')) + assert target.rpartition(cstring(': ')) == result + + def test_startswith(): target = cstring('hello, world') assert target.startswith('hello,') is True From 5e3ad6166e1b5f85cbf8e01804a8f14551e23e51 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Thu, 1 Apr 2021 09:50:59 -0500 Subject: [PATCH 34/37] Extract WHITESPACE_CHARS constant --- src/cstring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index ab9a847..e33fadc 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -1,5 +1,6 @@ #include +#define WHITESPACE_CHARS " \t\n\v\f\r" /* memrchr not available on some systems, so reimplement. */ const char *_memrchr(const char *s, int c, size_t n) { @@ -660,7 +661,7 @@ const char *_strip_chars_from_args(PyObject *args) { if(!PyArg_ParseTuple(args, "|O", &charsobj)) return NULL; - const char *chars = " \t\n\v\f\r"; + const char *chars = WHITESPACE_CHARS; if(charsobj && charsobj != Py_None) { if(!PyUnicode_Check(charsobj)) From 3c0c2b85205a2a45d98bfcfc4cc9910615cf264e Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sat, 3 Apr 2021 22:43:36 -0500 Subject: [PATCH 35/37] Make empty cstring singleton --- src/cstring.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/cstring.c b/src/cstring.c index e33fadc..f567516 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -36,6 +36,9 @@ static PyTypeObject cstring_type; #define CSTRING_ALLOC(tp, len) ((struct cstring *)(tp)->tp_alloc((tp), (len))) +/* singleton, initialized in cstring_new_empty */ +static const struct cstring *cstring_EMPTY = NULL; + static void *_bad_argument_type(PyObject *o) { PyErr_Format( PyExc_TypeError, @@ -70,8 +73,12 @@ static PyObject *_cstring_copy(PyObject *self) { } static PyObject *cstring_new_empty(void) { - /* TODO: empty cstring should be a singleton */ - return _cstring_new(&cstring_type, "", 0); + if(!cstring_EMPTY) { + cstring_EMPTY = (struct cstring *)_cstring_new(&cstring_type, "", 0); + } + /* leaking one reference for singleton cache (never cleaned up) */ + Py_INCREF(cstring_EMPTY); + return (PyObject *)cstring_EMPTY; } static const char *_obj_as_string_and_size(PyObject *o, Py_ssize_t *s) { From 62365a88d937e9accd9a3d4e0077b1c005ed0228 Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sat, 3 Apr 2021 22:53:49 -0500 Subject: [PATCH 36/37] Optimize initialization from empty buffer --- src/cstring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cstring.c b/src/cstring.c index f567516..77d1f29 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -121,6 +121,9 @@ static PyObject *cstring_new(PyTypeObject *type, PyObject *args, PyObject *kwarg if(!buffer) return NULL; + if(len == 0) + return cstring_new_empty(); + return _cstring_new(type, buffer, len); } From 59b963445d2519b3ca3ffd0cec89222e7afa919b Mon Sep 17 00:00:00 2001 From: Andrew Palmer Date: Sat, 3 Apr 2021 22:24:38 -0500 Subject: [PATCH 37/37] Implement split method --- src/cstring.c | 97 +++++++++++++++++++++++++++++++++++++++++++- test/test_methods.py | 12 ++++++ 2 files changed, 108 insertions(+), 1 deletion(-) diff --git a/src/cstring.c b/src/cstring.c index 77d1f29..3acd91f 100644 --- a/src/cstring.c +++ b/src/cstring.c @@ -655,6 +655,101 @@ PyObject *cstring_rindex(PyObject *self, PyObject *args) { return PyLong_FromSsize_t(p - CSTRING_VALUE(self)); } +PyObject *_cstring_split_on_chars(PyObject *self, const char seps[], Py_ssize_t maxsplit) { + if(maxsplit < 0) + maxsplit = PY_SSIZE_T_MAX; + + const char *start = CSTRING_VALUE(self); + + PyObject *list = PyList_New(0); + if(!list) + return NULL; + + while(*start) { + const char *end = start; + while(*end && !strchr(seps, *end)) + ++end; + + PyObject *new = _cstring_new(Py_TYPE(self), start, end - start); + if(!new) + goto fail; + PyList_Append(list, new); + Py_DECREF(new); + + const char *skip = end + 1; + while(*skip && strchr(seps, *skip)) + ++skip; + start = skip; + + if(PyList_GET_SIZE(list) + 1 > maxsplit) { + PyObject *new = _cstring_new(Py_TYPE(self), start, strlen(start)); + if(!new) + goto fail; + PyList_Append(list, new); + Py_DECREF(new); + break; + } + } + + return list; + +fail: + Py_DECREF(list); + return NULL; +} + +PyObject *_cstring_split_on_cstring(PyObject *self, PyObject *sepobj, Py_ssize_t maxsplit) { + if(!_ensure_cstring(sepobj)) + return NULL; + + if(maxsplit < 0) + maxsplit = PY_SSIZE_T_MAX; + + PyObject *list = PyList_New(0); + if(!list) + return NULL; + + const char *sep = CSTRING_VALUE(sepobj); + const char *s = CSTRING_VALUE(self); + for(;;) { + const char *e = strstr(s, sep); + if(!e) + break; + PyObject *new = _cstring_new(Py_TYPE(self), s, e - s); + if(!new) + goto fail; + PyList_Append(list, new); + Py_DECREF(new); + s = e + strlen(sep); + if(PyList_GET_SIZE(list) + 1 > maxsplit) + break; + } + + PyObject *new = _cstring_new(Py_TYPE(self), s, strlen(s)); + if(!new) + goto fail; + PyList_Append(list, new); + + return list; + +fail: + Py_DECREF(list); + return NULL; +} + +PyDoc_STRVAR(split__doc__, ""); +PyObject *cstring_split(PyObject *self, PyObject *args, PyObject *kwargs) { + PyObject *sepobj = Py_None; + int maxsplit = -1; + char *kwlist[] = {"sep", "maxsplit", NULL}; + if(!PyArg_ParseTupleAndKeywords(args, kwargs, "|Oi", kwlist, &sepobj, &maxsplit)) + return NULL; + + return (sepobj == Py_None) + ? _cstring_split_on_chars(self, WHITESPACE_CHARS, maxsplit) + : _cstring_split_on_cstring(self, sepobj, maxsplit); +} + PyDoc_STRVAR(startswith__doc__, ""); PyObject *cstring_startswith(PyObject *self, PyObject *args) { struct _substr_params params; @@ -821,7 +916,7 @@ static PyMethodDef cstring_methods[] = { {"rpartition", cstring_rpartition, METH_O, rpartition__doc__}, /* TODO: rsplit */ {"rstrip", cstring_rstrip, METH_VARARGS, rstrip__doc__}, - /* TODO: split */ + {"split", (PyCFunction)cstring_split, METH_VARARGS | METH_KEYWORDS, split__doc__}, /* TODO: splitlines */ {"startswith", cstring_startswith, METH_VARARGS, startswith__doc__}, {"strip", cstring_strip, METH_VARARGS, strip__doc__}, diff --git a/test/test_methods.py b/test/test_methods.py index 4ae7a77..8a1c24e 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -239,6 +239,18 @@ def test_rpartition_sep_not_found(): assert target.rpartition(cstring(': ')) == result +def test_split(): + assert cstring('hello, world').split(cstring(', ')) == [cstring('hello'), cstring('world')] + assert cstring('1,2,3').split(cstring(',')) == [ + cstring('1'), cstring('2'), cstring('3')] + assert cstring('hello, world').split() == [cstring('hello,'), cstring('world')] + assert cstring('hello\t \n world').split() == [cstring('hello'), cstring('world')] + assert cstring('1,2,3').split(cstring(','), maxsplit=1) == [ + cstring('1'), cstring('2,3')] + assert cstring('1 2 3').split(maxsplit=1) == [ + cstring('1'), cstring('2 3')] + + def test_startswith(): target = cstring('hello, world') assert target.startswith('hello,') is True