Skip to content

gh-93033: Use wmemchr in find_char and replace_1char_inplace #93034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Search in some strings (platform dependent i.e [U+0xFFFF, U+0x0100] on Windows or [U+0xFFFFFFFF, U+0x00010000] on Linux 64-bit) are now up to 10 times faster.
1 change: 1 addition & 0 deletions Objects/bytearrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,7 @@ bytearray_dealloc(PyByteArrayObject *self)
#define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
#define STRINGLIB_FAST_MEMCHR memchr
#define STRINGLIB_MUTABLE 1

#include "stringlib/fastsearch.h"
Expand Down
1 change: 1 addition & 0 deletions Objects/bytes_methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
#define STRINGLIB(F) stringlib_##F
#define STRINGLIB_CHAR char
#define STRINGLIB_SIZEOF_CHAR 1
#define STRINGLIB_FAST_MEMCHR memchr

#include "stringlib/fastsearch.h"
#include "stringlib/count.h"
Expand Down
1 change: 1 addition & 0 deletions Objects/stringlib/asciilib.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0
#define STRINGLIB_FAST_MEMCHR memchr

#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII
34 changes: 22 additions & 12 deletions Objects/stringlib/fastsearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))

#if STRINGLIB_SIZEOF_CHAR == 1
#ifdef STRINGLIB_FAST_MEMCHR
# define MEMCHR_CUT_OFF 15
#else
# define MEMCHR_CUT_OFF 40
Expand All @@ -53,8 +53,8 @@ STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
p = s;
e = s + n;
if (n > MEMCHR_CUT_OFF) {
#if STRINGLIB_SIZEOF_CHAR == 1
p = memchr(s, ch, n);
#ifdef STRINGLIB_FAST_MEMCHR
p = STRINGLIB_FAST_MEMCHR(s, ch, n);
if (p != NULL)
return (p - s);
return -1;
Expand Down Expand Up @@ -102,16 +102,26 @@ STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
return -1;
}

#undef MEMCHR_CUT_OFF

#if STRINGLIB_SIZEOF_CHAR == 1
# define MEMRCHR_CUT_OFF 15
#else
# define MEMRCHR_CUT_OFF 40
#endif


Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
{
const STRINGLIB_CHAR *p;
#ifdef HAVE_MEMRCHR
/* memrchr() is a GNU extension, available since glibc 2.1.91.
it doesn't seem as optimized as memchr(), but is still quite
faster than our hand-written loop below */
/* memrchr() is a GNU extension, available since glibc 2.1.91. it
doesn't seem as optimized as memchr(), but is still quite
faster than our hand-written loop below. There is no wmemrchr
for 4-byte chars. */

if (n > MEMCHR_CUT_OFF) {
if (n > MEMRCHR_CUT_OFF) {
#if STRINGLIB_SIZEOF_CHAR == 1
p = memrchr(s, ch, n);
if (p != NULL)
Expand Down Expand Up @@ -139,19 +149,19 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
if (*p == ch)
return n;
/* False positive */
if (n1 - n > MEMCHR_CUT_OFF)
if (n1 - n > MEMRCHR_CUT_OFF)
continue;
if (n <= MEMCHR_CUT_OFF)
if (n <= MEMRCHR_CUT_OFF)
break;
s1 = p - MEMCHR_CUT_OFF;
s1 = p - MEMRCHR_CUT_OFF;
while (p > s1) {
p--;
if (*p == ch)
return (p - s);
}
n = p - s;
}
while (n > MEMCHR_CUT_OFF);
while (n > MEMRCHR_CUT_OFF);
}
#endif
}
Expand All @@ -165,7 +175,7 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
return -1;
}

#undef MEMCHR_CUT_OFF
#undef MEMRCHR_CUT_OFF

/* Change to a 1 to see logging comments walk through the algorithm. */
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
Expand Down
4 changes: 2 additions & 2 deletions Objects/stringlib/replace.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
if (!--attempts) {
/* if u1 was not found for attempts iterations,
use FASTSEARCH() or memchr() */
#if STRINGLIB_SIZEOF_CHAR == 1
#ifdef STRINGLIB_FAST_MEMCHR
s++;
s = memchr(s, u1, end - s);
s = STRINGLIB_FAST_MEMCHR(s, u1, end - s);
if (s == NULL)
return;
#else
Expand Down
1 change: 1 addition & 0 deletions Objects/stringlib/stringdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@
#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_Repr
#define STRINGLIB_FAST_MEMCHR memchr
#endif /* !STRINGLIB_STRINGDEFS_H */
1 change: 1 addition & 0 deletions Objects/stringlib/ucs1lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#define STRINGLIB_NEW _PyUnicode_FromUCS1
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_FAST_MEMCHR memchr
#define STRINGLIB_MUTABLE 0

#define STRINGLIB_TOSTR PyObject_Str
Expand Down
4 changes: 4 additions & 0 deletions Objects/stringlib/ucs2lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0
#if SIZEOF_WCHAR_T == 2
#define STRINGLIB_FAST_MEMCHR(s, c, n) \
(Py_UCS2 *)wmemchr((const wchar_t *)(s), c, n)
#endif

#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII
4 changes: 4 additions & 0 deletions Objects/stringlib/ucs4lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0
#if SIZEOF_WCHAR_T == 4
#define STRINGLIB_FAST_MEMCHR(s, c, n) \
(Py_UCS4 *)wmemchr((const wchar_t *)(s), c, n)
#endif

#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII
Expand Down
1 change: 1 addition & 0 deletions Objects/stringlib/undef.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
#undef STRINGLIB_NEW
#undef STRINGLIB_IS_UNICODE
#undef STRINGLIB_MUTABLE
#undef STRINGLIB_FAST_MEMCHR