From 035b3e23c7c3f40ebe3423e6fa890184d80e1efa Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Thu, 11 Apr 2024 21:40:12 +0200
Subject: [PATCH 1/9] Improve performance of startswith by eliminating double
 work in tailmatch

---
 Objects/unicodeobject.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 2c259b7e869efe..f8e487a3c92c0a 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9272,24 +9272,23 @@ tailmatch(PyObject *self,
     else
         offset = start;
 
-    if (PyUnicode_READ(kind_self, data_self, offset) ==
-        PyUnicode_READ(kind_sub, data_sub, 0) &&
-        PyUnicode_READ(kind_self, data_self, offset + end_sub) ==
-        PyUnicode_READ(kind_sub, data_sub, end_sub)) {
+    int last_character_matches = PyUnicode_READ(kind_self, data_self, offset + end_sub) ==
+        PyUnicode_READ(kind_sub, data_sub, end_sub);
+
+    if (last_character_matches) {
+        if (end_sub==0)
+            return 1;
         /* If both are of the same kind, memcmp is sufficient */
         if (kind_self == kind_sub) {
-            return ! memcmp((char *)data_self +
-                                (offset * PyUnicode_KIND(substring)),
-                            data_sub,
-                            PyUnicode_GET_LENGTH(substring) *
-                                PyUnicode_KIND(substring));
+            return ! memcmp((char *)data_self + (offset * kind_sub),
+                            data_sub, end_sub * kind_sub);
         }
         /* otherwise we have to compare each character by first accessing it */
         else {
             /* We do not need to compare 0 and len(substring)-1 because
                the if statement above ensured already that they are equal
                when we end up here. */
-            for (i = 1; i < end_sub; ++i) {
+            for (i = 0; i < end_sub; ++i) {
                 if (PyUnicode_READ(kind_self, data_self, offset + i) !=
                     PyUnicode_READ(kind_sub, data_sub, i))
                     return 0;

From 4f4b084eadd50e65f165ad011777e5a7991ff240 Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Thu, 11 Apr 2024 22:57:02 +0200
Subject: [PATCH 2/9] code style

---
 Objects/unicodeobject.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index f8e487a3c92c0a..e9417adf7035b4 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9272,12 +9272,13 @@ tailmatch(PyObject *self,
     else
         offset = start;
 
-    int last_character_matches = PyUnicode_READ(kind_self, data_self, offset + end_sub) ==
-        PyUnicode_READ(kind_sub, data_sub, end_sub);
+    int match_last = PyUnicode_READ(kind_self, data_self, offset + end_sub) ==
+                        PyUnicode_READ(kind_sub, data_sub, end_sub);
 
-    if (last_character_matches) {
-        if (end_sub==0)
+    if (match_last) {
+        if (end_sub==0) {
             return 1;
+        }
         /* If both are of the same kind, memcmp is sufficient */
         if (kind_self == kind_sub) {
             return ! memcmp((char *)data_self + (offset * kind_sub),

From 9f201b16c6d38a3b89a54fdc794410a1c0eb5f0a Mon Sep 17 00:00:00 2001
From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 21:17:26 +0000
Subject: [PATCH 3/9] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?=
 =?UTF-8?q?rb=5Fit.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst               | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst

diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst
new file mode 100644
index 00000000000000..ea449637abc68e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst	
@@ -0,0 +1 @@
+Improve performance of :func:`str.startswith` and `str.endswith`.

From 8792d0b9d001a4c8a1b7e523e60f9450098a1e21 Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Thu, 11 Apr 2024 23:44:01 +0200
Subject: [PATCH 4/9] lint

---
 .../2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst
index ea449637abc68e..b6be9b7b66ba4f 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst	
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst	
@@ -1 +1 @@
-Improve performance of :func:`str.startswith` and `str.endswith`.
+Improve performance of :func:`str.startswith` and :func:`str.endswith`.

From 2a2cfb36840bd096ed4d1679d9ac37290f8c75e6 Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Mon, 20 May 2024 23:04:00 +0200
Subject: [PATCH 5/9] Update Objects/unicodeobject.c

Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com>
---
 Objects/unicodeobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 1d13227fef282a..7fd29531ad55a4 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9276,7 +9276,7 @@ tailmatch(PyObject *self,
                         PyUnicode_READ(kind_sub, data_sub, end_sub);
 
     if (match_last) {
-        if (end_sub==0) {
+        if (end_sub == 0) {
             return 1;
         }
         /* If both are of the same kind, memcmp is sufficient */

From 9f8e4b880c9c8d08f4a4f5973e1b64db846ab0c7 Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Mon, 20 May 2024 23:04:18 +0200
Subject: [PATCH 6/9] update comment

---
 Objects/unicodeobject.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 1d13227fef282a..c27cb27763dc35 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9286,9 +9286,9 @@ tailmatch(PyObject *self,
         }
         /* otherwise we have to compare each character by first accessing it */
         else {
-            /* We do not need to compare 0 and len(substring)-1 because
-               the if statement above ensured already that they are equal
-               when we end up here. */
+            /* We do not need to compare len(substring)-1 because the if
+               statement above ensured already that they are equal when we
+               end up here. */
             for (i = 0; i < end_sub; ++i) {
                 if (PyUnicode_READ(kind_self, data_self, offset + i) !=
                     PyUnicode_READ(kind_sub, data_sub, i))

From 8a7b9fe363d838b8d7e7930e3cf9487a55ec23c8 Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Tue, 21 May 2024 22:12:33 +0200
Subject: [PATCH 7/9] update news entry

---
 .../2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst
index b6be9b7b66ba4f..19dc551118ae67 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst	
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst	
@@ -1 +1 @@
-Improve performance of :func:`str.startswith` and :func:`str.endswith`.
+Improve performance of :func:`str.startswith`, :func:`str.endswith`, :func:`str.removeprefix` and :func:`str.removesuffix`.

From ea862985e973c1b456421bd7b8197e4850c346ac Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Tue, 21 May 2024 22:52:14 +0200
Subject: [PATCH 8/9] cleanup

---
 Objects/unicodeobject.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 10737760dcceef..0d9fdc443cd315 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9254,7 +9254,8 @@ tailmatch(PyObject *self,
     Py_ssize_t end_sub;
 
     ADJUST_INDICES(start, end, PyUnicode_GET_LENGTH(self));
-    end -= PyUnicode_GET_LENGTH(substring);
+    Py_ssize_t substring_length = PyUnicode_GET_LENGTH(substring);
+    end -= substring_length;
     if (end < start)
         return 0;
 
@@ -9265,18 +9266,25 @@ tailmatch(PyObject *self,
     data_self = PyUnicode_DATA(self);
     kind_sub = PyUnicode_KIND(substring);
     data_sub = PyUnicode_DATA(substring);
-    end_sub = PyUnicode_GET_LENGTH(substring) - 1;
+    end_sub = substring_length - 1;
 
     if (direction > 0)
         offset = end;
     else
         offset = start;
 
-    int match_last = PyUnicode_READ(kind_self, data_self, offset + end_sub) ==
-                        PyUnicode_READ(kind_sub, data_sub, end_sub);
+    int match_first = PyUnicode_READ(kind_self, data_self, offset) ==
+                        PyUnicode_READ(kind_sub, data_sub, 0);
 
-    if (match_last) {
-        if (end_sub == 0) {
+    if (match_first) {
+        if (substring_length == 1) {
+            // single-character case
+            return 1;
+        }
+        int match_last = PyUnicode_READ(kind_self, data_self, offset + end_sub) ==
+                        PyUnicode_READ(kind_sub, data_sub, end_sub);
+        if (!match_last && substring_length == 2) {
+            // failing two-character case
             return 1;
         }
         /* If both are of the same kind, memcmp is sufficient */
@@ -9286,10 +9294,10 @@ tailmatch(PyObject *self,
         }
         /* otherwise we have to compare each character by first accessing it */
         else {
-            /* We do not need to compare len(substring)-1 because the if
-               statement above ensured already that they are equal when we
+            /* We do not need to compare 0 and len(substring)-1 because the if
+               statements above ensured already that they are equal when we
                end up here. */
-            for (i = 0; i < end_sub; ++i) {
+            for (i = 1; i < end_sub; ++i) {
                 if (PyUnicode_READ(kind_self, data_self, offset + i) !=
                     PyUnicode_READ(kind_sub, data_sub, i))
                     return 0;

From 6aea844959e8ddc3b5b3a843b83106f5c4aead95 Mon Sep 17 00:00:00 2001
From: Pieter Eendebak <pieter.eendebak@gmail.com>
Date: Tue, 21 May 2024 22:17:38 +0200
Subject: [PATCH 9/9] Update Misc/NEWS.d/next/Core and
 Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst

Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com>
---
 .../2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst
index 19dc551118ae67..1e77d5ba1413b8 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst	
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst	
@@ -1 +1 @@
-Improve performance of :func:`str.startswith`, :func:`str.endswith`, :func:`str.removeprefix` and :func:`str.removesuffix`.
+Improve performance of :meth:`str.startswith`, :meth:`str.endswith`, :meth:`str.removeprefix` and :meth:`str.removesuffix`.