From f620f3b63ebc8eefee37ff90d8816e9f370c62d7 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Wed, 16 Apr 2025 01:20:27 +0900
Subject: [PATCH 01/75] Support NPY_STRING, NPY_UNICODE

---
 numpy/_core/src/multiarray/unique.cpp | 139 ++++++++++++++++++++++----
 1 file changed, 119 insertions(+), 20 deletions(-)
diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index f36acfdef49a..d2006ae2109a 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,6 +3,7 @@
 
 #include <Python.h>
 
+#include <string>
 #include <unordered_set>
 #include <functional>
 
@@ -26,7 +27,7 @@ FinalAction<F> finally(F f) {
 
 template<typename T>
 static PyObject*
-unique(PyArrayObject *self)
+unique_int(PyArrayObject *self)
 {
     /* This function takes a numpy array and returns a numpy array containing
     the unique values.
@@ -118,28 +119,126 @@ unique(PyArrayObject *self)
 }
 
 
+template<typename T>
+static PyObject*
+unique_string(PyArrayObject *self)
+{
+    /* This function takes a numpy array and returns a numpy array containing
+    the unique values.
+
+    It assumes the numpy array includes data that can be viewed as fixed-size
+    strings of a certain size (sizeof(T)).
+    */
+    NPY_ALLOW_C_API_DEF;
+    std::unordered_set<std::basic_string<T>> hashset;
+
+    NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
+                                      NPY_ITER_EXTERNAL_LOOP |
+                                      NPY_ITER_REFS_OK |
+                                      NPY_ITER_ZEROSIZE_OK |
+                                      NPY_ITER_GROWINNER,
+                                NPY_KEEPORDER, NPY_NO_CASTING,
+                                NULL);
+    // Making sure the iterator is deallocated when the function returns, with
+    // or w/o an exception
+    auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
+    if (iter == NULL) {
+        return NULL;
+    }
+
+    NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
+    if (iternext == NULL) {
+        return NULL;
+    }
+    char **dataptr = NpyIter_GetDataPtrArray(iter);
+    npy_intp *strideptr = NpyIter_GetInnerStrideArray(iter);
+    npy_intp *innersizeptr = NpyIter_GetInnerLoopSizePtr(iter);
+
+    // release the GIL
+    PyThreadState *_save;
+    _save = PyEval_SaveThread();
+    // Making sure the GIL is re-acquired when the function returns, with
+    // or w/o an exception
+    auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
+    // first we put the data in a hash map
+
+    // item size for the string type
+    npy_intp itemsize = PyArray_ITEMSIZE(self);
+
+    // the number of characters (For Unicode, itemsize / 4 for UCS4)
+    npy_intp num_chars = itemsize / sizeof(T);
+
+    if (NpyIter_GetIterSize(iter) > 0) {
+        do {
+            char* data = *dataptr;
+            npy_intp stride = *strideptr;
+            npy_intp count = *innersizeptr;
+
+            while (count--) {
+                hasher.emplace((T *)data, num_chars);
+                data += stride;
+            }
+        } while (iternext(iter));
+    }
+
+    npy_intp length = hashset.size();
+
+    NPY_ALLOW_C_API;
+    PyArray_Descr *descr = PyArray_DESCR(self);
+    Py_INCREF(descr);
+    PyObject *res_obj = PyArray_NewFromDescr(
+        &PyArray_Type,
+        descr,
+        1, // ndim
+        &length, // shape
+        NULL, // strides
+        NULL, // data
+        // This flag is needed to be able to call .sort on it.
+        NPY_ARRAY_WRITEABLE, // flags
+        NULL // obj
+    );
+    NPY_DISABLE_C_API;
+
+    if (res_obj == NULL) {
+        return NULL;
+    }
+
+    // then we iterate through the map's keys to get the unique values
+    T * data = (T *)PyArray_DATA((PyArrayObject *)res_obj);
+    auto it = hashset.begin();
+    size_t i = 0;
+    for (; it != hashset.end(); it++, i++) {
+        memcpy(data + i * num_chars, it->c_str(), itemsize);
+    }
+
+    return res_obj;
+}
+
+
 // this map contains the functions used for each item size.
 typedef std::function<PyObject *(PyArrayObject *)> function_type;
 std::unordered_map<int, function_type> unique_funcs = {
-    {NPY_BYTE, unique<npy_byte>},
-    {NPY_UBYTE, unique<npy_ubyte>},
-    {NPY_SHORT, unique<npy_short>},
-    {NPY_USHORT, unique<npy_ushort>},
-    {NPY_INT, unique<npy_int>},
-    {NPY_UINT, unique<npy_uint>},
-    {NPY_LONG, unique<npy_long>},
-    {NPY_ULONG, unique<npy_ulong>},
-    {NPY_LONGLONG, unique<npy_longlong>},
-    {NPY_ULONGLONG, unique<npy_ulonglong>},
-    {NPY_INT8, unique<npy_int8>},
-    {NPY_INT16, unique<npy_int16>},
-    {NPY_INT32, unique<npy_int32>},
-    {NPY_INT64, unique<npy_int64>},
-    {NPY_UINT8, unique<npy_uint8>},
-    {NPY_UINT16, unique<npy_uint16>},
-    {NPY_UINT32, unique<npy_uint32>},
-    {NPY_UINT64, unique<npy_uint64>},
-    {NPY_DATETIME, unique<npy_uint64>},
+    {NPY_BYTE, unique_int<npy_byte>},
+    {NPY_UBYTE, unique_int<npy_ubyte>},
+    {NPY_SHORT, unique_int<npy_short>},
+    {NPY_USHORT, unique_int<npy_ushort>},
+    {NPY_INT, unique_int<npy_int>},
+    {NPY_UINT, unique_int<npy_uint>},
+    {NPY_LONG, unique_int<npy_long>},
+    {NPY_ULONG, unique_int<npy_ulong>},
+    {NPY_LONGLONG, unique_int<npy_longlong>},
+    {NPY_ULONGLONG, unique_int<npy_ulonglong>},
+    {NPY_INT8, unique_int<npy_int8>},
+    {NPY_INT16, unique_int<npy_int16>},
+    {NPY_INT32, unique_int<npy_int32>},
+    {NPY_INT64, unique_int<npy_int64>},
+    {NPY_UINT8, unique_int<npy_uint8>},
+    {NPY_UINT16, unique_int<npy_uint16>},
+    {NPY_UINT32, unique_int<npy_uint32>},
+    {NPY_UINT64, unique_int<npy_uint64>},
+    {NPY_DATETIME, unique_int<npy_uint64>},
+    {NPY_STRING, unique_string<char>},
+    {NPY_UNICODE, unique_string<npy_ucs4>},
 };
 
 

From 20ccefe82582620ac9d27647dae8127ac921e8a8 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Wed, 16 Apr 2025 22:13:35 +0900
Subject: [PATCH 02/75] unique for NPY_STRING and NPY_UNICODE

---
 numpy/_core/src/multiarray/unique.cpp | 32 ++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index d2006ae2109a..35d622a49e3c 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,9 +3,10 @@
 
 #include <Python.h>
 
-#include <string>
+#include <iostream>
 #include <unordered_set>
 #include <functional>
+#include <vector>
 
 #include <numpy/npy_common.h>
 #include "numpy/arrayobject.h"
@@ -25,6 +26,25 @@ FinalAction<F> finally(F f) {
     return FinalAction<F>(f);
 }
 
+template <typename T>
+struct VectorHash {
+    std::size_t operator()(const std::vector<T>& v) const {
+        // https://www.boost.org/doc/libs/1_88_0/libs/container_hash/doc/html/hash.html#notes_hash_combine
+        std::size_t h = v.size();
+        for (const T& x : v) {
+            h ^= std::hash<T>{}(x) + 0x9e3779b9 + (h << 6) + (h >> 2);
+        }
+        return h;
+    }
+};
+
+template <typename T>
+struct VectorEqual {
+    bool operator()(const std::vector<T>& lhs, const std::vector<T>& rhs) const {
+        return lhs == rhs;
+    }
+};
+
 template<typename T>
 static PyObject*
 unique_int(PyArrayObject *self)
@@ -130,7 +150,7 @@ unique_string(PyArrayObject *self)
     strings of a certain size (sizeof(T)).
     */
     NPY_ALLOW_C_API_DEF;
-    std::unordered_set<std::basic_string<T>> hashset;
+    std::unordered_set<std::vector<T>, VectorHash<T>, VectorEqual<T>> hashset;
 
     NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
                                       NPY_ITER_EXTERNAL_LOOP |
@@ -175,7 +195,7 @@ unique_string(PyArrayObject *self)
             npy_intp count = *innersizeptr;
 
             while (count--) {
-                hasher.emplace((T *)data, num_chars);
+                hashset.emplace((T *) data, (T *) data + num_chars);
                 data += stride;
             }
         } while (iternext(iter));
@@ -204,11 +224,11 @@ unique_string(PyArrayObject *self)
     }
 
     // then we iterate through the map's keys to get the unique values
-    T * data = (T *)PyArray_DATA((PyArrayObject *)res_obj);
+    T ** data = (T **)PyArray_DATA((PyArrayObject *)res_obj);
     auto it = hashset.begin();
     size_t i = 0;
     for (; it != hashset.end(); it++, i++) {
-        memcpy(data + i * num_chars, it->c_str(), itemsize);
+        memcpy(data + i, it->data(), num_chars);
     }
 
     return res_obj;
@@ -237,7 +257,7 @@ std::unordered_map<int, function_type> unique_funcs = {
     {NPY_UINT32, unique_int<npy_uint32>},
     {NPY_UINT64, unique_int<npy_uint64>},
     {NPY_DATETIME, unique_int<npy_uint64>},
-    {NPY_STRING, unique_string<char>},
+    {NPY_STRING, unique_string<npy_byte>},
     {NPY_UNICODE, unique_string<npy_ucs4>},
 };
 

From 38626b9517f96871da368e0fe59af7015cf4cfab Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Thu, 17 Apr 2025 00:49:49 +0900
Subject: [PATCH 03/75] fix construct array

---
 numpy/_core/src/multiarray/unique.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 35d622a49e3c..044b725e40b4 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,6 +3,7 @@
 
 #include <Python.h>
 
+#include <algorithm>
 #include <iostream>
 #include <unordered_set>
 #include <functional>
@@ -224,11 +225,15 @@ unique_string(PyArrayObject *self)
     }
 
     // then we iterate through the map's keys to get the unique values
-    T ** data = (T **)PyArray_DATA((PyArrayObject *)res_obj);
+    char* data = PyArray_BYTES((const PyArrayObject *)res_obj);
     auto it = hashset.begin();
     size_t i = 0;
-    for (; it != hashset.end(); it++, i++) {
-        memcpy(data + i, it->data(), num_chars);
+    for (; it != hashset.end(); it++, i++, data += itemsize) {
+        std::size_t byte_to_copy = std::min(it->size() * sizeof(T), (std::size_t)itemsize);
+        memcpy(data, it->data(), byte_to_copy);
+        if (byte_to_copy < (std::size_t)itemsize) {
+            memset(data + byte_to_copy, 0, itemsize - byte_to_copy);
+        }
     }
 
     return res_obj;
@@ -259,6 +264,7 @@ std::unordered_map<int, function_type> unique_funcs = {
     {NPY_DATETIME, unique_int<npy_uint64>},
     {NPY_STRING, unique_string<npy_byte>},
     {NPY_UNICODE, unique_string<npy_ucs4>},
+    {NPY_VSTRING, unique_string<npy_byte>},
 };
 
 

From 56bd858ea1ac1ed51f16a7ba8613d2b8685069c0 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Thu, 17 Apr 2025 00:50:24 +0900
Subject: [PATCH 04/75] remove unneccessary include

---
 numpy/_core/src/multiarray/unique.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 044b725e40b4..da031d6602a0 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,7 +3,6 @@
 
 #include <Python.h>
 
-#include <algorithm>
 #include <iostream>
 #include <unordered_set>
 #include <functional>

From f79736ab0725f1d0bcaddd36af6c620edc7f0592 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Thu, 17 Apr 2025 01:03:38 +0900
Subject: [PATCH 05/75] refactor

---
 numpy/_core/src/multiarray/unique.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index da031d6602a0..e5c1c6d3dbdc 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,6 +3,8 @@
 
 #include <Python.h>
 
+#include <algorithm>
+#include <cstring>
 #include <iostream>
 #include <unordered_set>
 #include <functional>
@@ -227,11 +229,11 @@ unique_string(PyArrayObject *self)
     char* data = PyArray_BYTES((const PyArrayObject *)res_obj);
     auto it = hashset.begin();
     size_t i = 0;
-    for (; it != hashset.end(); it++, i++, data += itemsize) {
+    for (; it != hashset.end(); it++, i++) {
         std::size_t byte_to_copy = std::min(it->size() * sizeof(T), (std::size_t)itemsize);
-        memcpy(data, it->data(), byte_to_copy);
+        std::memcpy(data + i * itemsize, it->data(), byte_to_copy);
         if (byte_to_copy < (std::size_t)itemsize) {
-            memset(data + byte_to_copy, 0, itemsize - byte_to_copy);
+            std::memset(data + i * itemsize + byte_to_copy, 0, itemsize - byte_to_copy);
         }
     }
 

From c4e5438082648d9fa10b0d5916870b9c43a6ba28 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Thu, 17 Apr 2025 22:11:07 +0900
Subject: [PATCH 06/75] refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index e5c1c6d3dbdc..8b804b02ff49 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,7 +3,6 @@
 
 #include <Python.h>
 
-#include <algorithm>
 #include <cstring>
 #include <iostream>
 #include <unordered_set>
@@ -30,9 +29,9 @@ FinalAction<F> finally(F f) {
 
 template <typename T>
 struct VectorHash {
-    std::size_t operator()(const std::vector<T>& v) const {
+    size_t operator()(const std::vector<T>& v) const {
         // https://www.boost.org/doc/libs/1_88_0/libs/container_hash/doc/html/hash.html#notes_hash_combine
-        std::size_t h = v.size();
+        size_t h = v.size();
         for (const T& x : v) {
             h ^= std::hash<T>{}(x) + 0x9e3779b9 + (h << 6) + (h >> 2);
         }
@@ -230,9 +229,9 @@ unique_string(PyArrayObject *self)
     auto it = hashset.begin();
     size_t i = 0;
     for (; it != hashset.end(); it++, i++) {
-        std::size_t byte_to_copy = std::min(it->size() * sizeof(T), (std::size_t)itemsize);
+        size_t byte_to_copy = it->size() * sizeof(T);
         std::memcpy(data + i * itemsize, it->data(), byte_to_copy);
-        if (byte_to_copy < (std::size_t)itemsize) {
+        if (byte_to_copy < (size_t)itemsize) {
             std::memset(data + i * itemsize + byte_to_copy, 0, itemsize - byte_to_copy);
         }
     }
@@ -265,7 +264,6 @@ std::unordered_map<int, function_type> unique_funcs = {
     {NPY_DATETIME, unique_int<npy_uint64>},
     {NPY_STRING, unique_string<npy_byte>},
     {NPY_UNICODE, unique_string<npy_ucs4>},
-    {NPY_VSTRING, unique_string<npy_byte>},
 };
 
 

From 7c51049e27a3a94dc488229827b250deb845efaa Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Thu, 17 Apr 2025 22:13:02 +0900
Subject: [PATCH 07/75] comment

---
 numpy/_core/src/multiarray/unique.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 8b804b02ff49..fbaac49da3e2 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -148,7 +148,7 @@ unique_string(PyArrayObject *self)
     the unique values.
 
     It assumes the numpy array includes data that can be viewed as fixed-size
-    strings of a certain size (sizeof(T)).
+    strings of a certain size (itemsize / sizeof(T)).
     */
     NPY_ALLOW_C_API_DEF;
     std::unordered_set<std::vector<T>, VectorHash<T>, VectorEqual<T>> hashset;

From bd70552de1d8459f400de4424cd726e10a8ea63e Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 17:07:32 +0900
Subject: [PATCH 08/75] feature: unique for NPY_VSTRING

---
 numpy/_core/src/multiarray/unique.cpp | 122 +++++++++++++++++++++++++-
 1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index fbaac49da3e2..7483e7cb6472 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -181,7 +181,6 @@ unique_string(PyArrayObject *self)
     // Making sure the GIL is re-acquired when the function returns, with
     // or w/o an exception
     auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
-    // first we put the data in a hash map
 
     // item size for the string type
     npy_intp itemsize = PyArray_ITEMSIZE(self);
@@ -189,6 +188,7 @@ unique_string(PyArrayObject *self)
     // the number of characters (For Unicode, itemsize / 4 for UCS4)
     npy_intp num_chars = itemsize / sizeof(T);
 
+    // first we put the data in a hash map
     if (NpyIter_GetIterSize(iter) > 0) {
         do {
             char* data = *dataptr;
@@ -240,6 +240,125 @@ unique_string(PyArrayObject *self)
 }
 
 
+static PyObject*
+unique_vstring(PyArrayObject *self)
+{
+    /* This function takes a numpy array and returns a numpy array containing
+    the unique values.
+
+    It assumes the numpy array includes data that can be viewed as variable width
+    strings (StringDType).
+    */
+    NPY_ALLOW_C_API_DEF;
+    std::unordered_set<std::vector<npy_byte>, VectorHash<npy_byte>, VectorEqual<npy_byte>> hashset;
+
+    NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
+                                      NPY_ITER_EXTERNAL_LOOP |
+                                      NPY_ITER_REFS_OK |
+                                      NPY_ITER_ZEROSIZE_OK |
+                                      NPY_ITER_GROWINNER,
+                                NPY_KEEPORDER, NPY_NO_CASTING,
+                                NULL);
+    // Making sure the iterator is deallocated when the function returns, with
+    // or w/o an exception
+    auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
+    if (iter == NULL) {
+        return NULL;
+    }
+
+    NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
+    if (iternext == NULL) {
+        return NULL;
+    }
+    char **dataptr = NpyIter_GetDataPtrArray(iter);
+    npy_intp *strideptr = NpyIter_GetInnerStrideArray(iter);
+    npy_intp *innersizeptr = NpyIter_GetInnerLoopSizePtr(iter);
+
+    // release the GIL
+    PyThreadState *_save;
+    _save = PyEval_SaveThread();
+    // Making sure the GIL is re-acquired when the function returns, with
+    // or w/o an exception
+    auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
+
+    // https://numpy.org/doc/stable/reference/c-api/strings.html
+    PyArray_Descr *descr = PyArray_DESCR(self);
+    npy_string_allocator *allocator = NpyString_acquire_allocator(
+        (PyArray_StringDTypeObject *)descr);
+
+    // whether the array contains null values
+    bool contains_null = false;
+
+    // first we put the data in a hash map
+    if (NpyIter_GetIterSize(iter) > 0) {
+        do {
+            char* data = *dataptr;
+            npy_intp stride = *strideptr;
+            npy_intp count = *innersizeptr;
+
+            while (count--) {
+                npy_static_string sdata = {0, NULL};
+                npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
+                int is_null = NpyString_load(allocator, packed_string, &sdata);
+
+                if (is_null) {
+                    std::cerr << "add null" << std::endl;
+                    contains_null = true;
+                }
+                else {
+                    std::cerr << "add string : " << std::string(sdata.buf, sdata.size) << std::endl;
+                    hashset.emplace((npy_byte *)sdata.buf, (npy_byte *)sdata.buf + sdata.size);
+                }
+                data += stride;
+            }
+        } while (iternext(iter));
+    }
+
+    npy_intp length = hashset.size();
+    if (contains_null) {
+        length++;
+    }
+
+    NPY_ALLOW_C_API;
+    Py_INCREF(descr);
+    PyObject *res_obj = PyArray_NewFromDescr(
+        &PyArray_Type,
+        descr,
+        1, // ndim
+        &length, // shape
+        NULL, // strides
+        NULL, // data
+        // This flag is needed to be able to call .sort on it.
+        NPY_ARRAY_WRITEABLE, // flags
+        NULL // obj
+    );
+    NPY_DISABLE_C_API;
+
+    if (res_obj == NULL) {
+        return NULL;
+    }
+
+    // then we iterate through the map's keys to get the unique values
+    auto it = hashset.begin();
+    size_t i = 0;
+    while (i < length) {
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
+        if (contains_null) {
+            NpyString_pack_null(allocator, packed_string);
+            contains_null = false;
+        } else {
+            NpyString_pack(allocator, packed_string, (const char *)it->data(), it->size());
+            it++;
+        }
+        i++;
+    }
+
+    NpyString_release_allocator(allocator);
+
+    return res_obj;
+}
+
+
 // this map contains the functions used for each item size.
 typedef std::function<PyObject *(PyArrayObject *)> function_type;
 std::unordered_map<int, function_type> unique_funcs = {
@@ -264,6 +383,7 @@ std::unordered_map<int, function_type> unique_funcs = {
     {NPY_DATETIME, unique_int<npy_uint64>},
     {NPY_STRING, unique_string<npy_byte>},
     {NPY_UNICODE, unique_string<npy_ucs4>},
+    {NPY_VSTRING, unique_vstring},
 };
 
 

From cc8ece629d19741b8f04b9905e3393102c0a07be Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 18:31:04 +0900
Subject: [PATCH 09/75] refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 43 ++++++++++++++-------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 7483e7cb6472..bc1d955c409c 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -182,10 +182,11 @@ unique_string(PyArrayObject *self)
     // or w/o an exception
     auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
 
-    // item size for the string type
+    // size of each entries
     npy_intp itemsize = PyArray_ITEMSIZE(self);
 
-    // the number of characters (For Unicode, itemsize / 4 for UCS4)
+    // the number of characters of each entries
+    // (For Unicode, itemsize / 4 for UCS4)
     npy_intp num_chars = itemsize / sizeof(T);
 
     // first we put the data in a hash map
@@ -225,14 +226,14 @@ unique_string(PyArrayObject *self)
     }
 
     // then we iterate through the map's keys to get the unique values
-    char* data = PyArray_BYTES((const PyArrayObject *)res_obj);
     auto it = hashset.begin();
     size_t i = 0;
     for (; it != hashset.end(); it++, i++) {
+        char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
         size_t byte_to_copy = it->size() * sizeof(T);
-        std::memcpy(data + i * itemsize, it->data(), byte_to_copy);
+        std::memcpy(data, it->data(), byte_to_copy);
         if (byte_to_copy < (size_t)itemsize) {
-            std::memset(data + i * itemsize + byte_to_copy, 0, itemsize - byte_to_copy);
+            std::memset(data + byte_to_copy, 0, itemsize - byte_to_copy);
         }
     }
 
@@ -281,10 +282,14 @@ unique_vstring(PyArrayObject *self)
     // or w/o an exception
     auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
 
-    // https://numpy.org/doc/stable/reference/c-api/strings.html
+    // https://numpy.org/doc/stable/reference/c-api/strings.html#loading-a-string
     PyArray_Descr *descr = PyArray_DESCR(self);
+    Py_INCREF(descr);
     npy_string_allocator *allocator = NpyString_acquire_allocator(
         (PyArray_StringDTypeObject *)descr);
+    auto allocator_dealloc = finally([&]() {
+        NpyString_release_allocator(allocator);
+    });
 
     // whether the array contains null values
     bool contains_null = false;
@@ -302,11 +307,9 @@ unique_vstring(PyArrayObject *self)
                 int is_null = NpyString_load(allocator, packed_string, &sdata);
 
                 if (is_null) {
-                    std::cerr << "add null" << std::endl;
                     contains_null = true;
                 }
                 else {
-                    std::cerr << "add string : " << std::string(sdata.buf, sdata.size) << std::endl;
                     hashset.emplace((npy_byte *)sdata.buf, (npy_byte *)sdata.buf + sdata.size);
                 }
                 data += stride;
@@ -320,7 +323,6 @@ unique_vstring(PyArrayObject *self)
     }
 
     NPY_ALLOW_C_API;
-    Py_INCREF(descr);
     PyObject *res_obj = PyArray_NewFromDescr(
         &PyArray_Type,
         descr,
@@ -332,7 +334,6 @@ unique_vstring(PyArrayObject *self)
         NPY_ARRAY_WRITEABLE, // flags
         NULL // obj
     );
-    NPY_DISABLE_C_API;
 
     if (res_obj == NULL) {
         return NULL;
@@ -341,19 +342,19 @@ unique_vstring(PyArrayObject *self)
     // then we iterate through the map's keys to get the unique values
     auto it = hashset.begin();
     size_t i = 0;
-    while (i < length) {
-        npy_packed_static_string *packed_string = (npy_packed_static_string *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
-        if (contains_null) {
-            NpyString_pack_null(allocator, packed_string);
-            contains_null = false;
-        } else {
-            NpyString_pack(allocator, packed_string, (const char *)it->data(), it->size());
-            it++;
-        }
+    if (contains_null) {
+        // insert null if original array contains null
+        char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
+        NpyString_pack_null(allocator, packed_string);
         i++;
     }
-
-    NpyString_release_allocator(allocator);
+    for (; it != hashset.end(); it++, i++) {
+        char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
+        NpyString_pack(allocator, packed_string, (const char *)it->data(), it->size());
+    }
+    NPY_DISABLE_C_API;
 
     return res_obj;
 }

From f7b20a06fa32322a12948db44ca285a6dadeac13 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 19:59:31 +0900
Subject: [PATCH 10/75] remove unneccessary include

---
 numpy/_core/src/multiarray/unique.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index bc1d955c409c..f8e8fbb63de7 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -4,7 +4,6 @@
 #include <Python.h>
 
 #include <cstring>
-#include <iostream>
 #include <unordered_set>
 #include <functional>
 #include <vector>

From d0170edd10448158b7ef0f30e81e94b9d2ef48fe Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 19:59:55 +0900
Subject: [PATCH 11/75] add test

---
 numpy/lib/tests/test_arraysetops.py | 38 ++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 788a4cecdb44..893cfe395a0f 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -6,6 +6,7 @@
 from numpy import (
     ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, isin
     )
+from numpy.dtypes import StringDType
 from numpy.exceptions import AxisError
 from numpy.testing import (assert_array_equal, assert_equal,
                            assert_raises, assert_raises_regex)
@@ -811,7 +812,9 @@ def test_unique_1d(self):
 
     def test_unique_zero_sized(self):
         # test for zero-sized arrays
-        for dt in self.get_types():
+        types = self.get_types()
+        types.extend('SU')
+        for dt in types:
             a = np.array([], dt)
             b = np.array([], dt)
             i1 = np.array([], np.int64)
@@ -836,6 +839,39 @@ class Subclass(np.ndarray):
             bb = Subclass(b.shape, dtype=dt, buffer=b)
             self.check_all(aa, bb, i1, i2, c, dt)
 
+    def test_unique_byte_string(self):
+        # test for byte string arrays
+        a = ["apple", "banana", "apple", "cherry", "date", "banana", "fig", "grape"] * 10
+        b = ["apple", "banana", "cherry", "date", "fig", "grape"]
+        i1 = [0, 1, 3, 4, 6, 7]
+        i2 = [0, 1, 0, 2, 3, 1, 4, 5] * 10
+        c = np.multiply([2, 2, 1, 1, 1, 1], 10)
+        # test for string types
+        for dt in ['S', 'U']:
+            aa = np.array(a, dt)
+            bb = np.array(b, dt)
+            self.check_all(aa, bb, i1, i2, c, dt)
+
+    def test_unique_unicode_string(self):
+        # test for unicode string arrays
+        a = ["こんにちは", "こんばんは", "こんにちは", "😊", "さようなら", "こんばんは", "🌸", "😊"] * 10
+        b = ['こんにちは', 'こんばんは', 'さようなら', '🌸', '😊']
+        i1 = [0, 1, 4, 6, 3]
+        i2 = [0, 1, 0, 4, 2, 1, 3, 4] * 10
+        c = np.multiply([2, 2, 1, 1, 2], 10)
+        # test for string types
+        for dt in ['U']:
+            aa = np.array(a, dt)
+            bb = np.array(b, dt)
+            self.check_all(aa, bb, i1, i2, c, dt)
+
+    def test_unique_vstring(self):
+        # test for unicode and nullable string arrays
+        a = np.array(["apple", None, "りんご", "", "apple", "🍎", None, "banana", "", "バナナ", "🍌"], dtype=StringDType(na_object=None))
+        unq = np.array([None, 'バナナ', '🍎', '🍌', '', 'りんご', 'banana', 'apple'], dtype=StringDType(na_object=None))
+        a1 = unique(a, sorted=False)
+        assert_array_equal(a1, unq)
+
     @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])
     def test_unsupported_hash_based(self, arg):
         """These currently never use the hash-based solution.  However,

From dbb140f7192defbb54200866b8a5fa4734d1e5aa Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 20:00:14 +0900
Subject: [PATCH 12/75] add error message

---
 numpy/lib/_arraysetops_impl.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/numpy/lib/_arraysetops_impl.py b/numpy/lib/_arraysetops_impl.py
index a284a9204112..56946718b9ff 100644
--- a/numpy/lib/_arraysetops_impl.py
+++ b/numpy/lib/_arraysetops_impl.py
@@ -361,6 +361,28 @@ def _unique1d(ar, return_index=False, return_inverse=False,
         # two dimensions for all operations. Coerce to an ndarray in such cases.
         ar = np.asarray(ar).flatten()
 
+    if ar.dtype.kind == 'T':
+        if return_index:
+            raise ValueError(
+                "Currently, `return_index` can only be False dtype is `T` (StringDType)."
+            )
+        if return_inverse:
+            raise ValueError(
+                "Currently, `return_inverse` can only be False dtype is `T` (StringDType)."
+            )
+        if return_counts:
+            raise ValueError(
+                "Currently, `return_counts` can only be False dtype is `T` (StringDType)."
+            )
+        if not equal_nan:
+            raise ValueError(
+                "Currently, `equal_nan` can only be True dtype is `T` (StringDType)."
+            )
+        if sorted:
+            raise ValueError(
+                "Currently, `sorted` can only be False dtype is `T` (StringDType)."
+            )
+
     optional_indices = return_index or return_inverse
 
     # masked arrays are not supported yet.

From 49ed502a8ba94eafef6624f10b8688eb6a302bb4 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 20:11:07 +0900
Subject: [PATCH 13/75] linter

---
 numpy/lib/tests/test_arraysetops.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 893cfe395a0f..06cb96f30a9f 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -871,6 +871,14 @@ def test_unique_vstring(self):
         unq = np.array([None, 'バナナ', '🍎', '🍌', '', 'りんご', 'banana', 'apple'], dtype=StringDType(na_object=None))
         a1 = unique(a, sorted=False)
         assert_array_equal(a1, unq)
+    
+    def test_unique_vstring_errors(self):
+        a = np.array(["apple", None, "りんご", "", "apple", "🍎", None, "banana", "", "バナナ", "🍌"], dtype=StringDType(na_object=None))
+        assert_raises(ValueError, unique, a, return_index=True)
+        assert_raises(ValueError, unique, a, return_inverse=True)
+        assert_raises(ValueError, unique, a, return_counts=True)
+        assert_raises(ValueError, unique, a, equal_nan=False)
+        assert_raises(ValueError, unique, a, sorted=True)
 
     @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])
     def test_unsupported_hash_based(self, arg):

From 0238cee37f1cbcff302a0714cf6345808c84d38c Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 20:16:42 +0900
Subject: [PATCH 14/75] linter

---
 numpy/lib/tests/test_arraysetops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 06cb96f30a9f..46bb7ec62279 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -871,7 +871,7 @@ def test_unique_vstring(self):
         unq = np.array([None, 'バナナ', '🍎', '🍌', '', 'りんご', 'banana', 'apple'], dtype=StringDType(na_object=None))
         a1 = unique(a, sorted=False)
         assert_array_equal(a1, unq)
-    
+
     def test_unique_vstring_errors(self):
         a = np.array(["apple", None, "りんご", "", "apple", "🍎", None, "banana", "", "バナナ", "🍌"], dtype=StringDType(na_object=None))
         assert_raises(ValueError, unique, a, return_index=True)

From 6905978781d686b383b1b5b647f538a456ba5f31 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 20:51:24 +0900
Subject: [PATCH 15/75] reserve bucket

---
 numpy/_core/src/multiarray/unique.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index f8e8fbb63de7..9c25e672adf7 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -151,6 +151,9 @@ unique_string(PyArrayObject *self)
     */
     NPY_ALLOW_C_API_DEF;
     std::unordered_set<std::vector<T>, VectorHash<T>, VectorEqual<T>> hashset;
+    // reserve the hashset to avoid reallocations
+    // reallocations are expensive, especially for string arrays
+    hashset.reserve(PyArray_SIZE(self) * 2);
 
     NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
                                       NPY_ITER_EXTERNAL_LOOP |
@@ -251,6 +254,9 @@ unique_vstring(PyArrayObject *self)
     */
     NPY_ALLOW_C_API_DEF;
     std::unordered_set<std::vector<npy_byte>, VectorHash<npy_byte>, VectorEqual<npy_byte>> hashset;
+    // reserve the hashset to avoid reallocations
+    // reallocations are expensive, especially for string arrays
+    hashset.reserve(PyArray_SIZE(self) * 2);
 
     NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
                                       NPY_ITER_EXTERNAL_LOOP |

From 2fc1378c85dcf42dc3b7453228af391cd75d5a33 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 20:56:21 +0900
Subject: [PATCH 16/75] remove emoji from testcase

---
 numpy/lib/tests/test_arraysetops.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 46bb7ec62279..0ae7550ecb1f 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -854,11 +854,11 @@ def test_unique_byte_string(self):
 
     def test_unique_unicode_string(self):
         # test for unicode string arrays
-        a = ["こんにちは", "こんばんは", "こんにちは", "😊", "さようなら", "こんばんは", "🌸", "😊"] * 10
-        b = ['こんにちは', 'こんばんは', 'さようなら', '🌸', '😊']
-        i1 = [0, 1, 4, 6, 3]
-        i2 = [0, 1, 0, 4, 2, 1, 3, 4] * 10
-        c = np.multiply([2, 2, 1, 1, 2], 10)
+        a = ["こんにちは", "こんばんは", "こんにちは", "さようなら", "こんばんは"] * 10
+        b = ['こんにちは', 'こんばんは', 'さようなら']
+        i1 = [0, 1, 3]
+        i2 = [0, 1, 0, 2, 1] * 10
+        c = np.multiply([2, 2, 1], 10)
         # test for string types
         for dt in ['U']:
             aa = np.array(a, dt)
@@ -867,13 +867,13 @@ def test_unique_unicode_string(self):
 
     def test_unique_vstring(self):
         # test for unicode and nullable string arrays
-        a = np.array(["apple", None, "りんご", "", "apple", "🍎", None, "banana", "", "バナナ", "🍌"], dtype=StringDType(na_object=None))
-        unq = np.array([None, 'バナナ', '🍎', '🍌', '', 'りんご', 'banana', 'apple'], dtype=StringDType(na_object=None))
+        a = np.array(["apple", None, "りんご", "", "apple", None, "banana", "", "バナナ", "りんご"], dtype=StringDType(na_object=None))
+        unq = np.array([None, 'バナナ', '', 'banana', 'りんご', 'apple'], dtype=StringDType(na_object=None))
         a1 = unique(a, sorted=False)
         assert_array_equal(a1, unq)
 
     def test_unique_vstring_errors(self):
-        a = np.array(["apple", None, "りんご", "", "apple", "🍎", None, "banana", "", "バナナ", "🍌"], dtype=StringDType(na_object=None))
+        a = np.array(["apple", None, "りんご", "", "apple", None, "banana", "", "バナナ", "りんご"], dtype=StringDType(na_object=None))
         assert_raises(ValueError, unique, a, return_index=True)
         assert_raises(ValueError, unique, a, return_inverse=True)
         assert_raises(ValueError, unique, a, return_counts=True)

From 1ad6d6ce81debfd9e2a6db33d994f3d263165ae5 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 21:24:30 +0900
Subject: [PATCH 17/75] fix testcase

---
 numpy/lib/tests/test_arraysetops.py | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 0ae7550ecb1f..984c0278d18b 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -839,10 +839,10 @@ class Subclass(np.ndarray):
             bb = Subclass(b.shape, dtype=dt, buffer=b)
             self.check_all(aa, bb, i1, i2, c, dt)
 
-    def test_unique_byte_string(self):
+    def test_unique_string(self):
         # test for byte string arrays
-        a = ["apple", "banana", "apple", "cherry", "date", "banana", "fig", "grape"] * 10
-        b = ["apple", "banana", "cherry", "date", "fig", "grape"]
+        a = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 10
+        b = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
         i1 = [0, 1, 3, 4, 6, 7]
         i2 = [0, 1, 0, 2, 3, 1, 4, 5] * 10
         c = np.multiply([2, 2, 1, 1, 1, 1], 10)
@@ -852,28 +852,15 @@ def test_unique_byte_string(self):
             bb = np.array(b, dt)
             self.check_all(aa, bb, i1, i2, c, dt)
 
-    def test_unique_unicode_string(self):
-        # test for unicode string arrays
-        a = ["こんにちは", "こんばんは", "こんにちは", "さようなら", "こんばんは"] * 10
-        b = ['こんにちは', 'こんばんは', 'さようなら']
-        i1 = [0, 1, 3]
-        i2 = [0, 1, 0, 2, 1] * 10
-        c = np.multiply([2, 2, 1], 10)
-        # test for string types
-        for dt in ['U']:
-            aa = np.array(a, dt)
-            bb = np.array(b, dt)
-            self.check_all(aa, bb, i1, i2, c, dt)
-
     def test_unique_vstring(self):
         # test for unicode and nullable string arrays
-        a = np.array(["apple", None, "りんご", "", "apple", None, "banana", "", "バナナ", "りんご"], dtype=StringDType(na_object=None))
-        unq = np.array([None, 'バナナ', '', 'banana', 'りんご', 'apple'], dtype=StringDType(na_object=None))
+        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 10, dtype=StringDType(na_object=None))
+        unq = np.array([None, 'grape', 'fig', 'date', 'cherry', 'banana', 'apple'], dtype=StringDType(na_object=None))
         a1 = unique(a, sorted=False)
         assert_array_equal(a1, unq)
 
     def test_unique_vstring_errors(self):
-        a = np.array(["apple", None, "りんご", "", "apple", None, "banana", "", "バナナ", "りんご"], dtype=StringDType(na_object=None))
+        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 10, dtype=StringDType(na_object=None))
         assert_raises(ValueError, unique, a, return_index=True)
         assert_raises(ValueError, unique, a, return_inverse=True)
         assert_raises(ValueError, unique, a, return_counts=True)

From b478e152690da687c9d078fc4e891c42a6bc9743 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 21:59:57 +0900
Subject: [PATCH 18/75] remove error

---
 numpy/lib/_arraysetops_impl.py | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/numpy/lib/_arraysetops_impl.py b/numpy/lib/_arraysetops_impl.py
index 56946718b9ff..a284a9204112 100644
--- a/numpy/lib/_arraysetops_impl.py
+++ b/numpy/lib/_arraysetops_impl.py
@@ -361,28 +361,6 @@ def _unique1d(ar, return_index=False, return_inverse=False,
         # two dimensions for all operations. Coerce to an ndarray in such cases.
         ar = np.asarray(ar).flatten()
 
-    if ar.dtype.kind == 'T':
-        if return_index:
-            raise ValueError(
-                "Currently, `return_index` can only be False dtype is `T` (StringDType)."
-            )
-        if return_inverse:
-            raise ValueError(
-                "Currently, `return_inverse` can only be False dtype is `T` (StringDType)."
-            )
-        if return_counts:
-            raise ValueError(
-                "Currently, `return_counts` can only be False dtype is `T` (StringDType)."
-            )
-        if not equal_nan:
-            raise ValueError(
-                "Currently, `equal_nan` can only be True dtype is `T` (StringDType)."
-            )
-        if sorted:
-            raise ValueError(
-                "Currently, `sorted` can only be False dtype is `T` (StringDType)."
-            )
-
     optional_indices = return_index or return_inverse
 
     # masked arrays are not supported yet.

From 95bc4051b026227f04f9683495716209588dc5bb Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 22:00:43 +0900
Subject: [PATCH 19/75] fix testcase

---
 numpy/lib/tests/test_arraysetops.py | 49 ++++++++++++++++-------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 984c0278d18b..51a6b165a0cc 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -839,33 +839,40 @@ class Subclass(np.ndarray):
             bb = Subclass(b.shape, dtype=dt, buffer=b)
             self.check_all(aa, bb, i1, i2, c, dt)
 
-    def test_unique_string(self):
+    def test_unique_byte_string_hash_based(self):
         # test for byte string arrays
-        a = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 10
-        b = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
-        i1 = [0, 1, 3, 4, 6, 7]
-        i2 = [0, 1, 0, 2, 3, 1, 4, 5] * 10
-        c = np.multiply([2, 2, 1, 1, 1, 1], 10)
-        # test for string types
-        for dt in ['S', 'U']:
-            aa = np.array(a, dt)
-            bb = np.array(b, dt)
-            self.check_all(aa, bb, i1, i2, c, dt)
+        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 10
+        unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
+
+        a1 = unique(arr, sorted=False)
+        # the result varies depending on the hash function used,
+        # so we check them by sorting
+        a1_sorted = np.sort(a1)
+        assert_array_equal(a1_sorted, unq_sorted)
+
+    def test_unique_unicode_string_hash_based(self):
+        # test for unicode string arrays
+        arr = ['こんにちは', 'こんばんは', 'こんにちは', '😊', 'さようなら', 'こんばんは', '🌸', '😊'] * 10
+        unq_sorted = ['こんにちは', 'こんばんは', 'さようなら', '🌸', '😊']
+
+        a1 = unique(arr, sorted=False)
+        # the result varies depending on the hash function used,
+        # so we check them by sorting
+        a1_sorted = np.sort(a1)
+        assert_array_equal(a1_sorted, unq_sorted)
 
     def test_unique_vstring(self):
         # test for unicode and nullable string arrays
-        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 10, dtype=StringDType(na_object=None))
-        unq = np.array([None, 'grape', 'fig', 'date', 'cherry', 'banana', 'apple'], dtype=StringDType(na_object=None))
+        a = np.array(['apple', None, 'りんご', '', 'apple', '🍎', None, 'banana', '', 'バナナ', '🍌'], dtype=StringDType(na_object=None))
+        unq_sorted = np.array([None, '', 'apple', 'banana', 'りんご', 'バナナ', '🍌', '🍎'], dtype=StringDType(na_object=None))
+
         a1 = unique(a, sorted=False)
-        assert_array_equal(a1, unq)
+        # the result varies depending on the hash function used,
+        # so we check them by sorting
 
-    def test_unique_vstring_errors(self):
-        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 10, dtype=StringDType(na_object=None))
-        assert_raises(ValueError, unique, a, return_index=True)
-        assert_raises(ValueError, unique, a, return_inverse=True)
-        assert_raises(ValueError, unique, a, return_counts=True)
-        assert_raises(ValueError, unique, a, equal_nan=False)
-        assert_raises(ValueError, unique, a, sorted=True)
+        # nan
+        assert_equal(a1[0], unq_sorted[0])
+        assert_array_equal(np.sort(a1[1:]), unq_sorted[1:])
 
     @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])
     def test_unsupported_hash_based(self, arg):

From 3f1811bf99bc1e228eade505e08f6b5a18821a4f Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 22:01:42 +0900
Subject: [PATCH 20/75] fix testcase name

---
 numpy/lib/tests/test_arraysetops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 51a6b165a0cc..35ea2e19193e 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -861,7 +861,7 @@ def test_unique_unicode_string_hash_based(self):
         a1_sorted = np.sort(a1)
         assert_array_equal(a1_sorted, unq_sorted)
 
-    def test_unique_vstring(self):
+    def test_unique_vstring_hash_based(self):
         # test for unicode and nullable string arrays
         a = np.array(['apple', None, 'りんご', '', 'apple', '🍎', None, 'banana', '', 'バナナ', '🍌'], dtype=StringDType(na_object=None))
         unq_sorted = np.array([None, '', 'apple', 'banana', 'りんご', 'バナナ', '🍌', '🍎'], dtype=StringDType(na_object=None))

From 99e3662d7ae9d5e9ba736149cb4ccd916aab0cbd Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 18 Apr 2025 22:33:00 +0900
Subject: [PATCH 21/75] use basic_string

---
 numpy/_core/src/multiarray/unique.cpp | 40 ++++++++-------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 9c25e672adf7..b7a80935f40b 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,9 +3,11 @@
 
 #include <Python.h>
 
-#include <cstring>
+#include <algorithm>
+#include <iostream>
 #include <unordered_set>
 #include <functional>
+#include <string>
 #include <vector>
 
 #include <numpy/npy_common.h>
@@ -26,25 +28,6 @@ FinalAction<F> finally(F f) {
     return FinalAction<F>(f);
 }
 
-template <typename T>
-struct VectorHash {
-    size_t operator()(const std::vector<T>& v) const {
-        // https://www.boost.org/doc/libs/1_88_0/libs/container_hash/doc/html/hash.html#notes_hash_combine
-        size_t h = v.size();
-        for (const T& x : v) {
-            h ^= std::hash<T>{}(x) + 0x9e3779b9 + (h << 6) + (h >> 2);
-        }
-        return h;
-    }
-};
-
-template <typename T>
-struct VectorEqual {
-    bool operator()(const std::vector<T>& lhs, const std::vector<T>& rhs) const {
-        return lhs == rhs;
-    }
-};
-
 template<typename T>
 static PyObject*
 unique_int(PyArrayObject *self)
@@ -150,7 +133,7 @@ unique_string(PyArrayObject *self)
     strings of a certain size (itemsize / sizeof(T)).
     */
     NPY_ALLOW_C_API_DEF;
-    std::unordered_set<std::vector<T>, VectorHash<T>, VectorEqual<T>> hashset;
+    std::unordered_set<std::basic_string<T>> hashset;
     // reserve the hashset to avoid reallocations
     // reallocations are expensive, especially for string arrays
     hashset.reserve(PyArray_SIZE(self) * 2);
@@ -199,7 +182,8 @@ unique_string(PyArrayObject *self)
             npy_intp count = *innersizeptr;
 
             while (count--) {
-                hashset.emplace((T *) data, (T *) data + num_chars);
+                T * str = reinterpret_cast<T *>(data);
+                hashset.emplace(str, str + num_chars);
                 data += stride;
             }
         } while (iternext(iter));
@@ -233,9 +217,9 @@ unique_string(PyArrayObject *self)
     for (; it != hashset.end(); it++, i++) {
         char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
         size_t byte_to_copy = it->size() * sizeof(T);
-        std::memcpy(data, it->data(), byte_to_copy);
+        memcpy(data, (char *)it->data(), byte_to_copy);
         if (byte_to_copy < (size_t)itemsize) {
-            std::memset(data + byte_to_copy, 0, itemsize - byte_to_copy);
+            memset(data + byte_to_copy, 0, itemsize - byte_to_copy);
         }
     }
 
@@ -253,7 +237,7 @@ unique_vstring(PyArrayObject *self)
     strings (StringDType).
     */
     NPY_ALLOW_C_API_DEF;
-    std::unordered_set<std::vector<npy_byte>, VectorHash<npy_byte>, VectorEqual<npy_byte>> hashset;
+    std::unordered_set<std::string> hashset;
     // reserve the hashset to avoid reallocations
     // reallocations are expensive, especially for string arrays
     hashset.reserve(PyArray_SIZE(self) * 2);
@@ -357,7 +341,7 @@ unique_vstring(PyArrayObject *self)
     for (; it != hashset.end(); it++, i++) {
         char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
         npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
-        NpyString_pack(allocator, packed_string, (const char *)it->data(), it->size());
+        NpyString_pack(allocator, packed_string, it->data(), it->size());
     }
     NPY_DISABLE_C_API;
 
@@ -387,8 +371,8 @@ std::unordered_map<int, function_type> unique_funcs = {
     {NPY_UINT32, unique_int<npy_uint32>},
     {NPY_UINT64, unique_int<npy_uint64>},
     {NPY_DATETIME, unique_int<npy_uint64>},
-    {NPY_STRING, unique_string<npy_byte>},
-    {NPY_UNICODE, unique_string<npy_ucs4>},
+    {NPY_STRING, unique_string<char>},
+    {NPY_UNICODE, unique_string<char32_t>},
     {NPY_VSTRING, unique_vstring},
 };
 

From b99542a7a251c91aecb9164d04c4364ff460d19d Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 00:09:13 +0900
Subject: [PATCH 22/75] fix testcase

---
 numpy/lib/tests/test_arraysetops.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 35ea2e19193e..b22d55b81465 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -847,8 +847,7 @@ def test_unique_byte_string_hash_based(self):
         a1 = unique(arr, sorted=False)
         # the result varies depending on the hash function used,
         # so we check them by sorting
-        a1_sorted = np.sort(a1)
-        assert_array_equal(a1_sorted, unq_sorted)
+        assert_array_equal(sorted(a1.tolist()), unq_sorted)
 
     def test_unique_unicode_string_hash_based(self):
         # test for unicode string arrays
@@ -858,8 +857,7 @@ def test_unique_unicode_string_hash_based(self):
         a1 = unique(arr, sorted=False)
         # the result varies depending on the hash function used,
         # so we check them by sorting
-        a1_sorted = np.sort(a1)
-        assert_array_equal(a1_sorted, unq_sorted)
+        assert_array_equal(sorted(a1.tolist()), unq_sorted)
 
     def test_unique_vstring_hash_based(self):
         # test for unicode and nullable string arrays
@@ -872,7 +870,7 @@ def test_unique_vstring_hash_based(self):
 
         # nan
         assert_equal(a1[0], unq_sorted[0])
-        assert_array_equal(np.sort(a1[1:]), unq_sorted[1:])
+        assert_array_equal(sorted(a1[1:].tolist()), unq_sorted[1:])
 
     @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])
     def test_unsupported_hash_based(self, arg):

From 2589dd7489f6a3474bfa7ea241129664c0ff2564 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 00:12:33 +0900
Subject: [PATCH 23/75] add ValueError

---
 numpy/lib/_arraysetops_impl.py      | 5 +++++
 numpy/lib/tests/test_arraysetops.py | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/numpy/lib/_arraysetops_impl.py b/numpy/lib/_arraysetops_impl.py
index a284a9204112..bf111221a7fb 100644
--- a/numpy/lib/_arraysetops_impl.py
+++ b/numpy/lib/_arraysetops_impl.py
@@ -361,6 +361,11 @@ def _unique1d(ar, return_index=False, return_inverse=False,
         # two dimensions for all operations. Coerce to an ndarray in such cases.
         ar = np.asarray(ar).flatten()
 
+    if ar.dtype.kind == 'T' and not equal_nan:
+        raise ValueError(
+            "Currently, `equal_nan` can only be True dtype is `T` (StringDType)."
+        )
+
     optional_indices = return_index or return_inverse
 
     # masked arrays are not supported yet.
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index b22d55b81465..21a5217eb8be 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -872,6 +872,10 @@ def test_unique_vstring_hash_based(self):
         assert_equal(a1[0], unq_sorted[0])
         assert_array_equal(sorted(a1[1:].tolist()), unq_sorted[1:])
 
+    def test_unique_vstring_errors(self):
+        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana'] * 10, dtype=StringDType(na_object=None))
+        assert_raises(ValueError, unique, a, equal_nan=False)
+
     @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])
     def test_unsupported_hash_based(self, arg):
         """These currently never use the hash-based solution.  However,

From 3f40cdca70d7a83b5f1a8b25f3909932faa2f83b Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 00:31:47 +0900
Subject: [PATCH 24/75] fix testcase

---
 numpy/lib/tests/test_arraysetops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 21a5217eb8be..77b5ce7d6865 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -861,8 +861,8 @@ def test_unique_unicode_string_hash_based(self):
 
     def test_unique_vstring_hash_based(self):
         # test for unicode and nullable string arrays
-        a = np.array(['apple', None, 'りんご', '', 'apple', '🍎', None, 'banana', '', 'バナナ', '🍌'], dtype=StringDType(na_object=None))
-        unq_sorted = np.array([None, '', 'apple', 'banana', 'りんご', 'バナナ', '🍌', '🍎'], dtype=StringDType(na_object=None))
+        a = np.array(['apple', None, 'りんご', '', 'apple', '🍎', None, 'banana', '', 'バナナ', '🍌'] * 10, dtype=StringDType(na_object=None))
+        unq_sorted = [None, '', 'apple', 'banana', 'りんご', 'バナナ', '🍌', '🍎']
 
         a1 = unique(a, sorted=False)
         # the result varies depending on the hash function used,

From 68d5a7b072d5bcf6c814019be80d7f30c5bfec8e Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 01:08:00 +0900
Subject: [PATCH 25/75] fix memory error

---
 numpy/_core/src/multiarray/unique.cpp | 11 +++++++++--
 numpy/lib/tests/test_arraysetops.py   |  8 ++++----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index b7a80935f40b..348ef3ad6d7e 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -295,6 +295,9 @@ unique_vstring(PyArrayObject *self)
                 npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
                 int is_null = NpyString_load(allocator, packed_string, &sdata);
 
+                if (is_null == -1) {
+                    return NULL;
+                }
                 if (is_null) {
                     contains_null = true;
                 }
@@ -335,13 +338,17 @@ unique_vstring(PyArrayObject *self)
         // insert null if original array contains null
         char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
         npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
-        NpyString_pack_null(allocator, packed_string);
+        if (NpyString_pack_null(allocator, packed_string) == -1) {
+            return NULL;
+        }
         i++;
     }
     for (; it != hashset.end(); it++, i++) {
         char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
         npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
-        NpyString_pack(allocator, packed_string, it->data(), it->size());
+        if (NpyString_pack(allocator, packed_string, it->data(), it->size()) == -1) {
+            return NULL;
+        }
     }
     NPY_DISABLE_C_API;
 
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 77b5ce7d6865..f84d8c371710 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -841,7 +841,7 @@ class Subclass(np.ndarray):
 
     def test_unique_byte_string_hash_based(self):
         # test for byte string arrays
-        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 10
+        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 3
         unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
 
         a1 = unique(arr, sorted=False)
@@ -851,7 +851,7 @@ def test_unique_byte_string_hash_based(self):
 
     def test_unique_unicode_string_hash_based(self):
         # test for unicode string arrays
-        arr = ['こんにちは', 'こんばんは', 'こんにちは', '😊', 'さようなら', 'こんばんは', '🌸', '😊'] * 10
+        arr = ['こんにちは', 'こんばんは', 'こんにちは', '😊', 'さようなら', 'こんばんは', '🌸', '😊'] * 3
         unq_sorted = ['こんにちは', 'こんばんは', 'さようなら', '🌸', '😊']
 
         a1 = unique(arr, sorted=False)
@@ -861,7 +861,7 @@ def test_unique_unicode_string_hash_based(self):
 
     def test_unique_vstring_hash_based(self):
         # test for unicode and nullable string arrays
-        a = np.array(['apple', None, 'りんご', '', 'apple', '🍎', None, 'banana', '', 'バナナ', '🍌'] * 10, dtype=StringDType(na_object=None))
+        a = np.array(['apple', None, 'りんご', '', 'apple', '🍎', None, 'banana', '', 'バナナ', '🍌'] * 3, dtype=StringDType(na_object=None))
         unq_sorted = [None, '', 'apple', 'banana', 'りんご', 'バナナ', '🍌', '🍎']
 
         a1 = unique(a, sorted=False)
@@ -873,7 +873,7 @@ def test_unique_vstring_hash_based(self):
         assert_array_equal(sorted(a1[1:].tolist()), unq_sorted[1:])
 
     def test_unique_vstring_errors(self):
-        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana'] * 10, dtype=StringDType(na_object=None))
+        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana'] * 3, dtype=StringDType(na_object=None))
         assert_raises(ValueError, unique, a, equal_nan=False)
 
     @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])

From d38c3e3c3b9931bac825c4d05a518e9614e435db Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 01:27:59 +0900
Subject: [PATCH 26/75] remove multibyte char

---
 numpy/lib/tests/test_arraysetops.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index f84d8c371710..ea2ae39846f3 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -851,8 +851,8 @@ def test_unique_byte_string_hash_based(self):
 
     def test_unique_unicode_string_hash_based(self):
         # test for unicode string arrays
-        arr = ['こんにちは', 'こんばんは', 'こんにちは', '😊', 'さようなら', 'こんばんは', '🌸', '😊'] * 3
-        unq_sorted = ['こんにちは', 'こんばんは', 'さようなら', '🌸', '😊']
+        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 3
+        unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
 
         a1 = unique(arr, sorted=False)
         # the result varies depending on the hash function used,
@@ -861,19 +861,19 @@ def test_unique_unicode_string_hash_based(self):
 
     def test_unique_vstring_hash_based(self):
         # test for unicode and nullable string arrays
-        a = np.array(['apple', None, 'りんご', '', 'apple', '🍎', None, 'banana', '', 'バナナ', '🍌'] * 3, dtype=StringDType(na_object=None))
-        unq_sorted = [None, '', 'apple', 'banana', 'りんご', 'バナナ', '🍌', '🍎']
+        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 3, dtype=StringDType(na_object=None))
+        unq_sorted = [None, 'apple', 'banana', 'cherry', 'date', 'fig', 'grape']
 
         a1 = unique(a, sorted=False)
         # the result varies depending on the hash function used,
         # so we check them by sorting
 
         # nan
-        assert_equal(a1[0], unq_sorted[0])
+        #assert_equal(a1[0], unq_sorted[0])
         assert_array_equal(sorted(a1[1:].tolist()), unq_sorted[1:])
 
     def test_unique_vstring_errors(self):
-        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana'] * 3, dtype=StringDType(na_object=None))
+        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 3, dtype=StringDType(na_object=None))
         assert_raises(ValueError, unique, a, equal_nan=False)
 
     @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])

From 8cf2c636d4463c021f62c1c61297b61ccb8787a6 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 02:05:09 +0900
Subject: [PATCH 27/75] refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 48 +++++++++++++--------------
 numpy/lib/tests/test_arraysetops.py   |  8 ++---
 2 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 348ef3ad6d7e..e387451ab0cc 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,12 +3,10 @@
 
 #include <Python.h>
 
-#include <algorithm>
 #include <iostream>
 #include <unordered_set>
 #include <functional>
 #include <string>
-#include <vector>
 
 #include <numpy/npy_common.h>
 #include "numpy/arrayobject.h"
@@ -30,7 +28,7 @@ FinalAction<F> finally(F f) {
 
 template<typename T>
 static PyObject*
-unique_int(PyArrayObject *self)
+unique_integer(PyArrayObject *self)
 {
     /* This function takes a numpy array and returns a numpy array containing
     the unique values.
@@ -217,7 +215,7 @@ unique_string(PyArrayObject *self)
     for (; it != hashset.end(); it++, i++) {
         char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
         size_t byte_to_copy = it->size() * sizeof(T);
-        memcpy(data, (char *)it->data(), byte_to_copy);
+        memcpy(data, it->c_str(), byte_to_copy);
         if (byte_to_copy < (size_t)itemsize) {
             memset(data + byte_to_copy, 0, itemsize - byte_to_copy);
         }
@@ -302,7 +300,7 @@ unique_vstring(PyArrayObject *self)
                     contains_null = true;
                 }
                 else {
-                    hashset.emplace((npy_byte *)sdata.buf, (npy_byte *)sdata.buf + sdata.size);
+                    hashset.emplace(sdata.buf, sdata.buf + sdata.size);
                 }
                 data += stride;
             }
@@ -346,7 +344,7 @@ unique_vstring(PyArrayObject *self)
     for (; it != hashset.end(); it++, i++) {
         char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
         npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
-        if (NpyString_pack(allocator, packed_string, it->data(), it->size()) == -1) {
+        if (NpyString_pack(allocator, packed_string, it->c_str(), it->size()) == -1) {
             return NULL;
         }
     }
@@ -359,25 +357,25 @@ unique_vstring(PyArrayObject *self)
 // this map contains the functions used for each item size.
 typedef std::function<PyObject *(PyArrayObject *)> function_type;
 std::unordered_map<int, function_type> unique_funcs = {
-    {NPY_BYTE, unique_int<npy_byte>},
-    {NPY_UBYTE, unique_int<npy_ubyte>},
-    {NPY_SHORT, unique_int<npy_short>},
-    {NPY_USHORT, unique_int<npy_ushort>},
-    {NPY_INT, unique_int<npy_int>},
-    {NPY_UINT, unique_int<npy_uint>},
-    {NPY_LONG, unique_int<npy_long>},
-    {NPY_ULONG, unique_int<npy_ulong>},
-    {NPY_LONGLONG, unique_int<npy_longlong>},
-    {NPY_ULONGLONG, unique_int<npy_ulonglong>},
-    {NPY_INT8, unique_int<npy_int8>},
-    {NPY_INT16, unique_int<npy_int16>},
-    {NPY_INT32, unique_int<npy_int32>},
-    {NPY_INT64, unique_int<npy_int64>},
-    {NPY_UINT8, unique_int<npy_uint8>},
-    {NPY_UINT16, unique_int<npy_uint16>},
-    {NPY_UINT32, unique_int<npy_uint32>},
-    {NPY_UINT64, unique_int<npy_uint64>},
-    {NPY_DATETIME, unique_int<npy_uint64>},
+    {NPY_BYTE, unique_integer<npy_byte>},
+    {NPY_UBYTE, unique_integer<npy_ubyte>},
+    {NPY_SHORT, unique_integer<npy_short>},
+    {NPY_USHORT, unique_integer<npy_ushort>},
+    {NPY_INT, unique_integer<npy_int>},
+    {NPY_UINT, unique_integer<npy_uint>},
+    {NPY_LONG, unique_integer<npy_long>},
+    {NPY_ULONG, unique_integer<npy_ulong>},
+    {NPY_LONGLONG, unique_integer<npy_longlong>},
+    {NPY_ULONGLONG, unique_integer<npy_ulonglong>},
+    {NPY_INT8, unique_integer<npy_int8>},
+    {NPY_INT16, unique_integer<npy_int16>},
+    {NPY_INT32, unique_integer<npy_int32>},
+    {NPY_INT64, unique_integer<npy_int64>},
+    {NPY_UINT8, unique_integer<npy_uint8>},
+    {NPY_UINT16, unique_integer<npy_uint16>},
+    {NPY_UINT32, unique_integer<npy_uint32>},
+    {NPY_UINT64, unique_integer<npy_uint64>},
+    {NPY_DATETIME, unique_integer<npy_uint64>},
     {NPY_STRING, unique_string<char>},
     {NPY_UNICODE, unique_string<char32_t>},
     {NPY_VSTRING, unique_vstring},
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index ea2ae39846f3..e921bf07725c 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -841,7 +841,7 @@ class Subclass(np.ndarray):
 
     def test_unique_byte_string_hash_based(self):
         # test for byte string arrays
-        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 3
+        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 2
         unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
 
         a1 = unique(arr, sorted=False)
@@ -851,7 +851,7 @@ def test_unique_byte_string_hash_based(self):
 
     def test_unique_unicode_string_hash_based(self):
         # test for unicode string arrays
-        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 3
+        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 2
         unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
 
         a1 = unique(arr, sorted=False)
@@ -861,7 +861,7 @@ def test_unique_unicode_string_hash_based(self):
 
     def test_unique_vstring_hash_based(self):
         # test for unicode and nullable string arrays
-        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 3, dtype=StringDType(na_object=None))
+        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 2, dtype=StringDType(na_object=None))
         unq_sorted = [None, 'apple', 'banana', 'cherry', 'date', 'fig', 'grape']
 
         a1 = unique(a, sorted=False)
@@ -873,7 +873,7 @@ def test_unique_vstring_hash_based(self):
         assert_array_equal(sorted(a1[1:].tolist()), unq_sorted[1:])
 
     def test_unique_vstring_errors(self):
-        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 3, dtype=StringDType(na_object=None))
+        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 2, dtype=StringDType(na_object=None))
         assert_raises(ValueError, unique, a, equal_nan=False)
 
     @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])

From 0165d6a4c33d573c5573af194b44a62068ae32bf Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 02:26:00 +0900
Subject: [PATCH 28/75] add multibyte char

---
 numpy/lib/tests/test_arraysetops.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index e921bf07725c..ec2c040ddd86 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -841,7 +841,7 @@ class Subclass(np.ndarray):
 
     def test_unique_byte_string_hash_based(self):
         # test for byte string arrays
-        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 2
+        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape']
         unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
 
         a1 = unique(arr, sorted=False)
@@ -851,8 +851,8 @@ def test_unique_byte_string_hash_based(self):
 
     def test_unique_unicode_string_hash_based(self):
         # test for unicode string arrays
-        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] * 2
-        unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
+        arr = ['café', 'cafe', 'café', 'naïve', 'naive', 'résumé', 'naïve', 'resume', 'résumé']
+        unq_sorted = ['cafe', 'café', 'naive', 'naïve', 'resume', 'résumé']
 
         a1 = unique(arr, sorted=False)
         # the result varies depending on the hash function used,
@@ -861,8 +861,8 @@ def test_unique_unicode_string_hash_based(self):
 
     def test_unique_vstring_hash_based(self):
         # test for unicode and nullable string arrays
-        a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 2, dtype=StringDType(na_object=None))
-        unq_sorted = [None, 'apple', 'banana', 'cherry', 'date', 'fig', 'grape']
+        a = np.array(['straße', None, 'strasse', 'straße', None, 'niño', 'nino', 'élève', 'eleve', 'niño', 'élève'], dtype=StringDType(na_object=None))
+        unq_sorted = [None, 'eleve', 'nino', 'niño', 'strasse', 'straße', 'élève']
 
         a1 = unique(a, sorted=False)
         # the result varies depending on the hash function used,

From 243be6b436debbbbfd392b159d0004f37a36e6c9 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 02:42:26 +0900
Subject: [PATCH 29/75] refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index e387451ab0cc..39ba2d5d4df9 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -180,8 +180,8 @@ unique_string(PyArrayObject *self)
             npy_intp count = *innersizeptr;
 
             while (count--) {
-                T * str = reinterpret_cast<T *>(data);
-                hashset.emplace(str, str + num_chars);
+                T * sdata = reinterpret_cast<T *>(data);
+                hashset.emplace(sdata, sdata + num_chars);
                 data += stride;
             }
         } while (iternext(iter));
@@ -296,7 +296,7 @@ unique_vstring(PyArrayObject *self)
                 if (is_null == -1) {
                     return NULL;
                 }
-                if (is_null) {
+                else if (is_null) {
                     contains_null = true;
                 }
                 else {

From a6e5d3c9a0269dd2ec9be9eebcb9743b583cf61e Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 03:22:12 +0900
Subject: [PATCH 30/75] fix memory error

---
 numpy/_core/src/multiarray/unique.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 39ba2d5d4df9..fce896391130 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -7,6 +7,7 @@
 #include <unordered_set>
 #include <functional>
 #include <string>
+#include <utility>
 
 #include <numpy/npy_common.h>
 #include "numpy/arrayobject.h"
@@ -181,7 +182,8 @@ unique_string(PyArrayObject *self)
 
             while (count--) {
                 T * sdata = reinterpret_cast<T *>(data);
-                hashset.emplace(sdata, sdata + num_chars);
+                std::basic_string<T> sdata_str(sdata, num_chars);
+                hashset.emplace(std::move(sdata_str));
                 data += stride;
             }
         } while (iternext(iter));
@@ -300,7 +302,8 @@ unique_vstring(PyArrayObject *self)
                     contains_null = true;
                 }
                 else {
-                    hashset.emplace(sdata.buf, sdata.buf + sdata.size);
+                    std::string sdata_str(sdata.buf, sdata.size);
+                    hashset.emplace(std::move(sdata_str));
                 }
                 data += stride;
             }

From 78b9dc6d4f7b44f83d9623ea9cad652b767f2bc9 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 04:50:50 +0900
Subject: [PATCH 31/75] fix GIL

---
 numpy/_core/src/multiarray/unique.cpp | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index fce896391130..44fadeeebf15 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -144,12 +144,12 @@ unique_string(PyArrayObject *self)
                                       NPY_ITER_GROWINNER,
                                 NPY_KEEPORDER, NPY_NO_CASTING,
                                 NULL);
-    // Making sure the iterator is deallocated when the function returns, with
-    // or w/o an exception
-    auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
     if (iter == NULL) {
         return NULL;
     }
+    // Making sure the iterator is deallocated when the function returns, with
+    // or w/o an exception
+    auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
 
     NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
     if (iternext == NULL) {
@@ -166,8 +166,10 @@ unique_string(PyArrayObject *self)
     // or w/o an exception
     auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
 
+    NPY_ALLOW_C_API;
     // size of each entries
     npy_intp itemsize = PyArray_ITEMSIZE(self);
+    NPY_DISABLE_C_API;
 
     // the number of characters of each entries
     // (For Unicode, itemsize / 4 for UCS4)
@@ -205,7 +207,6 @@ unique_string(PyArrayObject *self)
         NPY_ARRAY_WRITEABLE, // flags
         NULL // obj
     );
-    NPY_DISABLE_C_API;
 
     if (res_obj == NULL) {
         return NULL;
@@ -222,6 +223,7 @@ unique_string(PyArrayObject *self)
             memset(data + byte_to_copy, 0, itemsize - byte_to_copy);
         }
     }
+    NPY_DISABLE_C_API;
 
     return res_obj;
 }
@@ -249,12 +251,12 @@ unique_vstring(PyArrayObject *self)
                                       NPY_ITER_GROWINNER,
                                 NPY_KEEPORDER, NPY_NO_CASTING,
                                 NULL);
-    // Making sure the iterator is deallocated when the function returns, with
-    // or w/o an exception
-    auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
     if (iter == NULL) {
         return NULL;
     }
+    // Making sure the iterator is deallocated when the function returns, with
+    // or w/o an exception
+    auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
 
     NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
     if (iternext == NULL) {
@@ -271,9 +273,12 @@ unique_vstring(PyArrayObject *self)
     // or w/o an exception
     auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
 
+    NPY_ALLOW_C_API;
     // https://numpy.org/doc/stable/reference/c-api/strings.html#loading-a-string
     PyArray_Descr *descr = PyArray_DESCR(self);
     Py_INCREF(descr);
+    NPY_DISABLE_C_API;
+
     npy_string_allocator *allocator = NpyString_acquire_allocator(
         (PyArray_StringDTypeObject *)descr);
     auto allocator_dealloc = finally([&]() {

From 0464617e48f0e71376b4936a73b9f64404cac9fc Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 05:30:56 +0900
Subject: [PATCH 32/75] fix strlen

---
 numpy/_core/src/multiarray/unique.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 44fadeeebf15..38002eb73362 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,6 +3,7 @@
 
 #include <Python.h>
 
+#include <algorithm>
 #include <iostream>
 #include <unordered_set>
 #include <functional>
@@ -184,7 +185,8 @@ unique_string(PyArrayObject *self)
 
             while (count--) {
                 T * sdata = reinterpret_cast<T *>(data);
-                std::basic_string<T> sdata_str(sdata, num_chars);
+                size_t byte_to_copy = std::find(sdata, sdata + num_chars, 0) - sdata;
+                std::basic_string<T> sdata_str(sdata, byte_to_copy);
                 hashset.emplace(std::move(sdata_str));
                 data += stride;
             }

From 908f495b05e446ebd8f723ea9d9ed2b3e98d9ddc Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 12:13:59 +0900
Subject: [PATCH 33/75] remove PyArray_GETPTR1

---
 numpy/_core/src/multiarray/unique.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 38002eb73362..b527de1b0f74 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -217,8 +217,10 @@ unique_string(PyArrayObject *self)
     // then we iterate through the map's keys to get the unique values
     auto it = hashset.begin();
     size_t i = 0;
-    for (; it != hashset.end(); it++, i++) {
-        char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
+    char *data = PyArray_BYTES((PyArrayObject *)res_obj);
+    npy_intp stride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+
+    for (; it != hashset.end(); it++, i++, data += stride) {
         size_t byte_to_copy = it->size() * sizeof(T);
         memcpy(data, it->c_str(), byte_to_copy);
         if (byte_to_copy < (size_t)itemsize) {
@@ -342,17 +344,19 @@ unique_vstring(PyArrayObject *self)
     // then we iterate through the map's keys to get the unique values
     auto it = hashset.begin();
     size_t i = 0;
+    char *data = PyArray_BYTES((PyArrayObject *)res_obj);
+    npy_intp stride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+
     if (contains_null) {
         // insert null if original array contains null
-        char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
         npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
         if (NpyString_pack_null(allocator, packed_string) == -1) {
             return NULL;
         }
+        data += stride;
         i++;
     }
-    for (; it != hashset.end(); it++, i++) {
-        char* data = (char *)PyArray_GETPTR1((PyArrayObject *)res_obj, i);
+    for (; it != hashset.end(); it++, i++, data += stride) {
         npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
         if (NpyString_pack(allocator, packed_string, it->c_str(), it->size()) == -1) {
             return NULL;

From 30d1d1aac0d4af40fbc89d320b49e52e77f77834 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 13:17:50 +0900
Subject: [PATCH 34/75] refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index b527de1b0f74..3a011f1c50cc 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -4,7 +4,6 @@
 #include <Python.h>
 
 #include <algorithm>
-#include <iostream>
 #include <unordered_set>
 #include <functional>
 #include <string>
@@ -219,6 +218,7 @@ unique_string(PyArrayObject *self)
     size_t i = 0;
     char *data = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp stride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    NPY_DISABLE_C_API;
 
     for (; it != hashset.end(); it++, i++, data += stride) {
         size_t byte_to_copy = it->size() * sizeof(T);
@@ -227,7 +227,6 @@ unique_string(PyArrayObject *self)
             memset(data + byte_to_copy, 0, itemsize - byte_to_copy);
         }
     }
-    NPY_DISABLE_C_API;
 
     return res_obj;
 }

From 36c167cc2ffebb84ed52e0215d1b50cd68b7a10b Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 13:58:13 +0900
Subject: [PATCH 35/75] refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 3a011f1c50cc..fec5c8dac91e 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -345,6 +345,7 @@ unique_vstring(PyArrayObject *self)
     size_t i = 0;
     char *data = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp stride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    NPY_DISABLE_C_API;
 
     if (contains_null) {
         // insert null if original array contains null
@@ -361,7 +362,6 @@ unique_vstring(PyArrayObject *self)
             return NULL;
         }
     }
-    NPY_DISABLE_C_API;
 
     return res_obj;
 }

From 79d31e4f2bcc1d6344dff66ac9a61b7556350856 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 15:10:37 +0900
Subject: [PATCH 36/75] use optional

---
 numpy/_core/src/multiarray/unique.cpp | 31 +++++++++++----------------
 numpy/lib/tests/test_arraysetops.py   | 11 ++++++----
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index fec5c8dac91e..12f4cc036bc0 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -6,6 +6,7 @@
 #include <algorithm>
 #include <unordered_set>
 #include <functional>
+#include <optional>
 #include <string>
 #include <utility>
 
@@ -242,7 +243,7 @@ unique_vstring(PyArrayObject *self)
     strings (StringDType).
     */
     NPY_ALLOW_C_API_DEF;
-    std::unordered_set<std::string> hashset;
+    std::unordered_set<std::optional<std::string>> hashset;
     // reserve the hashset to avoid reallocations
     // reallocations are expensive, especially for string arrays
     hashset.reserve(PyArray_SIZE(self) * 2);
@@ -288,9 +289,6 @@ unique_vstring(PyArrayObject *self)
         NpyString_release_allocator(allocator);
     });
 
-    // whether the array contains null values
-    bool contains_null = false;
-
     // first we put the data in a hash map
     if (NpyIter_GetIterSize(iter) > 0) {
         do {
@@ -307,7 +305,7 @@ unique_vstring(PyArrayObject *self)
                     return NULL;
                 }
                 else if (is_null) {
-                    contains_null = true;
+                    hashset.emplace(std::nullopt);
                 }
                 else {
                     std::string sdata_str(sdata.buf, sdata.size);
@@ -319,9 +317,6 @@ unique_vstring(PyArrayObject *self)
     }
 
     npy_intp length = hashset.size();
-    if (contains_null) {
-        length++;
-    }
 
     NPY_ALLOW_C_API;
     PyObject *res_obj = PyArray_NewFromDescr(
@@ -347,19 +342,17 @@ unique_vstring(PyArrayObject *self)
     npy_intp stride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
     NPY_DISABLE_C_API;
 
-    if (contains_null) {
-        // insert null if original array contains null
-        npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
-        if (NpyString_pack_null(allocator, packed_string) == -1) {
-            return NULL;
-        }
-        data += stride;
-        i++;
-    }
     for (; it != hashset.end(); it++, i++, data += stride) {
         npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
-        if (NpyString_pack(allocator, packed_string, it->c_str(), it->size()) == -1) {
-            return NULL;
+        if (it->has_value()) {
+            std::string str = it->value();
+            if (NpyString_pack(allocator, packed_string, str.c_str(), str.size()) == -1) {
+                return NULL;
+            }
+        } else {
+            if (NpyString_pack_null(allocator, packed_string) == -1) {
+                return NULL;
+            }
         }
     }
 
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index ec2c040ddd86..82957792674a 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -862,15 +862,18 @@ def test_unique_unicode_string_hash_based(self):
     def test_unique_vstring_hash_based(self):
         # test for unicode and nullable string arrays
         a = np.array(['straße', None, 'strasse', 'straße', None, 'niño', 'nino', 'élève', 'eleve', 'niño', 'élève'], dtype=StringDType(na_object=None))
-        unq_sorted = [None, 'eleve', 'nino', 'niño', 'strasse', 'straße', 'élève']
+        unq_sorted_wo_none = ['eleve', 'nino', 'niño', 'strasse', 'straße', 'élève']
 
         a1 = unique(a, sorted=False)
         # the result varies depending on the hash function used,
         # so we check them by sorting
 
-        # nan
-        #assert_equal(a1[0], unq_sorted[0])
-        assert_array_equal(sorted(a1[1:].tolist()), unq_sorted[1:])
+        # a1 should have exactly one None
+        count_none = sum(x is None for x in a1)
+        assert_equal(count_none, 1)
+
+        a1_wo_none = sorted(x for x in a1 if x is not None)
+        assert_array_equal(a1_wo_none, unq_sorted_wo_none)
 
     def test_unique_vstring_errors(self):
         a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 2, dtype=StringDType(na_object=None))

From 00143f98d7311732925101b22db6398a6342a14a Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 20:09:56 +0900
Subject: [PATCH 37/75] refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 260 +++++++++-----------------
 1 file changed, 85 insertions(+), 175 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 12f4cc036bc0..c8a2be964de6 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -43,53 +43,27 @@ unique_integer(PyArrayObject *self)
     custom or complicated dtypes or string values.
     */
     NPY_ALLOW_C_API_DEF;
-    std::unordered_set<T> hashset;
 
-    NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
-                                      NPY_ITER_EXTERNAL_LOOP |
-                                      NPY_ITER_REFS_OK |
-                                      NPY_ITER_ZEROSIZE_OK |
-                                      NPY_ITER_GROWINNER,
-                                NPY_KEEPORDER, NPY_NO_CASTING,
-                                NULL);
-    // Making sure the iterator is deallocated when the function returns, with
-    // or w/o an exception
-    auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
-    if (iter == NULL) {
-        return NULL;
-    }
+    // release the GIL
+    PyThreadState *_save1 = PyEval_SaveThread();
 
-    NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
-    if (iternext == NULL) {
-        return NULL;
-    }
-    char **dataptr = NpyIter_GetDataPtrArray(iter);
-    npy_intp *strideptr = NpyIter_GetInnerStrideArray(iter);
-    npy_intp *innersizeptr = NpyIter_GetInnerLoopSizePtr(iter);
+    npy_intp isize = PyArray_SIZE(self);
+    char *idata = PyArray_BYTES(self);
+    npy_intp istride = PyArray_STRIDES(self)[0];
 
-    // release the GIL
-    PyThreadState *_save;
-    _save = PyEval_SaveThread();
-    // Making sure the GIL is re-acquired when the function returns, with
-    // or w/o an exception
-    auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
-    // first we put the data in a hash map
-
-    if (NpyIter_GetIterSize(iter) > 0) {
-        do {
-            char* data = *dataptr;
-            npy_intp stride = *strideptr;
-            npy_intp count = *innersizeptr;
-
-            while (count--) {
-                hashset.insert(*((T *) data));
-                data += stride;
-            }
-        } while (iternext(iter));
+    std::unordered_set<T> hashset;
+    // reserve the hashset to avoid reallocations
+    // reallocations are expensive, especially for string arrays
+    hashset.reserve(isize * 2);
+
+    // As input is 1d, we can use the strides to iterate through the array.
+    for (npy_intp i = 0; i < isize; i++, idata += istride) {
+        hashset.insert(*((T *) idata));
     }
 
     npy_intp length = hashset.size();
 
+    PyEval_RestoreThread(_save1);
     NPY_ALLOW_C_API;
     PyArray_Descr *descr = PyArray_DESCR(self);
     Py_INCREF(descr);
@@ -104,11 +78,12 @@ unique_integer(PyArrayObject *self)
         NPY_ARRAY_WRITEABLE, // flags
         NULL // obj
     );
-    NPY_DISABLE_C_API;
 
     if (res_obj == NULL) {
         return NULL;
     }
+    NPY_DISABLE_C_API;
+    PyThreadState *_save2 = PyEval_SaveThread();
 
     // then we iterate through the map's keys to get the unique values
     T* data = (T *)PyArray_DATA((PyArrayObject *)res_obj);
@@ -118,6 +93,7 @@ unique_integer(PyArrayObject *self)
         data[i] = *it;
     }
 
+    PyEval_RestoreThread(_save2);
     return res_obj;
 }
 
@@ -133,68 +109,36 @@ unique_string(PyArrayObject *self)
     strings of a certain size (itemsize / sizeof(T)).
     */
     NPY_ALLOW_C_API_DEF;
-    std::unordered_set<std::basic_string<T>> hashset;
-    // reserve the hashset to avoid reallocations
-    // reallocations are expensive, especially for string arrays
-    hashset.reserve(PyArray_SIZE(self) * 2);
-
-    NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
-                                      NPY_ITER_EXTERNAL_LOOP |
-                                      NPY_ITER_REFS_OK |
-                                      NPY_ITER_ZEROSIZE_OK |
-                                      NPY_ITER_GROWINNER,
-                                NPY_KEEPORDER, NPY_NO_CASTING,
-                                NULL);
-    if (iter == NULL) {
-        return NULL;
-    }
-    // Making sure the iterator is deallocated when the function returns, with
-    // or w/o an exception
-    auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
-
-    NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
-    if (iternext == NULL) {
-        return NULL;
-    }
-    char **dataptr = NpyIter_GetDataPtrArray(iter);
-    npy_intp *strideptr = NpyIter_GetInnerStrideArray(iter);
-    npy_intp *innersizeptr = NpyIter_GetInnerLoopSizePtr(iter);
 
     // release the GIL
-    PyThreadState *_save;
-    _save = PyEval_SaveThread();
-    // Making sure the GIL is re-acquired when the function returns, with
-    // or w/o an exception
-    auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
+    PyThreadState *_save1 = PyEval_SaveThread();
 
-    NPY_ALLOW_C_API;
     // size of each entries
     npy_intp itemsize = PyArray_ITEMSIZE(self);
-    NPY_DISABLE_C_API;
-
     // the number of characters of each entries
     // (For Unicode, itemsize / 4 for UCS4)
-    npy_intp num_chars = itemsize / sizeof(T);
-
-    // first we put the data in a hash map
-    if (NpyIter_GetIterSize(iter) > 0) {
-        do {
-            char* data = *dataptr;
-            npy_intp stride = *strideptr;
-            npy_intp count = *innersizeptr;
-
-            while (count--) {
-                T * sdata = reinterpret_cast<T *>(data);
-                size_t byte_to_copy = std::find(sdata, sdata + num_chars, 0) - sdata;
-                std::basic_string<T> sdata_str(sdata, byte_to_copy);
-                hashset.emplace(std::move(sdata_str));
-                data += stride;
-            }
-        } while (iternext(iter));
+    npy_intp num_chars = itemsize / sizeof(typename T::value_type);
+
+    npy_intp isize = PyArray_SIZE(self);
+    char *idata = PyArray_BYTES(self);
+    npy_intp istride = PyArray_STRIDES(self)[0];
+
+    std::unordered_set<T> hashset;
+    // reserve the hashset to avoid reallocations
+    // reallocations are expensive, especially for string arrays
+    hashset.reserve(isize * 2);
+
+    // As input is 1d, we can use the strides to iterate through the array.
+    for (npy_intp i = 0; i < isize; i++, idata += istride) {
+        typename T::value_type *sdata = reinterpret_cast<typename T::value_type *>(idata);
+        size_t byte_to_copy = std::find(sdata, sdata + num_chars, 0) - sdata;
+        T sdata_str(sdata, byte_to_copy);
+        hashset.emplace(std::move(sdata_str));
     }
 
     npy_intp length = hashset.size();
 
+    PyEval_RestoreThread(_save1);
     NPY_ALLOW_C_API;
     PyArray_Descr *descr = PyArray_DESCR(self);
     Py_INCREF(descr);
@@ -213,22 +157,19 @@ unique_string(PyArrayObject *self)
     if (res_obj == NULL) {
         return NULL;
     }
-
-    // then we iterate through the map's keys to get the unique values
-    auto it = hashset.begin();
-    size_t i = 0;
-    char *data = PyArray_BYTES((PyArrayObject *)res_obj);
-    npy_intp stride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
     NPY_DISABLE_C_API;
+    PyThreadState *_save2 = PyEval_SaveThread();
 
-    for (; it != hashset.end(); it++, i++, data += stride) {
-        size_t byte_to_copy = it->size() * sizeof(T);
-        memcpy(data, it->c_str(), byte_to_copy);
-        if (byte_to_copy < (size_t)itemsize) {
-            memset(data + byte_to_copy, 0, itemsize - byte_to_copy);
-        }
+    char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
+    npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+
+    memset(odata, 0, itemsize * length);
+    for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
+        size_t byte_to_copy = it->size() * sizeof(typename T::value_type);
+        memcpy(odata, it->c_str(), byte_to_copy);
     }
 
+    PyEval_RestoreThread(_save2);
     return res_obj;
 }
 
@@ -243,45 +184,13 @@ unique_vstring(PyArrayObject *self)
     strings (StringDType).
     */
     NPY_ALLOW_C_API_DEF;
-    std::unordered_set<std::optional<std::string>> hashset;
-    // reserve the hashset to avoid reallocations
-    // reallocations are expensive, especially for string arrays
-    hashset.reserve(PyArray_SIZE(self) * 2);
-
-    NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
-                                      NPY_ITER_EXTERNAL_LOOP |
-                                      NPY_ITER_REFS_OK |
-                                      NPY_ITER_ZEROSIZE_OK |
-                                      NPY_ITER_GROWINNER,
-                                NPY_KEEPORDER, NPY_NO_CASTING,
-                                NULL);
-    if (iter == NULL) {
-        return NULL;
-    }
-    // Making sure the iterator is deallocated when the function returns, with
-    // or w/o an exception
-    auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
-
-    NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
-    if (iternext == NULL) {
-        return NULL;
-    }
-    char **dataptr = NpyIter_GetDataPtrArray(iter);
-    npy_intp *strideptr = NpyIter_GetInnerStrideArray(iter);
-    npy_intp *innersizeptr = NpyIter_GetInnerLoopSizePtr(iter);
 
-    // release the GIL
-    PyThreadState *_save;
-    _save = PyEval_SaveThread();
-    // Making sure the GIL is re-acquired when the function returns, with
-    // or w/o an exception
-    auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
-
-    NPY_ALLOW_C_API;
-    // https://numpy.org/doc/stable/reference/c-api/strings.html#loading-a-string
     PyArray_Descr *descr = PyArray_DESCR(self);
+    // this macro requires the GIL to be held
     Py_INCREF(descr);
-    NPY_DISABLE_C_API;
+
+    // release the GIL
+    PyThreadState *_save1 = PyEval_SaveThread();
 
     npy_string_allocator *allocator = NpyString_acquire_allocator(
         (PyArray_StringDTypeObject *)descr);
@@ -289,35 +198,37 @@ unique_vstring(PyArrayObject *self)
         NpyString_release_allocator(allocator);
     });
 
-    // first we put the data in a hash map
-    if (NpyIter_GetIterSize(iter) > 0) {
-        do {
-            char* data = *dataptr;
-            npy_intp stride = *strideptr;
-            npy_intp count = *innersizeptr;
-
-            while (count--) {
-                npy_static_string sdata = {0, NULL};
-                npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
-                int is_null = NpyString_load(allocator, packed_string, &sdata);
-
-                if (is_null == -1) {
-                    return NULL;
-                }
-                else if (is_null) {
-                    hashset.emplace(std::nullopt);
-                }
-                else {
-                    std::string sdata_str(sdata.buf, sdata.size);
-                    hashset.emplace(std::move(sdata_str));
-                }
-                data += stride;
-            }
-        } while (iternext(iter));
+    npy_intp isize = PyArray_SIZE(self);
+    char *idata = PyArray_BYTES(self);
+    npy_intp istride = PyArray_STRIDES(self)[0];
+
+    std::unordered_set<std::optional<std::string>> hashset;
+    // reserve the hashset to avoid reallocations
+    // reallocations are expensive, especially for string arrays
+    hashset.reserve(isize * 2);
+
+    // As input is 1d, we can use the strides to iterate through the array.
+    for (npy_intp i = 0; i < isize; i++, idata += istride) {
+        // https://numpy.org/doc/stable/reference/c-api/strings.html#loading-a-string
+        npy_static_string sdata = {0, NULL};
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)idata;
+        int is_null = NpyString_load(allocator, packed_string, &sdata);
+
+        if (is_null == -1) {
+            return NULL;
+        }
+        else if (is_null) {
+            hashset.emplace(std::nullopt);
+        }
+        else {
+            std::string sdata_str(sdata.buf, sdata.size);
+            hashset.emplace(std::move(sdata_str));
+        }
     }
 
     npy_intp length = hashset.size();
 
+    PyEval_RestoreThread(_save1);
     NPY_ALLOW_C_API;
     PyObject *res_obj = PyArray_NewFromDescr(
         &PyArray_Type,
@@ -334,16 +245,14 @@ unique_vstring(PyArrayObject *self)
     if (res_obj == NULL) {
         return NULL;
     }
-
-    // then we iterate through the map's keys to get the unique values
-    auto it = hashset.begin();
-    size_t i = 0;
-    char *data = PyArray_BYTES((PyArrayObject *)res_obj);
-    npy_intp stride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
     NPY_DISABLE_C_API;
+    PyThreadState *_save2 = PyEval_SaveThread();
 
-    for (; it != hashset.end(); it++, i++, data += stride) {
-        npy_packed_static_string *packed_string = (npy_packed_static_string *)data;
+    char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
+    npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    
+    for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
         if (it->has_value()) {
             std::string str = it->value();
             if (NpyString_pack(allocator, packed_string, str.c_str(), str.size()) == -1) {
@@ -356,6 +265,7 @@ unique_vstring(PyArrayObject *self)
         }
     }
 
+    PyEval_RestoreThread(_save2);
     return res_obj;
 }
 
@@ -382,8 +292,8 @@ std::unordered_map<int, function_type> unique_funcs = {
     {NPY_UINT32, unique_integer<npy_uint32>},
     {NPY_UINT64, unique_integer<npy_uint64>},
     {NPY_DATETIME, unique_integer<npy_uint64>},
-    {NPY_STRING, unique_string<char>},
-    {NPY_UNICODE, unique_string<char32_t>},
+    {NPY_STRING, unique_string<std::string>},
+    {NPY_UNICODE, unique_string<std::u32string>},
     {NPY_VSTRING, unique_vstring},
 };
 

From 1cc09f3b35a94e62ba9131b8e443349b60b88768 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 20:55:52 +0900
Subject: [PATCH 38/75] refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index c8a2be964de6..d8f939d8519d 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -85,12 +85,10 @@ unique_integer(PyArrayObject *self)
     NPY_DISABLE_C_API;
     PyThreadState *_save2 = PyEval_SaveThread();
 
-    // then we iterate through the map's keys to get the unique values
-    T* data = (T *)PyArray_DATA((PyArrayObject *)res_obj);
-    auto it = hashset.begin();
-    size_t i = 0;
-    for (; it != hashset.end(); it++, i++) {
-        data[i] = *it;
+    char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
+    npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
+        *reinterpret_cast<T *>(odata) = *it;
     }
 
     PyEval_RestoreThread(_save2);

From b29981d605b6a5f8263bac31631c49915f73a4cc Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 21:01:34 +0900
Subject: [PATCH 39/75] refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index d8f939d8519d..c0e7a33b6063 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -58,7 +58,7 @@ unique_integer(PyArrayObject *self)
 
     // As input is 1d, we can use the strides to iterate through the array.
     for (npy_intp i = 0; i < isize; i++, idata += istride) {
-        hashset.insert(*((T *) idata));
+        hashset.emplace(*((T *) idata));
     }
 
     npy_intp length = hashset.size();
@@ -130,8 +130,7 @@ unique_string(PyArrayObject *self)
     for (npy_intp i = 0; i < isize; i++, idata += istride) {
         typename T::value_type *sdata = reinterpret_cast<typename T::value_type *>(idata);
         size_t byte_to_copy = std::find(sdata, sdata + num_chars, 0) - sdata;
-        T sdata_str(sdata, byte_to_copy);
-        hashset.emplace(std::move(sdata_str));
+        hashset.emplace(sdata, sdata + byte_to_copy);
     }
 
     npy_intp length = hashset.size();
@@ -219,8 +218,9 @@ unique_vstring(PyArrayObject *self)
             hashset.emplace(std::nullopt);
         }
         else {
-            std::string sdata_str(sdata.buf, sdata.size);
-            hashset.emplace(std::move(sdata_str));
+            hashset.emplace(
+                std::make_optional<std::string>(sdata.buf, sdata.buf + sdata.size)
+            );
         }
     }
 

From 91c5d42b369ffa597979e01ad0b495a8f3d38c3b Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 23:32:05 +0900
Subject: [PATCH 40/75] refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 295 +++++++++-----------------
 1 file changed, 106 insertions(+), 189 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index c0e7a33b6063..294710ff6dad 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -28,206 +28,131 @@ FinalAction<F> finally(F f) {
     return FinalAction<F>(f);
 }
 
-template<typename T>
-static PyObject*
-unique_integer(PyArrayObject *self)
-{
-    /* This function takes a numpy array and returns a numpy array containing
-    the unique values.
-
-    It assumes the numpy array includes data that can be viewed as unsigned integers
-    of a certain size (sizeof(T)).
-
-    It doesn't need to know the actual type, since it needs to find unique values
-    among binary representations of the input data. This means it won't apply to
-    custom or complicated dtypes or string values.
-    */
-    NPY_ALLOW_C_API_DEF;
-
-    // release the GIL
-    PyThreadState *_save1 = PyEval_SaveThread();
+template <typename T>
+T read_integer(char *idata, npy_intp num_chars, npy_string_allocator *allocator) {
+    return *(T *)idata;
+};
 
-    npy_intp isize = PyArray_SIZE(self);
-    char *idata = PyArray_BYTES(self);
-    npy_intp istride = PyArray_STRIDES(self)[0];
+template <typename T>
+int write_integer(char *odata, const T &value, npy_intp itemsize, npy_string_allocator *allocator) {
+    *reinterpret_cast<T *>(odata) = value;
+    return 0;
+};
 
-    std::unordered_set<T> hashset;
-    // reserve the hashset to avoid reallocations
-    // reallocations are expensive, especially for string arrays
-    hashset.reserve(isize * 2);
+template <typename T>
+T read_string(char *idata, npy_intp num_chars, npy_string_allocator *allocator) {
+    typename T::value_type *sdata = reinterpret_cast<typename T::value_type *>(idata);
+    size_t byte_to_copy = std::find(sdata, sdata + num_chars, 0) - sdata;
+    return T(sdata, sdata + byte_to_copy);
+};
 
-    // As input is 1d, we can use the strides to iterate through the array.
-    for (npy_intp i = 0; i < isize; i++, idata += istride) {
-        hashset.emplace(*((T *) idata));
+template <typename T>
+int write_string(char *odata, const T &value, npy_intp itemsize, npy_string_allocator *allocator) {
+    size_t byte_to_copy = value.size() * sizeof(typename T::value_type);
+    memcpy(odata, value.c_str(), byte_to_copy);
+    if (byte_to_copy < (size_t)itemsize) {
+        memset(odata + byte_to_copy, 0, itemsize - byte_to_copy);
     }
+    return 0;
+};
 
-    npy_intp length = hashset.size();
-
-    PyEval_RestoreThread(_save1);
-    NPY_ALLOW_C_API;
-    PyArray_Descr *descr = PyArray_DESCR(self);
-    Py_INCREF(descr);
-    PyObject *res_obj = PyArray_NewFromDescr(
-        &PyArray_Type,
-        descr,
-        1, // ndim
-        &length, // shape
-        NULL, // strides
-        NULL, // data
-        // This flag is needed to be able to call .sort on it.
-        NPY_ARRAY_WRITEABLE, // flags
-        NULL // obj
-    );
+template <typename T>
+T read_vstring(char *idata, npy_intp num_chars, npy_string_allocator *allocator) {
+    // https://numpy.org/doc/stable/reference/c-api/strings.html#loading-a-string
+    npy_static_string sdata = {0, NULL};
+    npy_packed_static_string *packed_string = (npy_packed_static_string *)idata;
+    int is_null = NpyString_load(allocator, packed_string, &sdata);
 
-    if (res_obj == NULL) {
-        return NULL;
+    if (is_null == -1 || is_null) {
+        return std::nullopt;
     }
-    NPY_DISABLE_C_API;
-    PyThreadState *_save2 = PyEval_SaveThread();
-
-    char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
-    npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
-    for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
-        *reinterpret_cast<T *>(odata) = *it;
+    else {
+        return std::make_optional<std::string>(sdata.buf, sdata.buf + sdata.size);
     }
+};
 
-    PyEval_RestoreThread(_save2);
-    return res_obj;
-}
-
+template <typename T>
+int write_vstring(char *odata, const T &value, npy_intp itemsize, npy_string_allocator *allocator) {
+    npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
+    if (value.has_value()) {
+        std::string str = value.value();
+        if (NpyString_pack(allocator, packed_string, str.c_str(), str.size()) == -1) {
+            return -1;
+        }
+    } else {
+        if (NpyString_pack_null(allocator, packed_string) == -1) {
+            return -1;
+        }
+    }
+    return 0;
+};
 
-template<typename T>
+template <typename T, auto read, auto write>
 static PyObject*
-unique_string(PyArrayObject *self)
+unique(PyArrayObject *self)
 {
     /* This function takes a numpy array and returns a numpy array containing
     the unique values.
 
-    It assumes the numpy array includes data that can be viewed as fixed-size
-    strings of a certain size (itemsize / sizeof(T)).
+    It doesn't need to know the actual type, since it needs to find unique values
+    among binary representations of the input data. This means it won't apply to
+    custom or complicated dtypes or string values.
     */
     NPY_ALLOW_C_API_DEF;
-
-    // release the GIL
-    PyThreadState *_save1 = PyEval_SaveThread();
-
-    // size of each entries
-    npy_intp itemsize = PyArray_ITEMSIZE(self);
-    // the number of characters of each entries
-    // (For Unicode, itemsize / 4 for UCS4)
-    npy_intp num_chars = itemsize / sizeof(typename T::value_type);
-
-    npy_intp isize = PyArray_SIZE(self);
-    char *idata = PyArray_BYTES(self);
-    npy_intp istride = PyArray_STRIDES(self)[0];
-
-    std::unordered_set<T> hashset;
-    // reserve the hashset to avoid reallocations
-    // reallocations are expensive, especially for string arrays
-    hashset.reserve(isize * 2);
-
-    // As input is 1d, we can use the strides to iterate through the array.
-    for (npy_intp i = 0; i < isize; i++, idata += istride) {
-        typename T::value_type *sdata = reinterpret_cast<typename T::value_type *>(idata);
-        size_t byte_to_copy = std::find(sdata, sdata + num_chars, 0) - sdata;
-        hashset.emplace(sdata, sdata + byte_to_copy);
-    }
-
-    npy_intp length = hashset.size();
-
-    PyEval_RestoreThread(_save1);
     NPY_ALLOW_C_API;
     PyArray_Descr *descr = PyArray_DESCR(self);
+    // this operation requires the GIL to be held
     Py_INCREF(descr);
-    PyObject *res_obj = PyArray_NewFromDescr(
-        &PyArray_Type,
-        descr,
-        1, // ndim
-        &length, // shape
-        NULL, // strides
-        NULL, // data
-        // This flag is needed to be able to call .sort on it.
-        NPY_ARRAY_WRITEABLE, // flags
-        NULL // obj
-    );
-
-    if (res_obj == NULL) {
-        return NULL;
-    }
     NPY_DISABLE_C_API;
-    PyThreadState *_save2 = PyEval_SaveThread();
-
-    char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
-    npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
-
-    memset(odata, 0, itemsize * length);
-    for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
-        size_t byte_to_copy = it->size() * sizeof(typename T::value_type);
-        memcpy(odata, it->c_str(), byte_to_copy);
-    }
-
-    PyEval_RestoreThread(_save2);
-    return res_obj;
-}
-
-
-static PyObject*
-unique_vstring(PyArrayObject *self)
-{
-    /* This function takes a numpy array and returns a numpy array containing
-    the unique values.
-
-    It assumes the numpy array includes data that can be viewed as variable width
-    strings (StringDType).
-    */
-    NPY_ALLOW_C_API_DEF;
-
-    PyArray_Descr *descr = PyArray_DESCR(self);
-    // this macro requires the GIL to be held
-    Py_INCREF(descr);
 
     // release the GIL
     PyThreadState *_save1 = PyEval_SaveThread();
 
-    npy_string_allocator *allocator = NpyString_acquire_allocator(
-        (PyArray_StringDTypeObject *)descr);
+    int dtype = PyArray_TYPE(self);
+
+    // This is for NPY_STRING and NPY_UNICODE
+    // size and number of characters of each entries
+    npy_intp itemsize = 0, num_chars = 0;
+    if (dtype == NPY_STRING || dtype == NPY_UNICODE) {
+        itemsize = PyArray_ITEMSIZE(self);
+        if constexpr (std::is_same_v<T, std::string> || std::is_same_v<T, std::u32string>) {
+            // (For Unicode, itemsize / 4 for UCS4)
+            num_chars = itemsize / sizeof(typename T::value_type);
+        }
+    }
+
+    // This is for NPY_VSTRING
+    npy_string_allocator *allocator = nullptr;
+    if (dtype == NPY_VSTRING) {
+        allocator = NpyString_acquire_allocator(
+            (PyArray_StringDTypeObject *)descr);
+    }
     auto allocator_dealloc = finally([&]() {
-        NpyString_release_allocator(allocator);
+        if (allocator != nullptr) {
+            NpyString_release_allocator(allocator);
+        }
     });
 
     npy_intp isize = PyArray_SIZE(self);
     char *idata = PyArray_BYTES(self);
     npy_intp istride = PyArray_STRIDES(self)[0];
 
-    std::unordered_set<std::optional<std::string>> hashset;
+    std::unordered_set<T> hashset;
     // reserve the hashset to avoid reallocations
     // reallocations are expensive, especially for string arrays
     hashset.reserve(isize * 2);
 
     // As input is 1d, we can use the strides to iterate through the array.
     for (npy_intp i = 0; i < isize; i++, idata += istride) {
-        // https://numpy.org/doc/stable/reference/c-api/strings.html#loading-a-string
-        npy_static_string sdata = {0, NULL};
-        npy_packed_static_string *packed_string = (npy_packed_static_string *)idata;
-        int is_null = NpyString_load(allocator, packed_string, &sdata);
-
-        if (is_null == -1) {
-            return NULL;
-        }
-        else if (is_null) {
-            hashset.emplace(std::nullopt);
-        }
-        else {
-            hashset.emplace(
-                std::make_optional<std::string>(sdata.buf, sdata.buf + sdata.size)
-            );
-        }
+        T value = read(idata, num_chars, allocator);
+        hashset.emplace(std::move(value));
     }
 
     npy_intp length = hashset.size();
 
     PyEval_RestoreThread(_save1);
     NPY_ALLOW_C_API;
+    // this operation requires the GIL to be held
     PyObject *res_obj = PyArray_NewFromDescr(
         &PyArray_Type,
         descr,
@@ -248,18 +173,10 @@ unique_vstring(PyArrayObject *self)
 
     char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
-    
+    // As output is 1d, we can use the strides to iterate through the array.
     for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
-        npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
-        if (it->has_value()) {
-            std::string str = it->value();
-            if (NpyString_pack(allocator, packed_string, str.c_str(), str.size()) == -1) {
-                return NULL;
-            }
-        } else {
-            if (NpyString_pack_null(allocator, packed_string) == -1) {
-                return NULL;
-            }
+        if (write(odata, *it, itemsize, allocator) == -1) {
+            return NULL;
         }
     }
 
@@ -271,28 +188,28 @@ unique_vstring(PyArrayObject *self)
 // this map contains the functions used for each item size.
 typedef std::function<PyObject *(PyArrayObject *)> function_type;
 std::unordered_map<int, function_type> unique_funcs = {
-    {NPY_BYTE, unique_integer<npy_byte>},
-    {NPY_UBYTE, unique_integer<npy_ubyte>},
-    {NPY_SHORT, unique_integer<npy_short>},
-    {NPY_USHORT, unique_integer<npy_ushort>},
-    {NPY_INT, unique_integer<npy_int>},
-    {NPY_UINT, unique_integer<npy_uint>},
-    {NPY_LONG, unique_integer<npy_long>},
-    {NPY_ULONG, unique_integer<npy_ulong>},
-    {NPY_LONGLONG, unique_integer<npy_longlong>},
-    {NPY_ULONGLONG, unique_integer<npy_ulonglong>},
-    {NPY_INT8, unique_integer<npy_int8>},
-    {NPY_INT16, unique_integer<npy_int16>},
-    {NPY_INT32, unique_integer<npy_int32>},
-    {NPY_INT64, unique_integer<npy_int64>},
-    {NPY_UINT8, unique_integer<npy_uint8>},
-    {NPY_UINT16, unique_integer<npy_uint16>},
-    {NPY_UINT32, unique_integer<npy_uint32>},
-    {NPY_UINT64, unique_integer<npy_uint64>},
-    {NPY_DATETIME, unique_integer<npy_uint64>},
-    {NPY_STRING, unique_string<std::string>},
-    {NPY_UNICODE, unique_string<std::u32string>},
-    {NPY_VSTRING, unique_vstring},
+    {NPY_BYTE, unique<npy_byte, read_integer<npy_byte>, write_integer<npy_byte>>},
+    {NPY_UBYTE, unique<npy_ubyte, read_integer<npy_ubyte>, write_integer<npy_ubyte>>},
+    {NPY_SHORT, unique<npy_short, read_integer<npy_short>, write_integer<npy_short>>},
+    {NPY_USHORT, unique<npy_ushort, read_integer<npy_ushort>, write_integer<npy_ushort>>},
+    {NPY_INT, unique<npy_int, read_integer<npy_int>, write_integer<npy_int>>},
+    {NPY_UINT, unique<npy_uint, read_integer<npy_uint>, write_integer<npy_uint>>},
+    {NPY_LONG, unique<npy_long, read_integer<npy_long>, write_integer<npy_long>>},
+    {NPY_ULONG, unique<npy_ulong, read_integer<npy_ulong>, write_integer<npy_ulong>>},
+    {NPY_LONGLONG, unique<npy_longlong, read_integer<npy_longlong>, write_integer<npy_longlong>>},
+    {NPY_ULONGLONG, unique<npy_ulonglong, read_integer<npy_ulonglong>, write_integer<npy_ulonglong>>},
+    {NPY_INT8, unique<npy_int8, read_integer<npy_int8>, write_integer<npy_int8>>},
+    {NPY_INT16, unique<npy_int16, read_integer<npy_int16>, write_integer<npy_int16>>},
+    {NPY_INT32, unique<npy_int32, read_integer<npy_int32>, write_integer<npy_int32>>},
+    {NPY_INT64, unique<npy_int64, read_integer<npy_int64>, write_integer<npy_int64>>},
+    {NPY_UINT8, unique<npy_uint8, read_integer<npy_uint8>, write_integer<npy_uint8>>},
+    {NPY_UINT16, unique<npy_uint16, read_integer<npy_uint16>, write_integer<npy_uint16>>},
+    {NPY_UINT32, unique<npy_uint32, read_integer<npy_uint32>, write_integer<npy_uint32>>},
+    {NPY_UINT64, unique<npy_uint64, read_integer<npy_uint64>, write_integer<npy_uint64>>},
+    {NPY_DATETIME, unique<npy_uint64, read_integer<npy_uint64>, write_integer<npy_uint64>>},
+    {NPY_STRING, unique<std::string, read_string<std::string>, write_string<std::string>>},
+    {NPY_UNICODE, unique<std::u32string, read_string<std::u32string>, write_string<std::u32string>>},
+    {NPY_VSTRING, unique<std::optional<std::string>, read_vstring<std::optional<std::string>>, write_vstring<std::optional<std::string>>>},
 };
 
 

From e9c3aac2b23db1654d946149eacde10ac69378b2 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 19 Apr 2025 23:46:09 +0900
Subject: [PATCH 41/75] fix comment

---
 numpy/_core/src/multiarray/unique.cpp | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 294710ff6dad..0028004d666e 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -91,17 +91,14 @@ template <typename T, auto read, auto write>
 static PyObject*
 unique(PyArrayObject *self)
 {
-    /* This function takes a numpy array and returns a numpy array containing
-    the unique values.
-
-    It doesn't need to know the actual type, since it needs to find unique values
-    among binary representations of the input data. This means it won't apply to
-    custom or complicated dtypes or string values.
+    /*
+    * Returns a new NumPy array containing the unique values of the input array.
+    * This function uses hashing to identify uniqueness efficiently.
     */
     NPY_ALLOW_C_API_DEF;
     NPY_ALLOW_C_API;
     PyArray_Descr *descr = PyArray_DESCR(self);
-    // this operation requires the GIL to be held
+    // NumPy API calls and Python object manipulations require holding the GIL.
     Py_INCREF(descr);
     NPY_DISABLE_C_API;
 
@@ -110,8 +107,8 @@ unique(PyArrayObject *self)
 
     int dtype = PyArray_TYPE(self);
 
-    // This is for NPY_STRING and NPY_UNICODE
-    // size and number of characters of each entries
+    // For NPY_STRING and NPY_UNICODE arrays,
+    // retrieve item size and character count per entry.
     npy_intp itemsize = 0, num_chars = 0;
     if (dtype == NPY_STRING || dtype == NPY_UNICODE) {
         itemsize = PyArray_ITEMSIZE(self);
@@ -127,6 +124,7 @@ unique(PyArrayObject *self)
         allocator = NpyString_acquire_allocator(
             (PyArray_StringDTypeObject *)descr);
     }
+    // Ensure that the allocator is properly released upon function exit.
     auto allocator_dealloc = finally([&]() {
         if (allocator != nullptr) {
             NpyString_release_allocator(allocator);
@@ -138,11 +136,11 @@ unique(PyArrayObject *self)
     npy_intp istride = PyArray_STRIDES(self)[0];
 
     std::unordered_set<T> hashset;
-    // reserve the hashset to avoid reallocations
-    // reallocations are expensive, especially for string arrays
+    // Reserve hashset capacity in advance to minimize reallocations,
+    // which are particularly costly for string-based arrays.
     hashset.reserve(isize * 2);
 
-    // As input is 1d, we can use the strides to iterate through the array.
+    // Input array is one-dimensional, enabling efficient iteration using strides.
     for (npy_intp i = 0; i < isize; i++, idata += istride) {
         T value = read(idata, num_chars, allocator);
         hashset.emplace(std::move(value));
@@ -152,7 +150,7 @@ unique(PyArrayObject *self)
 
     PyEval_RestoreThread(_save1);
     NPY_ALLOW_C_API;
-    // this operation requires the GIL to be held
+    // NumPy API calls and Python object manipulations require holding the GIL.
     PyObject *res_obj = PyArray_NewFromDescr(
         &PyArray_Type,
         descr,
@@ -173,7 +171,7 @@ unique(PyArrayObject *self)
 
     char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
-    // As output is 1d, we can use the strides to iterate through the array.
+    // Output array is one-dimensional, enabling efficient iteration using strides.
     for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
         if (write(odata, *it, itemsize, allocator) == -1) {
             return NULL;

From 8191f5f7d6d0afdddc7822b818ea44f5a8584c8b Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sun, 20 Apr 2025 02:13:41 +0900
Subject: [PATCH 42/75] linter

---
 numpy/_core/src/multiarray/unique.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 0028004d666e..f6b9a866188c 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -4,10 +4,10 @@
 #include <Python.h>
 
 #include <algorithm>
-#include <unordered_set>
 #include <functional>
 #include <optional>
 #include <string>
+#include <unordered_set>
 #include <utility>
 
 #include <numpy/npy_common.h>

From 4faf36a5710a54aba46ed562274b3e54b0d38241 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sun, 20 Apr 2025 02:23:10 +0900
Subject: [PATCH 43/75] add doc

---
 doc/release/upcoming_changes/28767.change.rst      |  9 +++++++++
 doc/release/upcoming_changes/28767.performance.rst | 10 ++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 doc/release/upcoming_changes/28767.change.rst
 create mode 100644 doc/release/upcoming_changes/28767.performance.rst

diff --git a/doc/release/upcoming_changes/28767.change.rst b/doc/release/upcoming_changes/28767.change.rst
new file mode 100644
index 000000000000..33004091e4ef
--- /dev/null
+++ b/doc/release/upcoming_changes/28767.change.rst
@@ -0,0 +1,9 @@
+``unique_values`` for string dtypes may return unsorted data
+------------------------------------------
+np.unique now supports hash‐based duplicate removal for string dtypes.
+This enhancement extends the hash-table algorithm to byte strings ('S'),
+Unicode strings ('U'), and the experimental string dtype ('T', StringDType).
+As a result, calling np.unique() on an array of strings will use
+the faster hash-based method to obtain unique values.
+This also works for StringDType arrays containing None (missing values)
+when using equal_nan=True (treating missing values as equal). (gh-28767)
diff --git a/doc/release/upcoming_changes/28767.performance.rst b/doc/release/upcoming_changes/28767.performance.rst
new file mode 100644
index 000000000000..795f69c561fc
--- /dev/null
+++ b/doc/release/upcoming_changes/28767.performance.rst
@@ -0,0 +1,10 @@
+Performance improvements to ``np.unique`` for string dtypes
+-----------------------------------------
+The hash-based algorithm for unique extraction provides
+an order-of-magnitude speedup on large string arrays. 
+In an internal benchmark with about 1 billion string elements,
+the hash-based np.unique completed in roughly 48 seconds,
+compared to 498 seconds with the sort-based method
+– about 10–11× faster for unsorted unique operations on strings.
+This improvement greatly reduces the time to find unique values
+in very large string datasets. (gh-28767)

From c6aaf3923cf289f8a57b870637648a2cb8dcb4b5 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sun, 20 Apr 2025 03:04:23 +0900
Subject: [PATCH 44/75] DOC: fix

---
 doc/release/upcoming_changes/28767.change.rst      | 2 +-
 doc/release/upcoming_changes/28767.performance.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/release/upcoming_changes/28767.change.rst b/doc/release/upcoming_changes/28767.change.rst
index 33004091e4ef..07b9b0cb3e3a 100644
--- a/doc/release/upcoming_changes/28767.change.rst
+++ b/doc/release/upcoming_changes/28767.change.rst
@@ -6,4 +6,4 @@ Unicode strings ('U'), and the experimental string dtype ('T', StringDType).
 As a result, calling np.unique() on an array of strings will use
 the faster hash-based method to obtain unique values.
 This also works for StringDType arrays containing None (missing values)
-when using equal_nan=True (treating missing values as equal). (gh-28767)
+when using equal_nan=True (treating missing values as equal).
diff --git a/doc/release/upcoming_changes/28767.performance.rst b/doc/release/upcoming_changes/28767.performance.rst
index 795f69c561fc..9118e83b9992 100644
--- a/doc/release/upcoming_changes/28767.performance.rst
+++ b/doc/release/upcoming_changes/28767.performance.rst
@@ -7,4 +7,4 @@ the hash-based np.unique completed in roughly 48 seconds,
 compared to 498 seconds with the sort-based method
 – about 10–11× faster for unsorted unique operations on strings.
 This improvement greatly reduces the time to find unique values
-in very large string datasets. (gh-28767)
+in very large string datasets.

From 1053bcb2e9d969bbeab39655ad85b97280caf43e Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sun, 20 Apr 2025 18:58:00 +0900
Subject: [PATCH 45/75] DOC: fix format

---
 doc/release/upcoming_changes/28767.change.rst      | 2 +-
 doc/release/upcoming_changes/28767.performance.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/release/upcoming_changes/28767.change.rst b/doc/release/upcoming_changes/28767.change.rst
index 07b9b0cb3e3a..1d330e862c9e 100644
--- a/doc/release/upcoming_changes/28767.change.rst
+++ b/doc/release/upcoming_changes/28767.change.rst
@@ -1,5 +1,5 @@
 ``unique_values`` for string dtypes may return unsorted data
-------------------------------------------
+------------------------------------------------------------
 np.unique now supports hash‐based duplicate removal for string dtypes.
 This enhancement extends the hash-table algorithm to byte strings ('S'),
 Unicode strings ('U'), and the experimental string dtype ('T', StringDType).
diff --git a/doc/release/upcoming_changes/28767.performance.rst b/doc/release/upcoming_changes/28767.performance.rst
index 9118e83b9992..c06534b98c43 100644
--- a/doc/release/upcoming_changes/28767.performance.rst
+++ b/doc/release/upcoming_changes/28767.performance.rst
@@ -1,5 +1,5 @@
 Performance improvements to ``np.unique`` for string dtypes
------------------------------------------
+-----------------------------------------------------------
 The hash-based algorithm for unique extraction provides
 an order-of-magnitude speedup on large string arrays. 
 In an internal benchmark with about 1 billion string elements,

From 1afefbe934c91010b8afc79aa418cc4e5cba179f Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sun, 20 Apr 2025 20:23:56 +0900
Subject: [PATCH 46/75] MNT: refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 202 +++++++++++++-------------
 1 file changed, 98 insertions(+), 104 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index f6b9a866188c..a51ca0d167e6 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -13,81 +13,98 @@
 #include <numpy/npy_common.h>
 #include "numpy/arrayobject.h"
 
-// This is to use RAII pattern to handle cpp exceptions while avoiding memory leaks.
-// Adapted from https://stackoverflow.com/a/25510879/2536294
-template <typename F>
-struct FinalAction {
-    FinalAction(F f) : clean_{f} {}
-    ~FinalAction() { clean_(); }
-  private:
-    F clean_;
-};
-
-template <typename F>
-FinalAction<F> finally(F f) {
-    return FinalAction<F>(f);
-}
-
 template <typename T>
-T read_integer(char *idata, npy_intp num_chars, npy_string_allocator *allocator) {
-    return *(T *)idata;
+class IReadWrite {
+public:
+    virtual T read(char *idata) const = 0;
+    virtual int write(char *odata, const T &value) const = 0;
+    virtual ~IReadWrite() = default;
 };
 
 template <typename T>
-int write_integer(char *odata, const T &value, npy_intp itemsize, npy_string_allocator *allocator) {
-    *reinterpret_cast<T *>(odata) = value;
-    return 0;
-};
+class IntegerReadWrite : public IReadWrite<T> {
+public:
+    IntegerReadWrite(PyArray_Descr *descr) {}
 
-template <typename T>
-T read_string(char *idata, npy_intp num_chars, npy_string_allocator *allocator) {
-    typename T::value_type *sdata = reinterpret_cast<typename T::value_type *>(idata);
-    size_t byte_to_copy = std::find(sdata, sdata + num_chars, 0) - sdata;
-    return T(sdata, sdata + byte_to_copy);
-};
+    T read(char *idata) const override {
+        return *(T *)idata;
+    }
 
-template <typename T>
-int write_string(char *odata, const T &value, npy_intp itemsize, npy_string_allocator *allocator) {
-    size_t byte_to_copy = value.size() * sizeof(typename T::value_type);
-    memcpy(odata, value.c_str(), byte_to_copy);
-    if (byte_to_copy < (size_t)itemsize) {
-        memset(odata + byte_to_copy, 0, itemsize - byte_to_copy);
+    int write(char *odata, const T &value) const override {
+        *(T *)(odata) = value;
+        return 0;
     }
-    return 0;
 };
 
+// T is a string type (std::string or std::u32string)
 template <typename T>
-T read_vstring(char *idata, npy_intp num_chars, npy_string_allocator *allocator) {
-    // https://numpy.org/doc/stable/reference/c-api/strings.html#loading-a-string
-    npy_static_string sdata = {0, NULL};
-    npy_packed_static_string *packed_string = (npy_packed_static_string *)idata;
-    int is_null = NpyString_load(allocator, packed_string, &sdata);
+class StringReadWrite : public IReadWrite<T> {
+private:
+    npy_intp itemsize_;
+    npy_intp num_chars_;
+
+public:
+    StringReadWrite(PyArray_Descr *descr) {
+        itemsize_ = descr->elsize;
+        num_chars_ = itemsize_ / sizeof(typename T::value_type);
+    }
 
-    if (is_null == -1 || is_null) {
-        return std::nullopt;
+    T read(char *idata) const override {
+        typename T::value_type *sdata = reinterpret_cast<typename T::value_type *>(idata);
+        size_t byte_to_copy = std::find(sdata, sdata + num_chars_, 0) - sdata;
+        return T(sdata, sdata + byte_to_copy);
     }
-    else {
-        return std::make_optional<std::string>(sdata.buf, sdata.buf + sdata.size);
+
+    int write(char *odata, const T &value) const override {
+        size_t byte_to_copy = value.size() * sizeof(typename T::value_type);
+        memcpy(odata, value.c_str(), byte_to_copy);
+        if (byte_to_copy < (size_t)itemsize_) {
+            memset(odata + byte_to_copy, 0, itemsize_ - byte_to_copy);
+        }
+        return 0;
     }
 };
 
-template <typename T>
-int write_vstring(char *odata, const T &value, npy_intp itemsize, npy_string_allocator *allocator) {
-    npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
-    if (value.has_value()) {
-        std::string str = value.value();
-        if (NpyString_pack(allocator, packed_string, str.c_str(), str.size()) == -1) {
-            return -1;
+class VStringReadWrite : public IReadWrite<std::optional<std::string>> {
+private:
+    npy_string_allocator *allocator_;
+
+public:
+    VStringReadWrite(PyArray_Descr *descr) {
+        allocator_ = NpyString_acquire_allocator(
+            (PyArray_StringDTypeObject *)descr);
+    }
+
+    ~VStringReadWrite() {
+        NpyString_release_allocator(allocator_);
+    }
+
+    std::optional<std::string> read(char *idata) const override {
+        // https://numpy.org/doc/stable/reference/c-api/strings.html#loading-a-string
+        npy_static_string sdata = {0, nullptr};
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)(idata);
+        int is_null = NpyString_load(allocator_, packed_string, &sdata);
+
+        if (is_null == -1 || is_null) {
+            return std::nullopt;
+        }
+        else {
+            return std::make_optional<std::string>(sdata.buf, sdata.buf + sdata.size);
         }
-    } else {
-        if (NpyString_pack_null(allocator, packed_string) == -1) {
-            return -1;
+    }
+
+    int write(char *odata, const std::optional<std::string> &value) const override {
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)(odata);
+        if (value.has_value()) {
+            const std::string &str = value.value();
+            return NpyString_pack(allocator_, packed_string, str.c_str(), str.size());
+        } else {
+            return NpyString_pack_null(allocator_, packed_string);
         }
     }
-    return 0;
 };
 
-template <typename T, auto read, auto write>
+template <typename T, typename ReadWrite>
 static PyObject*
 unique(PyArrayObject *self)
 {
@@ -105,31 +122,8 @@ unique(PyArrayObject *self)
     // release the GIL
     PyThreadState *_save1 = PyEval_SaveThread();
 
-    int dtype = PyArray_TYPE(self);
-
-    // For NPY_STRING and NPY_UNICODE arrays,
-    // retrieve item size and character count per entry.
-    npy_intp itemsize = 0, num_chars = 0;
-    if (dtype == NPY_STRING || dtype == NPY_UNICODE) {
-        itemsize = PyArray_ITEMSIZE(self);
-        if constexpr (std::is_same_v<T, std::string> || std::is_same_v<T, std::u32string>) {
-            // (For Unicode, itemsize / 4 for UCS4)
-            num_chars = itemsize / sizeof(typename T::value_type);
-        }
-    }
-
-    // This is for NPY_VSTRING
-    npy_string_allocator *allocator = nullptr;
-    if (dtype == NPY_VSTRING) {
-        allocator = NpyString_acquire_allocator(
-            (PyArray_StringDTypeObject *)descr);
-    }
-    // Ensure that the allocator is properly released upon function exit.
-    auto allocator_dealloc = finally([&]() {
-        if (allocator != nullptr) {
-            NpyString_release_allocator(allocator);
-        }
-    });
+    // ReadWrite is a template class that provides read and write methods
+    ReadWrite read_write = ReadWrite(descr);
 
     npy_intp isize = PyArray_SIZE(self);
     char *idata = PyArray_BYTES(self);
@@ -142,7 +136,7 @@ unique(PyArrayObject *self)
 
     // Input array is one-dimensional, enabling efficient iteration using strides.
     for (npy_intp i = 0; i < isize; i++, idata += istride) {
-        T value = read(idata, num_chars, allocator);
+        T value = read_write.read(idata);
         hashset.emplace(std::move(value));
     }
 
@@ -173,7 +167,7 @@ unique(PyArrayObject *self)
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
     // Output array is one-dimensional, enabling efficient iteration using strides.
     for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
-        if (write(odata, *it, itemsize, allocator) == -1) {
+        if (read_write.write(odata, *it) == -1) {
             return NULL;
         }
     }
@@ -186,28 +180,28 @@ unique(PyArrayObject *self)
 // this map contains the functions used for each item size.
 typedef std::function<PyObject *(PyArrayObject *)> function_type;
 std::unordered_map<int, function_type> unique_funcs = {
-    {NPY_BYTE, unique<npy_byte, read_integer<npy_byte>, write_integer<npy_byte>>},
-    {NPY_UBYTE, unique<npy_ubyte, read_integer<npy_ubyte>, write_integer<npy_ubyte>>},
-    {NPY_SHORT, unique<npy_short, read_integer<npy_short>, write_integer<npy_short>>},
-    {NPY_USHORT, unique<npy_ushort, read_integer<npy_ushort>, write_integer<npy_ushort>>},
-    {NPY_INT, unique<npy_int, read_integer<npy_int>, write_integer<npy_int>>},
-    {NPY_UINT, unique<npy_uint, read_integer<npy_uint>, write_integer<npy_uint>>},
-    {NPY_LONG, unique<npy_long, read_integer<npy_long>, write_integer<npy_long>>},
-    {NPY_ULONG, unique<npy_ulong, read_integer<npy_ulong>, write_integer<npy_ulong>>},
-    {NPY_LONGLONG, unique<npy_longlong, read_integer<npy_longlong>, write_integer<npy_longlong>>},
-    {NPY_ULONGLONG, unique<npy_ulonglong, read_integer<npy_ulonglong>, write_integer<npy_ulonglong>>},
-    {NPY_INT8, unique<npy_int8, read_integer<npy_int8>, write_integer<npy_int8>>},
-    {NPY_INT16, unique<npy_int16, read_integer<npy_int16>, write_integer<npy_int16>>},
-    {NPY_INT32, unique<npy_int32, read_integer<npy_int32>, write_integer<npy_int32>>},
-    {NPY_INT64, unique<npy_int64, read_integer<npy_int64>, write_integer<npy_int64>>},
-    {NPY_UINT8, unique<npy_uint8, read_integer<npy_uint8>, write_integer<npy_uint8>>},
-    {NPY_UINT16, unique<npy_uint16, read_integer<npy_uint16>, write_integer<npy_uint16>>},
-    {NPY_UINT32, unique<npy_uint32, read_integer<npy_uint32>, write_integer<npy_uint32>>},
-    {NPY_UINT64, unique<npy_uint64, read_integer<npy_uint64>, write_integer<npy_uint64>>},
-    {NPY_DATETIME, unique<npy_uint64, read_integer<npy_uint64>, write_integer<npy_uint64>>},
-    {NPY_STRING, unique<std::string, read_string<std::string>, write_string<std::string>>},
-    {NPY_UNICODE, unique<std::u32string, read_string<std::u32string>, write_string<std::u32string>>},
-    {NPY_VSTRING, unique<std::optional<std::string>, read_vstring<std::optional<std::string>>, write_vstring<std::optional<std::string>>>},
+    {NPY_BYTE, unique<npy_byte, IntegerReadWrite<npy_byte>>},
+    {NPY_UBYTE, unique<npy_ubyte, IntegerReadWrite<npy_ubyte>>},
+    {NPY_SHORT, unique<npy_short, IntegerReadWrite<npy_short>>},
+    {NPY_USHORT, unique<npy_ushort, IntegerReadWrite<npy_ushort>>},
+    {NPY_INT, unique<npy_int, IntegerReadWrite<npy_int>>},
+    {NPY_UINT, unique<npy_uint, IntegerReadWrite<npy_uint>>},
+    {NPY_LONG, unique<npy_long, IntegerReadWrite<npy_long>>},
+    {NPY_ULONG, unique<npy_ulong, IntegerReadWrite<npy_ulong>>},
+    {NPY_LONGLONG, unique<npy_longlong, IntegerReadWrite<npy_longlong>>},
+    {NPY_ULONGLONG, unique<npy_ulonglong, IntegerReadWrite<npy_ulonglong>>},
+    {NPY_INT8, unique<npy_int8, IntegerReadWrite<npy_int8>>},
+    {NPY_INT16, unique<npy_int16, IntegerReadWrite<npy_int16>>},
+    {NPY_INT32, unique<npy_int32, IntegerReadWrite<npy_int32>>},
+    {NPY_INT64, unique<npy_int64, IntegerReadWrite<npy_int64>>},
+    {NPY_UINT8, unique<npy_uint8, IntegerReadWrite<npy_uint8>>},
+    {NPY_UINT16, unique<npy_uint16, IntegerReadWrite<npy_uint16>>},
+    {NPY_UINT32, unique<npy_uint32, IntegerReadWrite<npy_uint32>>},
+    {NPY_UINT64, unique<npy_uint64, IntegerReadWrite<npy_uint64>>},
+    {NPY_DATETIME, unique<npy_uint64, IntegerReadWrite<npy_uint64>>},
+    {NPY_STRING, unique<std::string, StringReadWrite<std::string>>},
+    {NPY_UNICODE, unique<std::u32string, StringReadWrite<std::u32string>>},
+    {NPY_VSTRING, unique<std::optional<std::string>, VStringReadWrite>},
 };
 
 

From b5610b17dde5a9608591edd25891db832d14d786 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sun, 20 Apr 2025 20:38:19 +0900
Subject: [PATCH 47/75] MNT: refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index a51ca0d167e6..021f3d7a3a87 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -65,7 +65,9 @@ class StringReadWrite : public IReadWrite<T> {
     }
 };
 
-class VStringReadWrite : public IReadWrite<std::optional<std::string>> {
+// T is std::optional<std::string>
+template <typename T>
+class VStringReadWrite : public IReadWrite<T> {
 private:
     npy_string_allocator *allocator_;
 
@@ -79,7 +81,7 @@ class VStringReadWrite : public IReadWrite<std::optional<std::string>> {
         NpyString_release_allocator(allocator_);
     }
 
-    std::optional<std::string> read(char *idata) const override {
+    T read(char *idata) const override {
         // https://numpy.org/doc/stable/reference/c-api/strings.html#loading-a-string
         npy_static_string sdata = {0, nullptr};
         npy_packed_static_string *packed_string = (npy_packed_static_string *)(idata);
@@ -89,14 +91,14 @@ class VStringReadWrite : public IReadWrite<std::optional<std::string>> {
             return std::nullopt;
         }
         else {
-            return std::make_optional<std::string>(sdata.buf, sdata.buf + sdata.size);
+            return std::make_optional<typename T::value_type>(sdata.buf, sdata.buf + sdata.size);
         }
     }
 
-    int write(char *odata, const std::optional<std::string> &value) const override {
+    int write(char *odata, const T &value) const override {
         npy_packed_static_string *packed_string = (npy_packed_static_string *)(odata);
         if (value.has_value()) {
-            const std::string &str = value.value();
+            const auto &str = value.value();
             return NpyString_pack(allocator_, packed_string, str.c_str(), str.size());
         } else {
             return NpyString_pack_null(allocator_, packed_string);
@@ -201,7 +203,7 @@ std::unordered_map<int, function_type> unique_funcs = {
     {NPY_DATETIME, unique<npy_uint64, IntegerReadWrite<npy_uint64>>},
     {NPY_STRING, unique<std::string, StringReadWrite<std::string>>},
     {NPY_UNICODE, unique<std::u32string, StringReadWrite<std::u32string>>},
-    {NPY_VSTRING, unique<std::optional<std::string>, VStringReadWrite>},
+    {NPY_VSTRING, unique<std::optional<std::string>, VStringReadWrite<std::optional<std::string>>>},
 };
 
 

From c28a7ce14ea2026de6061ea6ec88731b6eff933b Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Thu, 24 Apr 2025 23:11:21 +0900
Subject: [PATCH 48/75] ENH: Store pointers to strings in the set instead of
 the strings themselves.

---
 numpy/_core/src/multiarray/unique.cpp | 324 +++++++++++++++++---------
 1 file changed, 211 insertions(+), 113 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 021f3d7a3a87..84669363d1d9 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,112 +3,182 @@
 
 #include <Python.h>
 
-#include <algorithm>
+#include <cstring>
 #include <functional>
-#include <optional>
-#include <string>
 #include <unordered_set>
-#include <utility>
 
 #include <numpy/npy_common.h>
 #include "numpy/arrayobject.h"
 
-template <typename T>
-class IReadWrite {
-public:
-    virtual T read(char *idata) const = 0;
-    virtual int write(char *odata, const T &value) const = 0;
-    virtual ~IReadWrite() = default;
+// This is to use RAII pattern to handle cpp exceptions while avoiding memory leaks.
+// Adapted from https://stackoverflow.com/a/25510879/2536294
+template <typename F>
+struct FinalAction {
+    FinalAction(F f) : clean_{f} {}
+    ~FinalAction() { clean_(); }
+  private:
+    F clean_;
 };
+template <typename F>
+FinalAction<F> finally(F f) {
+    return FinalAction<F>(f);
+}
 
+// function to caluculate the hash of a string
 template <typename T>
-class IntegerReadWrite : public IReadWrite<T> {
-public:
-    IntegerReadWrite(PyArray_Descr *descr) {}
-
-    T read(char *idata) const override {
-        return *(T *)idata;
-    }
-
-    int write(char *odata, const T &value) const override {
-        *(T *)(odata) = value;
-        return 0;
+size_t str_hash(const T *str, npy_intp num_chars) {
+    // https://www.boost.org/doc/libs/1_88_0/libs/container_hash/doc/html/hash.html#notes_hash_combine
+    size_t h = 0;
+    for (npy_intp i = 0; i < num_chars; ++i) {
+        h ^= std::hash<T>{}(str[i]) + 0x9e3779b9 + (h << 6) + (h >> 2);
     }
-};
+    return h;
+}
 
-// T is a string type (std::string or std::u32string)
 template <typename T>
-class StringReadWrite : public IReadWrite<T> {
-private:
-    npy_intp itemsize_;
-    npy_intp num_chars_;
-
-public:
-    StringReadWrite(PyArray_Descr *descr) {
-        itemsize_ = descr->elsize;
-        num_chars_ = itemsize_ / sizeof(typename T::value_type);
+static PyObject*
+unique_integer(PyArrayObject *self)
+{
+    /*
+    * Returns a new NumPy array containing the unique values of the input array of integer.
+    * This function uses hashing to identify uniqueness efficiently.
+    */
+    NPY_ALLOW_C_API_DEF;
+    NPY_ALLOW_C_API;
+    PyArray_Descr *descr = PyArray_DESCR(self);
+    // NumPy API calls and Python object manipulations require holding the GIL.
+    Py_INCREF(descr);
+    NPY_DISABLE_C_API;
+
+    // release the GIL
+    PyThreadState *_save1 = PyEval_SaveThread();
+
+    // number of elements in the input array
+    npy_intp isize = PyArray_SIZE(self);
+
+    // Reserve hashset capacity in advance to minimize reallocations.
+    std::unordered_set<T> hashset(isize * 2);
+
+    // Input array is one-dimensional, enabling efficient iteration using strides.
+    char *idata = PyArray_BYTES(self);
+    npy_intp istride = PyArray_STRIDES(self)[0];
+    for (npy_intp i = 0; i < isize; i++, idata += istride) {
+        hashset.insert(*(T *)idata);
     }
 
-    T read(char *idata) const override {
-        typename T::value_type *sdata = reinterpret_cast<typename T::value_type *>(idata);
-        size_t byte_to_copy = std::find(sdata, sdata + num_chars_, 0) - sdata;
-        return T(sdata, sdata + byte_to_copy);
+    npy_intp length = hashset.size();
+
+    PyEval_RestoreThread(_save1);
+    NPY_ALLOW_C_API;
+    // NumPy API calls and Python object manipulations require holding the GIL.
+    PyObject *res_obj = PyArray_NewFromDescr(
+        &PyArray_Type,
+        descr,
+        1, // ndim
+        &length, // shape
+        NULL, // strides
+        NULL, // data
+        // This flag is needed to be able to call .sort on it.
+        NPY_ARRAY_WRITEABLE, // flags
+        NULL // obj
+    );
+
+    if (res_obj == NULL) {
+        return NULL;
     }
+    NPY_DISABLE_C_API;
+    PyThreadState *_save2 = PyEval_SaveThread();
 
-    int write(char *odata, const T &value) const override {
-        size_t byte_to_copy = value.size() * sizeof(typename T::value_type);
-        memcpy(odata, value.c_str(), byte_to_copy);
-        if (byte_to_copy < (size_t)itemsize_) {
-            memset(odata + byte_to_copy, 0, itemsize_ - byte_to_copy);
-        }
-        return 0;
+    char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
+    npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    // Output array is one-dimensional, enabling efficient iteration using strides.
+    for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
+        *(T *)odata = *it;
     }
-};
 
-// T is std::optional<std::string>
+    PyEval_RestoreThread(_save2);
+    return res_obj;
+}
+
 template <typename T>
-class VStringReadWrite : public IReadWrite<T> {
-private:
-    npy_string_allocator *allocator_;
-
-public:
-    VStringReadWrite(PyArray_Descr *descr) {
-        allocator_ = NpyString_acquire_allocator(
-            (PyArray_StringDTypeObject *)descr);
-    }
+static PyObject*
+unique_string(PyArrayObject *self)
+{
+    /*
+    * Returns a new NumPy array containing the unique values of the input array of fixed size strings.
+    * This function uses hashing to identify uniqueness efficiently.
+    */
+    NPY_ALLOW_C_API_DEF;
+    NPY_ALLOW_C_API;
+    PyArray_Descr *descr = PyArray_DESCR(self);
+    // NumPy API calls and Python object manipulations require holding the GIL.
+    Py_INCREF(descr);
+    NPY_DISABLE_C_API;
 
-    ~VStringReadWrite() {
-        NpyString_release_allocator(allocator_);
+    // release the GIL
+    PyThreadState *_save1 = PyEval_SaveThread();
+
+    // number of elements in the input array
+    npy_intp isize = PyArray_SIZE(self);
+
+    // variables for the string
+    npy_intp itemsize = descr->elsize;
+    npy_intp num_chars = itemsize / sizeof(T);
+    auto hash = [num_chars](const T *value) -> size_t {
+        return str_hash(value, num_chars);
+    };
+    auto equal = [num_chars](const T *lhs, const T *rhs) -> bool {
+        return std::memcmp(lhs, rhs, num_chars) == 0;
+    };
+
+    // Reserve hashset capacity in advance to minimize reallocations.
+    std::unordered_set<T *, decltype(hash), decltype(equal)> hashset(
+        isize * 2, hash, equal
+    );
+
+    // Input array is one-dimensional, enabling efficient iteration using strides.
+    char *idata = PyArray_BYTES(self);
+    npy_intp istride = PyArray_STRIDES(self)[0];
+    for (npy_intp i = 0; i < isize; i++, idata += istride) {
+        hashset.insert((T *)idata);
     }
 
-    T read(char *idata) const override {
-        // https://numpy.org/doc/stable/reference/c-api/strings.html#loading-a-string
-        npy_static_string sdata = {0, nullptr};
-        npy_packed_static_string *packed_string = (npy_packed_static_string *)(idata);
-        int is_null = NpyString_load(allocator_, packed_string, &sdata);
+    npy_intp length = hashset.size();
 
-        if (is_null == -1 || is_null) {
-            return std::nullopt;
-        }
-        else {
-            return std::make_optional<typename T::value_type>(sdata.buf, sdata.buf + sdata.size);
-        }
+    PyEval_RestoreThread(_save1);
+    NPY_ALLOW_C_API;
+    // NumPy API calls and Python object manipulations require holding the GIL.
+    PyObject *res_obj = PyArray_NewFromDescr(
+        &PyArray_Type,
+        descr,
+        1, // ndim
+        &length, // shape
+        NULL, // strides
+        NULL, // data
+        // This flag is needed to be able to call .sort on it.
+        NPY_ARRAY_WRITEABLE, // flags
+        NULL // obj
+    );
+
+    if (res_obj == NULL) {
+        return NULL;
     }
+    NPY_DISABLE_C_API;
+    PyThreadState *_save2 = PyEval_SaveThread();
 
-    int write(char *odata, const T &value) const override {
-        npy_packed_static_string *packed_string = (npy_packed_static_string *)(odata);
-        if (value.has_value()) {
-            const auto &str = value.value();
-            return NpyString_pack(allocator_, packed_string, str.c_str(), str.size());
-        } else {
-            return NpyString_pack_null(allocator_, packed_string);
-        }
+    char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
+    npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    // Output array is one-dimensional, enabling efficient iteration using strides.
+    for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
+        std::memcpy(odata, *it, itemsize);
     }
-};
 
-template <typename T, typename ReadWrite>
+    PyEval_RestoreThread(_save2);
+    return res_obj;
+}
+
 static PyObject*
-unique(PyArrayObject *self)
+unique_vstring(PyArrayObject *self)
 {
     /*
     * Returns a new NumPy array containing the unique values of the input array.
@@ -124,22 +194,47 @@ unique(PyArrayObject *self)
     // release the GIL
     PyThreadState *_save1 = PyEval_SaveThread();
 
-    // ReadWrite is a template class that provides read and write methods
-    ReadWrite read_write = ReadWrite(descr);
-
+    // number of elements in the input array
     npy_intp isize = PyArray_SIZE(self);
-    char *idata = PyArray_BYTES(self);
-    npy_intp istride = PyArray_STRIDES(self)[0];
 
-    std::unordered_set<T> hashset;
-    // Reserve hashset capacity in advance to minimize reallocations,
-    // which are particularly costly for string-based arrays.
-    hashset.reserve(isize * 2);
+    // variables for the vstring
+    npy_string_allocator *allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)descr);
+    auto allocator_dealloc = finally([&]() {
+        NpyString_release_allocator(allocator);
+    });
+    // unpacked_strings need to be allocated outside of the loop because of lifetime.
+    std::vector<npy_static_string> unpacked_strings(isize, {0, NULL});
+    auto hash = [](const npy_static_string *value) -> size_t {
+        if (value->buf == NULL) {
+            return 0;
+        }
+        return str_hash(value->buf, value->size);
+    };
+    auto equal = [](const npy_static_string *lhs, const npy_static_string *rhs) -> bool {
+        if (lhs->buf == NULL && rhs->buf == NULL) {
+            return true;
+        }
+        if (lhs->buf == NULL || rhs->buf == NULL) {
+            return false;
+        }
+        if (lhs->size != rhs->size) {
+            return false;
+        }
+        return std::memcmp(lhs->buf, rhs->buf, lhs->size) == 0;
+    };
+
+    // Reserve hashset capacity in advance to minimize reallocations.
+    std::unordered_set<npy_static_string *, decltype(hash), decltype(equal)> hashset(
+        isize * 2, hash, equal
+    );
 
     // Input array is one-dimensional, enabling efficient iteration using strides.
+    char *idata = PyArray_BYTES(self);
+    npy_intp istride = PyArray_STRIDES(self)[0];
     for (npy_intp i = 0; i < isize; i++, idata += istride) {
-        T value = read_write.read(idata);
-        hashset.emplace(std::move(value));
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)idata;
+        NpyString_load(allocator, packed_string, &unpacked_strings[i]);
+        hashset.insert(&unpacked_strings[i]);
     }
 
     npy_intp length = hashset.size();
@@ -169,8 +264,11 @@ unique(PyArrayObject *self)
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
     // Output array is one-dimensional, enabling efficient iteration using strides.
     for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
-        if (read_write.write(odata, *it) == -1) {
-            return NULL;
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
+        if ((*it)->buf == NULL) {
+            NpyString_pack_null(allocator, packed_string);
+        } else {
+            NpyString_pack(allocator, packed_string, (*it)->buf, (*it)->size);
         }
     }
 
@@ -182,28 +280,28 @@ unique(PyArrayObject *self)
 // this map contains the functions used for each item size.
 typedef std::function<PyObject *(PyArrayObject *)> function_type;
 std::unordered_map<int, function_type> unique_funcs = {
-    {NPY_BYTE, unique<npy_byte, IntegerReadWrite<npy_byte>>},
-    {NPY_UBYTE, unique<npy_ubyte, IntegerReadWrite<npy_ubyte>>},
-    {NPY_SHORT, unique<npy_short, IntegerReadWrite<npy_short>>},
-    {NPY_USHORT, unique<npy_ushort, IntegerReadWrite<npy_ushort>>},
-    {NPY_INT, unique<npy_int, IntegerReadWrite<npy_int>>},
-    {NPY_UINT, unique<npy_uint, IntegerReadWrite<npy_uint>>},
-    {NPY_LONG, unique<npy_long, IntegerReadWrite<npy_long>>},
-    {NPY_ULONG, unique<npy_ulong, IntegerReadWrite<npy_ulong>>},
-    {NPY_LONGLONG, unique<npy_longlong, IntegerReadWrite<npy_longlong>>},
-    {NPY_ULONGLONG, unique<npy_ulonglong, IntegerReadWrite<npy_ulonglong>>},
-    {NPY_INT8, unique<npy_int8, IntegerReadWrite<npy_int8>>},
-    {NPY_INT16, unique<npy_int16, IntegerReadWrite<npy_int16>>},
-    {NPY_INT32, unique<npy_int32, IntegerReadWrite<npy_int32>>},
-    {NPY_INT64, unique<npy_int64, IntegerReadWrite<npy_int64>>},
-    {NPY_UINT8, unique<npy_uint8, IntegerReadWrite<npy_uint8>>},
-    {NPY_UINT16, unique<npy_uint16, IntegerReadWrite<npy_uint16>>},
-    {NPY_UINT32, unique<npy_uint32, IntegerReadWrite<npy_uint32>>},
-    {NPY_UINT64, unique<npy_uint64, IntegerReadWrite<npy_uint64>>},
-    {NPY_DATETIME, unique<npy_uint64, IntegerReadWrite<npy_uint64>>},
-    {NPY_STRING, unique<std::string, StringReadWrite<std::string>>},
-    {NPY_UNICODE, unique<std::u32string, StringReadWrite<std::u32string>>},
-    {NPY_VSTRING, unique<std::optional<std::string>, VStringReadWrite<std::optional<std::string>>>},
+    {NPY_BYTE, unique_integer<npy_byte>},
+    {NPY_UBYTE, unique_integer<npy_ubyte>},
+    {NPY_SHORT, unique_integer<npy_short>},
+    {NPY_USHORT, unique_integer<npy_ushort>},
+    {NPY_INT, unique_integer<npy_int>},
+    {NPY_UINT, unique_integer<npy_uint>},
+    {NPY_LONG, unique_integer<npy_long>},
+    {NPY_ULONG, unique_integer<npy_ulong>},
+    {NPY_LONGLONG, unique_integer<npy_longlong>},
+    {NPY_ULONGLONG, unique_integer<npy_ulonglong>},
+    {NPY_INT8, unique_integer<npy_int8>},
+    {NPY_INT16, unique_integer<npy_int16>},
+    {NPY_INT32, unique_integer<npy_int32>},
+    {NPY_INT64, unique_integer<npy_int64>},
+    {NPY_UINT8, unique_integer<npy_uint8>},
+    {NPY_UINT16, unique_integer<npy_uint16>},
+    {NPY_UINT32, unique_integer<npy_uint32>},
+    {NPY_UINT64, unique_integer<npy_uint64>},
+    {NPY_DATETIME, unique_integer<npy_uint64>},
+    {NPY_STRING, unique_string<npy_byte>},
+    {NPY_UNICODE, unique_string<npy_ucs4>},
+    {NPY_VSTRING, unique_vstring},
 };
 
 

From b17011eb2e50dd562ae6ece4c56212981002b1ce Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 25 Apr 2025 00:05:17 +0900
Subject: [PATCH 49/75] FIX: length in memcmp

---
 numpy/_core/src/multiarray/unique.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 84669363d1d9..598066e726a7 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -127,8 +127,8 @@ unique_string(PyArrayObject *self)
     auto hash = [num_chars](const T *value) -> size_t {
         return str_hash(value, num_chars);
     };
-    auto equal = [num_chars](const T *lhs, const T *rhs) -> bool {
-        return std::memcmp(lhs, rhs, num_chars) == 0;
+    auto equal = [itemsize](const T *lhs, const T *rhs) -> bool {
+        return std::memcmp(lhs, rhs, itemsize) == 0;
     };
 
     // Reserve hashset capacity in advance to minimize reallocations.

From c2d5868c3b6b3c39fec2be68e06f636c9abc4f89 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 25 Apr 2025 00:50:14 +0900
Subject: [PATCH 50/75] ENH: refactoring

---
 numpy/_core/src/multiarray/unique.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 598066e726a7..8458a01fe6fd 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -202,8 +202,6 @@ unique_vstring(PyArrayObject *self)
     auto allocator_dealloc = finally([&]() {
         NpyString_release_allocator(allocator);
     });
-    // unpacked_strings need to be allocated outside of the loop because of lifetime.
-    std::vector<npy_static_string> unpacked_strings(isize, {0, NULL});
     auto hash = [](const npy_static_string *value) -> size_t {
         if (value->buf == NULL) {
             return 0;
@@ -231,6 +229,8 @@ unique_vstring(PyArrayObject *self)
     // Input array is one-dimensional, enabling efficient iteration using strides.
     char *idata = PyArray_BYTES(self);
     npy_intp istride = PyArray_STRIDES(self)[0];
+    // unpacked_strings need to be allocated outside of the loop because of the lifetime problem.
+    std::vector<npy_static_string> unpacked_strings(isize, {0, NULL});
     for (npy_intp i = 0; i < isize; i++, idata += istride) {
         npy_packed_static_string *packed_string = (npy_packed_static_string *)idata;
         NpyString_load(allocator, packed_string, &unpacked_strings[i]);

From 7d4afe0a2b48b22373b349f72bb0fd5b5d159f22 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 25 Apr 2025 00:50:40 +0900
Subject: [PATCH 51/75] DOC: 49sec -> 34sec

---
 doc/release/upcoming_changes/28767.performance.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/release/upcoming_changes/28767.performance.rst b/doc/release/upcoming_changes/28767.performance.rst
index c06534b98c43..7e429d566c08 100644
--- a/doc/release/upcoming_changes/28767.performance.rst
+++ b/doc/release/upcoming_changes/28767.performance.rst
@@ -3,8 +3,8 @@ Performance improvements to ``np.unique`` for string dtypes
 The hash-based algorithm for unique extraction provides
 an order-of-magnitude speedup on large string arrays. 
 In an internal benchmark with about 1 billion string elements,
-the hash-based np.unique completed in roughly 48 seconds,
+the hash-based np.unique completed in roughly 34 seconds,
 compared to 498 seconds with the sort-based method
-– about 10–11× faster for unsorted unique operations on strings.
+– about 14–15× faster for unsorted unique operations on strings.
 This improvement greatly reduces the time to find unique values
 in very large string datasets.

From ad843b00bdabd2eb5bf8833544871540a59e9d97 Mon Sep 17 00:00:00 2001
From: koki watanabe <56009584+math-hiyoko@users.noreply.github.com>
Date: Fri, 25 Apr 2025 12:22:19 +0900
Subject: [PATCH 52/75] Update numpy/lib/_arraysetops_impl.py

Co-authored-by: Nathan Goldbaum <nathan.goldbaum@gmail.com>
---
 numpy/lib/_arraysetops_impl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/lib/_arraysetops_impl.py b/numpy/lib/_arraysetops_impl.py
index bf111221a7fb..c24775e33baa 100644
--- a/numpy/lib/_arraysetops_impl.py
+++ b/numpy/lib/_arraysetops_impl.py
@@ -363,7 +363,7 @@ def _unique1d(ar, return_index=False, return_inverse=False,
 
     if ar.dtype.kind == 'T' and not equal_nan:
         raise ValueError(
-            "Currently, `equal_nan` can only be True dtype is `T` (StringDType)."
+            "Currently, `equal_nan` can only be True if dtype is `T` (StringDType)."
         )
 
     optional_indices = return_index or return_inverse

From 45ec2b38a3786726ec0b82f51f855f4a646e09c2 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Fri, 25 Apr 2025 12:43:06 +0900
Subject: [PATCH 53/75] DOC: Mention that hash-based np.unique returns unsorted
 strings

---
 doc/release/upcoming_changes/28767.change.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/release/upcoming_changes/28767.change.rst b/doc/release/upcoming_changes/28767.change.rst
index 1d330e862c9e..ec173c3672b0 100644
--- a/doc/release/upcoming_changes/28767.change.rst
+++ b/doc/release/upcoming_changes/28767.change.rst
@@ -5,5 +5,6 @@ This enhancement extends the hash-table algorithm to byte strings ('S'),
 Unicode strings ('U'), and the experimental string dtype ('T', StringDType).
 As a result, calling np.unique() on an array of strings will use
 the faster hash-based method to obtain unique values.
+Note that this hash-based method does not guarantee that the returned unique values will be sorted.
 This also works for StringDType arrays containing None (missing values)
 when using equal_nan=True (treating missing values as equal).

From fff254e8ddbb97c0ffd4a1d52231bf82f515fe36 Mon Sep 17 00:00:00 2001
From: koki watanabe <address8128@gmail.com>
Date: Sat, 26 Apr 2025 17:15:20 +0900
Subject: [PATCH 54/75] ENH: support medium and long vstrings

---
 numpy/_core/src/multiarray/unique.cpp | 19 +++++---
 numpy/lib/tests/test_arraysetops.py   | 68 +++++++++++++++++++++++++--
 2 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 8458a01fe6fd..600b2cce50a0 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -198,9 +198,9 @@ unique_vstring(PyArrayObject *self)
     npy_intp isize = PyArray_SIZE(self);
 
     // variables for the vstring
-    npy_string_allocator *allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)descr);
-    auto allocator_dealloc = finally([&]() {
-        NpyString_release_allocator(allocator);
+    npy_string_allocator *in_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)descr);
+    auto in_allocator_dealloc = finally([&]() {
+        NpyString_release_allocator(in_allocator);
     });
     auto hash = [](const npy_static_string *value) -> size_t {
         if (value->buf == NULL) {
@@ -233,7 +233,7 @@ unique_vstring(PyArrayObject *self)
     std::vector<npy_static_string> unpacked_strings(isize, {0, NULL});
     for (npy_intp i = 0; i < isize; i++, idata += istride) {
         npy_packed_static_string *packed_string = (npy_packed_static_string *)idata;
-        NpyString_load(allocator, packed_string, &unpacked_strings[i]);
+        NpyString_load(in_allocator, packed_string, &unpacked_strings[i]);
         hashset.insert(&unpacked_strings[i]);
     }
 
@@ -257,18 +257,25 @@ unique_vstring(PyArrayObject *self)
     if (res_obj == NULL) {
         return NULL;
     }
+    PyArray_Descr *res_descr = PyArray_DESCR((PyArrayObject *)res_obj);
+    // NumPy API calls and Python object manipulations require holding the GIL.
+    Py_INCREF(res_descr);
     NPY_DISABLE_C_API;
     PyThreadState *_save2 = PyEval_SaveThread();
 
+    npy_string_allocator *out_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)res_descr);
+    auto out_allocator_dealloc = finally([&]() {
+        NpyString_release_allocator(out_allocator);
+    });
     char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
     // Output array is one-dimensional, enabling efficient iteration using strides.
     for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
         npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
         if ((*it)->buf == NULL) {
-            NpyString_pack_null(allocator, packed_string);
+            NpyString_pack_null(out_allocator, packed_string);
         } else {
-            NpyString_pack(allocator, packed_string, (*it)->buf, (*it)->size);
+            NpyString_pack(out_allocator, packed_string, (*it)->buf, (*it)->size);
         }
     }
 
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 82957792674a..1da3f7bc2334 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -845,7 +845,7 @@ def test_unique_byte_string_hash_based(self):
         unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
 
         a1 = unique(arr, sorted=False)
-        # the result varies depending on the hash function used,
+        # the result varies depending on the impl of std::unordered_set,
         # so we check them by sorting
         assert_array_equal(sorted(a1.tolist()), unq_sorted)
 
@@ -855,17 +855,75 @@ def test_unique_unicode_string_hash_based(self):
         unq_sorted = ['cafe', 'café', 'naive', 'naïve', 'resume', 'résumé']
 
         a1 = unique(arr, sorted=False)
-        # the result varies depending on the hash function used,
+        # the result varies depending on the impl of std::unordered_set,
         # so we check them by sorting
         assert_array_equal(sorted(a1.tolist()), unq_sorted)
 
     def test_unique_vstring_hash_based(self):
         # test for unicode and nullable string arrays
-        a = np.array(['straße', None, 'strasse', 'straße', None, 'niño', 'nino', 'élève', 'eleve', 'niño', 'élève'], dtype=StringDType(na_object=None))
-        unq_sorted_wo_none = ['eleve', 'nino', 'niño', 'strasse', 'straße', 'élève']
+        a = np.array([
+                # short strings
+                'straße',
+                None,
+                'strasse',
+                'straße',
+                None,
+                'niño',
+                'nino',
+                'élève',
+                'eleve',
+                'niño',
+                'élève',
+                # medium strings
+                'b' * 20,
+                'ß' * 30,
+                None,
+                'é' * 30,
+                'e' * 20,
+                'ß' * 30,
+                'n' * 30,
+                'ñ' * 20,
+                None,
+                'e' * 20,
+                'ñ' * 20,
+                # long strings
+                'b' * 300,
+                'ß' * 400,
+                None,
+                'é' * 400,
+                'e' * 300,
+                'ß' * 400,
+                'n' * 400,
+                'ñ' * 300,
+                None,
+                'e' * 300,
+                'ñ' * 300,
+            ],
+            dtype=StringDType(na_object=None)
+        )
+        unq_sorted_wo_none = [
+            'b' * 20,
+            'b' * 300,
+            'e' * 20,
+            'e' * 300,
+            'eleve',
+            'nino',
+            'niño',
+            'n' * 30,
+            'n' * 400,
+            'strasse',
+            'straße',
+            'ß' * 30,
+            'ß' * 400,
+            'élève',
+            'é' * 30,
+            'é' * 400,
+            'ñ' * 20,
+            'ñ' * 300,
+        ]
 
         a1 = unique(a, sorted=False)
-        # the result varies depending on the hash function used,
+        # the result varies depending on the impl of std::unordered_set,
         # so we check them by sorting
 
         # a1 should have exactly one None

From 370bd8f35968bc19356a778f64c5a65bdf3c7c9f Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Tue, 29 Apr 2025 00:06:47 -0600
Subject: [PATCH 55/75] FIX: comment

---
 numpy/_core/src/multiarray/unique.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 600b2cce50a0..7200b30d53f8 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -56,7 +56,7 @@ unique_integer(PyArrayObject *self)
     // number of elements in the input array
     npy_intp isize = PyArray_SIZE(self);
 
-    // Reserve hashset capacity in advance to minimize reallocations.
+    // Reserve enough hashset capacity in advance to avoid reallocations and reduce collisions.
     std::unordered_set<T> hashset(isize * 2);
 
     // Input array is one-dimensional, enabling efficient iteration using strides.
@@ -131,7 +131,7 @@ unique_string(PyArrayObject *self)
         return std::memcmp(lhs, rhs, itemsize) == 0;
     };
 
-    // Reserve hashset capacity in advance to minimize reallocations.
+    // Reserve enough hashset capacity in advance to avoid reallocations and reduce collisions.
     std::unordered_set<T *, decltype(hash), decltype(equal)> hashset(
         isize * 2, hash, equal
     );
@@ -221,7 +221,7 @@ unique_vstring(PyArrayObject *self)
         return std::memcmp(lhs->buf, rhs->buf, lhs->size) == 0;
     };
 
-    // Reserve hashset capacity in advance to minimize reallocations.
+     // Reserve enough hashset capacity in advance to avoid reallocations and reduce collisions.
     std::unordered_set<npy_static_string *, decltype(hash), decltype(equal)> hashset(
         isize * 2, hash, equal
     );

From 49dfcb43d3bfe603b5364a6edea9246d1791a4ab Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Tue, 29 Apr 2025 00:22:04 -0600
Subject: [PATCH 56/75] ENH: use RAII wrapper

---
 numpy/_core/src/multiarray/unique.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 7200b30d53f8..7ecb7c314e8f 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -88,6 +88,9 @@ unique_integer(PyArrayObject *self)
     }
     NPY_DISABLE_C_API;
     PyThreadState *_save2 = PyEval_SaveThread();
+    auto save2_dealloc = finally([&]() {
+        PyEval_RestoreThread(_save2);
+    });
 
     char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
@@ -96,7 +99,6 @@ unique_integer(PyArrayObject *self)
         *(T *)odata = *it;
     }
 
-    PyEval_RestoreThread(_save2);
     return res_obj;
 }
 
@@ -165,6 +167,9 @@ unique_string(PyArrayObject *self)
     }
     NPY_DISABLE_C_API;
     PyThreadState *_save2 = PyEval_SaveThread();
+    auto save2_dealloc = finally([&]() {
+        PyEval_RestoreThread(_save2);
+    });
 
     char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
@@ -173,7 +178,6 @@ unique_string(PyArrayObject *self)
         std::memcpy(odata, *it, itemsize);
     }
 
-    PyEval_RestoreThread(_save2);
     return res_obj;
 }
 
@@ -262,6 +266,9 @@ unique_vstring(PyArrayObject *self)
     Py_INCREF(res_descr);
     NPY_DISABLE_C_API;
     PyThreadState *_save2 = PyEval_SaveThread();
+    auto save2_dealloc = finally([&]() {
+        PyEval_RestoreThread(_save2);
+    });
 
     npy_string_allocator *out_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)res_descr);
     auto out_allocator_dealloc = finally([&]() {
@@ -279,7 +286,6 @@ unique_vstring(PyArrayObject *self)
         }
     }
 
-    PyEval_RestoreThread(_save2);
     return res_obj;
 }
 

From c5745bf85a156c4aee08deaea0381773697ba2cd Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Tue, 29 Apr 2025 00:49:36 -0600
Subject: [PATCH 57/75] FIX: error handling of string packing

---
 numpy/_core/src/multiarray/unique.cpp | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 7ecb7c314e8f..87e5aff9c051 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -266,24 +266,34 @@ unique_vstring(PyArrayObject *self)
     Py_INCREF(res_descr);
     NPY_DISABLE_C_API;
     PyThreadState *_save2 = PyEval_SaveThread();
-    auto save2_dealloc = finally([&]() {
-        PyEval_RestoreThread(_save2);
-    });
 
     npy_string_allocator *out_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)res_descr);
     auto out_allocator_dealloc = finally([&]() {
         NpyString_release_allocator(out_allocator);
     });
+
     char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    int pack_status = 0;
     // Output array is one-dimensional, enabling efficient iteration using strides.
     for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
         npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
         if ((*it)->buf == NULL) {
-            NpyString_pack_null(out_allocator, packed_string);
+            pack_status = NpyString_pack_null(out_allocator, packed_string);
         } else {
-            NpyString_pack(out_allocator, packed_string, (*it)->buf, (*it)->size);
+            pack_status = NpyString_pack(out_allocator, packed_string, (*it)->buf, (*it)->size);
         }
+        if (pack_status == -1) {
+            // string packing failed
+            break;
+        }
+    }
+
+    PyEval_RestoreThread(_save2);
+    if (pack_status == -1) {
+        // PyErr_SetString requires holding the GIL.
+        PyErr_SetString(PyExc_TypeError, "string packing failed");
+        return NULL;
     }
 
     return res_obj;

From 3ba9788a4bee7a7ca23262fb85a09f670fc243b5 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Tue, 29 Apr 2025 00:58:15 -0600
Subject: [PATCH 58/75] FIX: error handling of string packing

---
 numpy/_core/src/multiarray/unique.cpp | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 87e5aff9c051..294090830361 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -266,6 +266,9 @@ unique_vstring(PyArrayObject *self)
     Py_INCREF(res_descr);
     NPY_DISABLE_C_API;
     PyThreadState *_save2 = PyEval_SaveThread();
+    auto save2_dealloc = finally([&]() {
+        PyEval_RestoreThread(_save2);
+    });
 
     npy_string_allocator *out_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)res_descr);
     auto out_allocator_dealloc = finally([&]() {
@@ -274,10 +277,10 @@ unique_vstring(PyArrayObject *self)
 
     char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
-    int pack_status = 0;
     // Output array is one-dimensional, enabling efficient iteration using strides.
     for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
         npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
+        int pack_status = 0;
         if ((*it)->buf == NULL) {
             pack_status = NpyString_pack_null(out_allocator, packed_string);
         } else {
@@ -285,17 +288,10 @@ unique_vstring(PyArrayObject *self)
         }
         if (pack_status == -1) {
             // string packing failed
-            break;
+            return NULL;
         }
     }
 
-    PyEval_RestoreThread(_save2);
-    if (pack_status == -1) {
-        // PyErr_SetString requires holding the GIL.
-        PyErr_SetString(PyExc_TypeError, "string packing failed");
-        return NULL;
-    }
-
     return res_obj;
 }
 

From 376ad093e16e549f574dd62775a7c45460e512d9 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Tue, 29 Apr 2025 01:07:55 -0600
Subject: [PATCH 59/75] FIX: change default bucket size

---
 numpy/_core/src/multiarray/unique.cpp | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 294090830361..2a3e7dbb6354 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -56,8 +56,12 @@ unique_integer(PyArrayObject *self)
     // number of elements in the input array
     npy_intp isize = PyArray_SIZE(self);
 
-    // Reserve enough hashset capacity in advance to avoid reallocations and reduce collisions.
-    std::unordered_set<T> hashset(isize * 2);
+    // Reserve hashset capacity in advance to minimize reallocations and collisions.
+    // We use min(isize, 1024) as the initial bucket count:
+    // - Reserving for all elements (isize) may over-allocate when there are few unique values.
+    // - Using a moderate upper bound (1024) keeps memory usage reasonable (4 KiB for pointers).
+    // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
+    std::unordered_set<T> hashset(min(isize, 1024));
 
     // Input array is one-dimensional, enabling efficient iteration using strides.
     char *idata = PyArray_BYTES(self);
@@ -133,9 +137,13 @@ unique_string(PyArrayObject *self)
         return std::memcmp(lhs, rhs, itemsize) == 0;
     };
 
-    // Reserve enough hashset capacity in advance to avoid reallocations and reduce collisions.
+    // Reserve hashset capacity in advance to minimize reallocations and collisions.
+    // We use min(isize, 1024) as the initial bucket count:
+    // - Reserving for all elements (isize) may over-allocate when there are few unique values.
+    // - Using a moderate upper bound (1024) keeps memory usage reasonable (4 KiB for pointers).
+    // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
     std::unordered_set<T *, decltype(hash), decltype(equal)> hashset(
-        isize * 2, hash, equal
+        min(isize, 1024), hash, equal
     );
 
     // Input array is one-dimensional, enabling efficient iteration using strides.
@@ -225,9 +233,13 @@ unique_vstring(PyArrayObject *self)
         return std::memcmp(lhs->buf, rhs->buf, lhs->size) == 0;
     };
 
-     // Reserve enough hashset capacity in advance to avoid reallocations and reduce collisions.
+    // Reserve hashset capacity in advance to minimize reallocations and collisions.
+    // We use min(isize, 1024) as the initial bucket count:
+    // - Reserving for all elements (isize) may over-allocate when there are few unique values.
+    // - Using a moderate upper bound (1024) keeps memory usage reasonable (4 KiB for pointers).
+    // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
     std::unordered_set<npy_static_string *, decltype(hash), decltype(equal)> hashset(
-        isize * 2, hash, equal
+        min(isize, 1024), hash, equal
     );
 
     // Input array is one-dimensional, enabling efficient iteration using strides.

From aa0db48c39da02a082711fcccbc8bee86edbf7d6 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Tue, 29 Apr 2025 21:59:33 -0600
Subject: [PATCH 60/75] FIX: include

---
 numpy/_core/src/multiarray/unique.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 2a3e7dbb6354..675b4814f3aa 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -3,6 +3,7 @@
 
 #include <Python.h>
 
+#include <algorithm>
 #include <cstring>
 #include <functional>
 #include <unordered_set>
@@ -61,7 +62,7 @@ unique_integer(PyArrayObject *self)
     // - Reserving for all elements (isize) may over-allocate when there are few unique values.
     // - Using a moderate upper bound (1024) keeps memory usage reasonable (4 KiB for pointers).
     // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
-    std::unordered_set<T> hashset(min(isize, 1024));
+    std::unordered_set<T> hashset(std::min(isize, 1024));
 
     // Input array is one-dimensional, enabling efficient iteration using strides.
     char *idata = PyArray_BYTES(self);
@@ -143,7 +144,7 @@ unique_string(PyArrayObject *self)
     // - Using a moderate upper bound (1024) keeps memory usage reasonable (4 KiB for pointers).
     // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
     std::unordered_set<T *, decltype(hash), decltype(equal)> hashset(
-        min(isize, 1024), hash, equal
+        std::min(isize, 1024), hash, equal
     );
 
     // Input array is one-dimensional, enabling efficient iteration using strides.
@@ -239,7 +240,7 @@ unique_vstring(PyArrayObject *self)
     // - Using a moderate upper bound (1024) keeps memory usage reasonable (4 KiB for pointers).
     // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
     std::unordered_set<npy_static_string *, decltype(hash), decltype(equal)> hashset(
-        min(isize, 1024), hash, equal
+        std::min(isize, 1024), hash, equal
     );
 
     // Input array is one-dimensional, enabling efficient iteration using strides.

From 7a2892fe29e34cd4d3f13ac6c6da7f2c72e332aa Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Tue, 29 Apr 2025 22:02:13 -0600
Subject: [PATCH 61/75] FIX: cast

---
 numpy/_core/src/multiarray/unique.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 675b4814f3aa..d4209c73a398 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -62,7 +62,7 @@ unique_integer(PyArrayObject *self)
     // - Reserving for all elements (isize) may over-allocate when there are few unique values.
     // - Using a moderate upper bound (1024) keeps memory usage reasonable (4 KiB for pointers).
     // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
-    std::unordered_set<T> hashset(std::min(isize, 1024));
+    std::unordered_set<T> hashset(std::min(isize, (npy_intp)1024));
 
     // Input array is one-dimensional, enabling efficient iteration using strides.
     char *idata = PyArray_BYTES(self);
@@ -144,7 +144,7 @@ unique_string(PyArrayObject *self)
     // - Using a moderate upper bound (1024) keeps memory usage reasonable (4 KiB for pointers).
     // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
     std::unordered_set<T *, decltype(hash), decltype(equal)> hashset(
-        std::min(isize, 1024), hash, equal
+        std::min(isize, (npy_intp)1024), hash, equal
     );
 
     // Input array is one-dimensional, enabling efficient iteration using strides.
@@ -240,7 +240,7 @@ unique_vstring(PyArrayObject *self)
     // - Using a moderate upper bound (1024) keeps memory usage reasonable (4 KiB for pointers).
     // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
     std::unordered_set<npy_static_string *, decltype(hash), decltype(equal)> hashset(
-        std::min(isize, 1024), hash, equal
+        std::min(isize, (npy_intp)1024), hash, equal
     );
 
     // Input array is one-dimensional, enabling efficient iteration using strides.

From 896bcba69bbb502e1b1acb405e7f8c31ebe47fea Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Thu, 1 May 2025 05:26:38 -0600
Subject: [PATCH 62/75] ENH: support equal_nan=False

---
 numpy/_core/src/multiarray/multiarraymodule.c |  2 +-
 numpy/_core/src/multiarray/unique.cpp         | 33 +++++---
 numpy/_core/src/multiarray/unique.h           |  2 +-
 numpy/lib/_arraysetops_impl.py                |  7 +-
 numpy/lib/tests/test_arraysetops.py           | 80 ++++++++++++++++++-
 5 files changed, 103 insertions(+), 21 deletions(-)

diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c
index e74e14bf10c2..dfe529587c36 100644
--- a/numpy/_core/src/multiarray/multiarraymodule.c
+++ b/numpy/_core/src/multiarray/multiarraymodule.c
@@ -4572,7 +4572,7 @@ static struct PyMethodDef array_module_methods[] = {
     {"from_dlpack", (PyCFunction)from_dlpack,
         METH_FASTCALL | METH_KEYWORDS, NULL},
     {"_unique_hash",  (PyCFunction)array__unique_hash,
-        METH_O, "Collect unique values via a hash map."},
+        METH_VARARGS | METH_KEYWORDS, "Collect unique values via a hash map."},
     {NULL, NULL, 0, NULL}                /* sentinel */
 };
 
diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index d4209c73a398..2e3c4354ab96 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -38,7 +38,7 @@ size_t str_hash(const T *str, npy_intp num_chars) {
 
 template <typename T>
 static PyObject*
-unique_integer(PyArrayObject *self)
+unique_integer(PyArrayObject *self, bool equal_nan)
 {
     /*
     * Returns a new NumPy array containing the unique values of the input array of integer.
@@ -109,7 +109,7 @@ unique_integer(PyArrayObject *self)
 
 template <typename T>
 static PyObject*
-unique_string(PyArrayObject *self)
+unique_string(PyArrayObject *self, bool equal_nan)
 {
     /*
     * Returns a new NumPy array containing the unique values of the input array of fixed size strings.
@@ -191,7 +191,7 @@ unique_string(PyArrayObject *self)
 }
 
 static PyObject*
-unique_vstring(PyArrayObject *self)
+unique_vstring(PyArrayObject *self, bool equal_nan)
 {
     /*
     * Returns a new NumPy array containing the unique values of the input array.
@@ -215,15 +215,19 @@ unique_vstring(PyArrayObject *self)
     auto in_allocator_dealloc = finally([&]() {
         NpyString_release_allocator(in_allocator);
     });
-    auto hash = [](const npy_static_string *value) -> size_t {
+    auto hash = [equal_nan](const npy_static_string *value) -> size_t {
         if (value->buf == NULL) {
-            return 0;
+            if (equal_nan) {
+                return 0;
+            } else {
+                return std::hash<const npy_static_string *>{}(value);
+            }
         }
         return str_hash(value->buf, value->size);
     };
-    auto equal = [](const npy_static_string *lhs, const npy_static_string *rhs) -> bool {
+    auto equal = [equal_nan](const npy_static_string *lhs, const npy_static_string *rhs) -> bool {
         if (lhs->buf == NULL && rhs->buf == NULL) {
-            return true;
+            return equal_nan;
         }
         if (lhs->buf == NULL || rhs->buf == NULL) {
             return false;
@@ -310,7 +314,7 @@ unique_vstring(PyArrayObject *self)
 
 
 // this map contains the functions used for each item size.
-typedef std::function<PyObject *(PyArrayObject *)> function_type;
+typedef std::function<PyObject *(PyArrayObject *, bool)> function_type;
 std::unordered_map<int, function_type> unique_funcs = {
     {NPY_BYTE, unique_integer<npy_byte>},
     {NPY_UBYTE, unique_integer<npy_ubyte>},
@@ -348,8 +352,17 @@ std::unordered_map<int, function_type> unique_funcs = {
  * type is unsupported or `NULL` with an error set.
  */
 extern "C" NPY_NO_EXPORT PyObject *
-array__unique_hash(PyObject *NPY_UNUSED(module), PyObject *arr_obj)
+array__unique_hash(PyObject *NPY_UNUSED(module), PyObject *args, PyObject *kwargs)
 {
+    static const char *kwlist[] = {"arr", "equal_nan", NULL};
+    PyObject *arr_obj;
+    int equal_nan = true;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|p:_unique_hash", (char **)kwlist,
+                                     &arr_obj, &equal_nan)) {
+        return NULL;
+    }
+
     if (!PyArray_Check(arr_obj)) {
         PyErr_SetString(PyExc_TypeError,
                 "_unique_hash() requires a NumPy array input.");
@@ -364,7 +377,7 @@ array__unique_hash(PyObject *NPY_UNUSED(module), PyObject *arr_obj)
             Py_RETURN_NOTIMPLEMENTED;
         }
 
-        return unique_funcs[type](arr);
+        return unique_funcs[type](arr, equal_nan);
     }
     catch (const std::bad_alloc &e) {
         PyErr_NoMemory();
diff --git a/numpy/_core/src/multiarray/unique.h b/numpy/_core/src/multiarray/unique.h
index 3e258405e8f4..26fc500f05f3 100644
--- a/numpy/_core/src/multiarray/unique.h
+++ b/numpy/_core/src/multiarray/unique.h
@@ -5,7 +5,7 @@
 extern "C" {
 #endif
 
-PyObject* array__unique_hash(PyObject *NPY_UNUSED(dummy), PyObject *args);
+PyObject* array__unique_hash(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwargs);
 
 #ifdef __cplusplus
 }
diff --git a/numpy/lib/_arraysetops_impl.py b/numpy/lib/_arraysetops_impl.py
index c24775e33baa..00b1c29ab4fd 100644
--- a/numpy/lib/_arraysetops_impl.py
+++ b/numpy/lib/_arraysetops_impl.py
@@ -361,11 +361,6 @@ def _unique1d(ar, return_index=False, return_inverse=False,
         # two dimensions for all operations. Coerce to an ndarray in such cases.
         ar = np.asarray(ar).flatten()
 
-    if ar.dtype.kind == 'T' and not equal_nan:
-        raise ValueError(
-            "Currently, `equal_nan` can only be True if dtype is `T` (StringDType)."
-        )
-
     optional_indices = return_index or return_inverse
 
     # masked arrays are not supported yet.
@@ -375,7 +370,7 @@ def _unique1d(ar, return_index=False, return_inverse=False,
         conv = _array_converter(ar)
         ar_, = conv
 
-        if (hash_unique := _unique_hash(ar_)) is not NotImplemented:
+        if (hash_unique := _unique_hash(ar_, equal_nan=equal_nan)) is not NotImplemented:
             if sorted:
                 hash_unique.sort()
             # We wrap the result back in case it was a subclass of numpy.ndarray.
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 1da3f7bc2334..c4d773849127 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -859,8 +859,8 @@ def test_unique_unicode_string_hash_based(self):
         # so we check them by sorting
         assert_array_equal(sorted(a1.tolist()), unq_sorted)
 
-    def test_unique_vstring_hash_based(self):
-        # test for unicode and nullable string arrays
+    def test_unique_vstring_hash_based_equal_nan(self):
+        # test for unicode and nullable string arrays (equal_nan=True)
         a = np.array([
                 # short strings
                 'straße',
@@ -922,7 +922,7 @@ def test_unique_vstring_hash_based(self):
             'ñ' * 300,
         ]
 
-        a1 = unique(a, sorted=False)
+        a1 = unique(a, sorted=False, equal_nan=True)
         # the result varies depending on the impl of std::unordered_set,
         # so we check them by sorting
 
@@ -933,6 +933,80 @@ def test_unique_vstring_hash_based(self):
         a1_wo_none = sorted(x for x in a1 if x is not None)
         assert_array_equal(a1_wo_none, unq_sorted_wo_none)
 
+    def test_unique_vstring_hash_based_not_equal_nan(self):
+        # test for unicode and nullable string arrays (equal_nan=False)
+        a = np.array([
+                # short strings
+                'straße',
+                None,
+                'strasse',
+                'straße',
+                None,
+                'niño',
+                'nino',
+                'élève',
+                'eleve',
+                'niño',
+                'élève',
+                # medium strings
+                'b' * 20,
+                'ß' * 30,
+                None,
+                'é' * 30,
+                'e' * 20,
+                'ß' * 30,
+                'n' * 30,
+                'ñ' * 20,
+                None,
+                'e' * 20,
+                'ñ' * 20,
+                # long strings
+                'b' * 300,
+                'ß' * 400,
+                None,
+                'é' * 400,
+                'e' * 300,
+                'ß' * 400,
+                'n' * 400,
+                'ñ' * 300,
+                None,
+                'e' * 300,
+                'ñ' * 300,
+            ],
+            dtype=StringDType(na_object=None)
+        )
+        unq_sorted_wo_none = [
+            'b' * 20,
+            'b' * 300,
+            'e' * 20,
+            'e' * 300,
+            'eleve',
+            'nino',
+            'niño',
+            'n' * 30,
+            'n' * 400,
+            'strasse',
+            'straße',
+            'ß' * 30,
+            'ß' * 400,
+            'élève',
+            'é' * 30,
+            'é' * 400,
+            'ñ' * 20,
+            'ñ' * 300,
+        ]
+
+        a1 = unique(a, sorted=False, equal_nan=False)
+        # the result varies depending on the impl of std::unordered_set,
+        # so we check them by sorting
+
+        # a1 should have exactly one None
+        count_none = sum(x is None for x in a1)
+        assert_equal(count_none, 6)
+
+        a1_wo_none = sorted(x for x in a1 if x is not None)
+        assert_array_equal(a1_wo_none, unq_sorted_wo_none)
+
     def test_unique_vstring_errors(self):
         a = np.array(['apple', 'banana', 'apple', None, 'cherry', 'date', 'banana', 'fig', None, 'grape'] * 2, dtype=StringDType(na_object=None))
         assert_raises(ValueError, unique, a, equal_nan=False)

From f1c1947231f83b31686b132f1554fb49d7a594c3 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Thu, 1 May 2025 05:39:04 -0600
Subject: [PATCH 63/75] FIX: function equal

---
 numpy/_core/src/multiarray/unique.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 2e3c4354ab96..3cba8309dbb9 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -227,7 +227,11 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
     };
     auto equal = [equal_nan](const npy_static_string *lhs, const npy_static_string *rhs) -> bool {
         if (lhs->buf == NULL && rhs->buf == NULL) {
-            return equal_nan;
+            if (equal_nan) {
+                return true;
+            } else {
+                return lhs == rhs;
+            }
         }
         if (lhs->buf == NULL || rhs->buf == NULL) {
             return false;

From f35123aeb95a6464d3cc1cfe0043782186438335 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Thu, 1 May 2025 08:39:09 -0600
Subject: [PATCH 64/75] FIX: check the case if pack_status douesn't return NULL

---
 numpy/_core/src/multiarray/unique.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 3cba8309dbb9..0f57669aa6cf 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -307,10 +307,6 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
         } else {
             pack_status = NpyString_pack(out_allocator, packed_string, (*it)->buf, (*it)->size);
         }
-        if (pack_status == -1) {
-            // string packing failed
-            return NULL;
-        }
     }
 
     return res_obj;

From e6ea015dc7607122457d1122bf587b0eb9d1bed7 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Thu, 1 May 2025 08:40:17 -0600
Subject: [PATCH 65/75] FIX: check the case if pack_status douesn't return NULL

---
 numpy/_core/src/multiarray/unique.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 0f57669aa6cf..e676d4f49a66 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -301,11 +301,10 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
     // Output array is one-dimensional, enabling efficient iteration using strides.
     for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
         npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
-        int pack_status = 0;
         if ((*it)->buf == NULL) {
-            pack_status = NpyString_pack_null(out_allocator, packed_string);
+            NpyString_pack_null(out_allocator, packed_string);
         } else {
-            pack_status = NpyString_pack(out_allocator, packed_string, (*it)->buf, (*it)->size);
+            NpyString_pack(out_allocator, packed_string, (*it)->buf, (*it)->size);
         }
     }
 

From ddff98f2d755b97e4a2253d2d753aca750da00ca Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Thu, 1 May 2025 09:23:59 -0600
Subject: [PATCH 66/75] FIX: stderr

---
 numpy/_core/src/multiarray/unique.cpp | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index e676d4f49a66..b25c01f36c4c 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -7,6 +7,7 @@
 #include <cstring>
 #include <functional>
 #include <unordered_set>
+#include <iostream>
 
 #include <numpy/npy_common.h>
 #include "numpy/arrayobject.h"
@@ -251,16 +252,21 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
         std::min(isize, (npy_intp)1024), hash, equal
     );
 
+    std::cerr << "Entered unique_vstring" << std::endl;
+    std::cerr << "array size (isize): " << isize << std::endl;
     // Input array is one-dimensional, enabling efficient iteration using strides.
     char *idata = PyArray_BYTES(self);
     npy_intp istride = PyArray_STRIDES(self)[0];
+    std::cerr << "initial idata: " << (void*)idata << ", istride: " << istride << std::endl;
     // unpacked_strings need to be allocated outside of the loop because of the lifetime problem.
     std::vector<npy_static_string> unpacked_strings(isize, {0, NULL});
     for (npy_intp i = 0; i < isize; i++, idata += istride) {
+        std::cerr << "  loop index i: " << i << ", idata: " << (void*)idata << std::endl;
         npy_packed_static_string *packed_string = (npy_packed_static_string *)idata;
         NpyString_load(in_allocator, packed_string, &unpacked_strings[i]);
         hashset.insert(&unpacked_strings[i]);
     }
+    std::cerr << "Insertion loop completed successfully." << std::endl;
 
     npy_intp length = hashset.size();
 
@@ -298,15 +304,23 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
 
     char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    std::cerr << "output array odata: " << (void*)odata << ", ostride: " << ostride << std::endl;
     // Output array is one-dimensional, enabling efficient iteration using strides.
     for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
+        std::cerr << "odata: " << (void*)odata << std::endl;
         npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
+        int pack_status = 0;
         if ((*it)->buf == NULL) {
-            NpyString_pack_null(out_allocator, packed_string);
+            pack_status = NpyString_pack_null(out_allocator, packed_string);
         } else {
-            NpyString_pack(out_allocator, packed_string, (*it)->buf, (*it)->size);
+            pack_status = NpyString_pack(out_allocator, packed_string, (*it)->buf, (*it)->size);
+        }
+        if (pack_status == -1) {
+            // string packing failed
+            return NULL;
         }
     }
+    std::cerr << "Packing loop completed successfully." << std::endl;
 
     return res_obj;
 }

From 2758e273ceb534173b0474fb27ee89ace12d78da Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Thu, 1 May 2025 18:39:54 -0600
Subject: [PATCH 67/75] ENH: METH_VARARGS -> METH_FASTCALL

---
 numpy/_core/src/multiarray/multiarraymodule.c |  2 +-
 numpy/_core/src/multiarray/unique.cpp         | 29 ++++++++++---------
 numpy/_core/src/multiarray/unique.h           |  3 +-
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c
index dfe529587c36..0095c38c13c5 100644
--- a/numpy/_core/src/multiarray/multiarraymodule.c
+++ b/numpy/_core/src/multiarray/multiarraymodule.c
@@ -4572,7 +4572,7 @@ static struct PyMethodDef array_module_methods[] = {
     {"from_dlpack", (PyCFunction)from_dlpack,
         METH_FASTCALL | METH_KEYWORDS, NULL},
     {"_unique_hash",  (PyCFunction)array__unique_hash,
-        METH_VARARGS | METH_KEYWORDS, "Collect unique values via a hash map."},
+        METH_FASTCALL | METH_KEYWORDS, "Collect unique values via a hash map."},
     {NULL, NULL, 0, NULL}                /* sentinel */
 };
 
diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index b25c01f36c4c..8e512e6b27c9 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -11,6 +11,9 @@
 
 #include <numpy/npy_common.h>
 #include "numpy/arrayobject.h"
+extern "C" {
+    #include "npy_argparse.h"
+}
 
 // This is to use RAII pattern to handle cpp exceptions while avoiding memory leaks.
 // Adapted from https://stackoverflow.com/a/25510879/2536294
@@ -365,23 +368,21 @@ std::unordered_map<int, function_type> unique_funcs = {
  * type is unsupported or `NULL` with an error set.
  */
 extern "C" NPY_NO_EXPORT PyObject *
-array__unique_hash(PyObject *NPY_UNUSED(module), PyObject *args, PyObject *kwargs)
+array__unique_hash(PyObject *NPY_UNUSED(module),
+                   PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames)
 {
-    static const char *kwlist[] = {"arr", "equal_nan", NULL};
-    PyObject *arr_obj;
-    int equal_nan = true;
-
-    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|p:_unique_hash", (char **)kwlist,
-                                     &arr_obj, &equal_nan)) {
-        return NULL;
-    }
-
-    if (!PyArray_Check(arr_obj)) {
-        PyErr_SetString(PyExc_TypeError,
-                "_unique_hash() requires a NumPy array input.");
+    PyArrayObject *arr = NULL;
+    npy_bool equal_nan = NPY_TRUE;  // default to True
+
+    NPY_PREPARE_ARGPARSER;
+    if (npy_parse_arguments("_unique_hash", args, len_args, kwnames,
+                            "arr", &PyArray_Converter, &arr,
+                            "|equal_nan",  &PyArray_BoolConverter, &equal_nan,
+                            NULL, NULL, NULL
+                            ) < 0
+    ) {
         return NULL;
     }
-    PyArrayObject *arr = (PyArrayObject *)arr_obj;
 
     try {
         auto type = PyArray_TYPE(arr);
diff --git a/numpy/_core/src/multiarray/unique.h b/numpy/_core/src/multiarray/unique.h
index 26fc500f05f3..7b3fb143ada4 100644
--- a/numpy/_core/src/multiarray/unique.h
+++ b/numpy/_core/src/multiarray/unique.h
@@ -5,7 +5,8 @@
 extern "C" {
 #endif
 
-PyObject* array__unique_hash(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwargs);
+PyObject* array__unique_hash(PyObject *NPY_UNUSED(dummy),
+                             PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames);
 
 #ifdef __cplusplus
 }

From a6dc86a8ef041796da94a5bd4a9dc0496244d157 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Fri, 2 May 2025 04:48:51 -0600
Subject: [PATCH 68/75] FIX: log

---
 numpy/_core/src/multiarray/unique.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 8e512e6b27c9..63f29cbb67c2 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -273,6 +273,7 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
 
     npy_intp length = hashset.size();
 
+    std::cerr << "hashset size: " << length << std::endl;
     PyEval_RestoreThread(_save1);
     NPY_ALLOW_C_API;
     // NumPy API calls and Python object manipulations require holding the GIL.
@@ -287,23 +288,30 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
         NPY_ARRAY_WRITEABLE, // flags
         NULL // obj
     );
-
+    std::cerr << "res_obj: " << (void*)res_obj << std::endl;
     if (res_obj == NULL) {
         return NULL;
     }
+    std::cerr << "res_obj created successfully." << std::endl;
     PyArray_Descr *res_descr = PyArray_DESCR((PyArrayObject *)res_obj);
+    std::cerr << "res_descr: " << (void*)res_descr << std::endl;
     // NumPy API calls and Python object manipulations require holding the GIL.
     Py_INCREF(res_descr);
+    std::cerr << "res_descr incremented successfully." << std::endl;
     NPY_DISABLE_C_API;
     PyThreadState *_save2 = PyEval_SaveThread();
+    std::cerr << "save2: " << (void*)_save2 << std::endl;
     auto save2_dealloc = finally([&]() {
         PyEval_RestoreThread(_save2);
     });
+    std::cerr << "save2_dealloc completed successfully." << std::endl;
 
     npy_string_allocator *out_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)res_descr);
+    std::cerr << "out_allocator: " << (void*)out_allocator << std::endl;
     auto out_allocator_dealloc = finally([&]() {
         NpyString_release_allocator(out_allocator);
     });
+    std::cerr << "out_allocator_dealloc completed successfully." << std::endl;
 
     char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];

From 9a936eb39bda325b442efca99daeb393e43b178f Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Sat, 3 May 2025 07:23:19 -0600
Subject: [PATCH 69/75] FIX: release allocator

---
 numpy/_core/src/multiarray/unique.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 63f29cbb67c2..4194d1759b3e 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -216,9 +216,6 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
 
     // variables for the vstring
     npy_string_allocator *in_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)descr);
-    auto in_allocator_dealloc = finally([&]() {
-        NpyString_release_allocator(in_allocator);
-    });
     auto hash = [equal_nan](const npy_static_string *value) -> size_t {
         if (value->buf == NULL) {
             if (equal_nan) {
@@ -274,6 +271,10 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
     npy_intp length = hashset.size();
 
     std::cerr << "hashset size: " << length << std::endl;
+
+    // allocator need to be released before operations requiring the GIL
+    NpyString_release_allocator(in_allocator);
+
     PyEval_RestoreThread(_save1);
     NPY_ALLOW_C_API;
     // NumPy API calls and Python object manipulations require holding the GIL.

From 1e967eed1644a27a261b9e438843993f078c0ae9 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Sat, 3 May 2025 07:24:39 -0600
Subject: [PATCH 70/75] FIX: comment

---
 numpy/_core/src/multiarray/unique.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 4194d1759b3e..4cb755d209a6 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -272,7 +272,8 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
 
     std::cerr << "hashset size: " << length << std::endl;
 
-    // allocator need to be released before operations requiring the GIL
+    // allocator need to be released before operations requiring the GIL.
+    // https://github.com/numpy/numpy/blob/49056192330487ff49a142e5280e24461b638723/numpy/_core/src/multiarray/stringdtype/static_string.c#L286-L306
     NpyString_release_allocator(in_allocator);
 
     PyEval_RestoreThread(_save1);

From 52c2326c4526a3f49b44eb8c377b5905bc7fced2 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Sat, 3 May 2025 07:59:11 -0600
Subject: [PATCH 71/75] FIX: delete log

---
 numpy/_core/src/multiarray/unique.cpp | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 4cb755d209a6..9f07afd251bc 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -7,7 +7,6 @@
 #include <cstring>
 #include <functional>
 #include <unordered_set>
-#include <iostream>
 
 #include <numpy/npy_common.h>
 #include "numpy/arrayobject.h"
@@ -252,30 +251,23 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
         std::min(isize, (npy_intp)1024), hash, equal
     );
 
-    std::cerr << "Entered unique_vstring" << std::endl;
-    std::cerr << "array size (isize): " << isize << std::endl;
     // Input array is one-dimensional, enabling efficient iteration using strides.
     char *idata = PyArray_BYTES(self);
     npy_intp istride = PyArray_STRIDES(self)[0];
-    std::cerr << "initial idata: " << (void*)idata << ", istride: " << istride << std::endl;
     // unpacked_strings need to be allocated outside of the loop because of the lifetime problem.
     std::vector<npy_static_string> unpacked_strings(isize, {0, NULL});
     for (npy_intp i = 0; i < isize; i++, idata += istride) {
-        std::cerr << "  loop index i: " << i << ", idata: " << (void*)idata << std::endl;
         npy_packed_static_string *packed_string = (npy_packed_static_string *)idata;
         NpyString_load(in_allocator, packed_string, &unpacked_strings[i]);
         hashset.insert(&unpacked_strings[i]);
     }
-    std::cerr << "Insertion loop completed successfully." << std::endl;
-
-    npy_intp length = hashset.size();
-
-    std::cerr << "hashset size: " << length << std::endl;
 
     // allocator need to be released before operations requiring the GIL.
     // https://github.com/numpy/numpy/blob/49056192330487ff49a142e5280e24461b638723/numpy/_core/src/multiarray/stringdtype/static_string.c#L286-L306
     NpyString_release_allocator(in_allocator);
 
+    npy_intp length = hashset.size();
+
     PyEval_RestoreThread(_save1);
     NPY_ALLOW_C_API;
     // NumPy API calls and Python object manipulations require holding the GIL.
@@ -290,37 +282,29 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
         NPY_ARRAY_WRITEABLE, // flags
         NULL // obj
     );
-    std::cerr << "res_obj: " << (void*)res_obj << std::endl;
     if (res_obj == NULL) {
         return NULL;
     }
-    std::cerr << "res_obj created successfully." << std::endl;
     PyArray_Descr *res_descr = PyArray_DESCR((PyArrayObject *)res_obj);
-    std::cerr << "res_descr: " << (void*)res_descr << std::endl;
     // NumPy API calls and Python object manipulations require holding the GIL.
     Py_INCREF(res_descr);
-    std::cerr << "res_descr incremented successfully." << std::endl;
     NPY_DISABLE_C_API;
+
+    // release the GIL
     PyThreadState *_save2 = PyEval_SaveThread();
-    std::cerr << "save2: " << (void*)_save2 << std::endl;
     auto save2_dealloc = finally([&]() {
         PyEval_RestoreThread(_save2);
     });
-    std::cerr << "save2_dealloc completed successfully." << std::endl;
 
     npy_string_allocator *out_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)res_descr);
-    std::cerr << "out_allocator: " << (void*)out_allocator << std::endl;
     auto out_allocator_dealloc = finally([&]() {
         NpyString_release_allocator(out_allocator);
     });
-    std::cerr << "out_allocator_dealloc completed successfully." << std::endl;
 
     char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
     npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
-    std::cerr << "output array odata: " << (void*)odata << ", ostride: " << ostride << std::endl;
     // Output array is one-dimensional, enabling efficient iteration using strides.
     for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
-        std::cerr << "odata: " << (void*)odata << std::endl;
         npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
         int pack_status = 0;
         if ((*it)->buf == NULL) {
@@ -333,7 +317,6 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
             return NULL;
         }
     }
-    std::cerr << "Packing loop completed successfully." << std::endl;
 
     return res_obj;
 }

From 6f18a43b0ecf13cc8fe4673368d00d736b52a3b9 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Sat, 3 May 2025 08:29:27 -0600
Subject: [PATCH 72/75] ENH: implemented FNV-1a as hash function

---
 numpy/_core/src/multiarray/unique.cpp | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 9f07afd251bc..cf0dc6353449 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -31,12 +31,21 @@ FinalAction<F> finally(F f) {
 // function to caluculate the hash of a string
 template <typename T>
 size_t str_hash(const T *str, npy_intp num_chars) {
-    // https://www.boost.org/doc/libs/1_88_0/libs/container_hash/doc/html/hash.html#notes_hash_combine
-    size_t h = 0;
-    for (npy_intp i = 0; i < num_chars; ++i) {
-        h ^= std::hash<T>{}(str[i]) + 0x9e3779b9 + (h << 6) + (h >> 2);
+    // http://www.isthe.com/chongo/tech/comp/fnv/#FNV-1a
+    #if NPY_SIZEOF_INTP == 4
+        static const size_t FNV_OFFSET_BASIS = 2166136261U;
+        static const size_t FNV_PRIME = 16777619U;
+    #else // NPY_SIZEOF_INTP == 8
+        static const size_t FNV_OFFSET_BASIS = 14695981039346656037ULL;
+        static const size_t FNV_PRIME = 1099511628211ULL;
+    #endif
+    const unsigned char* bytes = reinterpret_cast<const unsigned char*>(str);
+    size_t hash = FNV_OFFSET_BASIS;
+    for (npy_intp i = 0; i < num_chars * sizeof(T); ++i) {
+        hash ^= bytes[i];
+        hash *= FNV_PRIME;
     }
-    return h;
+    return hash;
 }
 
 template <typename T>

From 2a1bd41e3d6e8bc2205c71ade09dd4d2967b6ac9 Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Sat, 3 May 2025 08:31:54 -0600
Subject: [PATCH 73/75] bool -> npy_bool

---
 numpy/_core/src/multiarray/unique.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index cf0dc6353449..14482cb9a66a 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -50,7 +50,7 @@ size_t str_hash(const T *str, npy_intp num_chars) {
 
 template <typename T>
 static PyObject*
-unique_integer(PyArrayObject *self, bool equal_nan)
+unique_integer(PyArrayObject *self, npy_bool equal_nan)
 {
     /*
     * Returns a new NumPy array containing the unique values of the input array of integer.
@@ -121,7 +121,7 @@ unique_integer(PyArrayObject *self, bool equal_nan)
 
 template <typename T>
 static PyObject*
-unique_string(PyArrayObject *self, bool equal_nan)
+unique_string(PyArrayObject *self, npy_bool equal_nan)
 {
     /*
     * Returns a new NumPy array containing the unique values of the input array of fixed size strings.
@@ -203,7 +203,7 @@ unique_string(PyArrayObject *self, bool equal_nan)
 }
 
 static PyObject*
-unique_vstring(PyArrayObject *self, bool equal_nan)
+unique_vstring(PyArrayObject *self, npy_bool equal_nan)
 {
     /*
     * Returns a new NumPy array containing the unique values of the input array.
@@ -332,7 +332,7 @@ unique_vstring(PyArrayObject *self, bool equal_nan)
 
 
 // this map contains the functions used for each item size.
-typedef std::function<PyObject *(PyArrayObject *, bool)> function_type;
+typedef std::function<PyObject *(PyArrayObject *, npy_bool)> function_type;
 std::unordered_map<int, function_type> unique_funcs = {
     {NPY_BYTE, unique_integer<npy_byte>},
     {NPY_UBYTE, unique_integer<npy_ubyte>},

From 8b632f281fc97f01d578c811c8f97051aed6b1aa Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Sat, 3 May 2025 08:39:53 -0600
Subject: [PATCH 74/75] FIX: cast

---
 numpy/_core/src/multiarray/unique.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index 14482cb9a66a..07fc52b92ff9 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -41,7 +41,7 @@ size_t str_hash(const T *str, npy_intp num_chars) {
     #endif
     const unsigned char* bytes = reinterpret_cast<const unsigned char*>(str);
     size_t hash = FNV_OFFSET_BASIS;
-    for (npy_intp i = 0; i < num_chars * sizeof(T); ++i) {
+    for (npy_intp i = 0; i < num_chars * (npy_intp)sizeof(T); ++i) {
         hash ^= bytes[i];
         hash *= FNV_PRIME;
     }

From a7bfc084b6971608ccdf63d13954318bc81273ba Mon Sep 17 00:00:00 2001
From: kokwatan <kokwatan@lycorp.co.jp>
Date: Sun, 4 May 2025 01:15:30 -0600
Subject: [PATCH 75/75] 34sec -> 35.1sec

---
 doc/release/upcoming_changes/28767.performance.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/release/upcoming_changes/28767.performance.rst b/doc/release/upcoming_changes/28767.performance.rst
index 7e429d566c08..b6b54af0667a 100644
--- a/doc/release/upcoming_changes/28767.performance.rst
+++ b/doc/release/upcoming_changes/28767.performance.rst
@@ -3,8 +3,8 @@ Performance improvements to ``np.unique`` for string dtypes
 The hash-based algorithm for unique extraction provides
 an order-of-magnitude speedup on large string arrays. 
 In an internal benchmark with about 1 billion string elements,
-the hash-based np.unique completed in roughly 34 seconds,
+the hash-based np.unique completed in roughly 35.1 seconds,
 compared to 498 seconds with the sort-based method
-– about 14–15× faster for unsorted unique operations on strings.
+– about 14× faster for unsorted unique operations on strings.
 This improvement greatly reduces the time to find unique values
 in very large string datasets.