Skip to content

ENH: Implement string comparison ufuncs (or almost) #21041

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jun 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions benchmarks/benchmarks/bench_strings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from __future__ import absolute_import, division, print_function

from .common import Benchmark

import numpy as np
import operator


_OPERATORS = {
'==': operator.eq,
'!=': operator.ne,
'<': operator.lt,
'<=': operator.le,
'>': operator.gt,
'>=': operator.ge,
}


class StringComparisons(Benchmark):
# Basic string comparison speed tests
params = [
[100, 10000, (1000, 20)],
['U', 'S'],
[True, False],
['==', '!=', '<', '<=', '>', '>=']]
param_names = ['shape', 'dtype', 'contig', 'operator']
int64 = np.dtype(np.int64)

def setup(self, shape, dtype, contig, operator):
self.arr = np.arange(np.prod(shape)).astype(dtype).reshape(shape)
self.arr_identical = self.arr.copy()
self.arr_different = self.arr[::-1].copy()

if not contig:
self.arr = self.arr[..., ::2]
self.arr_identical = self.arr_identical[..., ::2]
self.arr_different = self.arr_different[..., ::2]

self.operator = _OPERATORS[operator]

def time_compare_identical(self, shape, dtype, contig, operator):
self.operator(self.arr, self.arr_identical)

def time_compare_different(self, shape, dtype, contig, operator):
self.operator(self.arr, self.arr_different)
2 changes: 1 addition & 1 deletion numpy/core/include/numpy/experimental_dtype_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ typedef struct {
} PyArrayMethod_Spec;


typedef PyObject *_ufunc_addloop_fromspec_func(
typedef int _ufunc_addloop_fromspec_func(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change seems unrelated to the refactoring part and probably deserves a commit on its own

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, although I don't care about when/how it gets in, hehe

PyObject *ufunc, PyArrayMethod_Spec *spec);
/*
* The main ufunc registration function. This adds a new implementation/loop
Expand Down
1 change: 1 addition & 0 deletions numpy/core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,6 +1082,7 @@ def generate_umath_doc_header(ext, build_dir):
join('src', 'umath', 'scalarmath.c.src'),
join('src', 'umath', 'ufunc_type_resolution.c'),
join('src', 'umath', 'override.c'),
join('src', 'umath', 'string_ufuncs.cpp'),
# For testing. Eventually, should use public API and be separate:
join('src', 'umath', '_scaled_float_dtype.c'),
]
Expand Down
8 changes: 8 additions & 0 deletions numpy/core/src/common/numpyos.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#ifndef NUMPY_CORE_SRC_COMMON_NPY_NUMPYOS_H_
#define NUMPY_CORE_SRC_COMMON_NPY_NUMPYOS_H_

#ifdef __cplusplus
extern "C" {
#endif

NPY_NO_EXPORT char*
NumPyOS_ascii_formatd(char *buffer, size_t buf_size,
const char *format,
Expand Down Expand Up @@ -39,4 +43,8 @@ NumPyOS_strtoll(const char *str, char **endptr, int base);
NPY_NO_EXPORT npy_ulonglong
NumPyOS_strtoull(const char *str, char **endptr, int base);

#ifdef __cplusplus
}
#endif

#endif /* NUMPY_CORE_SRC_COMMON_NPY_NUMPYOS_H_ */
9 changes: 8 additions & 1 deletion numpy/core/src/multiarray/array_method.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
#include <Python.h>
#include <numpy/ndarraytypes.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef enum {
/* Flag for whether the GIL is required */
Expand Down Expand Up @@ -249,6 +252,10 @@ PyArrayMethod_FromSpec(PyArrayMethod_Spec *spec);
* need better tests when a public version is exposed.
*/
NPY_NO_EXPORT PyBoundArrayMethodObject *
PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private);
PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int priv);

#ifdef __cplusplus
}
#endif

#endif /* NUMPY_CORE_SRC_MULTIARRAY_ARRAY_METHOD_H_ */
Loading