Skip to content

MAINT utils._fast_dict uses types from utils._typedefs #26025

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,8 +386,7 @@ def check_package_status(package, min_version):
"include_dirs": ["src"],
"include_np": True,
},
{"sources": ["_fast_dict.pyx"], "language": "c++", "include_np": True},
{"sources": ["_fast_dict.pyx"], "language": "c++", "include_np": True},
{"sources": ["_fast_dict.pyx"], "language": "c++"},
{"sources": ["_openmp_helpers.pyx"]},
{"sources": ["_seq_dataset.pyx.tp", "_seq_dataset.pxd.tp"], "include_np": True},
{
Expand Down
10 changes: 3 additions & 7 deletions sklearn/utils/_fast_dict.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@ integers, and values float.

from libcpp.map cimport map as cpp_map

# Import the C-level symbols of numpy
cimport numpy as cnp
from ._typedefs cimport float64_t, intp_t

ctypedef cnp.float64_t DTYPE_t

ctypedef cnp.intp_t ITYPE_t

###############################################################################
# An object to be used in Python

cdef class IntFloatDict:
cdef cpp_map[ITYPE_t, DTYPE_t] my_map
cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values)
cdef cpp_map[intp_t, float64_t] my_map
cdef _to_arrays(self, intp_t [:] keys, float64_t [:] values)
45 changes: 21 additions & 24 deletions sklearn/utils/_fast_dict.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,8 @@ from libcpp.map cimport map as cpp_map

import numpy as np

#DTYPE = np.float64
#ctypedef cnp.float64_t DTYPE_t
from ._typedefs cimport float64_t, intp_t

#ITYPE = np.intp
#ctypedef cnp.intp_t ITYPE_t

###############################################################################
# An object to be used in Python
Expand All @@ -30,8 +27,8 @@ cdef class IntFloatDict:

def __init__(
self,
ITYPE_t[:] keys,
DTYPE_t[:] values,
intp_t[:] keys,
float64_t[:] values,
):
cdef int i
cdef int size = values.size
Expand All @@ -44,7 +41,7 @@ cdef class IntFloatDict:
return self.my_map.size()

def __getitem__(self, int key):
cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = self.my_map.find(key)
cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.find(key)
if it == self.my_map.end():
# The key is not in the dict
raise KeyError('%i' % key)
Expand All @@ -56,20 +53,20 @@ cdef class IntFloatDict:
# Cython 0.20 generates buggy code below. Commenting this out for now
# and relying on the to_arrays method
#def __iter__(self):
# cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = self.my_map.begin()
# cdef cpp_map[ITYPE_t, DTYPE_t].iterator end = self.my_map.end()
# cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()
# cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()
# while it != end:
# yield deref(it).first, deref(it).second
# inc(it)

def __iter__(self):
cdef int size = self.my_map.size()
cdef ITYPE_t [:] keys = np.empty(size, dtype=np.intp)
cdef DTYPE_t [:] values = np.empty(size, dtype=np.float64)
cdef intp_t [:] keys = np.empty(size, dtype=np.intp)
cdef float64_t [:] values = np.empty(size, dtype=np.float64)
self._to_arrays(keys, values)
cdef int idx
cdef ITYPE_t key
cdef DTYPE_t value
cdef intp_t key
cdef float64_t value
for idx in range(size):
key = keys[idx]
value = values[idx]
Expand All @@ -92,10 +89,10 @@ cdef class IntFloatDict:
self._to_arrays(keys, values)
return keys, values

cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values):
cdef _to_arrays(self, intp_t [:] keys, float64_t [:] values):
# Internal version of to_arrays that takes already-initialized arrays
cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = self.my_map.begin()
cdef cpp_map[ITYPE_t, DTYPE_t].iterator end = self.my_map.end()
cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()
cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()
cdef int index = 0
while it != end:
keys[index] = deref(it).first
Expand All @@ -104,8 +101,8 @@ cdef class IntFloatDict:
index += 1

def update(self, IntFloatDict other):
cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = other.my_map.begin()
cdef cpp_map[ITYPE_t, DTYPE_t].iterator end = other.my_map.end()
cdef cpp_map[intp_t, float64_t].iterator it = other.my_map.begin()
cdef cpp_map[intp_t, float64_t].iterator end = other.my_map.end()
while it != end:
self.my_map[deref(it).first] = deref(it).second
inc(it)
Expand All @@ -116,9 +113,9 @@ cdef class IntFloatDict:
out_obj.my_map = self.my_map
return out_obj

def append(self, ITYPE_t key, DTYPE_t value):
def append(self, intp_t key, float64_t value):
# Construct our arguments
cdef pair[ITYPE_t, DTYPE_t] args
cdef pair[intp_t, float64_t] args
args.first = key
args.second = value
self.my_map.insert(args)
Expand All @@ -128,10 +125,10 @@ cdef class IntFloatDict:
# operation on dict

def argmin(IntFloatDict d):
cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = d.my_map.begin()
cdef cpp_map[ITYPE_t, DTYPE_t].iterator end = d.my_map.end()
cdef ITYPE_t min_key = -1
cdef DTYPE_t min_value = np.inf
cdef cpp_map[intp_t, float64_t].iterator it = d.my_map.begin()
cdef cpp_map[intp_t, float64_t].iterator end = d.my_map.end()
cdef intp_t min_key = -1
cdef float64_t min_value = np.inf
while it != end:
if deref(it).second < min_value:
min_value = deref(it).second
Expand Down