Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 5ea7a17

Browse files
Moving to numba=0.53.1 (#971)
* Moving to numba=0.53 * Workarounds to avoid Numba regressions in 0.53 * Changing Numba 0.53.0 to 0.53.1
1 parent 18cf6ae commit 5ea7a17

File tree

5 files changed

+57
-43
lines changed

5 files changed

+57
-43
lines changed

conda-recipe/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{% set NUMBA_VERSION = "==0.52.0" %}
1+
{% set NUMBA_VERSION = "==0.53.1" %}
22
{% set PANDAS_VERSION = "==1.2.0" %}
33
{% set PYARROW_VERSION = "==2.0.0" %}
44

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
numpy>=1.16
22
pandas==1.2.0
33
pyarrow==2.0.0
4-
numba==0.52.0
4+
numba==0.53.1
55
tbb
66
tbb-devel

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 53 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -459,13 +459,15 @@ def _series_getitem_idx_bool_indexer_impl(self, idx):
459459
if (isinstance(idx, SeriesType) and index_is_positional
460460
and not isinstance(idx.data.dtype, (types.Boolean, bool))):
461461
def hpat_pandas_series_getitem_idx_list_impl(self, idx):
462-
res = numpy.copy(self._data[:len(idx._data)])
463-
index = numpy.arange(len(self._data))
462+
idx_data = idx._data
463+
self_data = self._data
464+
res = numpy.copy(self._data[:len(idx_data)])
465+
index = numpy.arange(len(self_data))
464466
for i in numba.prange(len(res)):
465467
for j in numba.prange(len(index)):
466-
if j == idx._data[i]:
467-
res[i] = self._data[j]
468-
return pandas.Series(data=res, index=index[idx._data], name=self._name)
468+
if j == idx_data[i]:
469+
res[i] = self_data[j]
470+
return pandas.Series(data=res, index=index[idx_data], name=self._name)
469471
return hpat_pandas_series_getitem_idx_list_impl
470472

471473
# idx is Series and it's index is not PositionalIndex, idx.dtype is not Boolean
@@ -647,6 +649,7 @@ def sdc_pandas_series_setitem_no_reindexing_impl(self, idx, value):
647649

648650
def sdc_pandas_series_setitem_idx_bool_array_align_impl(self, idx, value):
649651

652+
series_data = self._data # FIXME_Numba#6960
650653
# if idx is a Boolean array (and value is a series) it's used as a mask for self.index
651654
# and filtered indexes are looked in value.index, and if found corresponding value is set
652655
if value_is_series == True: # noqa
@@ -659,7 +662,7 @@ def sdc_pandas_series_setitem_idx_bool_array_align_impl(self, idx, value):
659662
self_index_has_duplicates = len(unique_self_indices) != len(self_index)
660663
value_index_has_duplicates = len(unique_value_indices) != len(value_index)
661664
if (self_index_has_duplicates or value_index_has_duplicates):
662-
self._data[idx] = value._data
665+
series_data[idx] = value._data
663666
else:
664667
map_index_to_position = Dict.empty(
665668
key_type=indexes_common_dtype,
@@ -674,13 +677,13 @@ def sdc_pandas_series_setitem_idx_bool_array_align_impl(self, idx, value):
674677
if idx[i]:
675678
self_index_value = self_index[i]
676679
if self_index_value in map_index_to_position:
677-
self._data[i] = value._data[map_index_to_position[self_index_value]]
680+
series_data[i] = value._data[map_index_to_position[self_index_value]]
678681
else:
679-
sdc.hiframes.join.setitem_arr_nan(self._data, i)
682+
sdc.hiframes.join.setitem_arr_nan(series_data, i)
680683

681684
else:
682685
# if value has no index - nothing to reindex and assignment is made along positions set by idx mask
683-
self._data[idx] = value
686+
series_data[idx] = value
684687

685688
return self
686689

@@ -755,31 +758,35 @@ def sdc_pandas_series_setitem_idx_bool_series_align_impl(self, idx, value):
755758
value_is_scalar = not (value_is_series or value_is_array)
756759
def sdc_pandas_series_setitem_idx_int_series_align_impl(self, idx, value):
757760

761+
# FIXME_Numba#6960: all changes of this commit are unnecessary - revert when resolved
762+
self_data = self._data
763+
self_index = self._index
764+
self_index_size = len(self_index)
765+
idx_size = len(idx)
766+
758767
_idx = idx._data if idx_is_series == True else idx # noqa
759768
_value = value._data if value_is_series == True else value # noqa
760769

761-
self_index_size = len(self._index)
762-
idx_size = len(_idx)
763770
valid_indices = numpy.repeat(-1, self_index_size)
764771
for i in numba.prange(self_index_size):
765772
for j in numpy.arange(idx_size):
766-
if self._index[i] == _idx[j]:
773+
if self_index[i] == _idx[j]:
767774
valid_indices[i] = j
768775

769776
valid_indices_positions = numpy.arange(self_index_size)[valid_indices != -1]
770777
valid_indices_masked = valid_indices[valid_indices != -1]
771778

772-
indexes_found = self._index[valid_indices_positions]
779+
indexes_found = self_index[valid_indices_positions]
773780
if len(numpy.unique(indexes_found)) != len(indexes_found):
774781
raise ValueError("Reindexing only valid with uniquely valued Index objects")
775782

776783
if len(valid_indices_masked) != idx_size:
777784
raise KeyError("Reindexing not possible: idx has index not found in Series")
778785

779786
if value_is_scalar == True: # noqa
780-
self._data[valid_indices_positions] = _value
787+
self_data[valid_indices_positions] = _value
781788
else:
782-
self._data[valid_indices_positions] = numpy.take(_value, valid_indices_masked)
789+
self_data[valid_indices_positions] = numpy.take(_value, valid_indices_masked)
783790

784791
return self
785792

@@ -1598,17 +1605,18 @@ def hpat_pandas_series_var_impl(self, axis=None, skipna=None, level=None, ddof=1
15981605
if skipna is None:
15991606
skipna = True
16001607

1608+
self_data = self._data # FIXME_Numba#6960
16011609
if skipna:
1602-
valuable_length = len(self._data) - numpy.sum(numpy.isnan(self._data))
1610+
valuable_length = len(self_data) - numpy.sum(numpy.isnan(self_data))
16031611
if valuable_length <= ddof:
16041612
return numpy.nan
16051613

1606-
return numpy_like.nanvar(self._data) * valuable_length / (valuable_length - ddof)
1614+
return numpy_like.nanvar(self_data) * valuable_length / (valuable_length - ddof)
16071615

1608-
if len(self._data) <= ddof:
1616+
if len(self_data) <= ddof:
16091617
return numpy.nan
16101618

1611-
return self._data.var() * len(self._data) / (len(self._data) - ddof)
1619+
return self_data.var() * len(self_data) / (len(self_data) - ddof)
16121620

16131621
return hpat_pandas_series_var_impl
16141622

@@ -2859,8 +2867,9 @@ def hpat_pandas_series_prod_impl(self, axis=None, skipna=None, level=None, numer
28592867
else:
28602868
_skipna = skipna
28612869

2870+
series_data = self._data # FIXME_Numba#6960
28622871
if _skipna:
2863-
return numpy_like.nanprod(self._data)
2872+
return numpy_like.nanprod(series_data)
28642873
else:
28652874
return numpy.prod(self._data)
28662875

@@ -3079,8 +3088,9 @@ def hpat_pandas_series_min_impl(self, axis=None, skipna=None, level=None, numeri
30793088
else:
30803089
_skipna = skipna
30813090

3091+
series_data = self._data # FIXME_Numba#6960
30823092
if _skipna:
3083-
return numpy_like.nanmin(self._data)
3093+
return numpy_like.nanmin(series_data)
30843094

30853095
return self._data.min()
30863096

@@ -3156,8 +3166,9 @@ def hpat_pandas_series_max_impl(self, axis=None, skipna=None, level=None, numeri
31563166
else:
31573167
_skipna = skipna
31583168

3169+
series_data = self._data # FIXME_Numba#6960
31593170
if _skipna:
3160-
return numpy_like.nanmax(self._data)
3171+
return numpy_like.nanmax(series_data)
31613172

31623173
return self._data.max()
31633174

@@ -3222,8 +3233,9 @@ def hpat_pandas_series_mean_impl(self, axis=None, skipna=None, level=None, numer
32223233
else:
32233234
_skipna = skipna
32243235

3236+
series_data = self._data # FIXME_Numba#6960
32253237
if _skipna:
3226-
return numpy_like.nanmean(self._data)
3238+
return numpy_like.nanmean(series_data)
32273239

32283240
return self._data.mean()
32293241

@@ -3780,54 +3792,56 @@ def hpat_pandas_series_argsort(self, axis=0, kind='quicksort', order=None):
37803792

37813793
if not isinstance(self.index, PositionalIndexType):
37823794
def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=None):
3795+
series_data = self._data # FIXME_Numba#6960
37833796
if kind != 'quicksort' and kind != 'mergesort':
37843797
raise ValueError("Method argsort(). Unsupported parameter. Given 'kind' != 'quicksort' or 'mergesort'")
37853798
if kind == 'mergesort':
37863799
#It is impossible to use numpy.argsort(self._data, kind=kind) since numba gives typing error
3787-
sort = numpy_like.argsort(self._data, kind='mergesort')
3800+
sort = numpy_like.argsort(series_data, kind='mergesort')
37883801
else:
3789-
sort = numpy_like.argsort(self._data)
3802+
sort = numpy_like.argsort(series_data)
37903803
na = self.isna().sum()
3791-
result = numpy.empty(len(self._data), dtype=numpy.int64)
3792-
na_data_arr = sdc.hiframes.api.get_nan_mask(self._data)
3804+
result = numpy.empty(len(series_data), dtype=numpy.int64)
3805+
na_data_arr = sdc.hiframes.api.get_nan_mask(series_data)
37933806
if kind == 'mergesort':
3794-
sort_nona = numpy_like.argsort(self._data[~na_data_arr], kind='mergesort')
3807+
sort_nona = numpy_like.argsort(series_data[~na_data_arr], kind='mergesort')
37953808
else:
3796-
sort_nona = numpy_like.argsort(self._data[~na_data_arr])
3809+
sort_nona = numpy_like.argsort(series_data[~na_data_arr])
37973810
q = 0
37983811
for id, i in enumerate(sort):
3799-
if id in set(sort[len(self._data) - na:]):
3812+
if id in set(sort[len(series_data) - na:]):
38003813
q += 1
38013814
else:
38023815
result[id] = sort_nona[id - q]
3803-
for i in sort[len(self._data) - na:]:
3816+
for i in sort[len(series_data) - na:]:
38043817
result[i] = -1
38053818

38063819
return pandas.Series(result, self._index)
38073820

38083821
return hpat_pandas_series_argsort_idx_impl
38093822

38103823
def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order=None):
3824+
series_data = self._data # FIXME_Numba#6960
38113825
if kind != 'quicksort' and kind != 'mergesort':
38123826
raise ValueError("Method argsort(). Unsupported parameter. Given 'kind' != 'quicksort' or 'mergesort'")
38133827
if kind == 'mergesort':
3814-
sort = numpy_like.argsort(self._data, kind='mergesort')
3828+
sort = numpy_like.argsort(series_data, kind='mergesort')
38153829
else:
3816-
sort = numpy_like.argsort(self._data)
3830+
sort = numpy_like.argsort(series_data)
38173831
na = self.isna().sum()
3818-
result = numpy.empty(len(self._data), dtype=numpy.int64)
3819-
na_data_arr = sdc.hiframes.api.get_nan_mask(self._data)
3832+
result = numpy.empty(len(series_data), dtype=numpy.int64)
3833+
na_data_arr = sdc.hiframes.api.get_nan_mask(series_data)
38203834
if kind == 'mergesort':
3821-
sort_nona = numpy_like.argsort(self._data[~na_data_arr], kind='mergesort')
3835+
sort_nona = numpy_like.argsort(series_data[~na_data_arr], kind='mergesort')
38223836
else:
3823-
sort_nona = numpy_like.argsort(self._data[~na_data_arr])
3837+
sort_nona = numpy_like.argsort(series_data[~na_data_arr])
38243838
q = 0
38253839
for id, i in enumerate(sort):
3826-
if id in set(sort[len(self._data) - na:]):
3840+
if id in set(sort[len(series_data) - na:]):
38273841
q += 1
38283842
else:
38293843
result[id] = sort_nona[id - q]
3830-
for i in sort[len(self._data) - na:]:
3844+
for i in sort[len(series_data) - na:]:
38313845
result[i] = -1
38323846

38333847
return pandas.Series(result)

sdc/functions/numpy_like.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def sdc_astype_number_to_string_impl(self, dtype):
149149
arr_len = len(self)
150150

151151
# Get total bytes for new array
152-
for i in prange(arr_len):
152+
for i in np.arange(arr_len): # FIXME_Numba#6969: prange segfaults, use it when resolved
153153
item = self[i]
154154
num_bytes += get_utf8_size(str(item))
155155

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ def run(self):
382382
'numpy>=1.16',
383383
'pandas==1.2.0',
384384
'pyarrow==2.0.0',
385-
'numba==0.52.0',
385+
'numba==0.53.1',
386386
'tbb'
387387
],
388388
cmdclass=sdc_build_commands,

0 commit comments

Comments
 (0)