Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 2cee174

Browse files
Merge pull request #973 from IntelPython/master
Merge from master 2021_w19
2 parents 7e83e15 + 18cf6ae commit 2cee174

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+3861
-1970
lines changed

README.rst

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
**********************************
1+
*****
2+
Sdc
3+
*****
4+
25
Intel® Scalable Dataframe Compiler
3-
**********************************
6+
###################################################
47

58
.. image:: https://travis-ci.com/IntelPython/sdc.svg?branch=master
69
:target: https://travis-ci.com/IntelPython/sdc
@@ -21,7 +24,7 @@ Intel® Scalable Dataframe Compiler (Intel® SDC) is an extension of `Numba*`_
2124
that enables compilation of `Pandas*`_ operations. It automatically vectorizes and parallelizes
2225
the code by leveraging modern hardware instructions and by utilizing all available cores.
2326

24-
Intel® SDC documentation can be found `here <https://intelpython.github.io/sdc-doc/>`_.
27+
Intel® SDC documentation can be found `here <https://intelpython.github.io/sdc-doc/>`__.
2528

2629
.. note::
2730
For maximum performance and stability, please use numba from ``intel/label/beta`` channel.
@@ -171,7 +174,7 @@ The built documentation will be located in the ``./sdc/docs/build/html`` directo
171174
To preview the documentation open ``index.html`` file.
172175

173176

174-
More information about building and adding documentation can be found `here <docs/README.rst>`_.
177+
More information about building and adding documentation can be found `here <docs/README.rst>`__.
175178

176179

177180
Running unit tests

buildscripts/run_examples.py

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,45 +32,82 @@
3232

3333
from pathlib import Path
3434
from utilities import SDC_Build_Utilities
35+
import multiprocessing as mp
3536

3637

3738
EXAMPLES_TO_SKIP = {'basic_usage_nyse_predict.py'}
39+
TEST_TIMEOUT = 120
3840

3941

40-
def run_examples(sdc_utils):
42+
# keep test results global to be visible for async callbacks
43+
class TestResults():
4144
total = 0
42-
passed = 0
4345
failed = 0
46+
passed = 0
4447
skipped = 0
4548
failed_examples = []
4649

50+
51+
def run_single_example(path, sdc_utils):
52+
str_path = str(path)
53+
try:
54+
sdc_utils.log_info(sdc_utils.line_double)
55+
sdc_utils.run_command(f'python {str_path}')
56+
except Exception as e:
57+
raise Exception(str_path).with_traceback(e.__traceback__)
58+
59+
return str_path
60+
61+
62+
def normal_handler(test_name):
63+
TestResults.passed += 1
64+
sdc_utils.log_info(f'{test_name} PASSED')
65+
66+
67+
def error_handler(error):
68+
TestResults.failed += 1
69+
test_name = str(error).split()[-1]
70+
sdc_utils.log_info(f'{test_name} FAILED')
71+
TestResults.failed_examples.append(test_name)
72+
73+
74+
def run_examples(sdc_utils):
75+
4776
os.chdir(str(sdc_utils.examples_path))
77+
pool = mp.Pool(max(1, mp.cpu_count()))
78+
79+
task_queue = []
4880
for sdc_example in Path('.').glob('**/*.py'):
49-
total += 1
81+
TestResults.total += 1
5082

5183
if sdc_example.name in EXAMPLES_TO_SKIP:
52-
skipped += 1
84+
TestResults.skipped += 1
5385
continue
5486

5587
sdc_example = str(sdc_example)
88+
task_queue.append(pool.apply_async(
89+
run_single_example,
90+
[sdc_example, sdc_utils],
91+
callback=normal_handler,
92+
error_callback=error_handler
93+
))
94+
95+
for promise in task_queue:
5696
try:
57-
sdc_utils.log_info(sdc_utils.line_double)
58-
sdc_utils.run_command(f'python {str(sdc_example)}')
97+
promise.get(TEST_TIMEOUT)
5998
except Exception:
60-
failed += 1
61-
failed_examples.append(sdc_example)
62-
sdc_utils.log_info(f'{sdc_example} FAILED')
6399
traceback.print_exc()
64-
else:
65-
passed += 1
66-
sdc_utils.log_info(f'{sdc_example} PASSED')
67100

68-
summary_msg = f'SDC examples summary: {total} RUN, {passed} PASSED, {failed} FAILED, {skipped} SKIPPED'
101+
pool.close()
102+
pool.join()
103+
104+
summary_msg = f'SDC examples summary: {TestResults.total} RUN, {TestResults.passed} PASSED, ' \
105+
f'{TestResults.failed} FAILED, {TestResults.skipped} SKIPPED'
69106
sdc_utils.log_info(summary_msg, separate=True)
70-
for failed_example in failed_examples:
71-
sdc_utils.log_info(f'FAILED: {failed_example}')
107+
for test_name in TestResults.failed_examples:
108+
sdc_utils.log_info(f'FAILED: {test_name}')
72109

73-
if failed > 0:
110+
if TestResults.failed > 0:
74111
sdc_utils.log_info('Intel SDC examples FAILED', separate=True)
75112
exit(-1)
76113
sdc_utils.log_info('Intel SDC examples PASSED', separate=True)

conda-recipe/run_test.bat

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,33 @@ if errorlevel 1 exit 1
99

1010
@rem TODO investigate root cause of NumbaPerformanceWarning
1111
@rem http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics
12-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_basic
12+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_basic
1313
if errorlevel 1 exit 1
14-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_series
14+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_series
1515
if errorlevel 1 exit 1
16-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_dataframe
16+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_dataframe
1717
if errorlevel 1 exit 1
18-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_hiframes
18+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_hiframes
1919
if errorlevel 1 exit 1
20-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_date
20+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_date
2121
if errorlevel 1 exit 1
22-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_strings
22+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_strings
2323
if errorlevel 1 exit 1
24-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_groupby
24+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_groupby
2525
if errorlevel 1 exit 1
26-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_join
26+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_join
2727
if errorlevel 1 exit 1
28-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_rolling
28+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_rolling
2929
if errorlevel 1 exit 1
30-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_ml
30+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_ml
3131
if errorlevel 1 exit 1
32-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_io
32+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_io
3333
if errorlevel 1 exit 1
34-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_hpat_jit
34+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_hpat_jit
3535
if errorlevel 1 exit 1
36-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_sdc_numpy
36+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_sdc_numpy
3737
if errorlevel 1 exit 1
38-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_prange_utils
38+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_prange_utils
3939
if errorlevel 1 exit 1
4040

4141
REM Link check for Documentation using Sphinx's in-built linkchecker

conda-recipe/run_test.sh

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@ python -m sdc.tests.gen_test_data
1313

1414
# TODO investigate root cause of NumbaPerformanceWarning
1515
# http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics
16-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_basic
17-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_series
18-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_dataframe
19-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_hiframes
20-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_date
21-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_strings
22-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_groupby
23-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_join
24-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_rolling
25-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_ml
26-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_io
27-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_hpat_jit
28-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_sdc_numpy
29-
python -W ignore -u -m sdc.runtests -v sdc.tests.test_prange_utils
16+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_basic
17+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_series
18+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_dataframe
19+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_hiframes
20+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_date
21+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_strings
22+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_groupby
23+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_join
24+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_rolling
25+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_ml
26+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_io
27+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_hpat_jit
28+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_sdc_numpy
29+
python -W ignore -u -m numba.runtests -m -v sdc.tests.test_prange_utils

sdc/datatypes/categorical/functions.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@
2424
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2525
# *****************************************************************************
2626

27-
from sdc.utilities.utils import sdc_overload_attribute
27+
from sdc.utilities.utils import sdc_overload_attribute, sdc_overload
28+
from numba.extending import intrinsic
29+
from numba import types
2830

29-
from .types import CategoricalDtypeType
31+
from .types import CategoricalDtypeType, Categorical
3032

3133

3234
@sdc_overload_attribute(CategoricalDtypeType, 'ordered')
@@ -36,3 +38,29 @@ def pd_CategoricalDtype_categories_overload(self):
3638
def impl(self):
3739
return ordered
3840
return impl
41+
42+
43+
@intrinsic
44+
def _categorical_len(tyctx, arr_type):
45+
ret_type = types.intp
46+
47+
def codegen(context, builder, sig, args):
48+
arr_val, = args
49+
arr_info = context.make_helper(builder, arr_type, arr_val)
50+
res = builder.load(arr_info._get_ptr_by_name('nitems'))
51+
return res
52+
53+
return ret_type(arr_type), codegen
54+
55+
56+
@sdc_overload(len)
57+
def pd_Categorical_len_overload(self):
58+
if not isinstance(self, Categorical):
59+
return None
60+
61+
# Categorical use ArrayModel and don't expose be_type members
62+
# hence we use intrinsic to access those fields. TO-DO: refactor
63+
def impl(self):
64+
return _categorical_len(self)
65+
66+
return impl

0 commit comments

Comments
 (0)