From 06f366475f8223fa6662f448a69f0efc823fb5ad Mon Sep 17 00:00:00 2001 From: Sean Law Date: Wed, 10 Jul 2024 05:41:00 -0700 Subject: [PATCH 01/54] Added .pypirc support for uploading to PyPI with twine --- pypi.sh | 50 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/pypi.sh b/pypi.sh index 5e00a2a7f..c18c67260 100755 --- a/pypi.sh +++ b/pypi.sh @@ -59,36 +59,62 @@ # GROUP BY DATE(timestamp) # ) # ORDER BY date -# +############### +# Functions # +############### -rm -rf dist -python3 -m build --sdist --wheel +upload_test_pypi() +{ + # Upload to Test PyPi + if ! [ -f $HOME/.pypirc ]; then + # .pypirc file does not exist, prompt for API token + twine upload --verbose --repository-url https://test.pypi.org/legacy/ dist/* + else + # Get API token from .pypirc file + twine upload --verbose -r testpypi dist/* + fi +} + +upload_pypi() +{ + # Upload to PyPi + if ! [ -f $HOME/.pypirc ]; then + # .pypirc file does not exist, prompt for API token + twine upload dist/* + else + # Get API token from .pypirc file + twine upload -r pypi dist/* + fi +} # Use API Token instead of username+password # https://pypi.org/help/#apitoken # Place the API Token(s) in your $HOME/.pypirc +# +# # Example .pypirc file +# # [distutils] # index-servers = # pypi # testpypi -# +# # [pypi] # repository = https://upload.pypi.org/legacy/ # username = __token__ # password = -# +# # [testpypi] # repository = https://test.pypi.org/legacy/ # username = __token__ # password = -# Upload to Test PyPi -# (OLD) twine upload --verbose --repository-url https://test.pypi.org/legacy/ dist/* -twine upload --verbose -r testpypi dist/* - -# Upload to PyPI -# (OLD) twine upload dist/* -# twine upload -r pypi dist/* +########### +# Main # +########### +rm -rf dist +python3 -m build --sdist --wheel +upload_test_pypi +# upload_pypi rm -rf build dist stumpy.egg-info From 4fe6857fdc39d20dd084f0de4d9a1c343e9b683e Mon Sep 17 00:00:00 2001 From: Sean Law Date: Wed, 10 Jul 2024 05:42:08 -0700 Subject: [PATCH 02/54] Enabled tasklist extension for Myst --- docs/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/conf.py b/docs/conf.py index 7af59ac58..9c8b1409b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -234,4 +234,5 @@ "colon_fence", "amsmath", "dollarmath", + "tasklist", ] From 0c45b772eba350d248b6b76c8ce05a68d5778587 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Wed, 10 Jul 2024 18:32:24 -0700 Subject: [PATCH 03/54] Fixed bad URL --- docs/Tutorial_Time_Series_Chains.ipynb | 4 ++-- test.sh | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/Tutorial_Time_Series_Chains.ipynb b/docs/Tutorial_Time_Series_Chains.ipynb index 66b8ba77f..0163b3492 100644 --- a/docs/Tutorial_Time_Series_Chains.ipynb +++ b/docs/Tutorial_Time_Series_Chains.ipynb @@ -10,7 +10,7 @@ "\n", "## Forecasting Web Query Data with Anchored Time Series Chains (ATSC)\n", "\n", - "This example is adapted from the [Web Query Volume case study](http://www2015.thewebconf.org/documents/proceedings/companion/p721.pdf) and utilizes the main takeaways from the [Matrix Profile VII](https://www.cs.ucr.edu/~eamonn/chains_ICDM.pdf) research paper." + "This example is adapted from the [Web Query Volume case study: \"The web as a jungle: Nonlinear dynamical systems for co-evolving online activities\"](https://archives.iw3c2.org/www2015/documents/proceedings/proceedings/p721.pdf) and utilizes the main takeaways from the [Matrix Profile VII](https://www.cs.ucr.edu/~eamonn/chains_ICDM.pdf) research paper." ] }, { @@ -605,7 +605,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.12.4" } }, "nbformat": 4, diff --git a/test.sh b/test.sh index ad29f9863..a85fb07f1 100755 --- a/test.sh +++ b/test.sh @@ -268,7 +268,9 @@ show() check_links() { echo "Checking notebook links" - pytest --check-links docs/Tutorial_*.ipynb + export JUPYTER_PLATFORM_DIRS=1 + jupyter --paths + pytest --check-links docs/Tutorial_*.ipynb notebooks/Tutorial_*.ipynb docs/*.md docs/*.rst ./*.md ./*.rst } clean_up() From d60fa3938eb114af6770b18568f15dbff6de5b18 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Thu, 11 Jul 2024 12:47:15 -0700 Subject: [PATCH 04/54] Fixed typo --- test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test.sh b/test.sh index a85fb07f1..74c1e3ce4 100755 --- a/test.sh +++ b/test.sh @@ -257,7 +257,7 @@ show() { echo "Current working directory: " `pwd` echo "Black version: " `python -c "import black; print(black.__version__)"` - echo "Flake8 versoin: " `python -c "import flake8; print(flake8.__version__)"` + echo "Flake8 version: " `python -c "import flake8; print(flake8.__version__)"` echo "Python version: " `python -c "import platform; print(platform.python_version())"` echo "NumPy version: " `python -c "import numpy; print(numpy.__version__)"` echo "SciPy version: " `python -c "import scipy; print(scipy.__version__)"` From 268c78ee4de41f1cb2a4098b351181210e28c852 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Thu, 11 Jul 2024 18:40:19 -0700 Subject: [PATCH 05/54] Fixed #1002 Added Named Multi-dimensional Matrix Profiles --- stumpy/maamp.py | 3 ++- stumpy/maamped.py | 3 ++- stumpy/mmparray.py | 3 +++ stumpy/mstump.py | 3 ++- stumpy/mstumped.py | 3 ++- test.sh | 11 +++++++++++ tests/test_mmparray.py | 34 ++++++++++++++++++++++++++++++++++ 7 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 stumpy/mmparray.py create mode 100644 tests/test_mmparray.py diff --git a/stumpy/maamp.py b/stumpy/maamp.py index 0bc9f8904..d142c8e40 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -8,6 +8,7 @@ from numba import njit, prange from . import config, core +from .mmparray import mparray def _multi_mass_absolute(Q, T, m, Q_subseq_isfinite, T_subseq_isfinite, p=2.0): @@ -979,4 +980,4 @@ def maamp(T, m, include=None, discords=False, p=2.0): discords, ) - return P, I + return mparray(P_=P, I_=I) diff --git a/stumpy/maamped.py b/stumpy/maamped.py index 86bdf53bd..b992b1ec1 100644 --- a/stumpy/maamped.py +++ b/stumpy/maamped.py @@ -8,6 +8,7 @@ from . import config, core from .maamp import _get_first_maamp_profile, _get_multi_p_norm, _maamp +from .mmparray import mparray def _dask_maamped( @@ -412,4 +413,4 @@ def maamped(client, T, m, include=None, discords=False, p=2.0): discords, ) - return P, I + return mparray(P_=P, I_=I) diff --git a/stumpy/mmparray.py b/stumpy/mmparray.py new file mode 100644 index 000000000..087bb156e --- /dev/null +++ b/stumpy/mmparray.py @@ -0,0 +1,3 @@ +from collections import namedtuple + +mparray = namedtuple("mparray", "P_,I_") diff --git a/stumpy/mstump.py b/stumpy/mstump.py index bdb1e8ada..9b2dab7d4 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -10,6 +10,7 @@ from . import config, core from .maamp import maamp, maamp_mdl, maamp_multi_distance_profile, maamp_subspace +from .mmparray import mparray def _multi_mass( @@ -1281,4 +1282,4 @@ def mstump( discords, ) - return P, I + return mparray(P_=P, I_=I) diff --git a/stumpy/mstumped.py b/stumpy/mstumped.py index 5e369a20d..6f7c8dad3 100644 --- a/stumpy/mstumped.py +++ b/stumpy/mstumped.py @@ -8,6 +8,7 @@ from . import config, core from .maamped import maamped +from .mmparray import mparray from .mstump import _get_first_mstump_profile, _get_multi_QT, _mstump @@ -531,4 +532,4 @@ def mstumped( discords, ) - return P, I + return mparray(P_=P, I_=I) diff --git a/test.sh b/test.sh index 74c1e3ce4..1133618e5 100755 --- a/test.sh +++ b/test.sh @@ -19,6 +19,8 @@ do test_mode="gpu" elif [[ $var == "show" ]]; then test_mode="show" + elif [[ $var == "count" ]]; then + test_mode="count" elif [[ $var == "custom" ]]; then test_mode="custom" elif [[ $var == "report" ]]; then @@ -273,6 +275,12 @@ check_links() pytest --check-links docs/Tutorial_*.ipynb notebooks/Tutorial_*.ipynb docs/*.md docs/*.rst ./*.md ./*.rst } +count() +{ + test_count=$(pytest --collect-only -q | sed '$d' | sed '$d' | wc -l | sed 's/ //g') + echo "Found $test_count Unit Tests" +} + clean_up() { echo "Cleaning Up" @@ -340,6 +348,9 @@ elif [[ $test_mode == "report" ]]; then elif [[ $test_mode == "gpu" ]]; then echo "Executing GPU Unit Tests Only" test_gpu +elif [[ $test_mode == "count" ]]; then + echo "Counting Unit Tests" + count elif [[ $test_mode == "links" ]]; then echo "Check Notebook Links Only" check_links diff --git a/tests/test_mmparray.py b/tests/test_mmparray.py new file mode 100644 index 000000000..06bf81bfa --- /dev/null +++ b/tests/test_mmparray.py @@ -0,0 +1,34 @@ +import naive +import numpy as np +import numpy.testing as npt +import pytest + +from stumpy import maamp, mstump + +test_data = [ + (np.array([[584, -11, 23, 79, 1001, 0, -19]], dtype=np.float64), 3), + (np.random.uniform(-1000, 1000, [5, 20]).astype(np.float64), 5), +] + + +@pytest.mark.parametrize("T, m", test_data) +def test_mmparray_mstump(T, m): + excl_zone = int(np.ceil(m / 4)) + + ref_P, ref_I = naive.mstump(T, m, excl_zone) + comp = mstump(T, m) + + npt.assert_almost_equal(ref_P, comp.P_) + npt.assert_almost_equal(ref_I, comp.I_) + + +@pytest.mark.parametrize("T, m", test_data) +def test_mmparray_maamp(T, m): + excl_zone = int(np.ceil(m / 4)) + + for p in [1.0, 2.0, 3.0]: + ref_P, ref_I = naive.maamp(T, m, excl_zone, p=p) + comp = maamp(T, m, p=p) + + npt.assert_almost_equal(ref_P, comp.P_) + npt.assert_almost_equal(ref_I, comp.I_) From e7bb2b85ee8761683480ac093437e71989252d15 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Thu, 11 Jul 2024 18:48:01 -0700 Subject: [PATCH 06/54] Minor change --- test.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test.sh b/test.sh index 1133618e5..e76b54135 100755 --- a/test.sh +++ b/test.sh @@ -258,12 +258,15 @@ test_gpu() show() { echo "Current working directory: " `pwd` - echo "Black version: " `python -c "import black; print(black.__version__)"` - echo "Flake8 version: " `python -c "import flake8; print(flake8.__version__)"` + echo "Black version: " `python -c 'exec("try:\n\timport black;\n\tprint(black.__version__);\nexcept ModuleNotFoundError:\n\tprint(\"Module Not Found\");")'` + echo "Flake8 version: " `python -c 'exec("try:\n\timport flake8;\n\tprint(flake8.__version__);\nexcept ModuleNotFoundError:\n\tprint(\"Module Not Found\");")'` echo "Python version: " `python -c "import platform; print(platform.python_version())"` - echo "NumPy version: " `python -c "import numpy; print(numpy.__version__)"` - echo "SciPy version: " `python -c "import scipy; print(scipy.__version__)"` - echo "Numba version: " `python -c "import numba; print(numba.__version__)"` + echo "NumPy version: " `python -c 'exec("try:\n\timport numpy;\n\tprint(numpy.__version__);\nexcept ModuleNotFoundError:\n\tprint(\"Module Not Found\");")'` + echo "SciPy version: " `python -c 'exec("try:\n\timport scipy;\n\tprint(scipy.__version__);\nexcept ModuleNotFoundError:\n\tprint(\"Module Not Found\");")'` + echo "Numba version: " `python -c 'exec("try:\n\timport numba;\n\tprint(numba.__version__);\nexcept ModuleNotFoundError:\n\tprint(\"Module Not Found\");")'` + echo "Dask version: " `python -c 'exec("try:\n\timport dask;\n\tprint(dask.__version__);\nexcept ModuleNotFoundError:\n\tprint(\"Module Not Found\");")'` + echo "Distributed version: " `python -c 'exec("try:\n\timport distributed;\n\tprint(distributed.__version__);\nexcept ModuleNotFoundError:\n\tprint(\"Module Not Found\");")'` + echo "PyTest version: " `python -c 'exec("try:\n\timport pytest;\n\tprint(pytest.__version__);\nexcept ModuleNotFoundError:\n\tprint(\"Module Not Found\");")'` exit 0 } From 87fbd57be8f4e621a76c09a265e52dc1e4dd9c88 Mon Sep 17 00:00:00 2001 From: Nima Sarajpoor Date: Sat, 13 Jul 2024 22:02:43 -0400 Subject: [PATCH 07/54] Fixes #982 Reduce Github Workflow RunTime (#1004) * empty commit * Reduce computational load of coverage test of ostinato gpu functions --- tests/test_gpu_aamp_ostinato.py | 4 ++-- tests/test_gpu_ostinato.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_gpu_aamp_ostinato.py b/tests/test_gpu_aamp_ostinato.py index 4545ae48c..8284aeff2 100644 --- a/tests/test_gpu_aamp_ostinato.py +++ b/tests/test_gpu_aamp_ostinato.py @@ -80,13 +80,13 @@ def test_input_not_overwritten(): def test_extract_several_consensus(): # This test is to further ensure that the function `gpu_aamp_ostinato` # does not tamper with the original data. - Ts = [np.random.rand(n) for n in [256, 512, 1024]] + Ts = [np.random.rand(n) for n in [64, 128]] Ts_ref = [T.copy() for T in Ts] Ts_comp = [T.copy() for T in Ts] m = 20 - k = 5 # Get the first `k` consensus motifs + k = 2 # Get the first `k` consensus motifs for _ in range(k): # Find consensus motif and its NN in each time series in Ts_comp # Remove them from Ts_comp as well as Ts_ref, and assert that the diff --git a/tests/test_gpu_ostinato.py b/tests/test_gpu_ostinato.py index 9bd8f86cf..30eb9539c 100644 --- a/tests/test_gpu_ostinato.py +++ b/tests/test_gpu_ostinato.py @@ -141,13 +141,13 @@ def test_input_not_overwritten(): def test_extract_several_consensus(): # This test is to further ensure that the function `gpu_ostinato` # does not tamper with the original data. - Ts = [np.random.rand(n) for n in [256, 512, 1024]] + Ts = [np.random.rand(n) for n in [64, 128]] Ts_ref = [T.copy() for T in Ts] Ts_comp = [T.copy() for T in Ts] m = 20 - k = 5 # Get the first `k` consensus motifs + k = 2 # Get the first `k` consensus motifs for _ in range(k): # Find consensus motif and its NN in each time series in Ts_comp # Remove them from Ts_comp as well as Ts_ref, and assert that the From 0107d39b632b748dffe5f419d08517ea3f6fc866 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sun, 14 Jul 2024 00:39:45 -0400 Subject: [PATCH 08/54] Show elapsed time for unit/coverage tests --- test.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test.sh b/test.sh index e76b54135..bd2485712 100755 --- a/test.sh +++ b/test.sh @@ -196,6 +196,7 @@ test_custom() test_unit() { echo "Testing Numba JIT Compiled Functions" + SECONDS=0 if [[ ${#custom_testfiles[@]} -eq "0" ]]; then for testfile in tests/test_*.py do @@ -209,6 +210,8 @@ test_unit() check_errs $? done fi + duration=$SECONDS + echo "Elapsed Time: $((duration / 60)) minutes and $((duration % 60)) seconds" } test_coverage() @@ -226,6 +229,7 @@ test_coverage() # We always attempt to test everything but we may ignore things (ray, helper scripts) when we generate the coverage report + SECONDS=0 if [[ ${#custom_testfiles[@]} -eq "0" ]]; then # Execute all tests for testfile in tests/test_*.py; @@ -241,6 +245,8 @@ test_coverage() check_errs $? done fi + duration=$SECONDS + echo "Elapsed Time: $((duration / 60)) minutes and $((duration % 60)) seconds" show_coverage_report } From 1930d81758ee8de3c994e247a1212b55dd724105 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Thu, 18 Jul 2024 15:07:48 -0400 Subject: [PATCH 09/54] Fixed #1008 Support venv in test.sh --- docs/Contribute.ipynb | 54 +++++++++++++++++++++---------------------- setup.sh | 11 ++++----- test.sh | 6 ++--- 3 files changed, 34 insertions(+), 37 deletions(-) diff --git a/docs/Contribute.ipynb b/docs/Contribute.ipynb index 7f356af7a..e50ff0788 100644 --- a/docs/Contribute.ipynb +++ b/docs/Contribute.ipynb @@ -86,41 +86,41 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Make your Changes\n", - "\n", - "When working on a new project, there are often going to be dependencies. In order to isolate dependencies between different projects, it's a good practice to use a virtual environment. STUMPY supports both [venv](https://docs.python.org/3/library/venv.html) and [conda](https://docs.conda.io/en/latest/). After creating and activating either of these virtual environments, any dependencies you install will be isolated (so they don't break anything else on your system).\n", - "\n", - "First, install the dependencies using the [From Source Section](https://stumpy.readthedocs.io/en/latest/install.html#from-source) of the installation instructions. If you are building STUMPY within a conda environment, then you may consider using the `conda.sh` shell script which will automatically assist you in installing all of the required dependencies required for testing. \n", + "## Adhere to CONTRIBUTING.md Guidance\n", "\n", - "A good check to make sure everything is working 100% is to run the unit tests. For STUMPY, we have scripts to help you do that. You'll run `./setup.sh && ./test.sh`. In some cases, you may notice an uninstallation message for STUMPY but don't worry. This happens if you have had previously installed STUMPY as the `./setup.sh` command first uninstalls any existing version of STUMPY and then it will re-installs it from source (the local, cloned, development version). So, everything should be all set.\n", + "One of the great benefits of open source is the ability to collaborate with developers from around the world. However, you can imagine that combining their contributions into one coherent codebase while maintaining consistency can be challenging. Luckily, recent gains in automated tooling have made this a lot easier. Remember [CONTRIBUTING.md](https://github.com/TDAmeritrade/stumpy/blob/master/CONTRIBUTING.md)? There are a couple of things we want to make sure we do before we submit a `pull request`.\n", "\n", - "Ideally, you'll see the excellent STUMPY test coverage passing. If things start failing or breaking, then you may have a missing dependency problem. There's nothing worse than finding this out *after* you've made changes.\n", + "First, if you implemented a new feature or changed an existing feature, then you are also responsible for providing the unit test. This can often be just as much work as the feature, so make sure you account for it.\n", "\n", - "If all of the tests pass, then you know that you have a working copy of STUMPY to start your development on. Go ahead and implement your feature or change!\n", + "Next, run `flake8` and `black`. [flake8](https://flake8.pycqa.org/en/latest/) is a linter that checks the style and quality of the code. [black](https://pypi.org/project/black/) makes any necessary changes to ensure consistent code format.\n", "\n", "Checklist: \n", - "- [ ] Create a virtual environment \n", - "- [ ] Install dependencies \n", - "- [ ] Run the unit tests" + "- [ ] Write/update any unit tests \n", + "- [ ] Run `black --exclude=\".*\\.ipynb\" --extend-exclude=\".venv\" --diff ./` to reformat any python files you changed \n", + "- [ ] Run `flake8 --extend-exclude=.venv ./` to identify any formatting errors before you submit your `pull request` \n", + "- [ ] Run the unit tests. In STUMPY, you'll run `./setup.sh dev && ./test.sh`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Adhere to CONTRIBUTING.md Guidance\n", + "## Make your Changes\n", "\n", - "One of the great benefits of open source is the ability to collaborate with developers from around the world. However, you can imagine that combining their contributions into one coherent codebase while maintaining consistency can be challenging. Luckily, recent gains in automated tooling have made this a lot easier. Remember [CONTRIBUTING.md](https://github.com/TDAmeritrade/stumpy/blob/master/CONTRIBUTING.md)? There are a couple of things we want to make sure we do before we submit a `pull request`.\n", + "When working on a new project, there are often going to be dependencies. In order to isolate dependencies between different projects, it's a good practice to use a virtual environment. STUMPY supports both [venv](https://docs.python.org/3/library/venv.html) and [conda](https://docs.conda.io/en/latest/). After creating and activating either of these virtual environments, any dependencies you install will be isolated (so they don't break anything else on your system).\n", "\n", - "First, if you implemented a new feature or changed an existing feature, then you are also responsible for providing the unit test. This can often be just as much work as the feature, so make sure you account for it.\n", + "First, install the dependencies using the [From Source Section](https://stumpy.readthedocs.io/en/latest/install.html#from-source) of the installation instructions. If you are building STUMPY within a conda environment, then you may consider using the `conda.sh` shell script which will automatically assist you in installing all of the required dependencies required for testing. \n", "\n", - "Next, run `flake8` and `black`. [flake8](https://flake8.pycqa.org/en/latest/) is a linter that checks the style and quality of the code. [black](https://pypi.org/project/black/) makes any necessary changes to ensure consistent code format.\n", + "A good check to make sure everything is working 100% is to run the unit tests. For STUMPY, we have scripts to help you do that. You'll run `./setup.sh dev && ./test.sh`. In some cases, you may notice an uninstallation message for STUMPY but don't worry. This happens if you have had previously installed STUMPY as the `./setup.sh dev` command first uninstalls any existing version of STUMPY and then it will re-installs it from source (the local, cloned, development version). So, everything should be all set.\n", + "\n", + "Ideally, you'll see the excellent STUMPY test coverage passing. If things start failing or breaking, then you may have a missing dependency problem. There's nothing worse than finding this out *after* you've made changes.\n", + "\n", + "If all of the tests pass, then you know that you have a working copy of STUMPY to start your development on. Go ahead and implement your feature or change!\n", "\n", "Checklist: \n", - "- [ ] Write/update any unit tests \n", - "- [ ] Run `black` to reformat any python files you changed \n", - "- [ ] Run `flake8` to identify any formatting errors before you submit your `pull request` \n", - "- [ ] Run the unit tests. In STUMPY, you'll run `./setup.sh && ./test.sh`" + "- [ ] Create a virtual environment \n", + "- [ ] Install dependencies \n", + "- [ ] Run the unit tests" ] }, { @@ -157,7 +157,7 @@ "- [ ] `git pull` \n", "- [ ] `git checkout branch_name` \n", "- [ ] `git merge main` \n", - "- [ ] Run the unit tests: `./setup.sh && ./test.sh` \n", + "- [ ] Run the unit tests: `./setup.sh dev && ./test.sh` \n", "\n", "Checklist (fetching upstream locally): \n", "- [ ] `git fetch upstream`\n", @@ -165,7 +165,7 @@ "- [ ] `git merge upstream/main` \n", "- [ ] `git checkout branch_name` \n", "- [ ] `git merge main` \n", - "- [ ] Run the unit tests: `./setup.sh && ./test.sh` \n", + "- [ ] Run the unit tests: `./setup.sh dev && ./test.sh` \n", "\n", "![Fetch Upstream Fork](images/fetch_upstream_fork.png) " ] @@ -180,7 +180,7 @@ "\n", "Here's your next chance to communicate with the maintainers. Let them know what changes you made and if you need any help. This pull request will now be the running dialogue between you and the maintainers as you work on the issue.\n", "\n", - "Continuous integration systems automatically determine the suitability of merging pull requests. They check the formatting, test coverage, and test success of your code. After you submit a pull request, you'll see these running (as comments in your pull request). If they fail, your code will not be merged until the failure is fixed. In STUMPY, locally passing `flake8`, `black`, and `./setup.sh && ./test.sh` should ensure your continuous integration tests pass.\n", + "Continuous integration systems automatically determine the suitability of merging pull requests. They check the formatting, test coverage, and test success of your code. After you submit a pull request, you'll see these running (as comments in your pull request). If they fail, your code will not be merged until the failure is fixed. In STUMPY, locally passing `flake8`, `black`, and `./setup.sh dev && ./test.sh` should ensure your continuous integration tests pass.\n", "\n", "Checklist: \n", "- [ ] Create a Pull Request \n", @@ -249,9 +249,9 @@ "- [ ] Install dependencies \n", "- [ ] Run the unit tests \n", "- [ ] Write/update any unit tests \n", - "- [ ] Run `black` to reformat any python files you changed \n", - "- [ ] Run `flake8` to identify any formatting errors before you submit your `pull request` \n", - "- [ ] Run the unit tests. In STUMPY, you'll run `./setup.sh && ./test.sh` \n", + "- [ ] Run `black --exclude=\".*\\.ipynb\" --extend-exclude=\".venv\" --diff ./` to reformat any python files you changed \n", + "- [ ] Run `flake8 --extend-exclude=.venv ./` to identify any formatting errors before you submit your `pull request` \n", + "- [ ] Run the unit tests. In STUMPY, you'll run `./setup.sh dev && ./test.sh` \n", "- [ ] `git add your_file` \n", "- [ ] `git commit -m 'Great Commit Message'` \n", "- [ ] `git push` \n", @@ -266,7 +266,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -280,7 +280,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.12.4" } }, "nbformat": 4, diff --git a/setup.sh b/setup.sh index 1125317c1..65f62a669 100755 --- a/setup.sh +++ b/setup.sh @@ -8,19 +8,16 @@ echo "y" | python -m pip uninstall stumpy # Parse command line arguments for var in "$@" do - if [[ $var == "dev" ]]; then - echo 'Installing stumpy locally in "--editable" mode' - mode="--editable" + if [[ $var == "dev" ]] || [[ $var == "ci" ]]; then + echo 'Installing stumpy locally with extra "ci" requirement' + mode="" + extra="[ci]" elif [[ $var == "edit" ]]; then echo 'Installing stumpy locally in "--editable" mode' mode="--editable" elif [[ $var == "-e" ]]; then echo 'Installing stumpy locally in "--editable" mode' mode="--editable" - elif [[ $var == "ci" ]]; then - echo 'Installing stumpy locally with extra "ci" requirement' - mode="" - extra="[ci]" else echo "Installing stumpy in site-packages" fi diff --git a/test.sh b/test.sh index bd2485712..06fe9819c 100755 --- a/test.sh +++ b/test.sh @@ -57,14 +57,14 @@ check_errs() check_black() { echo "Checking Black Code Formatting" - black --check --exclude=".*\.ipynb" --diff ./ + black --check --exclude=".*\.ipynb" --extend-exclude=".venv" --diff ./ check_errs $? } check_isort() { echo "Checking iSort Import Formatting" - isort --profile black --check-only ./ + isort --profile black --skip .venv --check-only ./ check_errs $? } @@ -78,7 +78,7 @@ check_docstrings() check_flake() { echo "Checking Flake8 Style Guide Enforcement" - flake8 ./ + flake8 --extend-exclude=.venv ./ check_errs $? } From 3077d0ddfb315464321dc86f8ec3bf2cab9ce3b1 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Thu, 18 Jul 2024 20:35:26 -0400 Subject: [PATCH 10/54] Updated contributing materials --- CONTRIBUTING.md | 2 +- docs/Contribute.ipynb | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 07f1228a1..dc9f6a4e9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,6 +13,6 @@ Contributing to documentation is the easiest way to get started. Providing simpl ## Code -Code contributions are always welcome, from simple bug fixes, to new features. To contribute code please [fork the project](https://github.com/TDAmeritrade/stumpy/fork) into your own repository, make changes there, run [black](https://github.com/python/black) and [flake8](http://flake8.pycqa.org/en/latest/) on your code, add tests for bugs/new features, and then submit a pull request. If you are fixing a known issue please add the issue number to the PR message. If you are fixing a new issue feel free to file an issue and then reference it in the PR. You can [browse open issues](https://github.com/TDAmeritrade/stumpy/issues), or consult the [project roadmap](https://github.com/TDAmeritrade/stumpy/issues/1), for potential code contributions. Fixes for issues tagged with 'help wanted' are especially appreciated. +Code contributions are always welcome, from simple bug fixes, to new features. To contribute code please [fork the project](https://github.com/TDAmeritrade/stumpy/fork) into your own repository, make changes there, run [black](https://github.com/python/black) and [flake8](http://flake8.pycqa.org/en/latest/) on your code, add tests for bugs/new features, and then submit a pull request. If you are fixing a known issue please add the issue number to the PR message. If you are fixing a new issue feel free to file an issue and then reference it in the PR. You can [browse open issues](https://github.com/TDAmeritrade/stumpy/issues) for potential code contributions. Fixes for issues tagged with 'help wanted' are especially appreciated. diff --git a/docs/Contribute.ipynb b/docs/Contribute.ipynb index e50ff0788..038c435dd 100644 --- a/docs/Contribute.ipynb +++ b/docs/Contribute.ipynb @@ -15,7 +15,7 @@ "source": [ "## Git and GitHub\n", "\n", - "You don't need a [GitHub](https://github.com/) account to use STUMPY but you do need an account if you want to make a code or documentation contribution. This GitHub account will allow you to contribute to many of the most popular open source projects. Additionally, it will give you a place to store, track, and coordinate progress on your own projects. GitHub is a `remote repository` for the version control system `git`. `Git` is a tool that tracks changes in projects by constructing a directed acyclic graph (fancy way of allowing us to create `branches`, `revert` changes, and identify `conflicts`). You can use `git` on your local machine but when you want to save your work or collaborate with other team members, you `push` your work to a `remote` repository. In this case, that `remote` will be GitHub.\n", + "You don't need a [GitHub](https://github.com/) account to use STUMPY but you do need an account if you want to make a code or documentation contribution. This GitHub account will allow you to contribute to many of the most popular open source projects. Additionally, it will give you a place to store, track, and coordinate progress on your own projects. GitHub is a `remote repository` for the version control system `git`. `Git` is a versioning tool that tracks changes in projects by constructing a directed acyclic graph (fancy way of allowing us to create `branches`, `revert` changes, and identify `conflicts`). You can use `git` on your local machine but when you want to save your work or collaborate with other team members, you `push` your work to a `remote` repository. In this case, that `remote` will be GitHub.\n", "\n", "With all of that in mind, you'll also need to [install git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git).\n", "\n", @@ -40,7 +40,7 @@ "source": [ "## Find your contribution\n", "\n", - "You've decided that you want to contribute but how do you approach a new project and figure out where you can help? This will feel like like trying to jump into a conversation that's been happening for months (or years) and can often be intimidating. If you've used the project before, you'll be more familiar with its structure and API but you probably haven't \"peeked behind the curtain\". The best place to get started is the list of [Issues](https://github.com/TDAmeritrade/stumpy/issues). These are requests/changes/bugs that other people have identified. Feel free to peruse the list to get a feel for all of the ongoing work in the project. Often, maintainers will have a label system to organize the issues. These labels may include things like `documentation` or `enhancement`. For new contributors, many projects have a [good first issue label](https://github.com/TDAmeritrade/stumpy/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22).\n", + "You've decided that you want to contribute but how do you approach a new project and figure out where you can help? This will feel like like trying to jump into a conversation that's been happening for months (or years) and can often be intimidating. If you've used the project before, you'll be more familiar with its structure and API but you probably haven't \"peeked behind the curtain\". The best place to get started is the list of [Issues](https://github.com/TDAmeritrade/stumpy/issues). These are feature requests/changes/bugs that other people have identified. Feel free to peruse the list to get a feel for all of the ongoing work in the project. Often, maintainers will have a labeling system to organize the issues. These labels may include things like `documentation` or `enhancement`. For new contributors, many projects have a [good first issue label](https://github.com/TDAmeritrade/stumpy/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22).\n", "\n", "Your next stop, should always be [CONTRIBUTING.md](https://github.com/TDAmeritrade/stumpy/blob/master/CONTRIBUTING.md). Here, the maintainers outline any guidance they have for contributors,\n", "\n", @@ -68,7 +68,7 @@ "\n", "First, you need to create a copy of the repository for you to work off of; this is called a `fork`. Here are instructions on [forking a repository](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo). Now you have your own copy associated with your GitHub account.\n", "\n", - "Next, you need to `clone` this copy of the repository. This simply downloads it to your computer so that you can work on it. Here are instructions on [cloning a repository](https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository). Now that you have a copy downloaded onto your computer. *remember* to clone your fork and not [STUMPY](https://github.com/TDAmeritrade/stumpy).\n", + "Next, you need to `clone` this copy of the repository. This simply downloads it to your computer so that you can work on it. Here are instructions on [cloning a repository](https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository). It's super important to *remember* to clone your fork and not [STUMPY](https://github.com/TDAmeritrade/stumpy).\n", "\n", "Then, you'll need to create a `branch`. Here's an overview of [how git branches work](https://git-scm.com/book/en/v2/Git-Branching-Basic-Branching-and-Merging) but if you're working in the command line, then you probably need to type `git checkout -b branch_name`. In this case, `branch_name` should be replaced with something descriptive about the change that you are making like `change_incorrect_variable` or `document_x`.\n", "\n", @@ -105,11 +105,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Make your Changes\n", + "# Setting Up Your Environment\n", "\n", "When working on a new project, there are often going to be dependencies. In order to isolate dependencies between different projects, it's a good practice to use a virtual environment. STUMPY supports both [venv](https://docs.python.org/3/library/venv.html) and [conda](https://docs.conda.io/en/latest/). After creating and activating either of these virtual environments, any dependencies you install will be isolated (so they don't break anything else on your system).\n", "\n", - "First, install the dependencies using the [From Source Section](https://stumpy.readthedocs.io/en/latest/install.html#from-source) of the installation instructions. If you are building STUMPY within a conda environment, then you may consider using the `conda.sh` shell script which will automatically assist you in installing all of the required dependencies required for testing. \n", + "First, if you are using `venv` then you can install STUMPY and all of its the dependencies using the convenient `./setup.sh dev`. If you are building STUMPY within a `conda` environment, then you may consider using the `./conda.sh` shell script which will automatically assist you in installing all of the required dependencies required for STUMPY development. \n", "\n", "A good check to make sure everything is working 100% is to run the unit tests. For STUMPY, we have scripts to help you do that. You'll run `./setup.sh dev && ./test.sh`. In some cases, you may notice an uninstallation message for STUMPY but don't worry. This happens if you have had previously installed STUMPY as the `./setup.sh dev` command first uninstalls any existing version of STUMPY and then it will re-installs it from source (the local, cloned, development version). So, everything should be all set.\n", "\n", From 2770a88e2db731ceee6bed08f07b88248bda4cb5 Mon Sep 17 00:00:00 2001 From: ejorgensen-wl Date: Wed, 24 Jul 2024 17:48:19 -0400 Subject: [PATCH 11/54] Fixed #1014 Fix max_matches=None bug (#1015) * Fix max_matches=None bug * Alternate max_matches=None fix --- stumpy/aamp_motifs.py | 2 +- stumpy/motifs.py | 2 +- tests/test_motifs.py | 27 +++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/stumpy/aamp_motifs.py b/stumpy/aamp_motifs.py index 0d5f67dd1..c7a852f72 100644 --- a/stumpy/aamp_motifs.py +++ b/stumpy/aamp_motifs.py @@ -268,7 +268,7 @@ def aamp_motifs( m = T.shape[-1] - P.shape[-1] + 1 excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) if max_matches is None: # pragma: no cover - max_matches = np.inf + max_matches = P.shape[-1] if cutoff is None: # pragma: no cover P_copy = P.copy().astype(np.float64) P_copy[np.isinf(P_copy)] = np.nan diff --git a/stumpy/motifs.py b/stumpy/motifs.py index eaad4cfe1..8c560f659 100644 --- a/stumpy/motifs.py +++ b/stumpy/motifs.py @@ -334,7 +334,7 @@ def motifs( m = T.shape[-1] - P.shape[-1] + 1 excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) if max_matches is None: # pragma: no cover - max_matches = np.inf + max_matches = P.shape[-1] if cutoff is None: # pragma: no cover P_copy = P.copy().astype(np.float64) P_copy[np.isinf(P_copy)] = np.nan diff --git a/tests/test_motifs.py b/tests/test_motifs.py index e7d571f81..61e9dedf1 100644 --- a/tests/test_motifs.py +++ b/tests/test_motifs.py @@ -656,3 +656,30 @@ def test_motifs_with_isconstant(): npt.assert_almost_equal(ref_distances, comp_distance) npt.assert_almost_equal(ref_indices, comp_indices) + + +def test_motifs_with_max_matches_none(): + T = np.random.rand(16) + m = 3 + + max_motifs = 1 + max_matches = None + max_distance = np.inf + cutoff = np.inf + + # performant + mp = naive.stump(T, m, row_wise=True) + comp_distance, comp_indices = motifs( + T, + mp[:, 0].astype(np.float64), + min_neighbors=1, + max_distance=max_distance, + cutoff=cutoff, + max_matches=max_matches, + max_motifs=max_motifs, + ) + + ref_len = len(T) - m + 1 + + npt.assert_(ref_len >= comp_distance.shape[1]) + npt.assert_(ref_len >= comp_indices.shape[1]) From 4eb383e8c9964b7e5668e5fe8a3131a27a6a08f0 Mon Sep 17 00:00:00 2001 From: "Miguel G. Silva" Date: Thu, 25 Jul 2024 21:19:17 +0100 Subject: [PATCH 12/54] Fixed typo (#1017) Fixed typo in mmotifs docs --- stumpy/mmotifs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/mmotifs.py b/stumpy/mmotifs.py index a3d1a9b68..c495edbb3 100644 --- a/stumpy/mmotifs.py +++ b/stumpy/mmotifs.py @@ -55,7 +55,7 @@ def mmotifs( subsequence must have at least one similar match in order to be considered a motif. - max_distance : flaot, default None + max_distance : float, default None Maximal distance that is allowed between a query subsequence (a candidate motif) and all subsequences in ``T`` to be considered as a match. If ``None``, this defaults to From 121cf5d84a2c7e9b4f6f6faf74fb3c89273ec8d8 Mon Sep 17 00:00:00 2001 From: Joey <46200959+joehiggi1758@users.noreply.github.com> Date: Fri, 26 Jul 2024 23:19:02 -0400 Subject: [PATCH 13/54] =?UTF-8?q?Fixed=20#996=20updated=20min=20versions?= =?UTF-8?q?=20with=20test=20function=20to=20ensure=20our=20regex=20is?= =?UTF-8?q?=E2=80=A6=20(#1009)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [#996] updated min versions with test function to ensure our regex is caputring all edge cases * Fixed [#996] and simplified test_pkg_mismatch_regex() * Fixed [#996] and simplified test_pkg_mismatch_regex() * Fixed [#996] and updated value error to correctly callout mismatches --- min_versions.py | 62 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/min_versions.py b/min_versions.py index 93f25f13e..c7568ad69 100755 --- a/min_versions.py +++ b/min_versions.py @@ -143,6 +143,23 @@ def get_min_scipy_version(min_python, min_numpy): return df.SciPy_version +def match_pkg_version(line, pkg_name): + """ + Regular expression to match package versions + """ + matches = re.search( + rf""" + {pkg_name} # Package name + [\s=><:"\'\[\]]* # Zero or more spaces or special characters + (\d+\.\d+[\.0-9]*) # Capture "version" in `matches` + """, + line, + re.VERBOSE | re.IGNORECASE, # Ignores all whitespace and case in pattern + ) + + return matches + + def find_pkg_mismatches(pkg_name, pkg_version, fnames): """ Determine if any package version has mismatches @@ -153,15 +170,7 @@ def find_pkg_mismatches(pkg_name, pkg_version, fnames): with open(fname, "r") as file: for line_num, line in enumerate(file, start=1): l = line.strip().replace(" ", "").lower() - matches = re.search( - rf""" - {pkg_name} # Package name - [=><:"\'\[\]]+ # Zero or more special characters - (\d+\.\d+[\.0-9]*) # Capture "version" in `matches` - """, - l, - re.VERBOSE, # Ignores all whitespace in pattern - ) + matches = match_pkg_version(l, pkg_name) if matches is not None: version = matches.groups()[0] if version != pkg_version: @@ -170,6 +179,39 @@ def find_pkg_mismatches(pkg_name, pkg_version, fnames): return pkg_mismatches +def test_pkg_mismatch_regex(): + """ + Validation function for the package mismatch regex + """ + pkgs = { + "numpy": "0.0", + "scipy": "0.0", + "python": "2.7", + "python-version": "2.7", + "numba": "0.0", + } + + lines = [ + "Programming Language :: Python :: 3.8", + "STUMPY supports Python 3.8", + "python-version: ['3.8']", + 'requires-python = ">=3.8"', + "numba>=0.55.2", + ] + + for line in lines: + match_found = False + for pkg_name, pkg_version in pkgs.items(): + matches = match_pkg_version(line, pkg_name) + + if matches: + match_found = True + break + + if not match_found: + raise ValueError(f'Package mismatch regex fails to cover/match "{line}"') + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("min_python", nargs="?", default=None) @@ -205,6 +247,8 @@ def find_pkg_mismatches(pkg_name, pkg_version, fnames): "README.rst", ] + test_pkg_mismatch_regex() + for pkg_name, pkg_version in pkgs.items(): for name, version, fname, line_num in find_pkg_mismatches( pkg_name, pkg_version, fnames From fb9a1254f6fba773aa50e0bf99407b90eb98b39c Mon Sep 17 00:00:00 2001 From: Nima Sarajpoor Date: Fri, 26 Jul 2024 23:23:24 -0400 Subject: [PATCH 14/54] Fixed #1013 set param `T_subseq_isconstant ` to default `None` in `core.process_isconstant` (#1016) * set param to default None * test default mode of function --- stumpy/core.py | 2 +- tests/test_core.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index af3bf7a3b..9ba1b58ad 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -4221,7 +4221,7 @@ def _mpdist( return MPdist -def process_isconstant(T, m, T_subseq_isconstant, T_subseq_isfinite=None): +def process_isconstant(T, m, T_subseq_isconstant=None, T_subseq_isfinite=None): """ A convenience wrapper around the `rolling_isconstant` and `fix_isconstant_isfinite_conflicts`. diff --git a/tests/test_core.py b/tests/test_core.py index 0e84cf8c3..d35be0f89 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1753,3 +1753,28 @@ def test_process_isconstant_2d(): T_subseq_isconstant_comp = core.process_isconstant(T, m, T_subseq_isconstant) npt.assert_array_equal(T_subseq_isconstant_ref, T_subseq_isconstant_comp) + + +def test_process_isconstant_1d_default(): + # test the default value of `T_subseq_isconstant` in `process_isconstant` + n = 64 + m = 8 + + # case 1: without nan + T = np.random.rand(n) + T[:m] = 0.5 # constant subsequence + + T_subseq_isconstant_ref = naive.rolling_isconstant(T, m, a_subseq_isconstant=None) + T_subseq_isconstant_comp = core.process_isconstant(T, m, T_subseq_isconstant=None) + + npt.assert_array_equal(T_subseq_isconstant_ref, T_subseq_isconstant_comp) + + # case 2: with nan + T = np.random.rand(n) + T[:m] = 0.5 # constant subsequence + T[-m:] = np.nan # non-finite subsequence + + T_subseq_isconstant_ref = naive.rolling_isconstant(T, m, a_subseq_isconstant=None) + T_subseq_isconstant_comp = core.process_isconstant(T, m, T_subseq_isconstant=None) + + npt.assert_array_equal(T_subseq_isconstant_ref, T_subseq_isconstant_comp) From 66420ee784e75bdd919281339f54699c246a5d5b Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sat, 3 Aug 2024 07:32:34 -0400 Subject: [PATCH 15/54] Turned off notebook execution timeout --- docs/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 9c8b1409b..6137db05d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -236,3 +236,6 @@ "dollarmath", "tasklist", ] + +# Notebook cell execution timeout; defaults to 30. +nb_execution_timeout = None From cb6ca4df9c24fe73a53a62e4f81780c7d1a54020 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sat, 3 Aug 2024 07:37:02 -0400 Subject: [PATCH 16/54] Fixed bad value --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 6137db05d..88b69bdae 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -238,4 +238,4 @@ ] # Notebook cell execution timeout; defaults to 30. -nb_execution_timeout = None +nb_execution_timeout = -1 From 79096a8f661fcb717bd91c067a4cf2d568897785 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Sun, 4 Aug 2024 20:39:53 +0900 Subject: [PATCH 17/54] Fixed Typo lenght -> length --- stumpy/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index 9ba1b58ad..a237931a7 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -4270,7 +4270,7 @@ def process_isconstant(T, m, T_subseq_isconstant=None, T_subseq_isfinite=None): if len(T_subseq_isconstant) != T.shape[0]: # pragma: no cover msg = ( - "The lenght of the list `T_subseq_isconstant` must be " + "The length of the list `T_subseq_isconstant` must be " + "equal to the number of time series in `T`." ) raise ValueError(msg) From e27157032c69767ed977771a7366f34b431075a3 Mon Sep 17 00:00:00 2001 From: Joey <46200959+joehiggi1758@users.noreply.github.com> Date: Fri, 16 Aug 2024 10:16:04 -0400 Subject: [PATCH 18/54] Fixed #1010 Updated pull request checklist (#1026) --- docs/pull_request_template.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/pull_request_template.md b/docs/pull_request_template.md index acbb5327d..2d3d3d4aa 100644 --- a/docs/pull_request_template.md +++ b/docs/pull_request_template.md @@ -8,7 +8,7 @@ Below is a simple checklist but please do not hesitate to ask for assistance! - [ ] Install `black` (i.e., `python -m pip install black` or `conda install -c conda-forge black`) - [ ] Install `flake8` (i.e., `python -m pip install flake8` or `conda install -c conda-forge flake8`) - [ ] Install `pytest-cov` (i.e., `python -m pip install pytest-cov` or `conda install -c conda-forge pytest-cov`) -- [ ] Run `black .` in the root stumpy directory -- [ ] Run `flake8 .` in the root stumpy directory -- [ ] Run `./setup.sh && ./test.sh` in the root stumpy directory +- [ ] Run `black --exclude=".*\.ipynb" --extend-exclude=".venv" --diff ./` in the root stumpy directory +- [ ] Run `flake8 --extend-exclude=.venv ./` in the root stumpy directory +- [ ] Run `./setup.sh dev && ./test.sh` in the root stumpy directory - [ ] Reference a Github issue (and create one if one doesn't already exist) From 692c99cdade7b785097a46d71f42a4def4008d6d Mon Sep 17 00:00:00 2001 From: Nima Sarajpoor Date: Fri, 13 Sep 2024 16:47:50 -0400 Subject: [PATCH 19/54] Fixed #998: Speed up stumpi and aampi (#1001) * Average 4x faster performance! * wrap njit-decorated function around hot spot to improve performance * improve docstring * Move function to stumpy.core * Add test function * refactored aampi._update_egress * refactored stumpi._update * refactored using newly-created function * Rename function * test top-k feature in test function * use name of parameter when passing value to be more explicit * add comment and new case in the test function * revise test functions * minor enhancement * extra test that causes an error * Fixed error caused by loss of precision * revise test function and improve comments * Revised docstring and comment * Fixed flake8 format * Minor changes * Fixed black format * replace random with np.random --- stumpy/aampi.py | 48 +++---------- stumpy/core.py | 67 ++++++++++++++++++ stumpy/stumpi.py | 49 +++---------- tests/test_core.py | 170 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 256 insertions(+), 78 deletions(-) diff --git a/stumpy/aampi.py b/stumpy/aampi.py index 1a084f802..e5c2ee8a1 100644 --- a/stumpy/aampi.py +++ b/stumpy/aampi.py @@ -247,26 +247,9 @@ def _update_egress(self, t): if np.any(~self._T_isfinite[-self._m :]): D[:] = np.inf - core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) - - update_idx = np.argwhere(D < self._P[:, -1]).flatten() - for i in update_idx: - idx = np.searchsorted(self._P[i], D[i], side="right") - core._shift_insert_at_index(self._P[i], idx, D[i]) - core._shift_insert_at_index( - self._I[i], idx, D.shape[0] + self._n_appended - 1 - ) - # D.shape[0] is base-1 - - # Calculate the (top-k) matrix profile values/indices for the last subsequence - # by using its corresponding distance profile `D` - self._P[-1] = np.inf - self._I[-1] = -1 - for i, d in enumerate(D): - if d < self._P[-1, -1]: - idx = np.searchsorted(self._P[-1], d, side="right") - core._shift_insert_at_index(self._P[-1], idx, d) - core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended) + core._update_incremental_PI( + D, self._P, self._I, self._excl_zone, n_appended=self._n_appended + ) # All neighbors of the last subsequence are on its left. So, its (top-1) # matrix profile value/index and its left matrix profile value/index must @@ -322,30 +305,17 @@ def _update(self, t): if np.any(~self._T_isfinite[-self._m :]): D[:] = np.inf - core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) - - update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten() - for i in update_idx: - idx = np.searchsorted(self._P[i], D[i], side="right") - core._shift_insert_at_index(self._P[i], idx, D[i]) - core._shift_insert_at_index(self._I[i], idx, l) - - # Calculating top-k matrix profile and (top-1) left matrix profile (and their - # corresponding indices) for new subsequence whose distance profile is `D` P_new = np.full(self._k, np.inf, dtype=np.float64) I_new = np.full(self._k, -1, dtype=np.int64) - for i, d in enumerate(D): - if d < P_new[-1]: # maximum value in sorted array P_new - idx = np.searchsorted(P_new, d, side="right") - core._shift_insert_at_index(P_new, idx, d) - core._shift_insert_at_index(I_new, idx, i) + self._P = np.append(self._P, P_new.reshape(1, -1), axis=0) + self._I = np.append(self._I, I_new.reshape(1, -1), axis=0) + + core._update_incremental_PI(D, self._P, self._I, self._excl_zone, n_appended=0) - left_I_new = I_new[0] - left_P_new = P_new[0] + left_I_new = self._I[-1, 0] + left_P_new = self._P[-1, 0] self._T = T_new - self._P = np.append(self._P, P_new.reshape(1, -1), axis=0) - self._I = np.append(self._I, I_new.reshape(1, -1), axis=0) self._left_P = np.append(self._left_P, left_P_new) self._left_I = np.append(self._left_I, left_I_new) self._p_norm = p_norm_new diff --git a/stumpy/core.py b/stumpy/core.py index a237931a7..4cdaea02a 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -4368,3 +4368,70 @@ def get_ray_nworkers(ray_client): Total number of Ray workers """ return int(ray_client.cluster_resources().get("CPU")) + + +@njit +def _update_incremental_PI(D, P, I, excl_zone, n_appended=0): + """ + Given the 1D array distance profile, `D`, of the last subsequence of T, + update (in-place) the (top-k) matrix profile, `P`, and the matrix profile + index, I. + + Parameters + ---------- + D : numpy.ndarray + A 1D array (with dtype float) representing the distance profile of + the last subsequence of T + + P : numpy.ndarray + A 2D array representing the matrix profile of T, + with shape (len(T) - m + 1, k), where `m` is the window size. + P[-1, :] should be set to np.inf + + I : numpy.ndarray + A 2D array representing the matrix profile index of T, + with shape (len(T) - m + 1, k), where `m` is the window size + I[-1, :] should be set to -1. + + excl_zone : int + Size of the exclusion zone. + + n_appended : int + Number of times the timeseries start point is shifted one to the right. + See note below for more details. + + Returns + ------- + None + + Note + ----- + The `n_appended` parameter is used to indicate the number of times the timeseries + start point is shifted one to the right. When `egress=False` (see stumpy.stumpi), + the matrix profile and matrix profile index are updated in an incremental fashion + while considering all historical data. `n_appended` must be set to 0 in such + cases. However, when `egress=True`, the matrix profile and matrix profile index are + updated in an incremental fashion and they represent the matrix profile and matrix + profile index for the `l` most recent subsequences (where `l = len(T) - m + 1`). + In this case, each subsequence is only compared against upto `l-1` left neighbors + and upto `l-1` right neighbors. + """ + _apply_exclusion_zone(D, D.shape[0] - 1, excl_zone, np.inf) + + update_idx = np.argwhere(D < P[:, -1]).flatten() + for i in update_idx: + idx = np.searchsorted(P[i], D[i], side="right") + _shift_insert_at_index(P[i], idx, D[i]) + _shift_insert_at_index(I[i], idx, D.shape[0] + n_appended - 1) + + # Calculate the (top-k) matrix profile values/indidces + # for the last subsequence + P[-1] = np.inf + I[-1] = -1 + for i, d in enumerate(D): + if d < P[-1, -1]: + idx = np.searchsorted(P[-1], d, side="right") + _shift_insert_at_index(P[-1], idx, d) + _shift_insert_at_index(I[-1], idx, i + n_appended) + + return diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py index 1731b2808..feb8cb2af 100644 --- a/stumpy/stumpi.py +++ b/stumpy/stumpi.py @@ -354,26 +354,9 @@ def _update_egress(self, t): if np.any(~self._T_isfinite[-self._m :]): D[:] = np.inf - core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) - - update_idx = np.argwhere(D < self._P[:, -1]).flatten() - for i in update_idx: - idx = np.searchsorted(self._P[i], D[i], side="right") - core._shift_insert_at_index(self._P[i], idx, D[i]) - core._shift_insert_at_index( - self._I[i], idx, D.shape[0] + self._n_appended - 1 - ) - # D.shape[0] is base-1 - - # Calculate the (top-k) matrix profile values/indices for the last subsequence - # by using its corresponding distance profile `D` - self._P[-1] = np.inf - self._I[-1] = -1 - for i, d in enumerate(D): - if d < self._P[-1, -1]: - idx = np.searchsorted(self._P[-1], d, side="right") - core._shift_insert_at_index(self._P[-1], idx, d) - core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended) + core._update_incremental_PI( + D, self._P, self._I, self._excl_zone, n_appended=self._n_appended + ) # All neighbors of the last subsequence are on its left. So, its (top-1) # matrix profile value/index and its left matrix profile value/index must @@ -440,30 +423,18 @@ def _update(self, t): if np.any(~self._T_isfinite[-self._m :]): D[:] = np.inf - core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) - - update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten() - for i in update_idx: - idx = np.searchsorted(self._P[i], D[i], side="right") - core._shift_insert_at_index(self._P[i], idx, D[i]) - core._shift_insert_at_index(self._I[i], idx, l) - - # Calculating top-k matrix profile and (top-1) left matrix profile (and their - # corresponding indices) for new subsequence whose distance profile is `D` P_new = np.full(self._k, np.inf, dtype=np.float64) I_new = np.full(self._k, -1, dtype=np.int64) - for i, d in enumerate(D): - if d < P_new[-1]: # maximum value in sorted array P_new - idx = np.searchsorted(P_new, d, side="right") - core._shift_insert_at_index(P_new, idx, d) - core._shift_insert_at_index(I_new, idx, i) + self._P = np.append(self._P, P_new.reshape(1, -1), axis=0) + self._I = np.append(self._I, I_new.reshape(1, -1), axis=0) + + core._update_incremental_PI(D, self._P, self._I, self._excl_zone, n_appended=0) - left_I_new = I_new[0] - left_P_new = P_new[0] + left_I_new = self._I[-1, 0] + left_P_new = self._P[-1, 0] self._T = T_new - self._P = np.append(self._P, P_new.reshape(1, -1), axis=0) - self._I = np.append(self._I, I_new.reshape(1, -1), axis=0) + self._left_P = np.append(self._left_P, left_P_new) self._left_I = np.append(self._left_I, left_I_new) self._QT = QT_new diff --git a/tests/test_core.py b/tests/test_core.py index d35be0f89..8d0721979 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1778,3 +1778,173 @@ def test_process_isconstant_1d_default(): T_subseq_isconstant_comp = core.process_isconstant(T, m, T_subseq_isconstant=None) npt.assert_array_equal(T_subseq_isconstant_ref, T_subseq_isconstant_comp) + + +def test_update_incremental_PI_egressFalse(): + # This tests the function `core._update_incremental_PI` + # when `egress` is False, meaning new data point is being + # appended to the historical data. + T = np.random.rand(64) + t = np.random.rand() # new datapoint + T_new = np.append(T, t) + + m = 3 + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + for k in range(1, 4): + # ref + mp_ref = naive.stump(T_new, m, row_wise=True, k=k) + P_ref = mp_ref[:, :k].astype(np.float64) + I_ref = mp_ref[:, k : 2 * k].astype(np.int64) + + # comp + mp = naive.stump(T, m, row_wise=True, k=k) + P_comp = mp[:, :k].astype(np.float64) + I_comp = mp[:, k : 2 * k].astype(np.int64) + + # Because of the new data point, the length of matrix profile + # and matrix profile indices should be increased by one. + P_comp = np.pad( + P_comp, + [(0, 1), (0, 0)], + mode="constant", + constant_values=np.inf, + ) + I_comp = np.pad( + I_comp, + [(0, 1), (0, 0)], + mode="constant", + constant_values=-1, + ) + + D = core.mass(T_new[-m:], T_new) + core._update_incremental_PI(D, P_comp, I_comp, excl_zone, n_appended=0) + + # assertion + npt.assert_almost_equal(P_ref, P_comp) + npt.assert_almost_equal(I_ref, I_comp) + + +def test_update_incremental_PI_egressTrue(): + T = np.random.rand(64) + t = np.random.rand() # new data point + m = 3 + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + for k in range(1, 4): + # ref + # In egress=True mode, a new data point, t, is being appended + # to the historical data, T, while the oldest data point is + # being removed. Therefore, the first subsequence in T + # and the last subsequence does not get a chance to meet each + # other. Therefore, we need to exclude that distance. + + T_with_t = np.append(T, t) + D = naive.distance_matrix(T_with_t, T_with_t, m) + D[-1, 0] = np.inf + D[0, -1] = np.inf + + l = len(T_with_t) - m + 1 + P = np.empty((l, k), dtype=np.float64) + I = np.empty((l, k), dtype=np.int64) + for i in range(l): + core.apply_exclusion_zone(D[i], i, excl_zone, np.inf) + IDX = np.argsort(D[i], kind="mergesort")[:k] + I[i] = IDX + P[i] = D[i, IDX] + + P_ref = P[1:].copy() + I_ref = I[1:].copy() + + # comp + mp = naive.stump(T, m, row_wise=True, k=k) + P_comp = mp[:, :k].astype(np.float64) + I_comp = mp[:, k : 2 * k].astype(np.int64) + + P_comp[:-1] = P_comp[1:] + P_comp[-1] = np.inf + I_comp[:-1] = I_comp[1:] + I_comp[-1] = -1 + + T_new = np.append(T[1:], t) + D = core.mass(T_new[-m:], T_new) + core._update_incremental_PI(D, P_comp, I_comp, excl_zone, n_appended=1) + + # assertion + npt.assert_almost_equal(P_ref, P_comp) + npt.assert_almost_equal(I_ref, I_comp) + + +def test_update_incremental_PI_egressTrue_MemoryCheck(): + # This test function is to ensure that the function + # `core._update_incremental_PI` does not forget the + # nearest neighbors that were pointing to those old data + # points that are removed in the `egress=True` mode. + # This can be tested by inserting the same subsequence, s, in the beginning, + # middle, and end of the time series. This is to allow us to know which + # neighbor is the nearest neighbor to each of those three subsequences. + + # In the `egress=True` mode, the first element of the time series is removed and + # a new data point is appended. However, the updated matrix profile index for the + # middle subsequence `s` should still refer to the first subsequence in + # the historical data. + seed = 0 + np.random.seed(seed) + + T = np.random.rand(64) + m = 3 + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + s = np.random.rand(m) + T[:m] = s + T[30 : 30 + m] = s + T[-m:] = s + + t = np.random.rand() # new data point + T_with_t = np.append(T, t) + + # In egress=True mode, a new data point, t, is being appended + # to the historical data, T, while the oldest data point is + # being removed. Therefore, the first subsequence in T + # and the last subsequence does not get a chance to meet each + # other. Therefore, their pairwise distances should be excluded + # from the distance matrix. + D = naive.distance_matrix(T_with_t, T_with_t, m) + D[-1, 0] = np.inf + D[0, -1] = np.inf + + l = len(T_with_t) - m + 1 + for i in range(l): + core.apply_exclusion_zone(D[i], i, excl_zone, np.inf) + + T_new = np.append(T[1:], t) + dist_profile = naive.distance_profile(T_new[-m:], T_new, m) + core.apply_exclusion_zone(dist_profile, len(dist_profile) - 1, excl_zone, np.inf) + + for k in range(1, 4): + # ref + P = np.empty((l, k), dtype=np.float64) + I = np.empty((l, k), dtype=np.int64) + for i in range(l): + IDX = np.argsort(D[i], kind="mergesort")[:k] + I[i] = IDX + P[i] = D[i, IDX] + + P_ref = P[1:].copy() + I_ref = I[1:].copy() + + # comp + mp = naive.stump(T, m, row_wise=True, k=k) + P_comp = mp[:, :k].astype(np.float64) + I_comp = mp[:, k : 2 * k].astype(np.int64) + + P_comp[:-1] = P_comp[1:] + P_comp[-1] = np.inf + I_comp[:-1] = I_comp[1:] + I_comp[-1] = -1 + core._update_incremental_PI( + dist_profile, P_comp, I_comp, excl_zone, n_appended=1 + ) + + npt.assert_almost_equal(P_ref, P_comp) + npt.assert_almost_equal(I_ref, I_comp) From b7b355ce4a9450357ad207dd4f04fc8e8b4db100 Mon Sep 17 00:00:00 2001 From: Joey <46200959+joehiggi1758@users.noreply.github.com> Date: Sun, 15 Sep 2024 21:55:17 -0400 Subject: [PATCH 20/54] Fixed #995 Added instructions for pulling main into dev branch (#1028) * [#995] added instructions for pulling main into dev branch * [#995] enabled dropdown with sphinx-togglebutton * [#995] added instructions for pulling main into dev branch * [#995] updated code cell section to render correctly --- docs/Contribute.ipynb | 37 +++++++++++++++++++++++++++++++++++++ docs/conf.py | 1 + docs/requirements.txt | 1 + 3 files changed, 39 insertions(+) diff --git a/docs/Contribute.ipynb b/docs/Contribute.ipynb index 038c435dd..f5f7e3c2a 100644 --- a/docs/Contribute.ipynb +++ b/docs/Contribute.ipynb @@ -170,6 +170,43 @@ "![Fetch Upstream Fork](images/fetch_upstream_fork.png) " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ":::{admonition} Instructions for Pulling Main into a Development Branch\n", + ":class: toggle\n", + "1. Open a terminal\n", + "2. Change the current working directory to your local development repository (e.g., cd Git/stumpy_dev.git)\n", + "3. Check out your local development branch (e.g., git switch some_new_feature)\n", + "4. Commit all changes in your local development branch (e.g., git add some_file.py and git commit)\n", + "5. Fetch the branches and their respective commits from the upstream repository (e.g., git fetch upstream)\n", + "6. Check out your fork's local default branch (e.g., git checkout main)\n", + "7. Merge the changes from the upstream default branch - in this case, upstream/main - into your local default branch (e.g., git merge upstream/main). This brings your fork's default branch into sync with the upstream repository, without losing your local changes.\n", + "8. If your local main branch didn't have any unique commits, Git will perform a fast-forward. Otherwise, if your local main branch had unique commits, you may need to resolve conflicts. Note that this does not affect your development branch!\n", + "9. Next, switch over to your development branch (e.g., git switch some_new_feature)\n", + "10. Finally, merge the main branch into your development branch (e.g., git merge main)\n", + " - You may see something like the following, if you do, you will need to open up the files tagged with CONFLICT and resolve the merge conflicts. Once that's done, you will need to commit the changes and push the commit (e.g., git push) back to Github.\n", + "\n", + "```\n", + "git merge main\n", + "Auto-merging stumpy/aamp_stimp.py\n", + "Auto-merging stumpy/core.py\n", + "Auto-merging stumpy/mpdist.py\n", + "CONFLICT (content): Merge conflict in stumpy/mpdist.py\n", + "Auto-merging stumpy/mstumped.py\n", + "CONFLICT (content): Merge conflict in stumpy/mstumped.py\n", + "Auto-merging stumpy/ostinato.py\n", + "Auto-merging stumpy/stimp.py\n", + "CONFLICT (content): Merge conflict in stumpy/stimp.py\n", + "Auto-merging stumpy/stumped.py\n", + "CONFLICT (content): Merge conflict in stumpy/stumped.py\n", + "Automatic merge failed; fix conflicts and then commit the result.\n", + "You will need to open up the files tagged with CONFLICT and resolve the merge conflicts. Once that's done, you will need to commit the changes and push the commit (e.g., git push) back to Github.\n", + "```\n", + ":::" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/docs/conf.py b/docs/conf.py index 88b69bdae..dcd2da20a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -51,6 +51,7 @@ "sphinx.ext.intersphinx", "sphinx.ext.mathjax", "sphinx.ext.viewcode", + "sphinx_togglebutton", "numpydoc", "myst_nb", ] diff --git a/docs/requirements.txt b/docs/requirements.txt index 67d3d3727..b1242ff4b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -7,3 +7,4 @@ scikit-learn numpydoc myst-nb jupyterlab-myst +sphinx-togglebutton From 3cb836ba88d942314035ac74410e6128b7dd33ab Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sun, 13 Oct 2024 21:14:53 -0400 Subject: [PATCH 21/54] Fixed #1036 Bumped minimum Python/dependency versions --- .github/workflows/github-actions.yml | 6 +++--- README.rst | 2 +- environment.yml | 6 +++--- pyproject.toml | 8 ++++---- requirements.txt | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml index 69d4e35f7..7939d29e1 100644 --- a/.github/workflows/github-actions.yml +++ b/.github/workflows/github-actions.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.8'] + python-version: ['3.9'] steps: - uses: actions/checkout@v4 - name: Set Up Python @@ -63,7 +63,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Set Up Python @@ -110,7 +110,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Set Up Python diff --git a/README.rst b/README.rst index a0ce5a419..16780a3cc 100644 --- a/README.rst +++ b/README.rst @@ -325,7 +325,7 @@ Tests are written in the ``tests`` directory and processed using `PyTest `__ and, due to the use of unicode variable names/identifiers, is not compatible with Python 2.x. Given the small dependencies, STUMPY may work on older versions of Python but this is beyond the scope of our support and we strongly recommend that you upgrade to the most recent version of Python. +STUMPY supports `Python 3.9+ `__ and, due to the use of unicode variable names/identifiers, is not compatible with Python 2.x. Given the small dependencies, STUMPY may work on older versions of Python but this is beyond the scope of our support and we strongly recommend that you upgrade to the most recent version of Python. ------------ Getting Help diff --git a/environment.yml b/environment.yml index 919cdec19..30648ec44 100644 --- a/environment.yml +++ b/environment.yml @@ -2,10 +2,10 @@ channels: - numba - conda-forge dependencies: - - python>=3.8 - - numpy>=1.21 + - python>=3.9 + - numpy>=1.22 - scipy>=1.10 - - numba>=0.57.1 + - numba>=0.59.1 - pandas>=0.20.0 - flake8>=3.7.7 - flake8-docstrings>=1.5.0 diff --git a/pyproject.toml b/pyproject.toml index 1b5731861..1d5c2ed7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "stumpy" version = "1.13.0" -requires-python = ">=3.8" +requires-python = ">=3.9" authors = [ {name = "Sean M. Law", email = "seanmylaw@gmail.com"} ] @@ -23,7 +23,7 @@ classifiers = [ "Operating System :: POSIX", "Operating System :: Unix", "Operating System :: MacOS", - "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ] keywords = ["time series", "matrix profile", "motif", "discord"] maintainers = [ @@ -32,9 +32,9 @@ maintainers = [ ] license = {text = "3-clause BSD License"} dependencies = [ - "numpy >= 1.21", + "numpy >= 1.22", "scipy >= 1.10", - "numba >= 0.57.1" + "numba >= 0.59.1" ] [tool.setuptools] diff --git a/requirements.txt b/requirements.txt index 61f3bd793..d08f49a4c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -numpy>=1.21 +numpy>=1.22 scipy>=1.10 -numba>=0.57.1 +numba>=0.59.1 From 3b283714051401ada5679b56c2ec74fabbb10448 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sun, 3 Nov 2024 08:08:13 -0500 Subject: [PATCH 22/54] Added minor note on missing `mmatch` function --- docs/api.rst | 7 +++++++ stumpy/motifs.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index a05b8ecef..1f7f2c60a 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -34,6 +34,8 @@ Have A Question? stumpy.motifs stumpy.match stumpy.mmotifs + stumpy.mmatch + stumpy.mmatch stumpy.snippets stumpy.stimp stumpy.stimped @@ -152,6 +154,11 @@ mmotifs .. autofunction:: stumpy.mmotifs +mmatch +====== + +This function does not exist. See the :func:`stumpy.match` function, which natively supports multi-dimensional time series inputs. + snippets ======== diff --git a/stumpy/motifs.py b/stumpy/motifs.py index 8c560f659..de381f88b 100644 --- a/stumpy/motifs.py +++ b/stumpy/motifs.py @@ -414,13 +414,13 @@ def match( Find all matches of a query ``Q`` in a time series ``T`` The indices of subsequences whose distances to ``Q`` are less than or equal to - ``max_distance``, sorted by distance (lowest to highest). Around each occurrence an + ``max_distance``, sorted by distance (lowest to highest). Around each occurrence, an exclusion zone is applied before searching for the next. Parameters ---------- Q : numpy.ndarray - The query sequence. It doesn't have to be a subsequence of ``T``. + The query sequence. ``Q`` does not have to be a subsequence of ``T``. T : numpy.ndarray The time series of interest. From eadc0a271bbc5deeb92cef4bce137816aede60f1 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sun, 3 Nov 2024 08:58:23 -0500 Subject: [PATCH 23/54] Removed mmatch in API docs --- docs/api.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 1f7f2c60a..a05b8ecef 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -34,8 +34,6 @@ Have A Question? stumpy.motifs stumpy.match stumpy.mmotifs - stumpy.mmatch - stumpy.mmatch stumpy.snippets stumpy.stimp stumpy.stimped @@ -154,11 +152,6 @@ mmotifs .. autofunction:: stumpy.mmotifs -mmatch -====== - -This function does not exist. See the :func:`stumpy.match` function, which natively supports multi-dimensional time series inputs. - snippets ======== From 96166867d20edc46f7c8bb26b9cdc316568573d6 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sun, 3 Nov 2024 11:06:44 -0500 Subject: [PATCH 24/54] Updated talk video --- docs/motivation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/motivation.rst b/docs/motivation.rst index 97e268b3b..baccd2c4f 100644 --- a/docs/motivation.rst +++ b/docs/motivation.rst @@ -7,5 +7,5 @@ The following video provides the background and motivation for developing and op .. raw:: html
- +
From 3165d1ccbe505b0c5bd324db5e12979c392967d8 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sun, 17 Nov 2024 12:28:27 -0500 Subject: [PATCH 25/54] Fixed bad YouTube embedding link --- docs/motivation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/motivation.rst b/docs/motivation.rst index baccd2c4f..6ecf48496 100644 --- a/docs/motivation.rst +++ b/docs/motivation.rst @@ -7,5 +7,5 @@ The following video provides the background and motivation for developing and op .. raw:: html
- +
From 44168732e0c073cefdc208d1352bb4ae183a0eb5 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Fri, 27 Dec 2024 07:23:48 -0500 Subject: [PATCH 26/54] Changed to "is" when comparing types --- stumpy/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 4cdaea02a..fdbc9aefd 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -429,11 +429,11 @@ def check_dtype(a, dtype=np.float64): # pragma: no cover TypeError If the array type does not match `dtype` """ - if dtype == int: + if dtype is int: dtype = np.int64 - if dtype == float: + if dtype is float: dtype = np.float64 - if dtype == bool: + if dtype is bool: dtype = np.bool_ if not np.issubdtype(a.dtype, dtype): msg = f"{dtype} dtype expected but found {a.dtype} in input array\n" From ce0cd8c62be3d59a4cdc80c2870999357424f9d8 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Tue, 7 Jan 2025 14:21:37 -0500 Subject: [PATCH 27/54] Fixed #1058 Added support for Polars --- environment.yml | 3 ++- pyproject.toml | 3 ++- stumpy/core.py | 21 +++++++++++++++------ stumpy/maamp.py | 4 ++-- stumpy/maamped.py | 12 ++++++------ stumpy/mstump.py | 4 ++-- stumpy/mstumped.py | 12 ++++++------ tests/test_mstump.py | 7 +++++++ tests/test_stump.py | 9 +++++++++ tests/test_stumped.py | 5 +++++ 10 files changed, 56 insertions(+), 24 deletions(-) diff --git a/environment.yml b/environment.yml index 30648ec44..8cd13fe2d 100644 --- a/environment.yml +++ b/environment.yml @@ -21,8 +21,9 @@ dependencies: - pydata-sphinx-theme>=0.5.2 - scikit-learn>=0.21.3 - numpydoc>=1.1.0 - - build>=0.7.0 + - python-build>=0.7.0 - pytest-check-links>=0.7.1 - isort>=5.11.0 - jupyterlab-myst>=2.0.0 - myst-nb>=1.0.0 + - polars>=1.14.0 diff --git a/pyproject.toml b/pyproject.toml index 1d5c2ed7c..cb201d6cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,8 @@ ci = [ "black >= 22.1.0", "pytest >= 4.4.1", "isort >= 5.11.0", - 'tbb >= 2019.5 ; platform_system == "Linux"' + 'tbb >= 2019.5 ; platform_system == "Linux"', + "polars >= 1.14.0" ] [project.urls] diff --git a/stumpy/core.py b/stumpy/core.py index fdbc9aefd..a5bdfbdb8 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -445,17 +445,18 @@ def check_dtype(a, dtype=np.float64): # pragma: no cover def transpose_dataframe(df): # pragma: no cover """ - Check if the input is a column-wise Pandas `DataFrame`. If `True`, return a + Check if the input is a column-wise pandas/polars `DataFrame`. If `True`, return a transpose dataframe since stumpy assumes that each row represents data from a different dimension while each column represents data from the same dimension. - If `False`, return `a` unchanged. Pandas `Series` do not need to be transposed. + If `False`, return `a` unchanged. Pandas/polars `Series` do not need to be + transposed. Note that this function has zero dependency on Pandas (not even a soft dependency). Parameters ---------- - df : numpy.ndarray - Pandas dataframe + df : DataFrame + pandas/polars dataframe Returns ------- @@ -463,7 +464,7 @@ def transpose_dataframe(df): # pragma: no cover If `df` is a Pandas `DataFrame` then return `df.T`. Otherwise, return `df` """ if type(df).__name__ == "DataFrame": - return df.T + return df.transpose() return df @@ -2062,8 +2063,16 @@ def _preprocess(T, copy=True): Modified time series """ if copy: - T = T.copy() + try: + T = T.copy() + except AttributeError: # Polars copy + T = T.clone() + T = transpose_dataframe(T) + + if "polars" in str(type(T)): + T = T.to_numpy(writable=True) + T = np.asarray(T) check_dtype(T) diff --git a/stumpy/maamp.py b/stumpy/maamp.py index d142c8e40..f2aa5f6a4 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -879,8 +879,8 @@ def maamp(T, m, include=None, discords=False, p=2.0): ---------- T : numpy.ndarray The time series or sequence for which to compute the multi-dimensional - matrix profile. Each row in `T` represents data from a different - dimension while each column in `T` represents data from the same + matrix profile. Each row in `T` represents data from the same + dimension while each column in `T` represents data from a different dimension. m : int diff --git a/stumpy/maamped.py b/stumpy/maamped.py index b992b1ec1..0665e3e51 100644 --- a/stumpy/maamped.py +++ b/stumpy/maamped.py @@ -39,8 +39,8 @@ def _dask_maamped( T_A : numpy.ndarray The time series or sequence for which to compute the multi-dimensional - matrix profile. Each row in `T_A` represents data from a different - dimension while each column in `T_A` represents data from the same + matrix profile. Each row in `T_A` represents data from the same + dimension while each column in `T_A` represents data from a different dimension. T_B : numpy.ndarray @@ -194,8 +194,8 @@ def _ray_maamped( T_A : numpy.ndarray The time series or sequence for which to compute the multi-dimensional - matrix profile. Each row in `T_A` represents data from a different - dimension while each column in `T_A` represents data from the same + matrix profile. Each row in `T_A` represents data from the same + dimension while each column in `T_A` represents data from a different dimension. T_B : numpy.ndarray @@ -335,8 +335,8 @@ def maamped(client, T, m, include=None, discords=False, p=2.0): T : numpy.ndarray The time series or sequence for which to compute the multi-dimensional - matrix profile. Each row in `T` represents data from a different - dimension while each column in `T` represents data from the same + matrix profile. Each row in `T` represents data from the same + dimension while each column in `T` represents data from a different dimension. m : int diff --git a/stumpy/mstump.py b/stumpy/mstump.py index 9b2dab7d4..339c8bcb7 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -1126,8 +1126,8 @@ def mstump( ---------- T : numpy.ndarray The time series or sequence for which to compute the multi-dimensional - matrix profile. Each row in ``T`` represents data from a different - dimension while each column in ``T`` represents data from the same + matrix profile. Each row in ``T`` represents data from the same + dimension while each column in ``T`` represents data from a different dimension. m : int diff --git a/stumpy/mstumped.py b/stumpy/mstumped.py index 6f7c8dad3..aabb0b6ca 100644 --- a/stumpy/mstumped.py +++ b/stumpy/mstumped.py @@ -43,8 +43,8 @@ def _dask_mstumped( T_A : numpy.ndarray The time series or sequence for which to compute the multi-dimensional - matrix profile. Each row in `T_A` represents data from a different - dimension while each column in `T_A` represents data from the same + matrix profile. Each row in `T_A` represents data from the same + dimension while each column in `T_A` represents data from a different dimension. T_B : numpy.ndarray @@ -216,8 +216,8 @@ def _ray_mstumped( T_A : numpy.ndarray The time series or sequence for which to compute the multi-dimensional - matrix profile. Each row in `T_A` represents data from a different - dimension while each column in `T_A` represents data from the same + matrix profile. Each row in `T_A` represents data from the same + dimension while each column in `T_A` represents data from a different dimension. T_B : numpy.ndarray @@ -387,8 +387,8 @@ def mstumped( T : numpy.ndarray The time series or sequence for which to compute the multi-dimensional - matrix profile. Each row in ``T`` represents data from a different - dimension while each column in ``T`` represents data from the same + matrix profile. Each row in ``T`` represents data from the same + dimension while each column in ``T`` represents data from a different dimension. m : int diff --git a/tests/test_mstump.py b/tests/test_mstump.py index 3613653ea..6cb81b5d6 100644 --- a/tests/test_mstump.py +++ b/tests/test_mstump.py @@ -4,6 +4,7 @@ import numpy as np import numpy.testing as npt import pandas as pd +import polars as pl import pytest from stumpy import config, core, mdl, mstump, subspace @@ -305,6 +306,12 @@ def test_mstump_wrapper(T, m): npt.assert_almost_equal(ref_P, comp_P) npt.assert_almost_equal(ref_I, comp_I) + df = pl.DataFrame(T.T) + comp_P, comp_I = mstump(df, m) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + @pytest.mark.parametrize("T, m", test_data) def test_mstump_wrapper_include(T, m): diff --git a/tests/test_stump.py b/tests/test_stump.py index 2e1e88d00..fe35920cd 100644 --- a/tests/test_stump.py +++ b/tests/test_stump.py @@ -4,6 +4,7 @@ import numpy as np import numpy.testing as npt import pandas as pd +import polars as pl import pytest from stumpy import config, stump @@ -42,6 +43,10 @@ def test_stump_self_join(T_A, T_B): naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + comp_mp = stump(pl.Series(T_B), m, ignore_trivial=True) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + @pytest.mark.parametrize("T_A, T_B", test_data) def test_stump_A_B_join(T_A, T_B): @@ -56,6 +61,10 @@ def test_stump_A_B_join(T_A, T_B): naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + comp_mp = stump(pl.Series(T_A), m, pl.Series(T_B), ignore_trivial=False) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + def test_stump_constant_subsequence_self_join(): T_A = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) diff --git a/tests/test_stumped.py b/tests/test_stumped.py index db929a2b1..a7e9c3d2c 100644 --- a/tests/test_stumped.py +++ b/tests/test_stumped.py @@ -4,6 +4,7 @@ import numpy as np import numpy.testing as npt import pandas as pd +import polars as pl import pytest from dask.distributed import Client, LocalCluster @@ -75,6 +76,10 @@ def test_stumped_self_join_df(T_A, T_B, dask_cluster): naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + comp_mp = stumped(dask_client, pl.Series(T_B), m, ignore_trivial=True) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + @pytest.mark.filterwarnings("ignore:numpy.dtype size changed") @pytest.mark.filterwarnings("ignore:numpy.ufunc size changed") From 70e4e7065fa475b60d874da5f81e61d475c27f38 Mon Sep 17 00:00:00 2001 From: Nima Sarajpoor Date: Sun, 12 Jan 2025 15:58:21 -0500 Subject: [PATCH 28/54] Fixed #1059 missing fastmath (#1060) * add function to check fastmath * revise fastmath script and add support for reading arg from command line * minor fixes * rename function to improve readability * simplify code by passing boolean value * fix to catch njit functions with decorator * use regex to find njit functions * minor change * revise code to detect bare njit decorator * minor fix * fix path * add missing fastmath * revise fastmath flag * Improve ValueError msg * fix format * enable function to accept path as input * pass param via CLI, and some minor changes * adapt changes in test script * use type str for the param pkg_dir * minor changes * Revised string concatenation in error message --- fastmath.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++++ stumpy/cache.py | 22 ++++++++--- stumpy/core.py | 10 +++-- test.sh | 17 +++++++- 4 files changed, 137 insertions(+), 12 deletions(-) create mode 100755 fastmath.py diff --git a/fastmath.py b/fastmath.py new file mode 100755 index 000000000..b6fea39af --- /dev/null +++ b/fastmath.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +import argparse +import ast +import importlib +import pathlib + + +def get_njit_funcs(pkg_dir): + """ + Identify all njit functions + + Parameters + ---------- + pkg_dir : str + The path to the directory containing some .py files + + Returns + ------- + njit_funcs : list + A list of all njit functions, where each element is a tuple of the form + (module_name, func_name) + """ + ignore_py_files = ["__init__", "__pycache__"] + pkg_dir = pathlib.Path(pkg_dir) + + module_names = [] + for fname in pkg_dir.iterdir(): + if fname.stem not in ignore_py_files and not fname.stem.startswith("."): + module_names.append(fname.stem) + + njit_funcs = [] + for module_name in module_names: + filepath = pkg_dir / f"{module_name}.py" + file_contents = "" + with open(filepath, encoding="utf8") as f: + file_contents = f.read() + module = ast.parse(file_contents) + for node in module.body: + if isinstance(node, ast.FunctionDef): + func_name = node.name + for decorator in node.decorator_list: + decorator_name = None + if isinstance(decorator, ast.Name): + # Bare decorator + decorator_name = decorator.id + if isinstance(decorator, ast.Call) and isinstance( + decorator.func, ast.Name + ): + # Decorator is a function + decorator_name = decorator.func.id + + if decorator_name == "njit": + njit_funcs.append((module_name, func_name)) + + return njit_funcs + + +def check_fastmath(pkg_dir, pkg_name): + """ + Check if all njit functions have the `fastmath` flag set + + Parameters + ---------- + pkg_dir : str + The path to the directory containing some .py files + + pkg_name : str + The name of the package + + Returns + ------- + None + """ + missing_fastmath = [] # list of njit functions with missing fastmath flags + for module_name, func_name in get_njit_funcs(pkg_dir): + module = importlib.import_module(f".{module_name}", package=pkg_name) + func = getattr(module, func_name) + if "fastmath" not in func.targetoptions.keys(): + missing_fastmath.append(f"{module_name}.{func_name}") + + if len(missing_fastmath) > 0: + msg = ( + "Found one or more `@njit` functions that are missing the `fastmath` flag. " + + f"The functions are:\n {missing_fastmath}\n" + ) + raise ValueError(msg) + + return + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--check", dest="pkg_dir") + args = parser.parse_args() + + if args.pkg_dir: + pkg_dir = pathlib.Path(args.pkg_dir) + pkg_name = pkg_dir.name + check_fastmath(str(pkg_dir), pkg_name) diff --git a/stumpy/cache.py b/stumpy/cache.py index 398387724..76fb685bc 100644 --- a/stumpy/cache.py +++ b/stumpy/cache.py @@ -5,7 +5,6 @@ import ast import importlib import pathlib -import pkgutil import site import warnings @@ -28,13 +27,17 @@ def get_njit_funcs(): out : list A list of (`module_name`, `func_name`) pairs """ + ignore_py_files = ["__init__", "__pycache__"] + pkg_dir = pathlib.Path(__file__).parent - module_names = [name for _, name, _ in pkgutil.iter_modules([str(pkg_dir)])] + module_names = [] + for fname in pkg_dir.iterdir(): + if fname.stem not in ignore_py_files and not fname.stem.startswith("."): + module_names.append(fname.stem) njit_funcs = [] - for module_name in module_names: - filepath = pathlib.Path(__file__).parent / f"{module_name}.py" + filepath = pkg_dir / f"{module_name}.py" file_contents = "" with open(filepath, encoding="utf8") as f: file_contents = f.read() @@ -43,11 +46,18 @@ def get_njit_funcs(): if isinstance(node, ast.FunctionDef): func_name = node.name for decorator in node.decorator_list: + decorator_name = None + if isinstance(decorator, ast.Name): + # Bare decorator + decorator_name = decorator.id if isinstance(decorator, ast.Call) and isinstance( decorator.func, ast.Name ): - if decorator.func.id == "njit": - njit_funcs.append((module_name, func_name)) + # Decorator is a function + decorator_name = decorator.func.id + + if decorator_name == "njit": + njit_funcs.append((module_name, func_name)) return njit_funcs diff --git a/stumpy/core.py b/stumpy/core.py index a5bdfbdb8..51f0b1a18 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2356,6 +2356,7 @@ def _count_diagonal_ndist(diags, m, n_A, n_B): @njit( # "i8[:, :](i8[:], i8, b1)" + fastmath=True ) def _get_array_ranges(a, n_chunks, truncate): """ @@ -2404,6 +2405,7 @@ def _get_array_ranges(a, n_chunks, truncate): @njit( # "i8[:, :](i8, i8, b1)" + fastmath=True ) def _get_ranges(size, n_chunks, truncate): """ @@ -3256,7 +3258,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None): return MPdist -@njit() +@njit(fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) def _merge_topk_PI(PA, PB, IA, IB): """ Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place). @@ -3329,7 +3331,7 @@ def _merge_topk_PI(PA, PB, IA, IB): IA[i] = tmp_I -@njit() +@njit(fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) def _merge_topk_ρI(ρA, ρB, IA, IB): """ Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place). @@ -3403,7 +3405,7 @@ def _merge_topk_ρI(ρA, ρB, IA, IB): IA[i] = tmp_I -@njit() +@njit(fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) def _shift_insert_at_index(a, idx, v, shift="right"): """ If `shift=right` (default), all elements in `a[idx:]` are shifted to the right by @@ -4379,7 +4381,7 @@ def get_ray_nworkers(ray_client): return int(ray_client.cluster_resources().get("CPU")) -@njit +@njit(fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) def _update_incremental_PI(D, P, I, excl_zone, n_appended=0): """ Given the 1D array distance profile, `D`, of the last subsequence of T, diff --git a/test.sh b/test.sh index 06fe9819c..0136e16f5 100755 --- a/test.sh +++ b/test.sh @@ -93,6 +93,13 @@ check_print() fi } +check_fastmath() +{ + echo "Checking Missing fastmath flags in njit functions" + ./fastmath.py --check stumpy + check_errs $? +} + check_naive() { # Check if there are any naive implementations not at start of test file @@ -146,14 +153,14 @@ set_ray_coveragerc() show_coverage_report() { set_ray_coveragerc - coverage report -m --fail-under=100 --skip-covered --omit=docstring.py,min_versions.py,ray_python_version.py,stumpy/cache.py $fcoveragerc + coverage report -m --fail-under=100 --skip-covered --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,stumpy/cache.py $fcoveragerc } gen_coverage_xml_report() { # This function saves the coverage report in Cobertura XML format, which is compatible with codecov set_ray_coveragerc - coverage xml -o $fcoveragexml --fail-under=100 --omit=docstring.py,min_versions.py,ray_python_version.py,stumpy/cache.py $fcoveragerc + coverage xml -o $fcoveragexml --fail-under=100 --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,stumpy/cache.py $fcoveragerc } test_custom() @@ -333,6 +340,12 @@ check_print check_naive check_ray + +if [[ -z $NUMBA_DISABLE_JIT || $NUMBA_DISABLE_JIT -eq 0 ]]; then + check_fastmath +fi + + if [[ $test_mode == "notebooks" ]]; then echo "Executing Tutorial Notebooks Only" convert_notebooks From b6d9bcebcc278cccb7b1c131dae5b4ea4d972adc Mon Sep 17 00:00:00 2001 From: Sean Law Date: Tue, 21 Jan 2025 10:13:59 -0500 Subject: [PATCH 29/54] Added check for relative package imports --- stumpy/core.py | 2 +- test.sh | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index 51f0b1a18..21d3f17a8 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -3689,7 +3689,7 @@ def check_ignore_trivial(T_A, T_B, ignore_trivial): Notes ----- - These warnings may be supresse by using a context manager + These warnings may be supressed by using a context manager ``` import stumpy import numpy as np diff --git a/test.sh b/test.sh index 0136e16f5..cfaaee95d 100755 --- a/test.sh +++ b/test.sh @@ -100,6 +100,16 @@ check_fastmath() check_errs $? } +check_pkg_imports() +{ + echo "Checking Package Imports" + if [[ `grep '^from stumpy' stumpy/*py | wc -l` -gt "0" ]]; then + grep '^from stumpy' stumpy/*py + echo 'Error: please change "from stumpy" to "from ." ' + exit 1 + fi +} + check_naive() { # Check if there are any naive implementations not at start of test file @@ -337,6 +347,7 @@ check_isort check_flake check_docstrings check_print +check_pkg_imports check_naive check_ray From dfe4f63b928a33a6dc8f16b40234893cfe538647 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Tue, 21 Jan 2025 16:17:37 -0500 Subject: [PATCH 30/54] Fix missing Sphinx configuration declaration --- .readthedocs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.readthedocs.yml b/.readthedocs.yml index 0263c9ad7..3da6a4e46 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,4 +1,6 @@ version: 2 +sphinx: + configuration: docs/conf.py formats: [] build: os: ubuntu-22.04 From 80dc3e915d287a1d5368b04fd250d7ad60f4b535 Mon Sep 17 00:00:00 2001 From: Nima Sarajpoor Date: Thu, 23 Jan 2025 06:36:14 -0500 Subject: [PATCH 31/54] Fix fasthmath precision issue (#1048) * removed reassoc flag from fastmath * Add reset feature to config * Revised config value * replaced fastmath flags with config var * fixed format * Removed bad f-string * Replaced Raised with Returns in docstring * Add second attempt for assertion * minor change * Add condition to avoid revising fastmath when JIT is disabled * Removed support for input with type list to simplify function * Refactored the recompile process * removed blank lines * fixed typo * replaced hardcoded fastmath value with config var * revised function * renamed variable to improve readability * fixed bug * rename config to improve readability * revise func clear * revise func to recompile all njit functions * Adapt to changes in test function * add test * resolve coverage * resolve missing lines in coverage * Add test function to improve coverage * add fastmath module * revise test function to use fastmath module * fix minor issue * minor change to improve readability * Add fastmath default flags to config default * add reset function * rename function * adapt recent changes in test function * minor fixes * Check if DISABLE_JIT before getting fastmath * ignore lines for coverage check * Editorial fix * avoid .get(key) to get KeyError if it does not exist * add function to save cache * Add note to function * fix format * replace fastmath flag with config variable * add test function to check backward compatibility * skip test when JIT is disabled * rename test function * add conditional deprecation warning * add test function to check if cache can be saved after cache._clear() * remove old warning * add test for cache._clear * add wrapper around private functions * Raise OSError when NUMBA JIT is disabled during cache save * move warnings to public API * fix warning message * improved warning message * Add commit about addition config variables that are defined in __init__ * Revise test function to improve readability * Add test function for fastmath * Revised test functions * skip test if numba JIT is disabled * omit test functions that require NUMBA JIT * Removed the trivial test function * Raise warning instead of error to avoid interrupting the program * improve readability * remove intermediate variable * minor fixes * Add shell script code to check for harcoded fastmath flags * minor fix on indention --- fastmath.py | 2 +- stumpy/__init__.py | 15 +++++++ stumpy/aamp.py | 4 +- stumpy/cache.py | 98 ++++++++++++++++++++++++++++++++++++++++- stumpy/config.py | 77 +++++++++++++++++++++++++++----- stumpy/core.py | 38 ++++++++-------- stumpy/fastmath.py | 82 ++++++++++++++++++++++++++++++++++ stumpy/maamp.py | 2 +- stumpy/mstump.py | 2 +- stumpy/scraamp.py | 4 +- stumpy/scrump.py | 4 +- stumpy/stump.py | 4 +- test.sh | 11 ++++- tests/test_cache.py | 27 ++++++++++++ tests/test_config.py | 21 +++++++++ tests/test_fastmath.py | 44 ++++++++++++++++++ tests/test_precision.py | 30 ++++++++++++- 17 files changed, 420 insertions(+), 45 deletions(-) create mode 100644 stumpy/fastmath.py create mode 100644 tests/test_cache.py create mode 100644 tests/test_fastmath.py diff --git a/fastmath.py b/fastmath.py index b6fea39af..fe7dc0b56 100755 --- a/fastmath.py +++ b/fastmath.py @@ -13,7 +13,7 @@ def get_njit_funcs(pkg_dir): Parameters ---------- pkg_dir : str - The path to the directory containing some .py files + The path to the directory containing some .py files Returns ------- diff --git a/stumpy/__init__.py b/stumpy/__init__.py index 76a5cfbad..870912b15 100644 --- a/stumpy/__init__.py +++ b/stumpy/__init__.py @@ -1,9 +1,12 @@ +import importlib import os.path from importlib.metadata import distribution from site import getsitepackages +import numba from numba import cuda +from . import cache, config from .aamp import aamp # noqa: F401 from .aamp_mmotifs import aamp_mmotifs # noqa: F401 from .aamp_motifs import aamp_match, aamp_motifs # noqa: F401 @@ -32,6 +35,18 @@ from .stumped import stumped # noqa: F401 from .stumpi import stumpi # noqa: F401 +# Get the default fastmath flags for all njit functions +# and update the _STUMPY_DEFAULTS dictionary + +if not numba.config.DISABLE_JIT: # pragma: no cover + njit_funcs = cache.get_njit_funcs() + for module_name, func_name in njit_funcs: + module = importlib.import_module(f".{module_name}", package="stumpy") + func = getattr(module, func_name) + key = module_name + "." + func_name # e.g., core._mass + key = "STUMPY_FASTMATH_" + key.upper() # e.g., STUMPY_FASTHMATH_CORE._MASS + config._STUMPY_DEFAULTS[key] = func.targetoptions["fastmath"] + if cuda.is_available(): from .gpu_aamp import gpu_aamp # noqa: F401 from .gpu_aamp_ostinato import gpu_aamp_ostinato # noqa: F401 diff --git a/stumpy/aamp.py b/stumpy/aamp.py index edc2da7af..1e4879bcc 100644 --- a/stumpy/aamp.py +++ b/stumpy/aamp.py @@ -13,7 +13,7 @@ @njit( # "(f8[:], f8[:], i8, b1[:], b1[:], f8, i8[:], i8, i8, i8, f8[:, :, :]," # "f8[:, :], f8[:, :], i8[:, :, :], i8[:, :], i8[:, :], b1)", - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def _compute_diagonal( T_A, @@ -186,7 +186,7 @@ def _compute_diagonal( @njit( # "(f8[:], f8[:], i8, b1[:], b1[:], i8[:], b1, i8)", parallel=True, - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def _aamp( T_A, diff --git a/stumpy/cache.py b/stumpy/cache.py index 76fb685bc..a08f00b36 100644 --- a/stumpy/cache.py +++ b/stumpy/cache.py @@ -4,10 +4,13 @@ import ast import importlib +import inspect import pathlib import site import warnings +import numba + CACHE_WARNING = "Caching `numba` functions is purely for experimental purposes " CACHE_WARNING += "and should never be used or depended upon as it is not supported! " CACHE_WARNING += "All caching capabilities are not tested and may be removed/changed " @@ -74,7 +77,15 @@ def _enable(): ------- None """ - warnings.warn(CACHE_WARNING) + frame = inspect.currentframe() + caller_name = inspect.getouterframes(frame)[1].function + if caller_name != "_save": + msg = ( + "The 'cache._enable()' function is deprecated and no longer supported. " + + "Please use 'cache.save()' instead" + ) + warnings.warn(msg, DeprecationWarning, stacklevel=2) + njit_funcs = get_njit_funcs() for module_name, func_name in njit_funcs: module = importlib.import_module(f".{module_name}", package="stumpy") @@ -94,12 +105,29 @@ def _clear(): ------- None """ - warnings.warn(CACHE_WARNING) site_pkg_dir = site.getsitepackages()[0] numba_cache_dir = site_pkg_dir + "/stumpy/__pycache__" [f.unlink() for f in pathlib.Path(numba_cache_dir).glob("*nb*") if f.is_file()] +def clear(): + """ + Clear numba cache directory + + Parameters + ---------- + None + + Returns + ------- + None + """ + warnings.warn(CACHE_WARNING) + _clear() + + return + + def _get_cache(): """ Retrieve a list of cached numba functions @@ -117,3 +145,69 @@ def _get_cache(): site_pkg_dir = site.getsitepackages()[0] numba_cache_dir = site_pkg_dir + "/stumpy/__pycache__" return [f.name for f in pathlib.Path(numba_cache_dir).glob("*nb*") if f.is_file()] + + +def _recompile(): + """ + Recompile all njit functions + + Parameters + ---------- + None + + Returns + ------- + None + + Notes + ----- + If the `numba` cache is enabled, this results in saving (and/or overwriting) + the cached numba functions to disk. + """ + for module_name, func_name in get_njit_funcs(): + module = importlib.import_module(f".{module_name}", package="stumpy") + func = getattr(module, func_name) + func.recompile() + + return + + +def _save(): + """ + Save all njit functions + + Parameters + ---------- + None + + Returns + ------- + None + """ + _enable() + _recompile() + + return + + +def save(): + """ + Save/overwrite all the cache data files of + all-so-far compiled njit functions. + + Parameters + ---------- + None + + Returns + ------- + None + """ + if numba.config.DISABLE_JIT: + msg = "Could not save/cache function because NUMBA JIT is disabled" + warnings.warn(msg) + else: + warnings.warn(CACHE_WARNING) + _save() + + return diff --git a/stumpy/config.py b/stumpy/config.py index dad017ec3..d66ff76eb 100644 --- a/stumpy/config.py +++ b/stumpy/config.py @@ -2,15 +2,72 @@ # Copyright 2019 TD Ameritrade. Released under the terms of the 3-Clause BSD license. # STUMPY is a trademark of TD Ameritrade IP Company, Inc. All rights reserved. +import warnings + import numpy as np -STUMPY_THREADS_PER_BLOCK = 512 -STUMPY_MEAN_STD_NUM_CHUNKS = 1 -STUMPY_MEAN_STD_MAX_ITER = 10 -STUMPY_DENOM_THRESHOLD = 1e-14 -STUMPY_STDDEV_THRESHOLD = 1e-7 -STUMPY_P_NORM_THRESHOLD = 1e-14 -STUMPY_TEST_PRECISION = 5 -STUMPY_MAX_P_NORM_DISTANCE = np.finfo(np.float64).max -STUMPY_MAX_DISTANCE = np.sqrt(STUMPY_MAX_P_NORM_DISTANCE) -STUMPY_EXCL_ZONE_DENOM = 4 +_STUMPY_DEFAULTS = { + "STUMPY_THREADS_PER_BLOCK": 512, + "STUMPY_MEAN_STD_NUM_CHUNKS": 1, + "STUMPY_MEAN_STD_MAX_ITER": 10, + "STUMPY_DENOM_THRESHOLD": 1e-14, + "STUMPY_STDDEV_THRESHOLD": 1e-7, + "STUMPY_P_NORM_THRESHOLD": 1e-14, + "STUMPY_TEST_PRECISION": 5, + "STUMPY_MAX_P_NORM_DISTANCE": np.finfo(np.float64).max, + "STUMPY_MAX_DISTANCE": np.sqrt(np.finfo(np.float64).max), + "STUMPY_EXCL_ZONE_DENOM": 4, + "STUMPY_FASTMATH_TRUE": True, + "STUMPY_FASTMATH_FLAGS": {"nsz", "arcp", "contract", "afn", "reassoc"}, +} + +# In addition to these configuration variables, there exist config variables +# that have the default value of the fastmath flag of the njit functions. The +# name of this config variable has the following format: +# STUMPY_FASTMATH_. +# See __init__.py for more details + +STUMPY_THREADS_PER_BLOCK = _STUMPY_DEFAULTS["STUMPY_THREADS_PER_BLOCK"] +STUMPY_MEAN_STD_NUM_CHUNKS = _STUMPY_DEFAULTS["STUMPY_MEAN_STD_NUM_CHUNKS"] +STUMPY_MEAN_STD_MAX_ITER = _STUMPY_DEFAULTS["STUMPY_MEAN_STD_MAX_ITER"] +STUMPY_DENOM_THRESHOLD = _STUMPY_DEFAULTS["STUMPY_DENOM_THRESHOLD"] +STUMPY_STDDEV_THRESHOLD = _STUMPY_DEFAULTS["STUMPY_STDDEV_THRESHOLD"] +STUMPY_P_NORM_THRESHOLD = _STUMPY_DEFAULTS["STUMPY_P_NORM_THRESHOLD"] +STUMPY_TEST_PRECISION = _STUMPY_DEFAULTS["STUMPY_TEST_PRECISION"] +STUMPY_MAX_P_NORM_DISTANCE = _STUMPY_DEFAULTS["STUMPY_MAX_P_NORM_DISTANCE"] +STUMPY_MAX_DISTANCE = _STUMPY_DEFAULTS["STUMPY_MAX_DISTANCE"] +STUMPY_EXCL_ZONE_DENOM = _STUMPY_DEFAULTS["STUMPY_EXCL_ZONE_DENOM"] +STUMPY_FASTMATH_TRUE = _STUMPY_DEFAULTS["STUMPY_FASTMATH_TRUE"] +STUMPY_FASTMATH_FLAGS = _STUMPY_DEFAULTS["STUMPY_FASTMATH_FLAGS"] + + +def _reset(var=None): + """ + Reset the value of a configuration variable(s) to their default value(s) + + Parameters + ---------- + var : str, default None + The name of the configuration variable. If None, then all + configuration variables are reset to their default values. + + Returns + ------- + None + """ + config_vars = [ + k for k, _ in globals().items() if k.isupper() and k.startswith("STUMPY") + ] + + if var is None: + for config_var in config_vars: + globals()[config_var] = _STUMPY_DEFAULTS[config_var] + elif var in config_vars: + globals()[var] = _STUMPY_DEFAULTS[var] + else: # pragma: no cover + msg = ( + f"Configuration reset was skipped for unrecognized '_STUMPY_DEFAULT[{var}]'" + ) + warnings.warn(msg) + + return diff --git a/stumpy/core.py b/stumpy/core.py index 21d3f17a8..a7758c2fd 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -590,7 +590,7 @@ def check_window_size(m, max_size=None): raise ValueError(f"The window size must be less than or equal to {max_size}") -@njit(fastmath=True) +@njit(fastmath=config.STUMPY_FASTMATH_TRUE) def _sliding_dot_product(Q, T): """ A Numba JIT-compiled implementation of the sliding window dot product. @@ -658,7 +658,7 @@ def sliding_dot_product(Q, T): @njit( # "f8[:](f8[:], i8, b1[:])", - fastmath={"nsz", "arcp", "contract", "afn", "reassoc"} + fastmath=config.STUMPY_FASTMATH_FLAGS ) def _welford_nanvar(a, w, a_subseq_isfinite): """ @@ -772,7 +772,7 @@ def welford_nanstd(a, w=None): return np.sqrt(np.clip(welford_nanvar(a, w), a_min=0, a_max=None)) -@njit(parallel=True, fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) +@njit(parallel=True, fastmath=config.STUMPY_FASTMATH_FLAGS) def _rolling_nanstd_1d(a, w): """ A Numba JIT-compiled and parallelized function for computing the rolling standard @@ -1043,7 +1043,7 @@ def compute_mean_std(T, m): @njit( # "f8(i8, f8, f8, f8, f8, f8)", - fastmath={"nsz", "arcp", "contract", "afn", "reassoc"} + fastmath=config.STUMPY_FASTMATH_FLAGS ) def _calculate_squared_distance( m, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconstant @@ -1111,7 +1111,7 @@ def _calculate_squared_distance( @njit( # "f8[:](i8, f8[:], f8, f8, f8[:], f8[:])", - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def _calculate_squared_distance_profile( m, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconstant @@ -1177,7 +1177,7 @@ def _calculate_squared_distance_profile( @njit( # "f8[:](i8, f8[:], f8, f8, f8[:], f8[:])", - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def calculate_distance_profile( m, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconstant @@ -1230,7 +1230,7 @@ def calculate_distance_profile( return np.sqrt(D_squared) -@njit(fastmath=True) +@njit(fastmath=config.STUMPY_FASTMATH_TRUE) def _p_norm_distance_profile(Q, T, p=2.0): """ A Numba JIT-compiled and parallelized function for computing the p-normalized @@ -1506,7 +1506,7 @@ def mueen_calculate_distance_profile(Q, T): @njit( # "f8[:](f8[:], f8[:], f8[:], f8, f8, f8[:], f8[:])", - fastmath=True + fastmath=config.STUMPY_FASTMATH_TRUE ) def _mass(Q, T, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconstant): """ @@ -1979,7 +1979,7 @@ def _get_QT(start, T_A, T_B, m): @njit( # ["(f8[:], i8, i8)", "(f8[:, :], i8, i8)"], - fastmath=True + fastmath=config.STUMPY_FASTMATH_TRUE ) def _apply_exclusion_zone(a, idx, excl_zone, val): """ @@ -2317,7 +2317,7 @@ def array_to_temp_file(a): @njit( # "i8[:](i8[:], i8, i8, i8)", - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def _count_diagonal_ndist(diags, m, n_A, n_B): """ @@ -2356,7 +2356,7 @@ def _count_diagonal_ndist(diags, m, n_A, n_B): @njit( # "i8[:, :](i8[:], i8, b1)" - fastmath=True + fastmath=config.STUMPY_FASTMATH_TRUE ) def _get_array_ranges(a, n_chunks, truncate): """ @@ -2405,7 +2405,7 @@ def _get_array_ranges(a, n_chunks, truncate): @njit( # "i8[:, :](i8, i8, b1)" - fastmath=True + fastmath=config.STUMPY_FASTMATH_TRUE ) def _get_ranges(size, n_chunks, truncate): """ @@ -2516,7 +2516,7 @@ def rolling_isfinite(a, w): ) -@njit(parallel=True, fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) +@njit(parallel=True, fastmath=config.STUMPY_FASTMATH_FLAGS) def _rolling_isconstant(a, w): """ Compute the rolling isconstant for 1-D array. @@ -2853,7 +2853,7 @@ def _idx_to_mp( return P -@njit(fastmath=True) +@njit(fastmath=config.STUMPY_FASTMATH_TRUE) def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_width): """ Count the total number of distances covered by a range of diagonals @@ -3258,7 +3258,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None): return MPdist -@njit(fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) +@njit(fastmath=config.STUMPY_FASTMATH_FLAGS) def _merge_topk_PI(PA, PB, IA, IB): """ Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place). @@ -3331,7 +3331,7 @@ def _merge_topk_PI(PA, PB, IA, IB): IA[i] = tmp_I -@njit(fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) +@njit(fastmath=config.STUMPY_FASTMATH_FLAGS) def _merge_topk_ρI(ρA, ρB, IA, IB): """ Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place). @@ -3405,7 +3405,7 @@ def _merge_topk_ρI(ρA, ρB, IA, IB): IA[i] = tmp_I -@njit(fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) +@njit(fastmath=config.STUMPY_FASTMATH_FLAGS) def _shift_insert_at_index(a, idx, v, shift="right"): """ If `shift=right` (default), all elements in `a[idx:]` are shifted to the right by @@ -3981,7 +3981,7 @@ def _mdl(disc_subseqs, disc_neighbors, S, n_bit=8): @njit( # "(i8, i8, f8[:, :], f8[:], i8, f8[:, :], i8[:, :], f8)", - fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def _compute_multi_PI(d, idx, D, D_prime, range_start, P, I, p=2.0): """ @@ -4381,7 +4381,7 @@ def get_ray_nworkers(ray_client): return int(ray_client.cluster_resources().get("CPU")) -@njit(fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) +@njit(fastmath=config.STUMPY_FASTMATH_FLAGS) def _update_incremental_PI(D, P, I, excl_zone, n_appended=0): """ Given the 1D array distance profile, `D`, of the last subsequence of T, diff --git a/stumpy/fastmath.py b/stumpy/fastmath.py new file mode 100644 index 000000000..4e124026e --- /dev/null +++ b/stumpy/fastmath.py @@ -0,0 +1,82 @@ +import importlib + +from numba import njit + +from . import config + + +@njit(fastmath=config.STUMPY_FASTMATH_TRUE) +def _add_assoc(x, y): # pragma: no cover + """ + A dummy function to test the fastmath module + + Parameters + ---------- + x : float + A float value + + y : floatf + A float value + + Returns + ------- + out : float + The ouput valus + + Notes + ----- + This is taken from the following link: + https://numba.pydata.org/numba-doc/dev/user/performance-tips.html#fastmath + """ + return (x - y) + y + + +def _set(module_name, func_name, flag): + """ + Set fastmath flag for a given function + + Parameters + ---------- + module_name : str + The module name + + func_name : str + The function name + + flag : set or bool + The fastmath flag + + Returns + ------- + None + """ + module = importlib.import_module(f".{module_name}", package="stumpy") + func = getattr(module, func_name) + func.targetoptions["fastmath"] = flag + func.recompile() + + return + + +def _reset(module_name, func_name): + """ + Reset the value of fastmath to its default value + + Parameters + ---------- + module_name : str + The module name + + func_name : str + The function name + + Returns + ------- + None + """ + key = module_name + "." + func_name + key = "STUMPY_FASTMATH_" + key.upper() + default_flag = config._STUMPY_DEFAULTS[key] + _set(module_name, func_name, default_flag) + + return diff --git a/stumpy/maamp.py b/stumpy/maamp.py index f2aa5f6a4..dad6748c3 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -592,7 +592,7 @@ def _get_multi_p_norm(start, T, m, p=2.0): # "(i8, i8, i8, f8[:, :], f8[:, :], i8, i8, b1[:, :], b1[:, :], f8," # "f8[:, :], f8[:, :], f8[:, :])", parallel=True, - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def _compute_multi_p_norm( d, diff --git a/stumpy/mstump.py b/stumpy/mstump.py index 339c8bcb7..c4b7ed2c9 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -811,7 +811,7 @@ def _get_multi_QT(start, T, m): # "(i8, i8, i8, f8[:, :], f8[:, :], i8, i8, f8[:, :], f8[:, :], f8[:, :]," # "f8[:, :], f8[:, :], f8[:, :], f8[:, :])", parallel=True, - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def _compute_multi_D( d, diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py index 5c62606e3..56d83f6b6 100644 --- a/stumpy/scraamp.py +++ b/stumpy/scraamp.py @@ -83,7 +83,7 @@ def _preprocess_prescraamp(T_A, m, T_B=None, s=None): return (T_A, T_B, T_A_subseq_isfinite, T_B_subseq_isfinite, indices, s, excl_zone) -@njit(fastmath=True) +@njit(fastmath=config.STUMPY_FASTMATH_TRUE) def _compute_PI( T_A, T_B, @@ -286,7 +286,7 @@ def _compute_PI( # "(f8[:], f8[:], i8, b1[:], b1[:], f8, i8, i8, f8[:], f8[:]," # "i8[:], optional(i8))", parallel=True, - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def _prescraamp( T_A, diff --git a/stumpy/scrump.py b/stumpy/scrump.py index 7367a8885..dd5617480 100644 --- a/stumpy/scrump.py +++ b/stumpy/scrump.py @@ -133,7 +133,7 @@ def _preprocess_prescrump( ) -@njit(fastmath=True) +@njit(fastmath=config.STUMPY_FASTMATH_TRUE) def _compute_PI( T_A, T_B, @@ -384,7 +384,7 @@ def _compute_PI( # "(f8[:], f8[:], i8, f8[:], f8[:], f8[:], f8[:], f8[:], i8, i8, f8[:], f8[:]," # "i8[:], optional(i8))", parallel=True, - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def _prescrump( T_A, diff --git a/stumpy/stump.py b/stumpy/stump.py index 6c3a10721..18409c6e1 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -15,7 +15,7 @@ # "(f8[:], f8[:], i8, f8[:], f8[:], f8[:], f8[:], f8[:], f8[:], f8[:], f8[:]," # "b1[:], b1[:], b1[:], b1[:], i8[:], i8, i8, i8, f8[:, :, :], f8[:, :]," # "f8[:, :], i8[:, :, :], i8[:, :], i8[:, :], b1)", - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def _compute_diagonal( T_A, @@ -247,7 +247,7 @@ def _compute_diagonal( # "(f8[:], f8[:], i8, f8[:], f8[:], f8[:], f8[:], f8[:], f8[:], b1[:], b1[:]," # "b1[:], b1[:], i8[:], b1, i8)", parallel=True, - fastmath=True, + fastmath=config.STUMPY_FASTMATH_TRUE, ) def _stump( T_A, diff --git a/test.sh b/test.sh index cfaaee95d..049ef1b5b 100755 --- a/test.sh +++ b/test.sh @@ -98,6 +98,13 @@ check_fastmath() echo "Checking Missing fastmath flags in njit functions" ./fastmath.py --check stumpy check_errs $? + + echo "Checking hardcoded fastmath flags in njit functions" + if [[ $(grep -n fastmath= stumpy/*py | grep -vE 'fastmath=config' | wc -l) -gt "0" ]]; then + grep -n fastmath= stumpy/*py | grep -vE 'fastmath=config' + echo "Found one or more \`@njit()\` functions with a hardcoded \`fastmath\` flag." + exit 1 + fi } check_pkg_imports() @@ -163,14 +170,14 @@ set_ray_coveragerc() show_coverage_report() { set_ray_coveragerc - coverage report -m --fail-under=100 --skip-covered --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,stumpy/cache.py $fcoveragerc + coverage report -m --fail-under=100 --skip-covered --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,stumpy/cache.py,tests/test_cache.py,tests/test_fastmath.py $fcoveragerc } gen_coverage_xml_report() { # This function saves the coverage report in Cobertura XML format, which is compatible with codecov set_ray_coveragerc - coverage xml -o $fcoveragexml --fail-under=100 --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,stumpy/cache.py $fcoveragerc + coverage xml -o $fcoveragexml --fail-under=100 --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,stumpy/cache.py,tests/test_cache.py,tests/test_fastmath.py $fcoveragerc } test_custom() diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 000000000..655ca49b4 --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,27 @@ +import numba +import numpy as np +import pytest + +from stumpy import cache, stump + +if numba.config.DISABLE_JIT: + pytest.skip("Skipping Tests JIT is disabled", allow_module_level=True) + + +def test_cache_save_after_clear(): + T = np.random.rand(10) + m = 3 + stump(T, m) + + cache.save() + ref_cache = cache._get_cache() + + cache.clear() + # testing cache._clear() + assert len(cache._get_cache()) == 0 + + cache.save() + comp_cache = cache._get_cache() + + # testing cache._save() after cache._clear() + assert sorted(ref_cache) == sorted(comp_cache) diff --git a/tests/test_config.py b/tests/test_config.py index 2ee09d996..e2e2412fc 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -9,3 +9,24 @@ def test_change_excl_zone_denom(): config.STUMPY_EXCL_ZONE_DENOM = 4 assert core.get_max_window_size(10) == 7 + + +def test_reset_one_var(): + ref = config.STUMPY_EXCL_ZONE_DENOM + + config.STUMPY_EXCL_ZONE_DENOM += 1 + config._reset("STUMPY_EXCL_ZONE_DENOM") + + assert config.STUMPY_EXCL_ZONE_DENOM == ref + + +def test_reset_all_vars(): + ref_fastmath = config.STUMPY_FASTMATH_TRUE + ref_excl_zone_denom = config.STUMPY_EXCL_ZONE_DENOM + + config.STUMPY_FASTMATH_TRUE = not config.STUMPY_FASTMATH_TRUE + config.STUMPY_EXCL_ZONE_DENOM += 1 + + config._reset() + assert config.STUMPY_FASTMATH_TRUE == ref_fastmath + assert config.STUMPY_EXCL_ZONE_DENOM == ref_excl_zone_denom diff --git a/tests/test_fastmath.py b/tests/test_fastmath.py new file mode 100644 index 000000000..b80e0c78a --- /dev/null +++ b/tests/test_fastmath.py @@ -0,0 +1,44 @@ +import numba +import numpy as np +import pytest + +from stumpy import fastmath + +if numba.config.DISABLE_JIT: + pytest.skip("Skipping Tests JIT is disabled", allow_module_level=True) + + +def test_set(): + # The test is done by changing the value of fastmath flag for + # the fastmath._add_assoc function, taken from the following link: + # https://numba.pydata.org/numba-doc/dev/user/performance-tips.html#fastmath + + # case1: flag=False + fastmath._set("fastmath", "_add_assoc", flag=False) + out = fastmath._add_assoc(0, np.inf) + assert np.isnan(out) + + # case2: flag={'reassoc', 'nsz'} + fastmath._set("fastmath", "_add_assoc", flag={"reassoc", "nsz"}) + out = fastmath._add_assoc(0, np.inf) + assert out == 0.0 + + # case3: flag={'reassoc'} + fastmath._set("fastmath", "_add_assoc", flag={"reassoc"}) + out = fastmath._add_assoc(0, np.inf) + assert np.isnan(out) + + # case4: flag={'nsz'} + fastmath._set("fastmath", "_add_assoc", flag={"nsz"}) + out = fastmath._add_assoc(0, np.inf) + assert np.isnan(out) + + +def test_reset(): + # The test is done by changing the value of fastmath flag for + # the fastmath._add_assoc function, taken from the following link: + # https://numba.pydata.org/numba-doc/dev/user/performance-tips.html#fastmath + # and then reset it to the default value, i.e. `True` + fastmath._set("fastmath", "_add_assoc", False) + fastmath._reset("fastmath", "_add_assoc") + assert fastmath._add_assoc(0.0, np.inf) == 0.0 diff --git a/tests/test_precision.py b/tests/test_precision.py index c87985092..12a4481f8 100644 --- a/tests/test_precision.py +++ b/tests/test_precision.py @@ -2,13 +2,14 @@ from unittest.mock import patch import naive +import numba import numpy as np import numpy.testing as npt import pytest from numba import cuda import stumpy -from stumpy import config, core +from stumpy import cache, config, core, fastmath try: from numba.errors import NumbaPerformanceWarning @@ -137,6 +138,7 @@ def test_snippets(): ) = naive.mpdist_snippets( T, m, k, s=s, mpdist_T_subseq_isconstant=isconstant_custom_func ) + ( cmp_snippets, cmp_indices, @@ -146,6 +148,27 @@ def test_snippets(): cmp_regimes, ) = stumpy.snippets(T, m, k, s=s, mpdist_T_subseq_isconstant=isconstant_custom_func) + if ( + not np.allclose(ref_snippets, cmp_snippets) and not numba.config.DISABLE_JIT + ): # pragma: no cover + # Revise fastmath flags by removing reassoc (to improve precision), + # recompile njit functions, and re-compute snippets. + fastmath._set( + "core", "_calculate_squared_distance", {"nsz", "arcp", "contract", "afn"} + ) + cache._recompile() + + ( + cmp_snippets, + cmp_indices, + cmp_profiles, + cmp_fractions, + cmp_areas, + cmp_regimes, + ) = stumpy.snippets( + T, m, k, s=s, mpdist_T_subseq_isconstant=isconstant_custom_func + ) + npt.assert_almost_equal( ref_snippets, cmp_snippets, decimal=config.STUMPY_TEST_PRECISION ) @@ -161,6 +184,11 @@ def test_snippets(): npt.assert_almost_equal(ref_areas, cmp_areas, decimal=config.STUMPY_TEST_PRECISION) npt.assert_almost_equal(ref_regimes, cmp_regimes) + if not numba.config.DISABLE_JIT: # pragma: no cover + # Revert fastmath flag back to their default values + fastmath._reset("core", "_calculate_squared_distance") + cache._recompile() + @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning) @patch("stumpy.config.STUMPY_THREADS_PER_BLOCK", TEST_THREADS_PER_BLOCK) From 3676545d9d7622e7cbf79141b05f040d28a5954f Mon Sep 17 00:00:00 2001 From: Sean Law Date: Thu, 23 Jan 2025 06:46:32 -0500 Subject: [PATCH 32/54] Added comment that exclusion zones are applied to query motif --- stumpy/aamp_motifs.py | 2 ++ stumpy/motifs.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/stumpy/aamp_motifs.py b/stumpy/aamp_motifs.py index c7a852f72..f49c11c90 100644 --- a/stumpy/aamp_motifs.py +++ b/stumpy/aamp_motifs.py @@ -133,6 +133,8 @@ def _aamp_motifs( query_matches = np.array([[np.nan, candidate_idx]]) for idx in query_matches[:, 1]: + # Since the query motif is also included as the first item in the list of + # `query_matches`, the exclusion zone is also applied to the query motif! core.apply_exclusion_zone(P, int(idx), excl_zone, np.inf) candidate_idx = np.argmin(P[-1]) diff --git a/stumpy/motifs.py b/stumpy/motifs.py index de381f88b..5f049bcd1 100644 --- a/stumpy/motifs.py +++ b/stumpy/motifs.py @@ -138,6 +138,8 @@ def _motifs( query_matches = np.array([[np.nan, candidate_idx]]) for idx in query_matches[:, 1]: + # Since the query motif is also included as the first item in the list of + # `query_matches`, the exclusion zone is also applied to the query motif! core.apply_exclusion_zone(P, int(idx), excl_zone, np.inf) candidate_idx = np.argmin(P[-1]) From db1958a25337eb1a4839b6153421875778383093 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sat, 25 Jan 2025 11:55:26 -0500 Subject: [PATCH 33/54] Enabled coverage testing of cache.py module --- stumpy/cache.py | 31 +++++++++++++++++++++++++------ test.sh | 4 ++-- tests/test_cache.py | 8 ++++---- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/stumpy/cache.py b/stumpy/cache.py index a08f00b36..fbaf35230 100644 --- a/stumpy/cache.py +++ b/stumpy/cache.py @@ -50,7 +50,7 @@ def get_njit_funcs(): func_name = node.name for decorator in node.decorator_list: decorator_name = None - if isinstance(decorator, ast.Name): + if isinstance(decorator, ast.Name): # pragma: no cover # Bare decorator decorator_name = decorator.id if isinstance(decorator, ast.Call) and isinstance( @@ -79,7 +79,7 @@ def _enable(): """ frame = inspect.currentframe() caller_name = inspect.getouterframes(frame)[1].function - if caller_name != "_save": + if caller_name != "_save": # pragma: no cover msg = ( "The 'cache._enable()' function is deprecated and no longer supported. " + "Please use 'cache.save()' instead" @@ -90,7 +90,16 @@ def _enable(): for module_name, func_name in njit_funcs: module = importlib.import_module(f".{module_name}", package="stumpy") func = getattr(module, func_name) - func.enable_caching() + try: + func.enable_caching() + except AttributeError as e: + if ( + numba.config.DISABLE_JIT + and str(e) == "'function' object has no attribute 'enable_caching'" + ): + pass + else: # pragma: no cover + raise def _clear(): @@ -167,7 +176,16 @@ def _recompile(): for module_name, func_name in get_njit_funcs(): module = importlib.import_module(f".{module_name}", package="stumpy") func = getattr(module, func_name) - func.recompile() + try: + func.recompile() + except AttributeError as e: + if ( + numba.config.DISABLE_JIT + and str(e) == "'function' object has no attribute 'recompile'" + ): + pass + else: # pragma: no cover + raise return @@ -206,8 +224,9 @@ def save(): if numba.config.DISABLE_JIT: msg = "Could not save/cache function because NUMBA JIT is disabled" warnings.warn(msg) - else: + else: # pragma: no cover warnings.warn(CACHE_WARNING) - _save() + + _save() return diff --git a/test.sh b/test.sh index 049ef1b5b..6696c0b32 100755 --- a/test.sh +++ b/test.sh @@ -170,14 +170,14 @@ set_ray_coveragerc() show_coverage_report() { set_ray_coveragerc - coverage report -m --fail-under=100 --skip-covered --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,stumpy/cache.py,tests/test_cache.py,tests/test_fastmath.py $fcoveragerc + coverage report -m --fail-under=100 --skip-covered --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,tests/test_fastmath.py $fcoveragerc } gen_coverage_xml_report() { # This function saves the coverage report in Cobertura XML format, which is compatible with codecov set_ray_coveragerc - coverage xml -o $fcoveragexml --fail-under=100 --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,stumpy/cache.py,tests/test_cache.py,tests/test_fastmath.py $fcoveragerc + coverage xml -o $fcoveragexml --fail-under=100 --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,tests/test_fastmath.py $fcoveragerc } test_custom() diff --git a/tests/test_cache.py b/tests/test_cache.py index 655ca49b4..4b8af788d 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1,11 +1,11 @@ -import numba import numpy as np -import pytest from stumpy import cache, stump -if numba.config.DISABLE_JIT: - pytest.skip("Skipping Tests JIT is disabled", allow_module_level=True) + +def test_cache_get_njit_funcs(): + njit_funcs = cache.get_njit_funcs() + assert len(njit_funcs) > 0 def test_cache_save_after_clear(): From 92a2b4134cd2fd5d6f9cbc394d70ae204c333263 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sat, 25 Jan 2025 19:18:40 -0500 Subject: [PATCH 34/54] Added coverage testing for fastmath.py --- stumpy/config.py | 1 + stumpy/fastmath.py | 14 ++++++++++++-- test.sh | 4 ++-- tests/test_fastmath.py | 14 ++++++++------ 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/stumpy/config.py b/stumpy/config.py index d66ff76eb..f4378fbe6 100644 --- a/stumpy/config.py +++ b/stumpy/config.py @@ -19,6 +19,7 @@ "STUMPY_EXCL_ZONE_DENOM": 4, "STUMPY_FASTMATH_TRUE": True, "STUMPY_FASTMATH_FLAGS": {"nsz", "arcp", "contract", "afn", "reassoc"}, + "STUMPY_FASTMATH_FASTMATH._ADD_ASSOC": True, } # In addition to these configuration variables, there exist config variables diff --git a/stumpy/fastmath.py b/stumpy/fastmath.py index 4e124026e..43a46ec5a 100644 --- a/stumpy/fastmath.py +++ b/stumpy/fastmath.py @@ -1,5 +1,6 @@ import importlib +import numba from numba import njit from . import config @@ -52,8 +53,17 @@ def _set(module_name, func_name, flag): """ module = importlib.import_module(f".{module_name}", package="stumpy") func = getattr(module, func_name) - func.targetoptions["fastmath"] = flag - func.recompile() + try: + func.targetoptions["fastmath"] = flag + func.recompile() + except AttributeError as e: + if numba.config.DISABLE_JIT and ( + str(e) == "'function' object has no attribute 'targetoptions'" + or str(e) == "'function' object has no attribute 'recompile'" + ): + pass + else: # pragma: no cover + raise return diff --git a/test.sh b/test.sh index 6696c0b32..06070c7c6 100755 --- a/test.sh +++ b/test.sh @@ -170,14 +170,14 @@ set_ray_coveragerc() show_coverage_report() { set_ray_coveragerc - coverage report -m --fail-under=100 --skip-covered --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,tests/test_fastmath.py $fcoveragerc + coverage report -m --fail-under=100 --skip-covered --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py $fcoveragerc } gen_coverage_xml_report() { # This function saves the coverage report in Cobertura XML format, which is compatible with codecov set_ray_coveragerc - coverage xml -o $fcoveragexml --fail-under=100 --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py,tests/test_fastmath.py $fcoveragerc + coverage xml -o $fcoveragexml --fail-under=100 --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py $fcoveragerc } test_custom() diff --git a/tests/test_fastmath.py b/tests/test_fastmath.py index b80e0c78a..d2a993069 100644 --- a/tests/test_fastmath.py +++ b/tests/test_fastmath.py @@ -1,12 +1,8 @@ import numba import numpy as np -import pytest from stumpy import fastmath -if numba.config.DISABLE_JIT: - pytest.skip("Skipping Tests JIT is disabled", allow_module_level=True) - def test_set(): # The test is done by changing the value of fastmath flag for @@ -21,7 +17,10 @@ def test_set(): # case2: flag={'reassoc', 'nsz'} fastmath._set("fastmath", "_add_assoc", flag={"reassoc", "nsz"}) out = fastmath._add_assoc(0, np.inf) - assert out == 0.0 + if numba.config.DISABLE_JIT: + assert np.isnan(out) + else: # pragma: no cover + assert out == 0.0 # case3: flag={'reassoc'} fastmath._set("fastmath", "_add_assoc", flag={"reassoc"}) @@ -41,4 +40,7 @@ def test_reset(): # and then reset it to the default value, i.e. `True` fastmath._set("fastmath", "_add_assoc", False) fastmath._reset("fastmath", "_add_assoc") - assert fastmath._add_assoc(0.0, np.inf) == 0.0 + if numba.config.DISABLE_JIT: + assert np.isnan(fastmath._add_assoc(0.0, np.inf)) + else: # pragma: no cover + assert fastmath._add_assoc(0.0, np.inf) == 0.0 From 20094cc9881153fa61745087c7ece3d0f1a07fdc Mon Sep 17 00:00:00 2001 From: "Sean M. Law" <7473521+seanlaw@users.noreply.github.com> Date: Sun, 26 Jan 2025 12:13:07 -0500 Subject: [PATCH 35/54] Update fastmath.py to use proper default config --- stumpy/fastmath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/fastmath.py b/stumpy/fastmath.py index 43a46ec5a..acc65eee0 100644 --- a/stumpy/fastmath.py +++ b/stumpy/fastmath.py @@ -6,7 +6,7 @@ from . import config -@njit(fastmath=config.STUMPY_FASTMATH_TRUE) +@njit(fastmath=config.STUMPY_FASTMATH_FASTMATH._ADD_ASSOC) def _add_assoc(x, y): # pragma: no cover """ A dummy function to test the fastmath module From 3fd881632f61a2bc2baf77e53ad574dd784accc4 Mon Sep 17 00:00:00 2001 From: "Sean M. Law" <7473521+seanlaw@users.noreply.github.com> Date: Sun, 26 Jan 2025 12:17:54 -0500 Subject: [PATCH 36/54] Fixed typo --- stumpy/fastmath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/fastmath.py b/stumpy/fastmath.py index acc65eee0..1bfc1d4ec 100644 --- a/stumpy/fastmath.py +++ b/stumpy/fastmath.py @@ -6,7 +6,7 @@ from . import config -@njit(fastmath=config.STUMPY_FASTMATH_FASTMATH._ADD_ASSOC) +@njit(fastmath=config._STUMPY_DEFAULTS['STUMPY_FASTMATH_FASTMATH._ADD_ASSOC']) def _add_assoc(x, y): # pragma: no cover """ A dummy function to test the fastmath module From bbc97e4c8ba7a4cb4976d0afaf7ab0cacefbaf87 Mon Sep 17 00:00:00 2001 From: "Sean M. Law" <7473521+seanlaw@users.noreply.github.com> Date: Sun, 26 Jan 2025 12:20:52 -0500 Subject: [PATCH 37/54] Fixed bad black parentheses formatting --- stumpy/fastmath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/fastmath.py b/stumpy/fastmath.py index 1bfc1d4ec..5aac4ee0a 100644 --- a/stumpy/fastmath.py +++ b/stumpy/fastmath.py @@ -6,7 +6,7 @@ from . import config -@njit(fastmath=config._STUMPY_DEFAULTS['STUMPY_FASTMATH_FASTMATH._ADD_ASSOC']) +@njit(fastmath=config._STUMPY_DEFAULTS["STUMPY_FASTMATH_FASTMATH._ADD_ASSOC"]) def _add_assoc(x, y): # pragma: no cover """ A dummy function to test the fastmath module From 82ecd5110a39e36c9dfbc6bb8698c6381e790c6f Mon Sep 17 00:00:00 2001 From: "Sean M. Law" <7473521+seanlaw@users.noreply.github.com> Date: Wed, 29 Jan 2025 11:00:15 -0500 Subject: [PATCH 38/54] Fixed black formatting in test_floss.py --- tests/test_floss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_floss.py b/tests/test_floss.py index 3a11a5320..74735d1af 100644 --- a/tests/test_floss.py +++ b/tests/test_floss.py @@ -92,7 +92,7 @@ def naive_rea(cac, n_regimes, L, excl_factor): return np.array(loc_regimes, dtype=np.int64) -test_data = [(np.random.randint(0, 50, size=50, dtype=np.int64))] +test_data = [np.random.randint(0, 50, size=50, dtype=np.int64)] substitution_locations = [(slice(0, 0), 0, -1, slice(1, 3), [0, 3])] substitution_values = [np.nan, np.inf] From d6bfb4ac2e0cc7b281ba948132d03a271567d53e Mon Sep 17 00:00:00 2001 From: "Sean M. Law" <7473521+seanlaw@users.noreply.github.com> Date: Sat, 1 Feb 2025 12:20:45 -0500 Subject: [PATCH 39/54] Fixed #1069 Added numba cache dir for pytest (#1070) * Added numba cache dir for pytest * Added cache._clear() to cache._save() * Removed recompile from fastmath * Added ref cache length check * Improved coverage * Fixed black formatting * Fixed if to elif * Made get_cache more verbose * Added warning * Fixed typo * Refactored code * Cleaned up from comments * Added warning to clear before save * Reset CACHE_CLEARED after cache._save() is called * Cleaned up code * Added detailed cache note * Fixed black formatting * Added example * Updated test and added more comments --- stumpy/cache.py | 84 +++++++++++++++++++++++++++++++++++------- stumpy/fastmath.py | 8 +++- tests/test_cache.py | 28 ++++++++++---- tests/test_fastmath.py | 8 +++- 4 files changed, 103 insertions(+), 25 deletions(-) diff --git a/stumpy/cache.py b/stumpy/cache.py index fbaf35230..6c3552079 100644 --- a/stumpy/cache.py +++ b/stumpy/cache.py @@ -15,6 +15,7 @@ CACHE_WARNING += "and should never be used or depended upon as it is not supported! " CACHE_WARNING += "All caching capabilities are not tested and may be removed/changed " CACHE_WARNING += "without prior notice. Please proceed with caution!" +CACHE_CLEARED = True def get_njit_funcs(): @@ -102,48 +103,60 @@ def _enable(): raise -def _clear(): +def _clear(cache_dir=None): """ Clear numba cache Parameters ---------- - None + cache_dir : str, default None + The path to the numba cache directory Returns ------- None """ - site_pkg_dir = site.getsitepackages()[0] - numba_cache_dir = site_pkg_dir + "/stumpy/__pycache__" + global CACHE_CLEARED + + if cache_dir is not None: + numba_cache_dir = str(cache_dir) + else: # pragma: no cover + site_pkg_dir = site.getsitepackages()[0] + numba_cache_dir = site_pkg_dir + "/stumpy/__pycache__" + [f.unlink() for f in pathlib.Path(numba_cache_dir).glob("*nb*") if f.is_file()] + CACHE_CLEARED = True + -def clear(): +def clear(cache_dir=None): """ Clear numba cache directory Parameters ---------- - None + cache_dir : str, default None + The path to the numba cache directory. When `cache_dir` is `None`, then this + defaults to `site-packages/stumpy/__pycache__`. Returns ------- None """ warnings.warn(CACHE_WARNING) - _clear() + _clear(cache_dir) return -def _get_cache(): +def _get_cache(cache_dir=None): """ Retrieve a list of cached numba functions Parameters ---------- - None + cache_dir : str + The path to the numba cache directory Returns ------- @@ -151,9 +164,17 @@ def _get_cache(): A list of cached numba functions """ warnings.warn(CACHE_WARNING) - site_pkg_dir = site.getsitepackages()[0] - numba_cache_dir = site_pkg_dir + "/stumpy/__pycache__" - return [f.name for f in pathlib.Path(numba_cache_dir).glob("*nb*") if f.is_file()] + if cache_dir is not None: + numba_cache_dir = str(cache_dir) + else: # pragma: no cover + site_pkg_dir = site.getsitepackages()[0] + numba_cache_dir = site_pkg_dir + "/stumpy/__pycache__" + + return [ + f"{numba_cache_dir}/{f.name}" + for f in pathlib.Path(numba_cache_dir).glob("*nb*") + if f.is_file() + ] def _recompile(): @@ -202,16 +223,24 @@ def _save(): ------- None """ + global CACHE_CLEARED + + if not CACHE_CLEARED: # pragma: no cover + msg = "Numba njit cached files are not cleared before saving/overwriting. " + msg = "You may need to call `cache.clear()` before calling `cache.save()`." + warnings.warn(msg) + _enable() _recompile() + CACHE_CLEARED = False + return def save(): """ - Save/overwrite all the cache data files of - all-so-far compiled njit functions. + Save/overwrite all of the cached njit functions. Parameters ---------- @@ -220,6 +249,28 @@ def save(): Returns ------- None + + Notes + ----- + The cache is never cleared before saving/overwriting and may be explicitly cleared + by calling `cache.clear()` before saving. It is best practice to call `cache.save()` + only after calling all of your `njit` functions. If `cache.save()` is called for the + first time (before any `njit` function is called) then only the `.nbi` files (i.e., + the "cache index") for all `njit` functions are saved. As each `njit` function (and + sub-functions) is called then their corresponding `.nbc` file (i.e., "object code") + is saved. Each `.nbc` file will only be saved after its `njit` function is called + at least once. However, subsequent calls to `cache.save()` (after clearing the cache + via `cache.clear()`) will automatically save BOTH the `.nbi` files as well as the + `.nbc` files as long as their `njit` function has been called at least once. + + Examples + -------- + >>> import stumpy + >>> from stumpy import cache + >>> import numpy as np + >>> cache.clear() + >>> mp = stumpy.stump(np.array([584., -11., 23., 79., 1001., 0., -19.]), m=3) + >>> cache.save() """ if numba.config.DISABLE_JIT: msg = "Could not save/cache function because NUMBA JIT is disabled" @@ -227,6 +278,11 @@ def save(): else: # pragma: no cover warnings.warn(CACHE_WARNING) + if numba.config.CACHE_DIR != "": # pragma: no cover + msg = "Found user specified `NUMBA_CACHE_DIR`/`numba.config.CACHE_DIR`. " + msg += "The `stumpy` cache files may not be saved/cleared correctly!" + warnings.warn(msg) + _save() return diff --git a/stumpy/fastmath.py b/stumpy/fastmath.py index 5aac4ee0a..de99694c1 100644 --- a/stumpy/fastmath.py +++ b/stumpy/fastmath.py @@ -1,4 +1,5 @@ import importlib +import warnings import numba from numba import njit @@ -55,12 +56,15 @@ def _set(module_name, func_name, flag): func = getattr(module, func_name) try: func.targetoptions["fastmath"] = flag - func.recompile() + msg = "One or more fastmath flags have been set/reset. " + msg += "Please call `cache._recompile()` to ensure that all njit functions " + msg += "are properly recompiled." + warnings.warn(msg) except AttributeError as e: if numba.config.DISABLE_JIT and ( str(e) == "'function' object has no attribute 'targetoptions'" - or str(e) == "'function' object has no attribute 'recompile'" ): + warnings.warn("Fastmath flags could not be set as Numba JIT is disabled") pass else: # pragma: no cover raise diff --git a/tests/test_cache.py b/tests/test_cache.py index 4b8af788d..2127c8ed2 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1,3 +1,4 @@ +import numba import numpy as np from stumpy import cache, stump @@ -11,17 +12,28 @@ def test_cache_get_njit_funcs(): def test_cache_save_after_clear(): T = np.random.rand(10) m = 3 + + cache_dir = "stumpy/__pycache__" + + cache.clear(cache_dir) stump(T, m) + cache.save() # Enable and save both `.nbi` and `.nbc` cache files - cache.save() - ref_cache = cache._get_cache() + ref_cache = cache._get_cache(cache_dir) - cache.clear() - # testing cache._clear() - assert len(cache._get_cache()) == 0 + if numba.config.DISABLE_JIT: + assert len(ref_cache) == 0 + else: # pragma: no cover + assert len(ref_cache) > 0 - cache.save() - comp_cache = cache._get_cache() + cache.clear(cache_dir) + assert len(cache._get_cache(cache_dir)) == 0 + # Note that `stump(T, m)` has already been called once above and any subsequent + # calls to `cache.save()` will automatically save both `.nbi` and `.nbc` cache files + cache.save() # Save both `.nbi` and `.nbc` cache files + + comp_cache = cache._get_cache(cache_dir) - # testing cache._save() after cache._clear() assert sorted(ref_cache) == sorted(comp_cache) + + cache.clear(cache_dir) diff --git a/tests/test_fastmath.py b/tests/test_fastmath.py index d2a993069..a16bb6898 100644 --- a/tests/test_fastmath.py +++ b/tests/test_fastmath.py @@ -1,7 +1,7 @@ import numba import numpy as np -from stumpy import fastmath +from stumpy import cache, fastmath def test_set(): @@ -11,11 +11,13 @@ def test_set(): # case1: flag=False fastmath._set("fastmath", "_add_assoc", flag=False) + cache._recompile() out = fastmath._add_assoc(0, np.inf) assert np.isnan(out) # case2: flag={'reassoc', 'nsz'} fastmath._set("fastmath", "_add_assoc", flag={"reassoc", "nsz"}) + cache._recompile() out = fastmath._add_assoc(0, np.inf) if numba.config.DISABLE_JIT: assert np.isnan(out) @@ -24,11 +26,13 @@ def test_set(): # case3: flag={'reassoc'} fastmath._set("fastmath", "_add_assoc", flag={"reassoc"}) + cache._recompile() out = fastmath._add_assoc(0, np.inf) assert np.isnan(out) # case4: flag={'nsz'} fastmath._set("fastmath", "_add_assoc", flag={"nsz"}) + cache._recompile() out = fastmath._add_assoc(0, np.inf) assert np.isnan(out) @@ -39,7 +43,9 @@ def test_reset(): # https://numba.pydata.org/numba-doc/dev/user/performance-tips.html#fastmath # and then reset it to the default value, i.e. `True` fastmath._set("fastmath", "_add_assoc", False) + cache._recompile() fastmath._reset("fastmath", "_add_assoc") + cache._recompile() if numba.config.DISABLE_JIT: assert np.isnan(fastmath._add_assoc(0.0, np.inf)) else: # pragma: no cover From 3c4a00a3f387b4dddcca622a7fe4e6f3015ac05a Mon Sep 17 00:00:00 2001 From: Sean Law Date: Thu, 13 Mar 2025 09:15:51 -0400 Subject: [PATCH 40/54] Updated docs on `max_motifs` parameter to return all valid motifs --- stumpy/mmotifs.py | 4 +++- stumpy/motifs.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/stumpy/mmotifs.py b/stumpy/mmotifs.py index c495edbb3..df6ee9be7 100644 --- a/stumpy/mmotifs.py +++ b/stumpy/mmotifs.py @@ -73,7 +73,9 @@ def mmotifs( motif. The first match is always the self/trivial-match for each motif. max_motifs : int, default 1 - The maximum number of motifs to return. + The maximum number of motifs to return. To consider returning all possible + valid motifs, try setting `max_motifs` to the length of your input matrix + profile (i.e., `max_motifs=len(P)` atol : float, default 1e-8 The absolute tolerance parameter. This value will be added to ``max_distance`` diff --git a/stumpy/motifs.py b/stumpy/motifs.py index 5f049bcd1..0a6ceb9b7 100644 --- a/stumpy/motifs.py +++ b/stumpy/motifs.py @@ -72,7 +72,9 @@ def _motifs( the first match is always the self-match/trivial-match for each motif. max_motifs : int - The maximum number of motifs to return. + The maximum number of motifs to return. To consider returning all possible + valid motifs, try setting `max_motifs` to the length of your input matrix + profile (i.e., `max_motifs=len(P)`) atol : float, default 1e-8 The absolute tolerance parameter. This value will be added to `max_distance` From ab9f4892dc5501d351e38f5131adae6daf7ffbcf Mon Sep 17 00:00:00 2001 From: Sean Law Date: Thu, 13 Mar 2025 09:32:54 -0400 Subject: [PATCH 41/54] Updated docs --- stumpy/motifs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stumpy/motifs.py b/stumpy/motifs.py index 0a6ceb9b7..03edf103f 100644 --- a/stumpy/motifs.py +++ b/stumpy/motifs.py @@ -243,7 +243,9 @@ def motifs( self-match/trivial-match for each motif. max_motifs : int, default 1 - The maximum number of motifs to return. + The maximum number of motifs to return. To consider returning all possible + valid motifs, try setting `max_motifs` to the length of your input matrix + profile (i.e., `max_motifs=len(P)`) atol : float, default 1e-8 The absolute tolerance parameter. This value will be added to ``max_distance`` From 9504301f3a2bbf2eb912443e622f7375e4bb83a4 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Thu, 13 Mar 2025 09:54:44 -0400 Subject: [PATCH 42/54] Fixed typo --- stumpy/mmotifs.py | 2 +- stumpy/motifs.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/stumpy/mmotifs.py b/stumpy/mmotifs.py index df6ee9be7..8787a855d 100644 --- a/stumpy/mmotifs.py +++ b/stumpy/mmotifs.py @@ -75,7 +75,7 @@ def mmotifs( max_motifs : int, default 1 The maximum number of motifs to return. To consider returning all possible valid motifs, try setting `max_motifs` to the length of your input matrix - profile (i.e., `max_motifs=len(P)` + profile (i.e., ``max_motifs=len(P)``) atol : float, default 1e-8 The absolute tolerance parameter. This value will be added to ``max_distance`` diff --git a/stumpy/motifs.py b/stumpy/motifs.py index 03edf103f..ab0ef03f0 100644 --- a/stumpy/motifs.py +++ b/stumpy/motifs.py @@ -74,7 +74,7 @@ def _motifs( max_motifs : int The maximum number of motifs to return. To consider returning all possible valid motifs, try setting `max_motifs` to the length of your input matrix - profile (i.e., `max_motifs=len(P)`) + profile (i.e., ``max_motifs=len(P)``) atol : float, default 1e-8 The absolute tolerance parameter. This value will be added to `max_distance` @@ -245,7 +245,7 @@ def motifs( max_motifs : int, default 1 The maximum number of motifs to return. To consider returning all possible valid motifs, try setting `max_motifs` to the length of your input matrix - profile (i.e., `max_motifs=len(P)`) + profile (i.e., ``max_motifs=len(P)``) atol : float, default 1e-8 The absolute tolerance parameter. This value will be added to ``max_distance`` From d5e76074e9afd822e93fa5af6f9cb9c2dd6c180c Mon Sep 17 00:00:00 2001 From: Nima Sarajpoor Date: Sat, 5 Apr 2025 22:14:07 -0400 Subject: [PATCH 43/54] Fixed #1077 Add extra check for window size (#1078) * Add extra check for window size * update module to include extra check for self join * add tests for warning * revise comment * ignore coverage * minor improvement in docstring * fix flake8 * Revised test function using expected signature * fixed format * Revise function to pass test * Update stumpy/core.py * improve comments * improve readability of function * minor improvement in the description of param * remove redundant test function * Revise logic and the comment * improving comments * minor change * minor change in comment * minor change in comment * update aamp for checking window size * improve docstring and comments * improve docstring * use smaller input to make test function more understandable * updated stumped and aamped * updated maamp and maamped modules * update different modules to consider the change in core.check_window_size * minor fix * improve comments * improve comments * improved the explanations * minor change in the description of function * improve the clarity of the logic * improve comment * improve description of function * minor change * improve readability and consistency * minor change * minor changes --- stumpy/aamp.py | 10 +++---- stumpy/aamped.py | 6 ++-- stumpy/aampi.py | 2 +- stumpy/core.py | 71 +++++++++++++++++++++++++++++++++++++++++---- stumpy/gpu_aamp.py | 7 ++++- stumpy/gpu_stump.py | 7 ++++- stumpy/maamp.py | 8 ++--- stumpy/maamped.py | 2 +- stumpy/mstump.py | 8 +++-- stumpy/mstumped.py | 4 ++- stumpy/scraamp.py | 7 ++++- stumpy/scrump.py | 7 ++++- stumpy/stamp.py | 6 ++-- stumpy/stomp.py | 7 ++++- stumpy/stump.py | 11 ++++--- stumpy/stumped.py | 6 ++-- stumpy/stumpi.py | 2 +- tests/test_core.py | 15 ++++++++++ 18 files changed, 145 insertions(+), 41 deletions(-) diff --git a/stumpy/aamp.py b/stumpy/aamp.py index 1e4879bcc..6d6664537 100644 --- a/stumpy/aamp.py +++ b/stumpy/aamp.py @@ -407,17 +407,17 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): if T_B.ndim != 1: # pragma: no cover raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ") - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) - ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) - n_A = T_A.shape[0] n_B = T_B.shape[0] l = n_A - m + 1 + ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - if ignore_trivial: + if ignore_trivial: # self-join + core.check_window_size(m, max_size=min(n_A, n_B), n=n_A) diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) - else: + else: # AB-join + core.check_window_size(m, max_size=min(n_A, n_B)) diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64) P, PL, PR, I, IL, IR = _aamp( diff --git a/stumpy/aamped.py b/stumpy/aamped.py index 21132f281..14e6d237e 100644 --- a/stumpy/aamped.py +++ b/stumpy/aamped.py @@ -386,17 +386,17 @@ def aamped(client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): if T_B.ndim != 1: # pragma: no cover raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ") - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) - ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) - n_A = T_A.shape[0] n_B = T_B.shape[0] + ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) if ignore_trivial: + core.check_window_size(m, max_size=min(n_A, n_B), n=n_A) diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) else: + core.check_window_size(m, max_size=min(n_A, n_B)) diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64) _aamped = core._client_to_func(client) diff --git a/stumpy/aampi.py b/stumpy/aampi.py index e5c2ee8a1..7674243e5 100644 --- a/stumpy/aampi.py +++ b/stumpy/aampi.py @@ -111,7 +111,7 @@ def __init__(self, T, m, egress=True, p=2.0, k=1, mp=None): computed internally using `stumpy.aamp`. """ self._T = core._preprocess(T) - core.check_window_size(m, max_size=self._T.shape[-1]) + core.check_window_size(m, max_size=self._T.shape[0]) self._m = m self._n = self._T.shape[0] self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM)) diff --git a/stumpy/core.py b/stumpy/core.py index a7758c2fd..ea9b93afd 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -554,11 +554,12 @@ def get_max_window_size(n): return max_m -def check_window_size(m, max_size=None): +def check_window_size(m, max_size=None, n=None): """ Check the window size and ensure that it is greater than or equal to 3 and, if - `max_size` is provided, ensure that the window size is less than or equal to the - `max_size` + ``max_size`` is provided, ensure that the window size is less than or equal to + the ``max_size``. Furthermore, if ``n`` is provided, then a self-join is assumed + and it checks whether all subsequences have at least one non-trivial neighbor. Parameters ---------- @@ -568,6 +569,10 @@ def check_window_size(m, max_size=None): max_size : int, default None The maximum window size allowed + n : int, default None + The length of the time series in the case of a self-join. + ``n`` should not be supplied (or set to ``None``) in the case of an AB-join. + Returns ------- None @@ -589,6 +594,60 @@ def check_window_size(m, max_size=None): if max_size is not None and m > max_size: raise ValueError(f"The window size must be less than or equal to {max_size}") + if n is not None: + # Raise warning if there is at least one subsequence with no eligible + # (non-trivial) neighbor in the case of a self-join. + + # For any time series `T`, an "eligible nearest neighbor" subsequence for + # the central-most subsequence must be located outside the `excl_zone`, + # and the central-most subsequence will ALWAYS have the smallest relative + # (index-wise) distance to its farthest neighbor amongst all other subsequences. + # Therefore, we only need to check whether the `excl_zone` eliminates all + # "neighbors" for the central-most subsequence in `T`. In fact, we just need to + # verify whether the `excl_zone` eliminates the "neighbor" that is farthest + # away (index-wise) from the central-most subsequence. If it does not, this + # implies that all subsequences in `T` will have at least one "eligible nearest + # neighbor" that is located outside of their respective excl_zone. + + excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + l = n - m + 1 + # The start index of subsequences are: 0, 1, ..., l-1 + + # If `l` is odd + # Suppose `l == 5`. So, the start index of the subsequences + # are: 0, 1, 2, 3, 4 + # The central subsequence is located at index position c=2, with two + # farthest neighbors, one located at index 0, and the other is located + # at index 4. In both cases, the relative (index-wise) distance is 2, + # which is simply `5 // 2`. In general, it can be shown that the + # (index-wise) distance from the central subsequence to its farthest + # neighbor is `l // 2`. + + # If `l` is even + # Suppose `l == 6`. So, the start index of the subsequences + # are: 0, 1, 2, 3, 4, 5 + # There are two central-most subsequences, located at the index + # positions c=2 and c=3. For the central-most subsequence at index + # position c=2, its farthest neighbor will be located at index 5 (to the + # right of c=2) and, for the central-most subsequence at index position + # c=3, its farthest neighbor will be located at index 0 (to the left of + # c=3). In both cases, the relative (index-wise) distance is 3, + # which is simply `6 // 2`. In general, it can be shown that the + # (index-wise) distance from the central-most subsequence to its + # farthest neighbor is `l // 2`. + + # Therefore, regardless if `l` is even or odd, for the central + # subsequence for any time series, the index location of its + # farthest neighbor will always be `l // 2` index positions away. + diff_to_farthest_idx = l // 2 + if diff_to_farthest_idx <= excl_zone: + msg = ( + f"The window size, 'm = {m}', may be too large and could lead to " + + "meaningless results. Consider reducing 'm' where necessary" + ) + warnings.warn(msg) + @njit(fastmath=config.STUMPY_FASTMATH_TRUE) def _sliding_dot_product(Q, T): @@ -1354,7 +1413,7 @@ def mass_absolute(Q, T, T_subseq_isfinite=None, p=2.0, query_idx=None): raise ValueError(f"`Q` is {Q.ndim}-dimensional and must be 1-dimensional. ") Q_isfinite = np.isfinite(Q) - check_window_size(m, max_size=Q.shape[-1]) + check_window_size(m, max_size=Q.shape[0]) if query_idx is not None: # pragma: no cover query_idx = int(query_idx) @@ -1701,7 +1760,7 @@ def mass( raise ValueError(f"Q is {Q.ndim}-dimensional and must be 1-dimensional. ") Q_isfinite = np.isfinite(Q) - check_window_size(m, max_size=Q.shape[-1]) + check_window_size(m, max_size=Q.shape[0]) if query_idx is not None: query_idx = int(query_idx) @@ -1926,7 +1985,7 @@ def mass_distance_matrix( T_subseq_isconstant=T_subseq_isconstant, ) - check_window_size(m, max_size=min(Q.shape[-1], T.shape[-1])) + check_window_size(m, max_size=min(Q.shape[0], T.shape[0])) return _mass_distance_matrix( Q, diff --git a/stumpy/gpu_aamp.py b/stumpy/gpu_aamp.py index a4708a5fc..fc0c74068 100644 --- a/stumpy/gpu_aamp.py +++ b/stumpy/gpu_aamp.py @@ -536,8 +536,13 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0, k=1): "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) + if ignore_trivial: # self-join + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) + else: # AB-join + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) n = T_B.shape[0] w = T_A.shape[0] - m + 1 diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py index 98e7ebd78..16166ffb2 100644 --- a/stumpy/gpu_stump.py +++ b/stumpy/gpu_stump.py @@ -666,8 +666,13 @@ def gpu_stump( "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) + if ignore_trivial: # self-join + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) + else: # AB-join + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) n = T_B.shape[0] w = T_A.shape[0] - m + 1 diff --git a/stumpy/maamp.py b/stumpy/maamp.py index dad6748c3..c30deab6f 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -140,7 +140,7 @@ def maamp_subspace( returned. """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) subseqs, _ = core.preprocess_non_normalized(T[:, subseq_idx : subseq_idx + m], m) neighbors, _ = core.preprocess_non_normalized(T[:, nn_idx : nn_idx + m], m) @@ -269,7 +269,7 @@ def maamp_mdl( A list of numpy.ndarrays that contains the `k`th-dimensional subspaces """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) if discretize_func is None: T_isfinite = np.isfinite(T) @@ -441,7 +441,7 @@ def maamp_multi_distance_profile(query_idx, T, m, include=None, discords=False, err = f"T is {T.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - core.check_window_size(m, max_size=T.shape[1]) + core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) if include is not None: # pragma: no cover include = core._preprocess_include(include) @@ -933,7 +933,7 @@ def maamp(T, m, include=None, discords=False, p=2.0): err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1])) + core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: include = core._preprocess_include(include) diff --git a/stumpy/maamped.py b/stumpy/maamped.py index 0665e3e51..70bdf7e66 100644 --- a/stumpy/maamped.py +++ b/stumpy/maamped.py @@ -389,7 +389,7 @@ def maamped(client, T, m, include=None, discords=False, p=2.0): err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1])) + core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: include = core._preprocess_include(include) diff --git a/stumpy/mstump.py b/stumpy/mstump.py index c4b7ed2c9..6ea97edb7 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -217,7 +217,7 @@ def subspace( array([0, 1]) """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant) if discretize_func is None: @@ -409,7 +409,7 @@ def mdl( (array([ 80. , 111.509775]), [array([1]), array([0, 1])]) """ T = core._preprocess(T) - core.check_window_size(m, max_size=T.shape[-1]) + core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant) if discretize_func is None: @@ -1228,7 +1228,9 @@ def mstump( err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1])) + # mstump currently only supports self-join. Therefore, the argument `n=T_A.shape[1]` + # must be passed to the function `core.check_window_size`. + core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: include = core._preprocess_include(include) diff --git a/stumpy/mstumped.py b/stumpy/mstumped.py index aabb0b6ca..d8ce4c3a5 100644 --- a/stumpy/mstumped.py +++ b/stumpy/mstumped.py @@ -505,7 +505,9 @@ def mstumped( err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" raise ValueError(f"{err}") - core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1])) + # mstump currently only supports self-join. Therefore, the argument `n=T_A.shape[1]` + # must be passed to the function `core.check_window_size`. + core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) if include is not None: include = core._preprocess_include(include) diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py index 56d83f6b6..7d8e9bd24 100644 --- a/stumpy/scraamp.py +++ b/stumpy/scraamp.py @@ -646,10 +646,15 @@ def __init__( "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) self._ignore_trivial = core.check_ignore_trivial( self._T_A, self._T_B, self._ignore_trivial ) + if self._ignore_trivial: # self-join + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) + else: # AB-join + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) self._n_A = self._T_A.shape[0] self._n_B = self._T_B.shape[0] diff --git a/stumpy/scrump.py b/stumpy/scrump.py index dd5617480..5e0d212ab 100644 --- a/stumpy/scrump.py +++ b/stumpy/scrump.py @@ -905,10 +905,15 @@ def __init__( "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) self._ignore_trivial = core.check_ignore_trivial( self._T_A, self._T_B, self._ignore_trivial ) + if self._ignore_trivial: + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) + else: + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) self._n_A = self._T_A.shape[0] self._n_B = self._T_B.shape[0] diff --git a/stumpy/stamp.py b/stumpy/stamp.py index 855db1f26..5726803c7 100644 --- a/stumpy/stamp.py +++ b/stumpy/stamp.py @@ -208,13 +208,14 @@ def stamp( if T_B.ndim != 1: # pragma: no cover raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ") - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) - subseq_T_A = core.rolling_window(T_A, m) excl_zone = int(np.ceil(m / 2)) # Add exclusionary zone if ignore_trivial: + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) out = [ _mass_PI( subseq, @@ -229,6 +230,7 @@ def stamp( for i, subseq in enumerate(subseq_T_A) ] else: + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) out = [ _mass_PI( subseq, diff --git a/stumpy/stomp.py b/stumpy/stomp.py index 608ce7770..251d0fdc9 100644 --- a/stumpy/stomp.py +++ b/stumpy/stomp.py @@ -81,8 +81,13 @@ def _stomp(T_A, m, T_B=None, ignore_trivial=True): if T_B.ndim != 1: # pragma: no cover raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ") - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) + if ignore_trivial: # self-join + core.check_window_size( + m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0] + ) + else: # AB-join + core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) n = T_A.shape[0] l = n - m + 1 diff --git a/stumpy/stump.py b/stumpy/stump.py index 18409c6e1..2b68fb56a 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -711,18 +711,17 @@ def stump( "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) - ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) - n_A = T_A.shape[0] n_B = T_B.shape[0] l = n_A - m + 1 + ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - - if ignore_trivial: + if ignore_trivial: # self-join + core.check_window_size(m, max_size=min(n_A, n_B), n=n_A) diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) - else: + else: # AB-join + core.check_window_size(m, max_size=min(n_A, n_B)) diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64) P, PL, PR, I, IL, IR = _stump( diff --git a/stumpy/stumped.py b/stumpy/stumped.py index e5abc75c1..0cfa331b2 100644 --- a/stumpy/stumped.py +++ b/stumpy/stumped.py @@ -618,17 +618,17 @@ def stumped( "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`" ) - core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0])) - ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) - n_A = T_A.shape[0] n_B = T_B.shape[0] + ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial) excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) if ignore_trivial: + core.check_window_size(m, max_size=min(n_A, n_B), n=n_A) diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) else: + core.check_window_size(m, max_size=min(n_A, n_B)) diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64) _stumped = core._client_to_func(client) diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py index feb8cb2af..bf6d40661 100644 --- a/stumpy/stumpi.py +++ b/stumpy/stumpi.py @@ -179,7 +179,7 @@ def __init__( array. """ self._T = core._preprocess(T) - core.check_window_size(m, max_size=self._T.shape[-1]) + core.check_window_size(m, max_size=self._T.shape[0]) self._m = m self._k = k diff --git a/tests/test_core.py b/tests/test_core.py index 8d0721979..17059d5dc 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -192,6 +192,21 @@ def test_check_max_window_size(): core.check_window_size(m, max_size=3) +def test_check_window_size_excl_zone(): + # To ensure warning is raised if there is at least one subsequence + # that has no non-trivial neighbor + T = np.random.rand(10) + m = 7 + + # For `len(T) == 10` and `m == 7`, the `excl_zone` is ceil(m / 4) = 2. + # In this case, there are `10 - 7 + 1 = 4` subsequences of length 7, + # starting at indices 0, 1, 2, and 3. For a subsequence that starts at + # index 1, there are no non-trivial neighbors. So, a warning should be + # raised. + with pytest.warns(UserWarning): + core.check_window_size(m, max_size=len(T), n=len(T)) + + @pytest.mark.parametrize("Q, T", test_data) def test_njit_sliding_dot_product(Q, T): ref_mp = naive_rolling_window_dot_product(Q, T) From c707ad98321ac58f96935b34b6f3a24d29b78cac Mon Sep 17 00:00:00 2001 From: Nima Sarajpoor Date: Mon, 7 Apr 2025 20:44:06 -0400 Subject: [PATCH 44/54] Fixed #708 Check fastmath flags (#1068) * empty commit * fix fastmath for aamp._compute_diagonal * fix fastmath for aamp._aamp * fix fastmath for core._calculate_squared_distance_profile * fix fastmath for core.calculate_distance_profile * fix fastmath for core._apply_exclusion_zone * fix fastmath for mstump._compute_multi_D * fix fastmath for scraamp._compute_PI * fix fastmath for scraamp._prescraamp * fix fastmath for scrump._compute_PI * fix fastmath for scrump._prescrump * fix fastmath for stump._compute_diagonal * fix fastmath for stump._stump * temp commit * fix fastmath for maamp._compute_multi_p_norm * Add note to docstring for case p=np.inf * deleted wrong file * Add check for fastmath flags of callstacks * minor changes * minor changes and fixes * fix black and flake8 * minor changes * fixed typo and add comment * minor changes --- fastmath.py | 386 ++++++++++++++++++++++++++++++++++++++++++++++ stumpy/aamp.py | 4 +- stumpy/core.py | 10 +- stumpy/maamp.py | 2 +- stumpy/mstump.py | 2 +- stumpy/scraamp.py | 4 +- stumpy/scrump.py | 4 +- stumpy/stump.py | 4 +- 8 files changed, 403 insertions(+), 13 deletions(-) diff --git a/fastmath.py b/fastmath.py index fe7dc0b56..b46fd5e37 100755 --- a/fastmath.py +++ b/fastmath.py @@ -89,6 +89,391 @@ def check_fastmath(pkg_dir, pkg_name): return +class FunctionCallVisitor(ast.NodeVisitor): + """ + A class to traverse the AST of the modules of a package to collect + the call stacks of njit functions. + + Parameters + ---------- + pkg_dir : str + The path to the package directory containing some .py files. + + pkg_name : str + The name of the package. + + Attributes + ---------- + module_names : list + A list of module names to track the modules as the visitor traverses them. + + call_stack : list + A list of njit functions, representing a chain of function calls, + where each element is a string of the form "module_name.func_name". + + out : list + A list of unique `call_stack`s. + + njit_funcs : list + A list of all njit functions in `pkg_dir`'s modules. Each element is a tuple + of the form `(module_name, func_name)`. + + njit_modules : set + A set that contains the names of all modules, each of which contains at least + one njit function. + + njit_nodes : dict + A dictionary mapping njit function names to their corresponding AST nodes. + A key is a string, and it is of the form "module_name.func_name", and its + corresponding value is the AST node- with type ast.FunctionDef- of that + function. + + ast_modules : dict + A dictionary mapping module names to their corresponding AST objects. A key + is the name of a module, and its corresponding value is the content of that + module as an AST object. + + Methods + ------- + push_module(module_name) + Push the name of a module onto the stack `module_names`. + + pop_module() + Pop the last module name from the stack `module_names`. + + push_call_stack(module_name, func_name) + Push a function call onto the stack of function calls, `call_stack`. + + pop_call_stack() + Pop the last function call from the stack of function calls, `call_stack` + + goto_deeper_func(node) + Calls the visit method from class `ast.NodeVisitor` on all children of + the `node`. + + goto_next_func(node) + Calls the visit method from class `ast.NodeVisitor` on all children of + the `node`. + + push_out() + Push the current function call stack, `call_stack`, onto the output list, `out`, + unless it is already included in one of the so-far-collected call stacks. + + visit_Call(node) + This method is called when the visitor encounters a function call in the AST. It + checks if the called function is a njit function and, if so, traverses its AST + to collect its call stack. + """ + + def __init__(self, pkg_dir, pkg_name): + """ + Initialize the FunctionCallVisitor class. This method sets up the necessary + attributes and prepares the visitor for traversing the AST of STUMPY's modules. + + Parameters + ---------- + pkg_dir : str + The path to the package directory containing some .py files. + + pkg_name : str + The name of the package. + + Returns + ------- + None + """ + super().__init__() + self.module_names = [] + self.call_stack = [] + self.out = [] + + # Setup lists, dicts, and ast objects + self.njit_funcs = get_njit_funcs(pkg_dir) + self.njit_modules = set(mod_name for mod_name, func_name in self.njit_funcs) + self.njit_nodes = {} + self.ast_modules = {} + + filepaths = sorted(f for f in pathlib.Path(pkg_dir).iterdir() if f.is_file()) + ignore = ["__init__.py", "__pycache__"] + + for filepath in filepaths: + file_name = filepath.name + if ( + file_name not in ignore + and not file_name.startswith("gpu") + and str(filepath).endswith(".py") + ): + module_name = file_name.replace(".py", "") + file_contents = "" + with open(filepath, encoding="utf8") as f: + file_contents = f.read() + self.ast_modules[module_name] = ast.parse(file_contents) + + for node in self.ast_modules[module_name].body: + if isinstance(node, ast.FunctionDef): + func_name = node.name + if (module_name, func_name) in self.njit_funcs: + self.njit_nodes[f"{module_name}.{func_name}"] = node + + def push_module(self, module_name): + """ + Push a module name onto the stack of module names. + + Parameters + ---------- + module_name : str + The name of the module to be pushed onto the stack. + + Returns + ------- + None + """ + self.module_names.append(module_name) + + return + + def pop_module(self): + """ + Pop the last module name from the stack of module names. + + Parameters + ---------- + None + + Returns + ------- + None + """ + if self.module_names: + self.module_names.pop() + + return + + def push_call_stack(self, module_name, func_name): + """ + Push a function call onto the stack of function calls. + + Parameters + ---------- + module_name : str + A module's name + + func_name : str + A function's name + + Returns + ------- + None + """ + self.call_stack.append(f"{module_name}.{func_name}") + + return + + def pop_call_stack(self): + """ + Pop the last function call from the stack of function calls. + + Parameters + ---------- + None + + Returns + ------- + None + """ + if self.call_stack: + self.call_stack.pop() + + return + + def goto_deeper_func(self, node): + """ + Calls the visit method from class `ast.NodeVisitor` on + all children of the `node`. + + Parameters + ---------- + node : ast.AST + The AST node to be visited. + + Returns + ------- + None + """ + self.generic_visit(node) + + return + + def goto_next_func(self, node): + """ + Calls the visit method from class `ast.NodeVisitor` on + all children of the node. + + Parameters + ---------- + node : ast.AST + The AST node to be visited. + + Returns + ------- + None + """ + self.generic_visit(node) + + return + + def push_out(self): + """ + Push the current function call stack onto the output list unless it + is already included in one of the so-far-collected call stacks. + + + Parameters + ---------- + None + + Returns + ------- + None + """ + unique = True + for cs in self.out: + if " ".join(self.call_stack) in " ".join(cs): + unique = False + break + + if unique: + self.out.append(self.call_stack.copy()) + + return + + def visit_Call(self, node): + """ + Called when visiting an AST node of type `ast.Call`. + + Parameters + ---------- + node : ast.Call + The AST node representing a function call. + + Returns + ------- + None + """ + callee_name = ast.unparse(node.func) + + module_changed = False + if "." in callee_name: + new_module_name, new_func_name = callee_name.split(".")[:2] + + if new_module_name in self.njit_modules: + self.push_module(new_module_name) + module_changed = True + else: + if self.module_names: + new_module_name = self.module_names[-1] + new_func_name = callee_name + callee_name = f"{new_module_name}.{new_func_name}" + + if callee_name in self.njit_nodes.keys(): + callee_node = self.njit_nodes[callee_name] + self.push_call_stack(new_module_name, new_func_name) + self.goto_deeper_func(callee_node) + self.push_out() + self.pop_call_stack() + if module_changed: + self.pop_module() + + self.goto_next_func(node) + + return + + +def get_njit_call_stacks(pkg_dir, pkg_name): + """ + Get the call stacks of all njit functions in `pkg_dir` + + Parameters + ---------- + pkg_dir : str + The path to the package directory containing some .py files + + pkg_name : str + The name of the package + + Returns + ------- + out : list + A list of unique function call stacks. Each item is of type list, + representing a chain of function calls. + """ + visitor = FunctionCallVisitor(pkg_dir, pkg_name) + + for module_name in visitor.njit_modules: + visitor.push_module(module_name) + + for node in visitor.ast_modules[module_name].body: + if isinstance(node, ast.FunctionDef): + func_name = node.name + if (module_name, func_name) in visitor.njit_funcs: + visitor.push_call_stack(module_name, func_name) + visitor.visit(node) + visitor.pop_call_stack() + + visitor.pop_module() + + return visitor.out + + +def check_call_stack_fastmath(pkg_dir, pkg_name): + """ + Check if all njit functions in a call stack have the same `fastmath` flag. + This function raises a ValueError if it finds any inconsistencies in the + `fastmath` flags in at lease one call stack of njit functions. + + Parameters + ---------- + pkg_dir : str + The path to the directory containing some .py files + + pkg_name : str + The name of the package + + Returns + ------- + None + """ + # List of call stacks with inconsistent fastmath flags + inconsistent_call_stacks = [] + + njit_call_stacks = get_njit_call_stacks(pkg_dir, pkg_name) + for cs in njit_call_stacks: + # Set the fastmath flag of the first function in the call stack + # as the reference flag + module_name, func_name = cs[0].split(".") + module = importlib.import_module(f".{module_name}", package="stumpy") + func = getattr(module, func_name) + flag_ref = func.targetoptions["fastmath"] + + for item in cs[1:]: + module_name, func_name = cs[0].split(".") + module = importlib.import_module(f".{module_name}", package="stumpy") + func = getattr(module, func_name) + flag = func.targetoptions["fastmath"] + if flag != flag_ref: + inconsistent_call_stacks.append(cs) + break + + if len(inconsistent_call_stacks) > 0: + msg = ( + "Found at least one call stack that has inconsistent `fastmath` flags. " + + f"Those call stacks are:\n {inconsistent_call_stacks}\n" + ) + raise ValueError(msg) + + return + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--check", dest="pkg_dir") @@ -98,3 +483,4 @@ def check_fastmath(pkg_dir, pkg_name): pkg_dir = pathlib.Path(args.pkg_dir) pkg_name = pkg_dir.name check_fastmath(str(pkg_dir), pkg_name) + check_call_stack_fastmath(str(pkg_dir), pkg_name) diff --git a/stumpy/aamp.py b/stumpy/aamp.py index 6d6664537..940a20b2b 100644 --- a/stumpy/aamp.py +++ b/stumpy/aamp.py @@ -13,7 +13,7 @@ @njit( # "(f8[:], f8[:], i8, b1[:], b1[:], f8, i8[:], i8, i8, i8, f8[:, :, :]," # "f8[:, :], f8[:, :], i8[:, :, :], i8[:, :], i8[:, :], b1)", - fastmath=config.STUMPY_FASTMATH_TRUE, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def _compute_diagonal( T_A, @@ -186,7 +186,7 @@ def _compute_diagonal( @njit( # "(f8[:], f8[:], i8, b1[:], b1[:], i8[:], b1, i8)", parallel=True, - fastmath=config.STUMPY_FASTMATH_TRUE, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def _aamp( T_A, diff --git a/stumpy/core.py b/stumpy/core.py index ea9b93afd..507a516db 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1170,7 +1170,7 @@ def _calculate_squared_distance( @njit( # "f8[:](i8, f8[:], f8, f8, f8[:], f8[:])", - fastmath=config.STUMPY_FASTMATH_TRUE, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def _calculate_squared_distance_profile( m, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconstant @@ -1236,7 +1236,7 @@ def _calculate_squared_distance_profile( @njit( # "f8[:](i8, f8[:], f8, f8, f8[:], f8[:])", - fastmath=config.STUMPY_FASTMATH_TRUE, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def calculate_distance_profile( m, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconstant @@ -1310,6 +1310,10 @@ def _p_norm_distance_profile(Q, T, p=2.0): ------- output : numpy.ndarray p-normalized distance profile between `Q` and `T` + + Notes + ----- + The special case `p==inf` is not supported. """ m = Q.shape[0] l = T.shape[0] - m + 1 @@ -2038,7 +2042,7 @@ def _get_QT(start, T_A, T_B, m): @njit( # ["(f8[:], i8, i8)", "(f8[:, :], i8, i8)"], - fastmath=config.STUMPY_FASTMATH_TRUE + fastmath=config.STUMPY_FASTMATH_FLAGS ) def _apply_exclusion_zone(a, idx, excl_zone, val): """ diff --git a/stumpy/maamp.py b/stumpy/maamp.py index c30deab6f..d944819c0 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -592,7 +592,7 @@ def _get_multi_p_norm(start, T, m, p=2.0): # "(i8, i8, i8, f8[:, :], f8[:, :], i8, i8, b1[:, :], b1[:, :], f8," # "f8[:, :], f8[:, :], f8[:, :])", parallel=True, - fastmath=config.STUMPY_FASTMATH_TRUE, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def _compute_multi_p_norm( d, diff --git a/stumpy/mstump.py b/stumpy/mstump.py index 6ea97edb7..2945b9d98 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -811,7 +811,7 @@ def _get_multi_QT(start, T, m): # "(i8, i8, i8, f8[:, :], f8[:, :], i8, i8, f8[:, :], f8[:, :], f8[:, :]," # "f8[:, :], f8[:, :], f8[:, :], f8[:, :])", parallel=True, - fastmath=config.STUMPY_FASTMATH_TRUE, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def _compute_multi_D( d, diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py index 7d8e9bd24..c0d9b5ee1 100644 --- a/stumpy/scraamp.py +++ b/stumpy/scraamp.py @@ -83,7 +83,7 @@ def _preprocess_prescraamp(T_A, m, T_B=None, s=None): return (T_A, T_B, T_A_subseq_isfinite, T_B_subseq_isfinite, indices, s, excl_zone) -@njit(fastmath=config.STUMPY_FASTMATH_TRUE) +@njit(fastmath=config.STUMPY_FASTMATH_FLAGS) def _compute_PI( T_A, T_B, @@ -286,7 +286,7 @@ def _compute_PI( # "(f8[:], f8[:], i8, b1[:], b1[:], f8, i8, i8, f8[:], f8[:]," # "i8[:], optional(i8))", parallel=True, - fastmath=config.STUMPY_FASTMATH_TRUE, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def _prescraamp( T_A, diff --git a/stumpy/scrump.py b/stumpy/scrump.py index 5e0d212ab..55e15ca6d 100644 --- a/stumpy/scrump.py +++ b/stumpy/scrump.py @@ -133,7 +133,7 @@ def _preprocess_prescrump( ) -@njit(fastmath=config.STUMPY_FASTMATH_TRUE) +@njit(fastmath=config.STUMPY_FASTMATH_FLAGS) def _compute_PI( T_A, T_B, @@ -384,7 +384,7 @@ def _compute_PI( # "(f8[:], f8[:], i8, f8[:], f8[:], f8[:], f8[:], f8[:], i8, i8, f8[:], f8[:]," # "i8[:], optional(i8))", parallel=True, - fastmath=config.STUMPY_FASTMATH_TRUE, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def _prescrump( T_A, diff --git a/stumpy/stump.py b/stumpy/stump.py index 2b68fb56a..b70a776aa 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -15,7 +15,7 @@ # "(f8[:], f8[:], i8, f8[:], f8[:], f8[:], f8[:], f8[:], f8[:], f8[:], f8[:]," # "b1[:], b1[:], b1[:], b1[:], i8[:], i8, i8, i8, f8[:, :, :], f8[:, :]," # "f8[:, :], i8[:, :, :], i8[:, :], i8[:, :], b1)", - fastmath=config.STUMPY_FASTMATH_TRUE, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def _compute_diagonal( T_A, @@ -247,7 +247,7 @@ def _compute_diagonal( # "(f8[:], f8[:], i8, f8[:], f8[:], f8[:], f8[:], f8[:], f8[:], b1[:], b1[:]," # "b1[:], b1[:], i8[:], b1, i8)", parallel=True, - fastmath=config.STUMPY_FASTMATH_TRUE, + fastmath=config.STUMPY_FASTMATH_FLAGS, ) def _stump( T_A, From 423c679fdfcdd547744d2b9a2aed7565124fd6e1 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Mon, 7 Apr 2025 20:46:33 -0400 Subject: [PATCH 45/54] Fixed #1079 Bad error message in T.ndim --- stumpy/maamp.py | 2 +- stumpy/maamped.py | 2 +- stumpy/mstump.py | 2 +- stumpy/mstumped.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/stumpy/maamp.py b/stumpy/maamp.py index d944819c0..480147b9f 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -438,7 +438,7 @@ def maamp_multi_distance_profile(query_idx, T, m, include=None, discords=False, T, T_subseq_isfinite = core.preprocess_non_normalized(T, m) if T.ndim <= 1: # pragma: no cover - err = f"T is {T.ndim}-dimensional and must be at least 1-dimensional" + err = f"T is {T.ndim}-dimensional and must be at least 2-dimensional" raise ValueError(f"{err}") core.check_window_size(m, max_size=T.shape[1], n=T.shape[1]) diff --git a/stumpy/maamped.py b/stumpy/maamped.py index 70bdf7e66..ce285aa89 100644 --- a/stumpy/maamped.py +++ b/stumpy/maamped.py @@ -386,7 +386,7 @@ def maamped(client, T, m, include=None, discords=False, p=2.0): T_B, T_B_subseq_isfinite = core.preprocess_non_normalized(T_B, m) if T_A.ndim <= 1: # pragma: no cover - err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" + err = f"T is {T_A.ndim}-dimensional and must be at least 2-dimensional" raise ValueError(f"{err}") core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1]) diff --git a/stumpy/mstump.py b/stumpy/mstump.py index 2945b9d98..bc5737aa7 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -624,7 +624,7 @@ def multi_distance_profile( ) if T.ndim <= 1: # pragma: no cover - err = f"T is {T.ndim}-dimensional and must be at least 1-dimensional" + err = f"T is {T.ndim}-dimensional and must be at least 2-dimensional" raise ValueError(f"{err}") core.check_window_size(m, max_size=T.shape[1]) diff --git a/stumpy/mstumped.py b/stumpy/mstumped.py index d8ce4c3a5..afabdbafa 100644 --- a/stumpy/mstumped.py +++ b/stumpy/mstumped.py @@ -502,7 +502,7 @@ def mstumped( ) if T_A.ndim <= 1: # pragma: no cover - err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional" + err = f"T is {T_A.ndim}-dimensional and must be at least 2-dimensional" raise ValueError(f"{err}") # mstump currently only supports self-join. Therefore, the argument `n=T_A.shape[1]` From 1efb51a93e47000c39d74aa5d3c6cf9d0e66e3a8 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Fri, 13 Jun 2025 07:43:43 -0400 Subject: [PATCH 46/54] Replaced references to stumpy-dev post-org-migration --- .fossa.yml | 2 +- CONTRIBUTING.md | 6 +++--- README.rst | 27 ++++++++++++-------------- docs/Contribute.ipynb | 8 ++++---- docs/Tutorial_The_Matrix_Profile.ipynb | 4 ++-- docs/Tutorial_Time_Series_Chains.ipynb | 6 +++--- docs/WIP/Tutorial_meter_swapping.ipynb | 4 ++-- docs/api.rst | 2 +- docs/conf.py | 2 +- docs/help.rst | 2 +- docs/images/Performance.ipynb | 2 +- docs/install.rst | 2 +- docs/tutorials.rst | 2 +- pypi.sh | 4 ++-- pyproject.toml | 4 ++-- 15 files changed, 37 insertions(+), 40 deletions(-) diff --git a/.fossa.yml b/.fossa.yml index 3baaa7a56..aae18833e 100644 --- a/.fossa.yml +++ b/.fossa.yml @@ -5,7 +5,7 @@ version: 2 cli: server: https://app.fossa.com fetcher: custom - project: https://github.com/TDAmeritrade/stumpy + project: https://github.com/stumpy-dev/stumpy analyze: modules: - name: . diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dc9f6a4e9..85bca91b7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,15 +4,15 @@ Contributions of all kinds are welcome. In particular pull requests are apprecia ## Issues -The easiest contribution to make is to [file an issue](https://github.com/TDAmeritrade/stumpy/issues/new). It is beneficial if you perform a cursory search of [existing issues](https://github.com/TDAmeritrade/stumpy/issues?q=is%3Aissue) and it is also helpful, but not necessary, if you can provide clear instruction for how to reproduce a problem. If you have resolved an issue yourself please consider contributing to this repository so others can benefit from your work. +The easiest contribution to make is to [file an issue](https://github.com/stumpy-dev/stumpy/issues/new). It is beneficial if you perform a cursory search of [existing issues](https://github.com/stumpy-dev/stumpy/issues?q=is%3Aissue) and it is also helpful, but not necessary, if you can provide clear instruction for how to reproduce a problem. If you have resolved an issue yourself please consider contributing to this repository so others can benefit from your work. ## Documentation Contributing to documentation is the easiest way to get started. Providing simple clear or helpful documentation for new users is critical. Anything that *you* as a new user found hard to understand, or difficult to work out, are excellent places to begin. Contributions to more detailed and descriptive error messages is especially appreciated. To contribute to the documentation please -[fork the project](https://github.com/TDAmeritrade/stumpy/fork) into your own repository, make changes there, and then submit a pull request. +[fork the project](https://github.com/stumpy-dev/stumpy/fork) into your own repository, make changes there, and then submit a pull request. ## Code -Code contributions are always welcome, from simple bug fixes, to new features. To contribute code please [fork the project](https://github.com/TDAmeritrade/stumpy/fork) into your own repository, make changes there, run [black](https://github.com/python/black) and [flake8](http://flake8.pycqa.org/en/latest/) on your code, add tests for bugs/new features, and then submit a pull request. If you are fixing a known issue please add the issue number to the PR message. If you are fixing a new issue feel free to file an issue and then reference it in the PR. You can [browse open issues](https://github.com/TDAmeritrade/stumpy/issues) for potential code contributions. Fixes for issues tagged with 'help wanted' are especially appreciated. +Code contributions are always welcome, from simple bug fixes, to new features. To contribute code please [fork the project](https://github.com/stumpy-dev/stumpy/fork) into your own repository, make changes there, run [black](https://github.com/python/black) and [flake8](http://flake8.pycqa.org/en/latest/) on your code, add tests for bugs/new features, and then submit a pull request. If you are fixing a known issue please add the issue number to the PR message. If you are fixing a new issue feel free to file an issue and then reference it in the PR. You can [browse open issues](https://github.com/stumpy-dev/stumpy/issues) for potential code contributions. Fixes for issues tagged with 'help wanted' are especially appreciated. diff --git a/README.rst b/README.rst index 16780a3cc..ba7be48da 100644 --- a/README.rst +++ b/README.rst @@ -12,19 +12,19 @@ :target: https://pepy.tech/project/stumpy :alt: PyPI Downloads .. |License| image:: https://img.shields.io/pypi/l/stumpy.svg - :target: https://github.com/TDAmeritrade/stumpy/blob/master/LICENSE.txt + :target: https://github.com/stumpy-dev/stumpy/blob/master/LICENSE.txt :alt: License -.. |Test Status| image:: https://github.com/TDAmeritrade/stumpy/workflows/Tests/badge.svg - :target: https://github.com/TDAmeritrade/stumpy/actions?query=workflow%3ATests+branch%3Amain +.. |Test Status| image:: https://github.com/stumpy-dev/stumpy/workflows/Tests/badge.svg + :target: https://github.com/stumpy-dev/stumpy/actions?query=workflow%3ATests+branch%3Amain :alt: Test Status -.. |Code Coverage| image:: https://codecov.io/gh/TDAmeritrade/stumpy/branch/master/graph/badge.svg - :target: https://codecov.io/gh/TDAmeritrade/stumpy +.. |Code Coverage| image:: https://codecov.io/gh/stumpy-dev/stumpy/branch/master/graph/badge.svg + :target: https://codecov.io/gh/stumpy-dev/stumpy :alt: Code Coverage .. |RTD Status| image:: https://readthedocs.org/projects/stumpy/badge/?version=latest :target: https://stumpy.readthedocs.io/ :alt: ReadTheDocs Status .. |Binder| image:: https://mybinder.org/badge_logo.svg - :target: https://mybinder.org/v2/gh/TDAmeritrade/stumpy/main?filepath=notebooks + :target: https://mybinder.org/v2/gh/stumpy-dev/stumpy/main?filepath=notebooks :alt: Binder .. |JOSS| image:: http://joss.theoj.org/papers/10.21105/joss.01504/status.svg :target: https://doi.org/10.21105/joss.01504 @@ -35,17 +35,14 @@ .. |NumFOCUS| image:: https://img.shields.io/badge/NumFOCUS-Affiliated%20Project-orange.svg?style=flat&colorA=E1523D&colorB=007D8A :target: https://numfocus.org/sponsored-projects/affiliated-projects :alt: NumFOCUS Affiliated Project -.. |FOSSA| image:: https://app.fossa.com/api/projects/custom%2B9056%2Fgithub.com%2FTDAmeritrade%2Fstumpy.svg?type=shield - :target: https://app.fossa.io/projects/custom%2B9056%2Fgithub.com%2FTDAmeritrade%2Fstumpy?ref=badge_shield - :alt: FOSSA .. |Twitter| image:: https://img.shields.io/twitter/follow/stumpy_dev.svg?style=social :target: https://twitter.com/stumpy_dev :alt: Twitter | -.. image:: https://raw.githubusercontent.com/TDAmeritrade/stumpy/master/docs/images/stumpy_logo_small.png - :target: https://github.com/TDAmeritrade/stumpy +.. image:: https://raw.githubusercontent.com/stumpy-dev/stumpy/master/docs/images/stumpy_logo_small.png + :target: https://github.com/stumpy-dev/stumpy :alt: STUMPY Logo ====== @@ -54,7 +51,7 @@ STUMPY STUMPY is a powerful and scalable Python library that efficiently computes something called the `matrix profile `__, which is just an academic way of saying "for every (green) subsequence within your time series, automatically identify its corresponding nearest-neighbor (grey)": -.. image:: https://github.com/TDAmeritrade/stumpy/blob/main/docs/images/stumpy_demo.gif?raw=true +.. image:: https://github.com/stumpy-dev/stumpy/blob/main/docs/images/stumpy_demo.gif?raw=true :alt: STUMPY Animated GIF What's important is that once you've computed your matrix profile (middle panel above) it can then be used for a variety of time series data mining tasks such as: @@ -230,7 +227,7 @@ Performance We tested the performance of computing the exact matrix profile using the Numba JIT compiled version of the code on randomly generated time series data with various lengths (i.e., ``np.random.rand(n)``) along with different `CPU and GPU hardware resources `_. -.. image:: https://raw.githubusercontent.com/TDAmeritrade/stumpy/master/docs/images/performance.png +.. image:: https://raw.githubusercontent.com/stumpy-dev/stumpy/master/docs/images/performance.png :alt: STUMPY Performance Plot The raw results are displayed in the table below as Hours:Minutes:Seconds.Milliseconds and with a constant window size of `m = 50`. Note that these reported runtimes include the time that it takes to move the data from the host to all of the GPU device(s). You may need to scroll to the right side of the table in order to see all of the runtimes. @@ -331,13 +328,13 @@ STUMPY supports `Python 3.9+ `__ and, due to the Getting Help ------------ -First, please check the `discussions `__ and `issues `__ on Github to see if your question has already been answered there. If no solution is available there feel free to open a new discussion or issue and the authors will attempt to respond in a reasonably timely fashion. +First, please check the `discussions `__ and `issues `__ on Github to see if your question has already been answered there. If no solution is available there feel free to open a new discussion or issue and the authors will attempt to respond in a reasonably timely fashion. ------------ Contributing ------------ -We welcome `contributions `__ in any form! Assistance with documentation, particularly expanding tutorials, is always welcome. To contribute please `fork the project `__, make your changes, and submit a pull request. We will do our best to work through any issues with you and get your code merged into the main branch. +We welcome `contributions `__ in any form! Assistance with documentation, particularly expanding tutorials, is always welcome. To contribute please `fork the project `__, make your changes, and submit a pull request. We will do our best to work through any issues with you and get your code merged into the main branch. ------ Citing diff --git a/docs/Contribute.ipynb b/docs/Contribute.ipynb index f5f7e3c2a..458cd0f30 100644 --- a/docs/Contribute.ipynb +++ b/docs/Contribute.ipynb @@ -40,9 +40,9 @@ "source": [ "## Find your contribution\n", "\n", - "You've decided that you want to contribute but how do you approach a new project and figure out where you can help? This will feel like like trying to jump into a conversation that's been happening for months (or years) and can often be intimidating. If you've used the project before, you'll be more familiar with its structure and API but you probably haven't \"peeked behind the curtain\". The best place to get started is the list of [Issues](https://github.com/TDAmeritrade/stumpy/issues). These are feature requests/changes/bugs that other people have identified. Feel free to peruse the list to get a feel for all of the ongoing work in the project. Often, maintainers will have a labeling system to organize the issues. These labels may include things like `documentation` or `enhancement`. For new contributors, many projects have a [good first issue label](https://github.com/TDAmeritrade/stumpy/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22).\n", + "You've decided that you want to contribute but how do you approach a new project and figure out where you can help? This will feel like like trying to jump into a conversation that's been happening for months (or years) and can often be intimidating. If you've used the project before, you'll be more familiar with its structure and API but you probably haven't \"peeked behind the curtain\". The best place to get started is the list of [Issues](https://github.com/stumpy-dev/stumpy/issues). These are feature requests/changes/bugs that other people have identified. Feel free to peruse the list to get a feel for all of the ongoing work in the project. Often, maintainers will have a labeling system to organize the issues. These labels may include things like `documentation` or `enhancement`. For new contributors, many projects have a [good first issue label](https://github.com/stumpy-dev/stumpy/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22).\n", "\n", - "Your next stop, should always be [CONTRIBUTING.md](https://github.com/TDAmeritrade/stumpy/blob/master/CONTRIBUTING.md). Here, the maintainers outline any guidance they have for contributors,\n", + "Your next stop, should always be [CONTRIBUTING.md](https://github.com/stumpy-dev/stumpy/blob/master/CONTRIBUTING.md). Here, the maintainers outline any guidance they have for contributors,\n", "\n", "If you click on any issue, you'll see a running history of discussions. This serves as the record of thoughts around that specific issue. For some issues, you may see an ongoing conversation. In others, you may just see the initial issue. This is your chance to have a dialogue with the maintainers. If you've found an open issue that interests you and you think you may be able to solve it, feel free to leave a message. Remember that maintainers are people too and at STUMPY, they're excited to help new contributors. Here's an example of a potential message:\n", "\n", @@ -68,7 +68,7 @@ "\n", "First, you need to create a copy of the repository for you to work off of; this is called a `fork`. Here are instructions on [forking a repository](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo). Now you have your own copy associated with your GitHub account.\n", "\n", - "Next, you need to `clone` this copy of the repository. This simply downloads it to your computer so that you can work on it. Here are instructions on [cloning a repository](https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository). It's super important to *remember* to clone your fork and not [STUMPY](https://github.com/TDAmeritrade/stumpy).\n", + "Next, you need to `clone` this copy of the repository. This simply downloads it to your computer so that you can work on it. Here are instructions on [cloning a repository](https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository). It's super important to *remember* to clone your fork and not [STUMPY](https://github.com/stumpy-dev/stumpy).\n", "\n", "Then, you'll need to create a `branch`. Here's an overview of [how git branches work](https://git-scm.com/book/en/v2/Git-Branching-Basic-Branching-and-Merging) but if you're working in the command line, then you probably need to type `git checkout -b branch_name`. In this case, `branch_name` should be replaced with something descriptive about the change that you are making like `change_incorrect_variable` or `document_x`.\n", "\n", @@ -88,7 +88,7 @@ "source": [ "## Adhere to CONTRIBUTING.md Guidance\n", "\n", - "One of the great benefits of open source is the ability to collaborate with developers from around the world. However, you can imagine that combining their contributions into one coherent codebase while maintaining consistency can be challenging. Luckily, recent gains in automated tooling have made this a lot easier. Remember [CONTRIBUTING.md](https://github.com/TDAmeritrade/stumpy/blob/master/CONTRIBUTING.md)? There are a couple of things we want to make sure we do before we submit a `pull request`.\n", + "One of the great benefits of open source is the ability to collaborate with developers from around the world. However, you can imagine that combining their contributions into one coherent codebase while maintaining consistency can be challenging. Luckily, recent gains in automated tooling have made this a lot easier. Remember [CONTRIBUTING.md](https://github.com/stumpy-dev/stumpy/blob/master/CONTRIBUTING.md)? There are a couple of things we want to make sure we do before we submit a `pull request`.\n", "\n", "First, if you implemented a new feature or changed an existing feature, then you are also responsible for providing the unit test. This can often be just as much work as the feature, so make sure you account for it.\n", "\n", diff --git a/docs/Tutorial_The_Matrix_Profile.ipynb b/docs/Tutorial_The_Matrix_Profile.ipynb index 301fcab60..1bfd4cd94 100644 --- a/docs/Tutorial_The_Matrix_Profile.ipynb +++ b/docs/Tutorial_The_Matrix_Profile.ipynb @@ -298,7 +298,7 @@ "\n", "In the fall of 2016, researchers from the [University of California, Riverside](https://www.cs.ucr.edu/~eamonn) and the [University of New Mexico](https://www.cs.unm.edu/~mueen/) published a beautiful set of [back-to-back papers](https://www.cs.ucr.edu/~eamonn/MatrixProfile.html) that described an exact method called STOMP for computing the matrix profile for any time series with a computational complexity of O(n2)! They also further demonstrated this using GPUs and they called this faster approach GPU-STOMP.\n", "\n", - "With the academics, data scientists, and developers in mind, we have taken these concepts and have open sourced STUMPY, a powerful and scalable library that efficiently computes the matrix profile according to this published research. And, thanks to other open source software such as [Numba](http://numba.pydata.org/) and [Dask](https://dask.org/), our implementation is highly parallelized (for a single server with multiple CPUs or, alternatively, multiple GPUs), highly distributed (with multiple CPUs across multiple servers). We've tested STUMPY on as many as 256 CPU cores (spread across 32 servers) or 16 NVIDIA GPU devices (on the same DGX-2 server) and have achieved similar [performance](https://github.com/TDAmeritrade/stumpy#performance) to the published GPU-STOMP work." + "With the academics, data scientists, and developers in mind, we have taken these concepts and have open sourced STUMPY, a powerful and scalable library that efficiently computes the matrix profile according to this published research. And, thanks to other open source software such as [Numba](http://numba.pydata.org/) and [Dask](https://dask.org/), our implementation is highly parallelized (for a single server with multiple CPUs or, alternatively, multiple GPUs), highly distributed (with multiple CPUs across multiple servers). We've tested STUMPY on as many as 256 CPU cores (spread across 32 servers) or 16 NVIDIA GPU devices (on the same DGX-2 server) and have achieved similar [performance](https://github.com/stumpy-dev/stumpy#performance) to the published GPU-STOMP work." ] }, { @@ -349,7 +349,7 @@ "\n", "[STUMPY Documentation](https://stumpy.readthedocs.io/en/latest/)\n", "\n", - "[STUMPY Matrix Profile Github Code Repository](https://github.com/TDAmeritrade/stumpy)" + "[STUMPY Matrix Profile Github Code Repository](https://github.com/stumpy-dev/stumpy)" ] } ], diff --git a/docs/Tutorial_Time_Series_Chains.ipynb b/docs/Tutorial_Time_Series_Chains.ipynb index 0163b3492..143e2a6f0 100644 --- a/docs/Tutorial_Time_Series_Chains.ipynb +++ b/docs/Tutorial_Time_Series_Chains.ipynb @@ -6,7 +6,7 @@ "source": [ "# Time Series Chains\n", "\n", - "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/TDAmeritrade/stumpy/main?filepath=notebooks/Tutorial_Time_Series_Chains.ipynb)\n", + "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/stumpy-dev/stumpy/main?filepath=notebooks/Tutorial_Time_Series_Chains.ipynb)\n", "\n", "## Forecasting Web Query Data with Anchored Time Series Chains (ATSC)\n", "\n", @@ -38,7 +38,7 @@ "from matplotlib.patches import Rectangle, FancyArrowPatch\n", "import itertools\n", "\n", - "plt.style.use('https://raw.githubusercontent.com/TDAmeritrade/stumpy/main/docs/stumpy.mplstyle')" + "plt.style.use('https://raw.githubusercontent.com/stumpy-dev/stumpy/main/docs/stumpy.mplstyle')" ] }, { @@ -585,7 +585,7 @@ "\n", "[STUMPY Documentation](https://stumpy.readthedocs.io/en/latest/)\n", "\n", - "[STUMPY Matrix Profile Github Code Repository](https://github.com/TDAmeritrade/stumpy)" + "[STUMPY Matrix Profile Github Code Repository](https://github.com/stumpy-dev/stumpy)" ] } ], diff --git a/docs/WIP/Tutorial_meter_swapping.ipynb b/docs/WIP/Tutorial_meter_swapping.ipynb index 3091aa25f..c3912b37b 100644 --- a/docs/WIP/Tutorial_meter_swapping.ipynb +++ b/docs/WIP/Tutorial_meter_swapping.ipynb @@ -99,7 +99,7 @@ "import matplotlib.pyplot as plt\n", "import datetime\n", "\n", - "plt.style.use('https://raw.githubusercontent.com/TDAmeritrade/stumpy/main/docs/stumpy.mplstyle')\n", + "plt.style.use('https://raw.githubusercontent.com/stumpy-dev/stumpy/main/docs/stumpy.mplstyle')\n", "\n", "np.random.seed(1337) # Random seed for reproducibility" ] @@ -916,7 +916,7 @@ "\n", "[STUMPY Documentation](https://stumpy.readthedocs.io/en/latest/)\n", "\n", - "[STUMPY Matrix Profile Github Code Repository](https://github.com/TDAmeritrade/stumpy)" + "[STUMPY Matrix Profile Github Code Repository](https://github.com/stumpy-dev/stumpy)" ] } ], diff --git a/docs/api.rst b/docs/api.rst index a05b8ecef..4abf12123 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -4,7 +4,7 @@ STUMPY API Have A Question? ================ -`Ask Here `_ +`Ask Here `_ .. rubric:: Overview diff --git a/docs/conf.py b/docs/conf.py index dcd2da20a..841347e69 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -112,7 +112,7 @@ "icon_links": [ { "name": "GitHub", - "url": "https://github.com/TDAmeritrade/stumpy", + "url": "https://github.com/stumpy-dev/stumpy", "icon": "fab fa-github-square", }, { diff --git a/docs/help.rst b/docs/help.rst index a37740536..3a23c66f6 100644 --- a/docs/help.rst +++ b/docs/help.rst @@ -2,4 +2,4 @@ Getting Help ------------ -First, please check the `discussions `__ and `issues `__ on Github to see if your question has already been answered there. If no solution is available there feel free to open a new discussion or issue and the authors will attempt to respond in a reasonably timely fashion. +First, please check the `discussions `__ and `issues `__ on Github to see if your question has already been answered there. If no solution is available there feel free to open a new discussion or issue and the authors will attempt to respond in a reasonably timely fashion. diff --git a/docs/images/Performance.ipynb b/docs/images/Performance.ipynb index f996e2846..88f507a29 100644 --- a/docs/images/Performance.ipynb +++ b/docs/images/Performance.ipynb @@ -179,7 +179,7 @@ } ], "source": [ - "df = pd.read_html('https://github.com/TDAmeritrade/stumpy', match='STUMPED.256')[0]\n", + "df = pd.read_html('https://github.com/stumpy-dev/stumpy', match='STUMPED.256')[0]\n", "df = df.rename(columns={'n = 2i': 'n'})\n", "df['GPU-STOMP'] = pd.to_timedelta(df['GPU-STOMP'])\n", "df['STUMP.2'] = pd.to_timedelta(df['STUMP.2'])\n", diff --git a/docs/install.rst b/docs/install.rst index 138e6c80b..f24e6e378 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -27,7 +27,7 @@ To install stumpy from source, first clone the source repository: .. code:: bash - git clone https://github.com/TDAmeritrade/stumpy.git + git clone https://github.com/stumpy-dev/stumpy.git cd stumpy Next, you'll need to install the necessary dependencies. For maximum performance (or if you are installing stumpy for the Apple M1 ARM-based chip), it is recommended that you install all dependencies using `conda`: diff --git a/docs/tutorials.rst b/docs/tutorials.rst index 8812c782f..9f9e2ce83 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -3,7 +3,7 @@ Tutorials ========= .. image:: https://mybinder.org/badge_logo.svg - :target: https://mybinder.org/v2/gh/TDAmeritrade/stumpy/main?filepath=notebooks + :target: https://mybinder.org/v2/gh/stumpy-dev/stumpy/main?filepath=notebooks :alt: Binder .. diff --git a/pypi.sh b/pypi.sh index c18c67260..6af3db613 100755 --- a/pypi.sh +++ b/pypi.sh @@ -40,8 +40,8 @@ # twine check dist/* # # Github Release -# 1. Navigate to the Github release page: https://github.com/TDAmeritrade/stumpy/releases -# 2. Click "Draft a new release": https://github.com/TDAmeritrade/stumpy/releases/new +# 1. Navigate to the Github release page: https://github.com/stumpy-dev/stumpy/releases +# 2. Click "Draft a new release": https://github.com/stumpy-dev/stumpy/releases/new # 3. In the "Tag version" box, add the version number i.e., "v1.0.0" # 4. In the Release title" box, add the version number i.e., "v1.0.0" # 5. In the "Describe this release" box, add the description i.e., "Version 1.1.0 Release" diff --git a/pyproject.toml b/pyproject.toml index cb201d6cb..fe04d8260 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,7 @@ ci = [ ] [project.urls] -Homepage = "https://github.com/TDAmeritrade/stumpy" +Homepage = "https://github.com/stumpy-dev/stumpy" Documentation = "https://stumpy.readthedocs.io/en/latest/" -Repository = "https://github.com/TDAmeritrade/stumpy" +Repository = "https://github.com/stumpy-dev/stumpy" From 5b98dba251bcaa6b2cf643f9f4a2aaeb84aad675 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Fri, 13 Jun 2025 07:54:04 -0400 Subject: [PATCH 47/54] Removed FOSSA references --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index ba7be48da..23e9e388a 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ |PyPI Version| |Conda Forge Version| |PyPI Downloads| |License| |Test Status| |Code Coverage| -|RTD Status| |Binder| |JOSS| |NumFOCUS| |FOSSA| +|RTD Status| |Binder| |JOSS| |NumFOCUS| .. |PyPI Version| image:: https://img.shields.io/pypi/v/stumpy.svg :target: https://pypi.org/project/stumpy/ From 5d0ca0f3aa5fdaf1da7bcb6e698b58c1a0598120 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Fri, 13 Jun 2025 07:57:18 -0400 Subject: [PATCH 48/54] Replaced references to master branch --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 23e9e388a..b8c16b30a 100644 --- a/README.rst +++ b/README.rst @@ -12,12 +12,12 @@ :target: https://pepy.tech/project/stumpy :alt: PyPI Downloads .. |License| image:: https://img.shields.io/pypi/l/stumpy.svg - :target: https://github.com/stumpy-dev/stumpy/blob/master/LICENSE.txt + :target: https://github.com/stumpy-dev/stumpy/blob/main/LICENSE.txt :alt: License .. |Test Status| image:: https://github.com/stumpy-dev/stumpy/workflows/Tests/badge.svg :target: https://github.com/stumpy-dev/stumpy/actions?query=workflow%3ATests+branch%3Amain :alt: Test Status -.. |Code Coverage| image:: https://codecov.io/gh/stumpy-dev/stumpy/branch/master/graph/badge.svg +.. |Code Coverage| image:: https://codecov.io/gh/stumpy-dev/stumpy/branch/main/graph/badge.svg :target: https://codecov.io/gh/stumpy-dev/stumpy :alt: Code Coverage .. |RTD Status| image:: https://readthedocs.org/projects/stumpy/badge/?version=latest @@ -41,7 +41,7 @@ | -.. image:: https://raw.githubusercontent.com/stumpy-dev/stumpy/master/docs/images/stumpy_logo_small.png +.. image:: https://raw.githubusercontent.com/stumpy-dev/stumpy/main/docs/images/stumpy_logo_small.png :target: https://github.com/stumpy-dev/stumpy :alt: STUMPY Logo @@ -227,7 +227,7 @@ Performance We tested the performance of computing the exact matrix profile using the Numba JIT compiled version of the code on randomly generated time series data with various lengths (i.e., ``np.random.rand(n)``) along with different `CPU and GPU hardware resources `_. -.. image:: https://raw.githubusercontent.com/stumpy-dev/stumpy/master/docs/images/performance.png +.. image:: https://raw.githubusercontent.com/stumpy-dev/stumpy/main/docs/images/performance.png :alt: STUMPY Performance Plot The raw results are displayed in the table below as Hours:Minutes:Seconds.Milliseconds and with a constant window size of `m = 50`. Note that these reported runtimes include the time that it takes to move the data from the host to all of the GPU device(s). You may need to scroll to the right side of the table in order to see all of the runtimes. @@ -334,7 +334,7 @@ First, please check the `discussions `__ in any form! Assistance with documentation, particularly expanding tutorials, is always welcome. To contribute please `fork the project `__, make your changes, and submit a pull request. We will do our best to work through any issues with you and get your code merged into the main branch. +We welcome `contributions `__ in any form! Assistance with documentation, particularly expanding tutorials, is always welcome. To contribute please `fork the project `__, make your changes, and submit a pull request. We will do our best to work through any issues with you and get your code merged into the main branch. ------ Citing From 1fa25d17e78602155dfb904809b38b8a8446d55e Mon Sep 17 00:00:00 2001 From: Sean Law Date: Fri, 13 Jun 2025 15:42:42 -0400 Subject: [PATCH 49/54] Updated coverage badge --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index b8c16b30a..4d0ffd490 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ :target: https://github.com/stumpy-dev/stumpy/actions?query=workflow%3ATests+branch%3Amain :alt: Test Status .. |Code Coverage| image:: https://codecov.io/gh/stumpy-dev/stumpy/branch/main/graph/badge.svg - :target: https://codecov.io/gh/stumpy-dev/stumpy + :target: https://codecov.io/gh/stumpy-dev/stumpy/tree/main :alt: Code Coverage .. |RTD Status| image:: https://readthedocs.org/projects/stumpy/badge/?version=latest :target: https://stumpy.readthedocs.io/ From 7beb9689980835671f40e0b7a45a05f8042edba3 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Fri, 13 Jun 2025 15:48:55 -0400 Subject: [PATCH 50/54] Fixed codecov badge default branch --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 4d0ffd490..b8c16b30a 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ :target: https://github.com/stumpy-dev/stumpy/actions?query=workflow%3ATests+branch%3Amain :alt: Test Status .. |Code Coverage| image:: https://codecov.io/gh/stumpy-dev/stumpy/branch/main/graph/badge.svg - :target: https://codecov.io/gh/stumpy-dev/stumpy/tree/main + :target: https://codecov.io/gh/stumpy-dev/stumpy :alt: Code Coverage .. |RTD Status| image:: https://readthedocs.org/projects/stumpy/badge/?version=latest :target: https://stumpy.readthedocs.io/ From b6c3af2bce7a8c18a683422af323193fff02dde6 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Fri, 13 Jun 2025 21:05:51 -0400 Subject: [PATCH 51/54] Fixed codecov badge --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index b8c16b30a..639e9a0e5 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ .. |Test Status| image:: https://github.com/stumpy-dev/stumpy/workflows/Tests/badge.svg :target: https://github.com/stumpy-dev/stumpy/actions?query=workflow%3ATests+branch%3Amain :alt: Test Status -.. |Code Coverage| image:: https://codecov.io/gh/stumpy-dev/stumpy/branch/main/graph/badge.svg +.. |Code Coverage| image:: https://codecov.io/gh/stumpy-dev/stumpy/graph/badge.svg?token=u0DooAbGji :target: https://codecov.io/gh/stumpy-dev/stumpy :alt: Code Coverage .. |RTD Status| image:: https://readthedocs.org/projects/stumpy/badge/?version=latest From 61a2d41f70ae6b0a400ea7b2288396d14da43090 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sat, 14 Jun 2025 07:22:42 -0400 Subject: [PATCH 52/54] Removed space at end of line --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 639e9a0e5..b4c8e0bc8 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ .. |Test Status| image:: https://github.com/stumpy-dev/stumpy/workflows/Tests/badge.svg :target: https://github.com/stumpy-dev/stumpy/actions?query=workflow%3ATests+branch%3Amain :alt: Test Status -.. |Code Coverage| image:: https://codecov.io/gh/stumpy-dev/stumpy/graph/badge.svg?token=u0DooAbGji +.. |Code Coverage| image:: https://codecov.io/gh/stumpy-dev/stumpy/graph/badge.svg?token=u0DooAbGji :target: https://codecov.io/gh/stumpy-dev/stumpy :alt: Code Coverage .. |RTD Status| image:: https://readthedocs.org/projects/stumpy/badge/?version=latest From 60283ff60ecc0ed8ac0a98fab2aa4b06e3eb1987 Mon Sep 17 00:00:00 2001 From: "Sean M. Law" <7473521+seanlaw@users.noreply.github.com> Date: Tue, 17 Jun 2025 20:57:46 -0400 Subject: [PATCH 53/54] Fixed #1086 Remove Codecov (#1084) * Minor change * Minor change * Initiate migration away from codecov * Minor change * Added parallel-mode to coverage.py * Added upload artifacts * Minor change * Removed paralle-mode from coverage.py * Minor change * Minor change * Minor change * Fixed combine function * Minor change * Minor change * Minor change * Minor change * Minor change * Minor change * Minor change * Minor change * Minor change * Minor change * Minor change * Added __init__.py to be omitted * Removed aggregation step * Minor change * Removed explicit show report --- .github/workflows/github-actions.yml | 9 --------- .gitignore | 4 ++-- README.rst | 3 +-- test.sh | 14 ++------------ 4 files changed, 5 insertions(+), 25 deletions(-) diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml index 7939d29e1..9d0978d30 100644 --- a/.github/workflows/github-actions.yml +++ b/.github/workflows/github-actions.yml @@ -152,12 +152,3 @@ jobs: - name: Run Coverage Tests run: ./test.sh coverage shell: bash - - name: Generate Coverage Report - run: ./test.sh report coverage.stumpy.xml - shell: bash - - name: Upload Coverage Tests Results - uses: codecov/codecov-action@v4 - with: - file: ./coverage.stumpy.xml - verbose: true - token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.gitignore b/.gitignore index 4ba903a59..c1a36735c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,8 +8,8 @@ LOG* PID .coverage* -coverage.xml -stumpy.coverage.xml +coverage.json +coverage.stumpy.json dask-worker-space stumpy.egg-info build diff --git a/README.rst b/README.rst index b4c8e0bc8..d34b0cc8b 100644 --- a/README.rst +++ b/README.rst @@ -17,8 +17,7 @@ .. |Test Status| image:: https://github.com/stumpy-dev/stumpy/workflows/Tests/badge.svg :target: https://github.com/stumpy-dev/stumpy/actions?query=workflow%3ATests+branch%3Amain :alt: Test Status -.. |Code Coverage| image:: https://codecov.io/gh/stumpy-dev/stumpy/graph/badge.svg?token=u0DooAbGji - :target: https://codecov.io/gh/stumpy-dev/stumpy +.. |Code Coverage| image:: https://img.shields.io/badge/Coverage-100%25-green :alt: Code Coverage .. |RTD Status| image:: https://readthedocs.org/projects/stumpy/badge/?version=latest :target: https://stumpy.readthedocs.io/ diff --git a/test.sh b/test.sh index 06070c7c6..796024489 100755 --- a/test.sh +++ b/test.sh @@ -5,7 +5,6 @@ print_mode="verbose" custom_testfiles=() max_iter=10 site_pkgs=$(python -c 'import site; print(site.getsitepackages()[0])') -fcoveragexml="coverage.stumpy.xml" # Parse command line arguments for var in "$@" do @@ -31,8 +30,6 @@ do custom_testfiles+=("$var") elif [[ $var =~ ^[\-0-9]+$ ]]; then max_iter=$var - elif [[ "$var" == *".xml" ]]; then - fcoveragexml=$var elif [[ "$var" == "links" ]]; then test_mode="links" else @@ -170,14 +167,7 @@ set_ray_coveragerc() show_coverage_report() { set_ray_coveragerc - coverage report -m --fail-under=100 --skip-covered --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py $fcoveragerc -} - -gen_coverage_xml_report() -{ - # This function saves the coverage report in Cobertura XML format, which is compatible with codecov - set_ray_coveragerc - coverage xml -o $fcoveragexml --fail-under=100 --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py $fcoveragerc + coverage report --show-missing --fail-under=100 --skip-covered --omit=fastmath.py,docstring.py,min_versions.py,ray_python_version.py $fcoveragerc } test_custom() @@ -384,7 +374,7 @@ elif [[ $test_mode == "report" ]]; then echo "Generate Coverage Report Only" # Assume coverage tests have already been executed # and a coverage file exists - gen_coverage_xml_report + show_coverage_report elif [[ $test_mode == "gpu" ]]; then echo "Executing GPU Unit Tests Only" test_gpu From 534488d0b84f2bc20d529e6c46daf62c497f5f2b Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sun, 13 Jul 2025 08:45:58 -0400 Subject: [PATCH 54/54] Fixed #1045 Added raw P_ attr to stimp --- stumpy/aamp_stimp.py | 20 ++++++++++++++++++++ stumpy/stimp.py | 20 ++++++++++++++++++++ tests/test_aamp_stimp.py | 33 ++++++++++++++++++++++++++++++++- tests/test_stimp.py | 34 +++++++++++++++++++++++++++++++++- 4 files changed, 105 insertions(+), 2 deletions(-) diff --git a/stumpy/aamp_stimp.py b/stumpy/aamp_stimp.py index cf15725a8..99ffd2718 100644 --- a/stumpy/aamp_stimp.py +++ b/stumpy/aamp_stimp.py @@ -348,6 +348,26 @@ def M_(self): """ return self._M.astype(np.int64) + @property + def P_(self): + """ + Get all of the raw (i.e., non-transformed) matrix profiles matrix profile in + (breadth first searched (level) ordered) + + Parameters + ---------- + None + + Returns + ------- + None + """ + P = [] + for i, idx in enumerate(self._bfs_indices): + P.append(self._PAN[idx][: len(self._T) - self._M[i] + 1]) + + return P + # @property # def bfs_indices_(self): # """ diff --git a/stumpy/stimp.py b/stumpy/stimp.py index 446b28a2a..84452cd39 100644 --- a/stumpy/stimp.py +++ b/stumpy/stimp.py @@ -360,6 +360,26 @@ def M_(self): """ return self._M.astype(np.int64) + @property + def P_(self): + """ + Get all of the raw (i.e., non-transformed) matrix profiles matrix profile in + (breadth first searched (level) ordered) + + Parameters + ---------- + None + + Returns + ------- + None + """ + P = [] + for i, idx in enumerate(self._bfs_indices): + P.append(self._PAN[idx][: len(self._T) - self._M[i] + 1]) + + return P + # @property # def bfs_indices_(self): # """ diff --git a/tests/test_aamp_stimp.py b/tests/test_aamp_stimp.py index f21634813..621dbb1af 100644 --- a/tests/test_aamp_stimp.py +++ b/tests/test_aamp_stimp.py @@ -158,7 +158,7 @@ def test_aamp_stimp_100_percent(T): max_m=None, step=1, percentage=percentage, - pre_scraamp=True, + pre_scraamp=False, ) for i in range(n): @@ -197,6 +197,37 @@ def test_aamp_stimp_100_percent(T): npt.assert_almost_equal(ref_pan, cmp_pan) +@pytest.mark.parametrize("T", T) +def test_stimp_raw_mp(T): + """ + Check pan.P_ attribute for raw matrix profile + """ + percentage = 1.0 + min_m = 3 + n = 5 + + pan = aamp_stimp( + T, + min_m=min_m, + max_m=None, + step=1, + percentage=percentage, + pre_scraamp=False, + ) + + for i in range(n): + pan.update() + + for idx, m in enumerate(pan.M_[:n]): + zone = int(np.ceil(m / 4)) + ref_P_ = naive.aamp(T, m, T_B=None, exclusion_zone=zone)[:, 0] + cmp_P_ = pan.P_[idx] + + naive.replace_inf(ref_P_) + naive.replace_inf(cmp_P_) + npt.assert_almost_equal(ref_P_, cmp_P_) + + @pytest.mark.filterwarnings("ignore:numpy.dtype size changed") @pytest.mark.filterwarnings("ignore:numpy.ufunc size changed") @pytest.mark.filterwarnings("ignore:numpy.ndarray size changed") diff --git a/tests/test_stimp.py b/tests/test_stimp.py index 2f1fe334e..608c03ccc 100644 --- a/tests/test_stimp.py +++ b/tests/test_stimp.py @@ -148,7 +148,7 @@ def test_stimp_100_percent(T): max_m=None, step=1, percentage=percentage, - pre_scrump=True, + pre_scrump=False, # normalize=True, ) @@ -182,6 +182,38 @@ def test_stimp_100_percent(T): npt.assert_almost_equal(ref_pan, cmp_pan) +@pytest.mark.parametrize("T", T) +def test_stimp_raw_mp(T): + """ + Check pan.P_ attribute for raw matrix profile + """ + percentage = 1.0 + min_m = 3 + n = 5 + + pan = stimp( + T, + min_m=min_m, + max_m=None, + step=1, + percentage=percentage, + pre_scrump=False, + # normalize=True, + ) + + for i in range(n): + pan.update() + + for idx, m in enumerate(pan.M_[:n]): + zone = int(np.ceil(m / 4)) + ref_P_ = naive.stump(T, m, T_B=None, exclusion_zone=zone)[:, 0] + cmp_P_ = pan.P_[idx] + + naive.replace_inf(ref_P_) + naive.replace_inf(cmp_P_) + npt.assert_almost_equal(ref_P_, cmp_P_) + + @pytest.mark.filterwarnings("ignore:numpy.dtype size changed") @pytest.mark.filterwarnings("ignore:numpy.ufunc size changed") @pytest.mark.filterwarnings("ignore:numpy.ndarray size changed")