Skip to content

BLD, SIMD: The meson CPU dispatcher implementation #24405

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/meson_actions/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: MesonBuildTest
description: "checkout repo, build, and test numpy"
runs:
using: composite
steps:
- name: Install dependencies
shell: bash
run: pip install -r build_requirements.txt
- name: Build
shell: 'script -q -e -c "bash --noprofile --norc -eo pipefail {0}"'
env:
TERM: xterm-256color
run:
spin build -- ${MESON_ARGS[@]}
- name: Check build-internal dependencies
shell: bash
run:
ninja -C build -t missingdeps
- name: Check installed test and stub files
shell: bash
run:
python tools/check_installed_files.py $(find ./build-install -path '*/site-packages/numpy')
- name: Test
shell: 'script -q -e -c "bash --noprofile --norc -eo pipefail {0}"'
env:
TERM: xterm-256color
run: |
pip install pytest pytest-xdist hypothesis typing_extensions
spin test -j auto
20 changes: 10 additions & 10 deletions .github/workflows/build_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
if: "github.repository == 'numpy/numpy'"
runs-on: ubuntu-latest
env:
WITHOUT_SIMD: 1
MESON_ARGS: "-Dallow-noblas=true -Dcpu-baseline=none -Dcpu-dispatch=none"
steps:
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
with:
Expand All @@ -58,7 +58,7 @@ jobs:
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: ./.github/actions
- uses: ./.github/meson_actions

basic:
needs: [smoke_test]
Expand Down Expand Up @@ -122,7 +122,7 @@ jobs:
runs-on: ubuntu-latest
if: github.event_name != 'push'
env:
WITHOUT_OPTIMIZATIONS: 1
MESON_ARGS: "-Dallow-noblas=true -Ddisable-optimization=true"
steps:
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
with:
Expand All @@ -131,14 +131,14 @@ jobs:
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: ./.github/actions
- uses: ./.github/meson_actions

with_baseline_only:
needs: [smoke_test]
runs-on: ubuntu-latest
if: github.event_name != 'push'
env:
CPU_DISPATCH: "none"
MESON_ARGS: "-Dallow-noblas=true -Dcpu-dispatch=none"
steps:
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
with:
Expand All @@ -147,14 +147,14 @@ jobs:
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: ./.github/actions
- uses: ./.github/meson_actions

without_avx512:
needs: [smoke_test]
runs-on: ubuntu-latest
if: github.event_name != 'push'
env:
CPU_DISPATCH: "max -xop -fma4 -avx512f -avx512cd -avx512_knl -avx512_knm -avx512_skx -avx512_clx -avx512_cnl -avx512_icl"
MESON_ARGS: "-Dallow-noblas=true -Dcpu-dispatch=SSSE3,SSE41,POPCNT,SSE42,AVX,F16C,AVX2,FMA3"
steps:
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
with:
Expand All @@ -163,14 +163,14 @@ jobs:
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: ./.github/actions
- uses: ./.github/meson_actions

without_avx512_avx2_fma3:
needs: [smoke_test]
runs-on: ubuntu-latest
if: github.event_name != 'push'
env:
CPU_DISPATCH: "SSSE3 SSE41 POPCNT SSE42 AVX F16C"
MESON_ARGS: "-Dallow-noblas=true -Dcpu-dispatch=SSSE3,SSE41,POPCNT,SSE42,AVX,F16C"
steps:
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
with:
Expand All @@ -179,7 +179,7 @@ jobs:
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: ./.github/actions
- uses: ./.github/meson_actions

debug:
needs: [smoke_test]
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ recursive-include numpy/random *.pyx *.pxd *.pyx.in *.pxd.in
include numpy/py.typed
include numpy/random/include/*
include numpy/*.pxd
# Meson CPU Dispatcher
recursive-include meson_cpu *.build *.in
# Add build support that should go in sdist, but not go in bdist/be installed
# Note that sub-directories that don't have __init__ are apparently not
# included by 'recursive-include', so list those separately
Expand Down
4 changes: 2 additions & 2 deletions build_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
meson-python>=0.10.0
Cython
meson-python>=0.13.1
Cython>=3.0
wheel==0.38.1
ninja
spin==0.4
2 changes: 1 addition & 1 deletion doc/source/user/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ and other Python sequences.
>>> for i in a:
... print(i**(1 / 3.))
...
9.999999999999998
9.999999999999998 # may vary
1.0
9.999999999999998
3.0
Expand Down
3 changes: 2 additions & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ project(
# See `numpy/__init__.py`
version: '1.26.0.dev0',
license: 'BSD-3',
meson_version: '>= 1.1.0',
meson_version: '>=1.2.99', # version in vendored-meson is 1.2.99
default_options: [
'buildtype=debugoptimized',
'b_ndebug=if-release',
Expand Down Expand Up @@ -80,4 +80,5 @@ else
meson.add_dist_script(py, versioneer, '-o', '_version_meson.py')
endif

subdir('meson_cpu')
subdir('numpy')
58 changes: 58 additions & 0 deletions meson_cpu/arm/meson.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
source_root = meson.project_source_root()
mod_features = import('features')
NEON = mod_features.new(
'NEON', 1,
test_code: files(source_root + '/numpy/distutils/checks/cpu_neon.c')[0]
)
NEON_FP16 = mod_features.new(
'NEON_FP16', 2, implies: NEON,
test_code: files(source_root + '/numpy/distutils/checks/cpu_neon_fp16.c')[0]
)
# FMA
NEON_VFPV4 = mod_features.new(
'NEON_VFPV4', 3, implies: NEON_FP16,
test_code: files(source_root + '/numpy/distutils/checks/cpu_neon_vfpv4.c')[0]
)
# Advanced SIMD
ASIMD = mod_features.new(
'ASIMD', 4, implies: NEON_VFPV4, detect: {'val': 'ASIMD', 'match': 'NEON.*'},
test_code: files(source_root + '/numpy/distutils/checks/cpu_asimd.c')[0]
)
cpu_family = host_machine.cpu_family()
if cpu_family == 'aarch64'
# hardware baseline
NEON.update(implies: [NEON_FP16, NEON_VFPV4, ASIMD])
NEON_FP16.update(implies: [NEON, NEON_VFPV4, ASIMD])
NEON_VFPV4.update(implies: [NEON, NEON_FP16, ASIMD])
elif cpu_family == 'arm'
NEON.update(args: '-mfpu=neon')
NEON_FP16.update(args: ['-mfp16-format=ieee', {'val': '-mfpu=neon-fp16', 'match': '-mfpu=.*'}])
NEON_VFPV4.update(args: [{'val': '-mfpu=neon-vfpv4', 'match': '-mfpu=.*'}])
ASIMD.update(args: [
{'val': '-mfpu=neon-fp-armv8', 'match': '-mfpu=.*'},
'-march=armv8-a+simd'
])
endif
# ARMv8.2 half-precision & vector arithm
ASIMDHP = mod_features.new(
'ASIMDHP', 5, implies: ASIMD,
args: {'val': '-march=armv8.2-a+fp16', 'match': '-march=.*', 'mfilter': '\+.*'},
test_code: files(source_root + '/numpy/distutils/checks/cpu_asimdhp.c')[0]
)
## ARMv8.2 dot product
ASIMDDP = mod_features.new(
'ASIMDDP', 6, implies: ASIMD,
args: {'val': '-march=armv8.2-a+dotprod', 'match': '-march=.*', 'mfilter': '\+.*'},
test_code: files(source_root + '/numpy/distutils/checks/cpu_asimddp.c')[0]
)
## ARMv8.2 Single & half-precision Multiply
ASIMDFHM = mod_features.new(
'ASIMDFHM', 7, implies: ASIMDHP,
args: {'val': '-march=armv8.2-a+fp16fml', 'match': '-march=.*', 'mfilter': '\+.*'},
test_code: files(source_root + '/numpy/distutils/checks/cpu_asimdfhm.c')[0]
)
# TODO: Add support for MSVC
ARM_FEATURES = {
'NEON': NEON, 'NEON_FP16': NEON_FP16, 'NEON_VFPV4': NEON_VFPV4,
'ASIMD': ASIMD, 'ASIMDHP': ASIMDHP, 'ASIMDFHM': ASIMDFHM
}
Loading