Skip to content

Commit 2698b9f

Browse files
committed
Merge branch 'main' into glm_newton_cholesky
2 parents d4a8b4e + 2303c89 commit 2698b9f

File tree

194 files changed

+6249
-2226
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

194 files changed

+6249
-2226
lines changed

.git-blame-ignore-revs

+3
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@
2222

2323
# PR 22474: Update to Black 22.1.0
2424
1fc86b6aacd89da44a3b4e8abf7c3e2ba4336ffe
25+
26+
# PR 22983: Update to Black 22.3.0
27+
d4aad64b1eb2e42e76f49db2ccfbe4b4660d092b

.github/workflows/update_tracking_issue.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ on:
2424
jobs:
2525
update_tracking_issue:
2626
runs-on: ubuntu-latest
27-
if: github.repository == 'scikit-learn/scikit-learn'
27+
if: github.repository == 'scikit-learn/scikit-learn' && github.event_name == 'schedule'
2828
steps:
2929
- uses: actions/checkout@v2
3030
- uses: actions/setup-python@v2

.github/workflows/wheels.yml

+9-12
Original file line numberDiff line numberDiff line change
@@ -86,18 +86,6 @@ jobs:
8686
platform_id: manylinux_x86_64
8787
manylinux_image: manylinux2014
8888

89-
# Linux 64 bit manylinux2010
90-
- os: ubuntu-latest
91-
python: 38
92-
bitness: 64
93-
platform_id: manylinux_x86_64
94-
manylinux_image: manylinux2010
95-
- os: ubuntu-latest
96-
python: 39
97-
bitness: 64
98-
platform_id: manylinux_x86_64
99-
manylinux_image: manylinux2010
100-
10189
# NumPy on Python 3.10 only supports 64bit and is only available with manylinux2014
10290
- os: ubuntu-latest
10391
python: 310
@@ -169,6 +157,15 @@ jobs:
169157
with:
170158
path: wheelhouse/*.whl
171159

160+
update-tracker:
161+
uses: ./.github/workflows/update_tracking_issue.yml
162+
if: ${{ always() }}
163+
needs: [build_wheels]
164+
with:
165+
job_status: ${{ needs.build_wheels.result }}
166+
secrets:
167+
BOT_GITHUB_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }}
168+
172169
# Build the source distribution under Linux
173170
build_sdist:
174171
name: Source distribution

asv_benchmarks/benchmarks/linear_model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def skip(self, params):
110110

111111
class LinearRegressionBenchmark(Predictor, Estimator, Benchmark):
112112
"""
113-
Benchmarks for Linear Reagression.
113+
Benchmarks for Linear Regression.
114114
"""
115115

116116
param_names = ["representation"]

azure-pipelines.yml

+40-27
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,7 @@ jobs:
1313
pool:
1414
vmImage: ubuntu-20.04
1515
steps:
16-
- bash: |
17-
set -ex
18-
if [[ $BUILD_REASON == "PullRequest" ]]; then
19-
# By default pull requests use refs/pull/PULL_ID/merge as the source branch
20-
# which has a "Merge ID into ID" as a commit message. The latest commit
21-
# message is the second to last commit
22-
COMMIT_ID=$(echo $BUILD_SOURCEVERSIONMESSAGE | awk '{print $2}')
23-
message=$(git log $COMMIT_ID -1 --pretty=%B)
24-
else
25-
message=$BUILD_SOURCEVERSIONMESSAGE
26-
fi
27-
echo "##vso[task.setvariable variable=message;isOutput=true]$message"
16+
- bash: python build_tools/azure/get_commit_message.py
2817
name: commit
2918
displayName: Get source version message
3019

@@ -81,7 +70,40 @@ jobs:
8170
# Tests that require large downloads over the networks are skipped in CI.
8271
# Here we make sure, that they are still run on a regular basis.
8372
SKLEARN_SKIP_NETWORK_TESTS: '0'
84-
CREATE_ISSUE_ON_TRACKER: 'true'
73+
74+
- template: build_tools/azure/posix.yml
75+
# Experimental CPython branch without the Global Interpreter Lock:
76+
# https://github.com/colesbury/nogil/
77+
#
78+
# The nogil build relies on a dedicated PyPI-style index to install patched
79+
# versions of NumPy, SciPy and Cython maintained by @colesbury and that
80+
# include specifc fixes to make them run correctly without relying on the GIL.
81+
#
82+
# The goal of this CI entry is to make sure that we do not introduce any
83+
# dependency on the GIL in scikit-learn itself. An auxiliary goal is to early
84+
# detect any regression in the patched build dependencies to report them
85+
# upstream. The long-term goal is to be able to stop having to maintain
86+
# multiprocessing based workaround / hacks in joblib / loky to make multi-CPU
87+
# computing in scikit-learn efficient by default using regular threads.
88+
#
89+
# If this experimental entry becomes too unstable, feel free to disable it.
90+
parameters:
91+
name: Linux_nogil
92+
vmImage: ubuntu-20.04
93+
dependsOn: [git_commit, linting]
94+
condition: |
95+
and(
96+
succeeded(),
97+
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
98+
or(eq(variables['Build.Reason'], 'Schedule'),
99+
contains(dependencies['git_commit']['outputs']['commit.message'], '[nogil]'
100+
)
101+
)
102+
)
103+
matrix:
104+
pylatest_pip_nogil:
105+
DISTRIB: 'pip-nogil'
106+
COVERAGE: 'false'
85107

86108
# Check compilation with intel C++ compiler (ICC)
87109
- template: build_tools/azure/posix.yml
@@ -126,7 +148,6 @@ jobs:
126148
DOCKER_CONTAINER: 'condaforge/mambaforge-pypy3:4.10.3-5'
127149
PILLOW_VERSION: 'none'
128150
PANDAS_VERSION: 'none'
129-
CREATE_ISSUE_ON_TRACKER: 'true'
130151

131152
# Will run all the time regardless of linting outcome.
132153
- template: build_tools/azure/posix.yml
@@ -158,8 +179,7 @@ jobs:
158179
condition: |
159180
and(
160181
succeeded(),
161-
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
162-
ne(variables['Build.Reason'], 'Schedule')
182+
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]'))
163183
)
164184
matrix:
165185
py38_conda_forge_openblas_ubuntu_1804:
@@ -179,8 +199,7 @@ jobs:
179199
condition: |
180200
and(
181201
succeeded(),
182-
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
183-
ne(variables['Build.Reason'], 'Schedule')
202+
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]'))
184203
)
185204
matrix:
186205
# Linux environment to test that scikit-learn can be built against
@@ -225,8 +244,7 @@ jobs:
225244
condition: |
226245
and(
227246
succeeded(),
228-
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
229-
ne(variables['Build.Reason'], 'Schedule')
247+
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]'))
230248
)
231249
matrix:
232250
debian_atlas_32bit:
@@ -247,8 +265,7 @@ jobs:
247265
condition: |
248266
and(
249267
succeeded(),
250-
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
251-
ne(variables['Build.Reason'], 'Schedule')
268+
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]'))
252269
)
253270
matrix:
254271
pylatest_conda_forge_mkl:
@@ -271,8 +288,7 @@ jobs:
271288
condition: |
272289
and(
273290
succeeded(),
274-
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
275-
ne(variables['Build.Reason'], 'Schedule')
291+
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]'))
276292
)
277293
matrix:
278294
py38_conda_forge_mkl:
@@ -284,9 +300,6 @@ jobs:
284300
# Unpin when pytest stalling issue is fixed
285301
PYTEST_VERSION: '6.2.5'
286302
COVERAGE: 'true'
287-
# Temporary fix for setuptools to use distutils from standard lib
288-
# https://github.com/numpy/numpy/issues/17216
289-
SETUPTOOLS_USE_DISTUTILS: 'stdlib'
290303
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '7' # non-default seed
291304
py38_pip_openblas_32bit:
292305
PYTHON_VERSION: '3.8'

benchmarks/bench_multilabel_metrics.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@
3434
FORMATS = {
3535
"sequences": lambda y: [list(np.flatnonzero(s)) for s in y],
3636
"dense": lambda y: y,
37-
"csr": lambda y: sp.csr_matrix(y),
38-
"csc": lambda y: sp.csc_matrix(y),
37+
"csr": sp.csr_matrix,
38+
"csc": sp.csc_matrix,
3939
}
4040

4141

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import os
2+
import subprocess
3+
4+
5+
def get_commit_message():
6+
"""Retrieve the commit message."""
7+
build_source_version_message = os.environ["BUILD_SOURCEVERSIONMESSAGE"]
8+
9+
if os.environ["BUILD_REASON"] == "PullRequest":
10+
# By default pull requests use refs/pull/PULL_ID/merge as the source branch
11+
# which has a "Merge ID into ID" as a commit message. The latest commit
12+
# message is the second to last commit
13+
commit_id = build_source_version_message.split()[1]
14+
git_cmd = ["git", "log", commit_id, "-1", "--pretty=%B"]
15+
commit_message = subprocess.run(
16+
git_cmd, capture_output=True, text=True
17+
).stdout.strip()
18+
else:
19+
commit_message = build_source_version_message
20+
21+
return commit_message
22+
23+
24+
if __name__ == "__main__":
25+
# set the environment variable to be propagated to other steps
26+
commit_message = get_commit_message()
27+
print(f"##vso[task.setvariable variable=message;isOutput=true]{commit_message}")
28+
29+
print(f"commit message: {commit_message}") # helps debugging
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from get_commit_message import get_commit_message
2+
3+
4+
def get_selected_tests():
5+
"""Parse the commit message to check if pytest should run only specific tests.
6+
7+
If so, selected tests will be run with SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all".
8+
9+
The commit message must take the form:
10+
<title> [all random seeds]
11+
<test_name_1>
12+
<test_name_2>
13+
...
14+
"""
15+
commit_message = get_commit_message()
16+
17+
if "[all random seeds]" in commit_message:
18+
selected_tests = commit_message.split("[all random seeds]")[1].strip()
19+
selected_tests = selected_tests.replace("\n", " or ")
20+
else:
21+
selected_tests = ""
22+
23+
return selected_tests
24+
25+
26+
if __name__ == "__main__":
27+
# set the environment variable to be propagated to other steps
28+
selected_tests = get_selected_tests()
29+
30+
if selected_tests:
31+
print(f"##vso[task.setvariable variable=SELECTED_TESTS]'{selected_tests}'")
32+
print(f"selected tests: {selected_tests}") # helps debugging
33+
else:
34+
print("no selected tests")

build_tools/azure/install.sh

+45-8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ source build_tools/shared.sh
88

99
UNAMESTR=`uname`
1010

11+
CCACHE_LINKS_DIR="/tmp/ccache"
12+
13+
1114
make_conda() {
1215
TO_INSTALL="$@"
1316
if [[ "$DISTRIB" == *"mamba"* ]]; then
@@ -20,14 +23,21 @@ make_conda() {
2023
}
2124

2225
setup_ccache() {
23-
echo "Setting up ccache with CCACHE_DIR=${CCACHE_DIR}"
24-
mkdir /tmp/ccache/
25-
which ccache
26-
for name in gcc g++ cc c++ clang clang++ i686-linux-gnu-gcc i686-linux-gnu-c++ x86_64-linux-gnu-gcc x86_64-linux-gnu-c++ x86_64-apple-darwin13.4.0-clang x86_64-apple-darwin13.4.0-clang++; do
27-
ln -s $(which ccache) "/tmp/ccache/${name}"
28-
done
29-
export PATH="/tmp/ccache/:${PATH}"
30-
ccache -M 256M
26+
CCACHE_BIN=`which ccache || echo ""`
27+
if [[ "${CCACHE_BIN}" == "" ]]; then
28+
echo "ccache not found, skipping..."
29+
elif [[ -d "${CCACHE_LINKS_DIR}" ]]; then
30+
echo "ccache already configured, skipping..."
31+
else
32+
echo "Setting up ccache with CCACHE_DIR=${CCACHE_DIR}"
33+
mkdir ${CCACHE_LINKS_DIR}
34+
which ccache
35+
for name in gcc g++ cc c++ clang clang++ i686-linux-gnu-gcc i686-linux-gnu-c++ x86_64-linux-gnu-gcc x86_64-linux-gnu-c++ x86_64-apple-darwin13.4.0-clang x86_64-apple-darwin13.4.0-clang++; do
36+
ln -s ${CCACHE_BIN} "${CCACHE_LINKS_DIR}/${name}"
37+
done
38+
export PATH="${CCACHE_LINKS_DIR}:${PATH}"
39+
ccache -M 256M
40+
fi
3141
}
3242

3343
pre_python_environment_install() {
@@ -48,6 +58,12 @@ pre_python_environment_install() {
4858
apt-get -yq update
4959
apt-get -yq install build-essential
5060

61+
elif [[ "$DISTRIB" == "pip-nogil" ]]; then
62+
echo "deb-src http://archive.ubuntu.com/ubuntu/ focal main" | sudo tee -a /etc/apt/sources.list
63+
sudo apt-get -yq update
64+
sudo apt-get install -yq ccache
65+
sudo apt-get build-dep -yq python3 python3-dev
66+
5167
elif [[ "$BUILD_WITH_ICC" == "true" ]]; then
5268
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
5369
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
@@ -56,6 +72,7 @@ pre_python_environment_install() {
5672
sudo apt-get update
5773
sudo apt-get install intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic
5874
source /opt/intel/oneapi/setvars.sh
75+
5976
fi
6077
}
6178

@@ -120,6 +137,26 @@ python_environment_install() {
120137
pip install https://github.com/joblib/joblib/archive/master.zip
121138
echo "Installing pillow master"
122139
pip install https://github.com/python-pillow/Pillow/archive/main.zip
140+
141+
elif [[ "$DISTRIB" == "pip-nogil" ]]; then
142+
setup_ccache # speed-up the build of CPython it-self
143+
ORIGINAL_FOLDER=`pwd`
144+
cd ..
145+
git clone --depth 1 https://github.com/colesbury/nogil
146+
cd nogil
147+
./configure && make -j 2
148+
./python -m venv $ORIGINAL_FOLDER/$VIRTUALENV
149+
cd $ORIGINAL_FOLDER
150+
source $VIRTUALENV/bin/activate
151+
152+
python -m pip install -U pip
153+
# The pip version that comes with the nogil branch of CPython
154+
# automatically uses the custom nogil index as its highest priority
155+
# index to fetch patched versions of libraries with native code that
156+
# would otherwise depend on the GIL.
157+
echo "Installing build dependencies with pip from the nogil repository: https://d1yxz45j0ypngg.cloudfront.net/"
158+
pip install numpy scipy cython joblib threadpoolctl
159+
123160
fi
124161

125162
python -m pip install $(get_dep threadpoolctl $THREADPOOLCTL_VERSION) \

0 commit comments

Comments
 (0)