From 5357f5538f6fafaa0cf9e0d4bf50050dc410e857 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 17 May 2022 13:33:42 -0400 Subject: [PATCH] dl_files, dl_database: avoid using multiple processes The standard multiprocessing module is used to distribute a task to multiple processes, which is useful when doing heavy computation due to the limitations of CPython; however, making this work is dependent on the ability to fork processes or else to kludgily emulate forking on systems that don't support it. In particular, it tends to cause problems on Windows unless you are very scrupulous about how you write your program. Therefore, as a rule, the multiprocessing module shouldn't be used by general-purpose libraries, and should only be invoked by application programmers themselves (who are in a position to guarantee that imports have no side effects, the main script uses 'if __name__ == "__main__"', etc.) However, downloading a file isn't a CPU-bound task, it's an I/O-bound task, and therefore for this purpose, parallel threads should work as well or even better than parallel processes. The multiprocessing.dummy module provides the same API as the multiprocessing module, but uses threads instead of processes, so it should be safe to use in a general-purpose library. --- wfdb/io/download.py | 4 ++-- wfdb/io/record.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/wfdb/io/download.py b/wfdb/io/download.py index 06953495..57560e36 100644 --- a/wfdb/io/download.py +++ b/wfdb/io/download.py @@ -1,5 +1,5 @@ import json -import multiprocessing +import multiprocessing.dummy import os import posixpath @@ -566,7 +566,7 @@ def dl_files(db, dl_dir, files, keep_subdirs=True, overwrite=False): print("Downloading files...") # Create multiple processes to download files. # Limit to 2 connections to avoid overloading the server - pool = multiprocessing.Pool(processes=2) + pool = multiprocessing.dummy.Pool(processes=2) pool.map(dl_pn_file, dl_inputs) print("Finished downloading files") diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 290987a4..e44eed30 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -1,5 +1,5 @@ import datetime -import multiprocessing +import multiprocessing.dummy import posixpath import re @@ -3090,7 +3090,7 @@ def dl_database( print("Downloading files...") # Create multiple processes to download files. # Limit to 2 connections to avoid overloading the server - pool = multiprocessing.Pool(processes=2) + pool = multiprocessing.dummy.Pool(processes=2) pool.map(download.dl_pn_file, dl_inputs) print("Finished downloading files")