From 8e814cc3d8f762cbb8643d9abe3ef782d97a0a19 Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Mon, 6 Jan 2025 11:38:11 -0500 Subject: [PATCH 1/2] add fsspec to rdheader --- pyproject.toml | 2 ++ wfdb/io/download.py | 9 +++++++-- wfdb/io/record.py | 8 ++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 09fbdad9..5faaa00e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,8 @@ dependencies = [ "soundfile >= 0.10.0", "matplotlib >= 3.2.2", "requests >= 2.8.1", + "fsspec >= 2023.10.0", + "aiohttp >= 3.11.11", ] dynamic = ["version"] diff --git a/wfdb/io/download.py b/wfdb/io/download.py index 338d8b97..2d23a3eb 100644 --- a/wfdb/io/download.py +++ b/wfdb/io/download.py @@ -3,6 +3,7 @@ import os import posixpath +import fsspec import numpy as np from wfdb.io import _url @@ -102,10 +103,14 @@ def _stream_header(file_name: str, pn_dir: str) -> str: """ # Full url of header location - url = posixpath.join(config.db_index_url, pn_dir, file_name) + cloud_protocols = ["azureml:", "s3://", "gs://"] + if any(pn_dir.startswith(proto) for proto in cloud_protocols): + url = posixpath.join(pn_dir, file_name) + else: + url = posixpath.join(config.db_index_url, pn_dir, file_name) # Get the content of the remote file - with _url.openurl(url, "rb") as f: + with fsspec.open(url, "rb") as f: content = f.read() return content.decode("iso-8859-1") diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 1a8855ed..1cab87a3 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -4,6 +4,7 @@ import os import re +import fsspec import numpy as np import pandas as pd @@ -1827,7 +1828,10 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): dir_name = os.path.abspath(dir_name) # Construct the download path using the database version - if (pn_dir is not None) and ("." not in pn_dir): + cloud_protocols = ["azureml:", "s3://", "gs://"] + if (pn_dir is not None) and any(pn_dir.startswith(proto) for proto in cloud_protocols): + pass + elif (pn_dir is not None) and ("." not in pn_dir): dir_list = pn_dir.split("/") pn_dir = posixpath.join( dir_list[0], download.get_version(dir_list[0]), *dir_list[1:] @@ -1836,7 +1840,7 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): # Read the local or remote header file. file_name = f"{base_record_name}.hea" if pn_dir is None: - with open( + with fsspec.open( os.path.join(dir_name, file_name), "r", encoding="ascii", From 52a146bbc42d59a2a0028a44a6c450c5f51b1e08 Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Mon, 6 Jan 2025 12:08:58 -0500 Subject: [PATCH 2/2] remove fsspec open --- wfdb/io/download.py | 3 ++- wfdb/io/record.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/wfdb/io/download.py b/wfdb/io/download.py index 2d23a3eb..c75c2408 100644 --- a/wfdb/io/download.py +++ b/wfdb/io/download.py @@ -110,7 +110,8 @@ def _stream_header(file_name: str, pn_dir: str) -> str: url = posixpath.join(config.db_index_url, pn_dir, file_name) # Get the content of the remote file - with fsspec.open(url, "rb") as f: + # with fsspec.open(url, "rb") as f: + with open(url, "rb") as f: content = f.read() return content.decode("iso-8859-1") diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 1cab87a3..237fb33c 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -1840,7 +1840,8 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): # Read the local or remote header file. file_name = f"{base_record_name}.hea" if pn_dir is None: - with fsspec.open( + # with fsspec.open( + with open( os.path.join(dir_name, file_name), "r", encoding="ascii",