Skip to content

Commit 14248d1

Browse files
authored
Refactor header helper functions and stop parsing comments on non-comment lines(MIT-LCP#393)
1 parent 0cb5638 commit 14248d1

File tree

3 files changed

+46
-87
lines changed

3 files changed

+46
-87
lines changed

wfdb/io/_header.py

Lines changed: 23 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import datetime
22
import os
33
import re
4+
from typing import List, Tuple
45

56
import numpy as np
67
import pandas as pd
78

8-
from wfdb.io import download
99
from wfdb.io import _signal
1010

1111

@@ -872,7 +872,7 @@ def get_sig_name(self):
872872
return sig_name
873873

874874

875-
def wfdb_strptime(time_string):
875+
def wfdb_strptime(time_string: str) -> datetime.time:
876876
"""
877877
Given a time string in an acceptable WFDB format, return
878878
a datetime.time object.
@@ -905,73 +905,45 @@ def wfdb_strptime(time_string):
905905
return datetime.datetime.strptime(time_string, time_fmt).time()
906906

907907

908-
def _read_header_lines(base_record_name, dir_name, pn_dir):
908+
def parse_header_content(
909+
header_content: str,
910+
) -> Tuple[List[str], List[str]]:
909911
"""
910-
Read the lines in a local or remote header file.
912+
Parse the text of a header file.
911913
912914
Parameters
913915
----------
914-
base_record_name : str
915-
The base name of the WFDB record to be read, without any file
916-
extensions.
917-
dir_name : str
918-
The local directory location of the header file. This parameter
919-
is ignored if `pn_dir` is set.
920-
pn_dir : str
921-
Option used to stream data from Physionet. The Physionet
922-
database directory from which to find the required record files.
923-
eg. For record '100' in 'http://physionet.org/content/mitdb'
924-
pn_dir='mitdb'.
916+
header_content: str
917+
The string content of the full header file
925918
926919
Returns
927920
-------
928-
header_lines : list
929-
List of strings corresponding to the header lines.
930-
comment_lines : list
931-
List of strings corresponding to the comment lines.
932-
921+
header_lines : List[str]
922+
A list of all the non-comment lines
923+
comment_lines : List[str]
924+
A list of all the comment lines
933925
"""
934-
file_name = base_record_name + ".hea"
935-
936-
# Read local file
937-
if pn_dir is None:
938-
with open(
939-
os.path.join(dir_name, file_name), "r", errors="ignore"
940-
) as fp:
941-
# Record line followed by signal/segment lines if any
942-
header_lines = []
943-
# Comment lines
944-
comment_lines = []
945-
for line in fp:
946-
line = line.strip()
947-
# Comment line
948-
if line.startswith("#"):
949-
comment_lines.append(line)
950-
# Non-empty non-comment line = header line.
951-
elif line:
952-
# Look for a comment in the line
953-
ci = line.find("#")
954-
if ci > 0:
955-
header_lines.append(line[:ci])
956-
# comment on same line as header line
957-
comment_lines.append(line[ci:])
958-
else:
959-
header_lines.append(line)
960-
# Read online header file
961-
else:
962-
header_lines, comment_lines = download._stream_header(file_name, pn_dir)
926+
header_lines, comment_lines = [], []
927+
for line in header_content.splitlines():
928+
line = line.strip()
929+
# Comment line
930+
if line.startswith("#"):
931+
comment_lines.append(line)
932+
# Non-empty non-comment line = header line.
933+
elif line:
934+
header_lines.append(line)
963935

964936
return header_lines, comment_lines
965937

966938

967-
def _parse_record_line(record_line):
939+
def _parse_record_line(record_line: str) -> dict:
968940
"""
969941
Extract fields from a record line string into a dictionary.
970942
971943
Parameters
972944
----------
973945
record_line : str
974-
The name of the record line that will be used to extact fields.
946+
The record line contained in the header file
975947
976948
Returns
977949
-------

wfdb/io/download.py

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,9 @@ def _remote_file_size(url=None, file_name=None, pn_dir=None):
8282
return remote_file_size
8383

8484

85-
def _stream_header(file_name, pn_dir):
85+
def _stream_header(file_name: str, pn_dir: str) -> str:
8686
"""
87-
Stream the lines of a remote header file.
87+
Stream the text of a remote header file.
8888
8989
Parameters
9090
----------
@@ -97,10 +97,8 @@ def _stream_header(file_name, pn_dir):
9797
9898
Returns
9999
-------
100-
header_lines : list
101-
All of the traditional header lines.
102-
comment_lines : list
103-
All of the comment header lines.
100+
N/A : str
101+
The text contained in the header file
104102
105103
"""
106104
# Full url of header location
@@ -110,30 +108,7 @@ def _stream_header(file_name, pn_dir):
110108
with _url.openurl(url, "rb") as f:
111109
content = f.read()
112110

113-
# Get each line as a string
114-
filelines = content.decode("iso-8859-1").splitlines()
115-
116-
# Separate content into header and comment lines
117-
header_lines = []
118-
comment_lines = []
119-
120-
for line in filelines:
121-
line = str(line.strip())
122-
# Comment line
123-
if line.startswith("#"):
124-
comment_lines.append(line)
125-
# Non-empty non-comment line = header line.
126-
elif line:
127-
# Look for a comment in the line
128-
ci = line.find("#")
129-
if ci > 0:
130-
header_lines.append(line[:ci])
131-
# comment on same line as header line
132-
comment_lines.append(line[ci:])
133-
else:
134-
header_lines.append(line)
135-
136-
return (header_lines, comment_lines)
111+
return content.decode("iso-8859-1")
137112

138113

139114
def _stream_dat(file_name, pn_dir, byte_count, start_byte, dtype):

wfdb/io/record.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import datetime
22
import multiprocessing.dummy
33
import posixpath
4+
import os
45
import re
56

67
import numpy as np
7-
import os
88
import pandas as pd
99

1010
from wfdb.io import _header
@@ -639,7 +639,7 @@ def check_read_inputs(
639639
"return_res must be one of the following when physical is True: 64, 32, 16"
640640
)
641641

642-
def _adjust_datetime(self, sampfrom):
642+
def _adjust_datetime(self, sampfrom: int):
643643
"""
644644
Adjust date and time fields to reflect user input if possible.
645645
@@ -1778,16 +1778,28 @@ def rdheader(record_name, pn_dir=None, rd_segments=False):
17781778
dir_name, base_record_name = os.path.split(record_name)
17791779
dir_name = os.path.abspath(dir_name)
17801780

1781+
# Construct the download path using the database version
17811782
if (pn_dir is not None) and ("." not in pn_dir):
17821783
dir_list = pn_dir.split("/")
17831784
pn_dir = posixpath.join(
17841785
dir_list[0], download.get_version(dir_list[0]), *dir_list[1:]
17851786
)
17861787

1787-
# Read the header file. Separate comment and non-comment lines
1788-
header_lines, comment_lines = _header._read_header_lines(
1789-
base_record_name, dir_name, pn_dir
1790-
)
1788+
# Read the local or remote header file.
1789+
file_name = f"{base_record_name}.hea"
1790+
if pn_dir is None:
1791+
with open(
1792+
os.path.join(dir_name, file_name),
1793+
"r",
1794+
encoding="ascii",
1795+
errors="ignore",
1796+
) as f:
1797+
header_content = f.read()
1798+
else:
1799+
header_content = download._stream_header(file_name, pn_dir)
1800+
1801+
# Separate comment and non-comment lines
1802+
header_lines, comment_lines = _header.parse_header_content(header_content)
17911803

17921804
# Get fields from record line
17931805
record_fields = _header._parse_record_line(header_lines[0])

0 commit comments

Comments
 (0)