Skip to content

Commit d77d906

Browse files
committed
For reading edf, add an 'encoding' parameter for strings in the file (defaulting to iso8859-1 rather than utf-8). While edf specifications require pure ascii strings, files may ununiformly choose another encoding.
1 parent 3047c17 commit d77d906

File tree

1 file changed

+29
-21
lines changed

1 file changed

+29
-21
lines changed

wfdb/io/convert/edf.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ def read_edf(
2020
header_only=False,
2121
verbose=False,
2222
rdedfann_flag=False,
23+
encoding="iso8859-1",
2324
):
2425
"""
2526
Read a EDF format file into a WFDB Record.
@@ -61,6 +62,9 @@ def read_edf(
6162
is being called by the user and the file has annotations, then warn
6263
them that the EDF file has annotations and that they should use
6364
`rdedfann` instead.
65+
encoding : str, optional
66+
The encoding to use for strings in the header. Although the edf
67+
specification requires ascii strings, some files do not adhere to it.
6468
6569
Returns
6670
-------
@@ -139,7 +143,7 @@ def read_edf(
139143
edf_file = open(record_name, mode="rb")
140144

141145
# Version of this data format (8 bytes)
142-
version = struct.unpack("<8s", edf_file.read(8))[0].decode()
146+
version = struct.unpack("<8s", edf_file.read(8))[0].decode(encoding)
143147

144148
# Check to see that the input is an EDF file. (This check will detect
145149
# most but not all other types of files.)
@@ -152,7 +156,7 @@ def read_edf(
152156
print("EDF version number: {}".format(version.strip()))
153157

154158
# Local patient identification (80 bytes)
155-
patient_id = struct.unpack("<80s", edf_file.read(80))[0].decode()
159+
patient_id = struct.unpack("<80s", edf_file.read(80))[0].decode(encoding)
156160
if verbose:
157161
print("Patient ID: {}".format(patient_id))
158162

@@ -161,12 +165,12 @@ def read_edf(
161165
# including an abbreviated month name in English and a full (4-digit)
162166
# year, as is done here if this information is available in the input
163167
# record. EDF+ requires this.
164-
record_id = struct.unpack("<80s", edf_file.read(80))[0].decode()
168+
record_id = struct.unpack("<80s", edf_file.read(80))[0].decode(encoding)
165169
if verbose:
166170
print("Recording ID: {}".format(record_id))
167171

168172
# Start date of recording (dd.mm.yy) (8 bytes)
169-
start_date = struct.unpack("<8s", edf_file.read(8))[0].decode()
173+
start_date = struct.unpack("<8s", edf_file.read(8))[0].decode(encoding)
170174
if verbose:
171175
print("Recording Date: {}".format(start_date))
172176
start_day, start_month, start_year = [int(i) for i in start_date.split(".")]
@@ -177,21 +181,21 @@ def read_edf(
177181
start_year += 100
178182

179183
# Start time of recording (hh.mm.ss) (8 bytes)
180-
start_time = struct.unpack("<8s", edf_file.read(8))[0].decode()
184+
start_time = struct.unpack("<8s", edf_file.read(8))[0].decode(encoding)
181185
if verbose:
182186
print("Recording Time: {}".format(start_time))
183187
start_hour, start_minute, start_second = [
184188
int(i) for i in start_time.split(".")
185189
]
186190

187191
# Number of bytes in header (8 bytes)
188-
header_bytes = int(struct.unpack("<8s", edf_file.read(8))[0].decode())
192+
header_bytes = int(struct.unpack("<8s", edf_file.read(8))[0].decode(encoding))
189193
if verbose:
190194
print("Number of bytes in header record: {}".format(header_bytes))
191195

192196
# Reserved (44 bytes)
193197
reserved_notes = (
194-
struct.unpack("<44s", edf_file.read(44))[0].decode().strip()
198+
struct.unpack("<44s", edf_file.read(44))[0].decode(encoding).strip()
195199
)
196200
if reserved_notes[:5] == "EDF+C":
197201
# The file is EDF compatible and will work without issue
@@ -209,7 +213,7 @@ def read_edf(
209213
print("Free Space: {}".format(reserved_notes))
210214

211215
# Number of blocks (-1 if unknown) (8 bytes)
212-
num_blocks = int(struct.unpack("<8s", edf_file.read(8))[0].decode())
216+
num_blocks = int(struct.unpack("<8s", edf_file.read(8))[0].decode(encoding))
213217
if verbose:
214218
print("Number of data records: {}".format(num_blocks))
215219
if num_blocks == -1:
@@ -218,7 +222,7 @@ def read_edf(
218222
)
219223

220224
# Duration of a block, in seconds (8 bytes)
221-
block_duration = float(struct.unpack("<8s", edf_file.read(8))[0].decode())
225+
block_duration = float(struct.unpack("<8s", edf_file.read(8))[0].decode(encoding))
222226
if verbose:
223227
print(
224228
"Duration of each data record in seconds: {}".format(block_duration)
@@ -227,7 +231,7 @@ def read_edf(
227231
block_duration = 1.0
228232

229233
# Number of signals (4 bytes)
230-
n_sig = int(struct.unpack("<4s", edf_file.read(4))[0].decode())
234+
n_sig = int(struct.unpack("<4s", edf_file.read(4))[0].decode(encoding))
231235
if verbose:
232236
print("Number of signals: {}".format(n_sig))
233237
if n_sig < 1:
@@ -236,7 +240,7 @@ def read_edf(
236240
# Label (e.g., EEG FpzCz or Body temp) (16 bytes each)
237241
sig_name = []
238242
for _ in range(n_sig):
239-
temp_sig = struct.unpack("<16s", edf_file.read(16))[0].decode().strip()
243+
temp_sig = struct.unpack("<16s", edf_file.read(16))[0].decode(encoding).strip()
240244
if temp_sig == "EDF Annotations" and not rdedfann_flag:
241245
print(
242246
"*** This may be an EDF+ Annotation file instead, please see "
@@ -250,7 +254,7 @@ def read_edf(
250254
transducer_types = []
251255
for _ in range(n_sig):
252256
transducer_types.append(
253-
struct.unpack("<80s", edf_file.read(80))[0].decode().strip()
257+
struct.unpack("<80s", edf_file.read(80))[0].decode(encoding).strip()
254258
)
255259
if verbose:
256260
print("Transducer Types: {}".format(transducer_types))
@@ -259,7 +263,7 @@ def read_edf(
259263
physical_dims = []
260264
for _ in range(n_sig):
261265
physical_dims.append(
262-
struct.unpack("<8s", edf_file.read(8))[0].decode().strip()
266+
struct.unpack("<8s", edf_file.read(8))[0].decode(encoding).strip()
263267
)
264268
if verbose:
265269
print("Physical Dimensions: {}".format(physical_dims))
@@ -269,7 +273,7 @@ def read_edf(
269273
for _ in range(n_sig):
270274
physical_min = np.append(
271275
physical_min,
272-
float(struct.unpack("<8s", edf_file.read(8))[0].decode()),
276+
float(struct.unpack("<8s", edf_file.read(8))[0].decode(encoding)),
273277
)
274278
if verbose:
275279
print("Physical Minimums: {}".format(physical_min))
@@ -279,7 +283,7 @@ def read_edf(
279283
for _ in range(n_sig):
280284
physical_max = np.append(
281285
physical_max,
282-
float(struct.unpack("<8s", edf_file.read(8))[0].decode()),
286+
float(struct.unpack("<8s", edf_file.read(8))[0].decode(encoding)),
283287
)
284288
if verbose:
285289
print("Physical Maximums: {}".format(physical_max))
@@ -289,7 +293,7 @@ def read_edf(
289293
for _ in range(n_sig):
290294
digital_min = np.append(
291295
digital_min,
292-
float(struct.unpack("<8s", edf_file.read(8))[0].decode()),
296+
float(struct.unpack("<8s", edf_file.read(8))[0].decode(encoding)),
293297
)
294298
if verbose:
295299
print("Digital Minimums: {}".format(digital_min))
@@ -299,7 +303,7 @@ def read_edf(
299303
for _ in range(n_sig):
300304
digital_max = np.append(
301305
digital_max,
302-
float(struct.unpack("<8s", edf_file.read(8))[0].decode()),
306+
float(struct.unpack("<8s", edf_file.read(8))[0].decode(encoding)),
303307
)
304308
if verbose:
305309
print("Digital Maximums: {}".format(digital_max))
@@ -308,7 +312,7 @@ def read_edf(
308312
prefilter_info = []
309313
for _ in range(n_sig):
310314
prefilter_info.append(
311-
struct.unpack("<80s", edf_file.read(80))[0].decode().strip()
315+
struct.unpack("<80s", edf_file.read(80))[0].decode(encoding).strip()
312316
)
313317
if verbose:
314318
print("Prefiltering Information: {}".format(prefilter_info))
@@ -317,14 +321,14 @@ def read_edf(
317321
samps_per_block = []
318322
for _ in range(n_sig):
319323
samps_per_block.append(
320-
int(struct.unpack("<8s", edf_file.read(8))[0].decode())
324+
int(struct.unpack("<8s", edf_file.read(8))[0].decode(encoding))
321325
)
322326
if verbose:
323327
print("Number of Samples per Record: {}".format(samps_per_block))
324328

325329
# The last 32*nsig bytes in the header are unused
326330
for _ in range(n_sig):
327-
struct.unpack("<32s", edf_file.read(32))[0].decode()
331+
struct.unpack("<32s", edf_file.read(32))[0].decode(encoding)
328332

329333
# Pre-process the acquired data before creating the record
330334
record_name_out = (
@@ -997,6 +1001,7 @@ def rdedfann(
9971001
info_only=True,
9981002
record_only=False,
9991003
verbose=False,
1004+
encoding="iso8859-1",
10001005
):
10011006
"""
10021007
This program returns the annotation information from an EDF+ file
@@ -1038,6 +1043,9 @@ def rdedfann(
10381043
verbose : bool, optional
10391044
Whether to print all the information read about the file (True) or
10401045
not (False).
1046+
encoding : str, optional
1047+
The encoding to use for strings in the header. Although the edf
1048+
specification requires ascii strings, some files do not adhere to it.
10411049
10421050
Returns
10431051
-------
@@ -1110,7 +1118,7 @@ def rdedfann(
11101118
adjusted_hex = hex(
11111119
struct.unpack("<H", struct.pack(">H", chunk + 1))[0]
11121120
)
1113-
annotation_string += bytes.fromhex(adjusted_hex[2:]).decode("ascii")
1121+
annotation_string += bytes.fromhex(adjusted_hex[2:]).decode(encoding)
11141122
# Remove all of the whitespace
11151123
for rep in ["\x00", "\x14", "\x15"]:
11161124
annotation_string = annotation_string.replace(rep, " ")

0 commit comments

Comments
 (0)