Skip to content

Commit 6318b12

Browse files
author
Benjamin Moody
committed
Merge pull request MIT-LCP#397 into main
_rd_compressed_file fails when the number of samples to read exceeds 2**24 and the number of channels isn't a power of two. This is a bug in libsndfile, and although it should be fixed there and perhaps could be worked around in python-soundfile, it's most expedient to work around it here for now.
2 parents 0457e2d + 7868503 commit 6318b12

File tree

4 files changed

+43
-1
lines changed

4 files changed

+43
-1
lines changed

sample-data/flac_3_constant.dat

20 KB
Binary file not shown.

sample-data/flac_3_constant.hea

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
flac_3_constant 3 250 5600000
2+
flac_3_constant.dat 508 200 8 0 25 15104 0 col 0
3+
flac_3_constant.dat 508 200 8 0 50 30208 0 col 1
4+
flac_3_constant.dat 508 200 8 0 75 -20224 0 col 2

tests/test_record.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,28 @@ def test_read_flac(self):
249249
f"Mismatch in {name}",
250250
)
251251

252+
def test_read_flac_longduration(self):
253+
"""
254+
Three signals multiplexed in a FLAC file, over 2**24 samples.
255+
256+
Input file created with:
257+
yes 25 50 75 | head -5600000 |
258+
wrsamp -O 508 -o flac_3_constant 0 1 2
259+
260+
Note that the total number of samples (across the three
261+
channels) exceeds 2**24. There is a bug in libsndfile that
262+
causes it to break if we try to read more than 2**24 total
263+
samples at a time, when the number of channels is not a power
264+
of two.
265+
"""
266+
record = wfdb.rdrecord("sample-data/flac_3_constant")
267+
sig_target = np.repeat(
268+
np.array([[0.125, 0.25, 0.375]], dtype="float64"),
269+
5600000,
270+
axis=0,
271+
)
272+
np.testing.assert_array_equal(record.p_signal, sig_target)
273+
252274
# ------------------ 2. Special format records ------------------ #
253275

254276
def test_2a(self):

wfdb/io/_signal.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1875,7 +1875,23 @@ def _rd_compressed_file(
18751875
start_samp = start_frame * samps_per_frame[0]
18761876
end_samp = end_frame * samps_per_frame[0]
18771877
sf.seek(start_samp + sample_offset)
1878-
sig_data = sf.read(end_samp - start_samp, dtype=read_dtype)
1878+
1879+
# We could do this:
1880+
# sig_data = sf.read(end_samp - start_samp, dtype=read_dtype)
1881+
# However, sf.read fails for huge blocks (over 2**24 total
1882+
# samples) due to a bug in libsndfile:
1883+
# https://github.com/libsndfile/libsndfile/issues/431
1884+
# So read the data in chunks instead.
1885+
n_samp = end_samp - start_samp
1886+
sig_data = np.empty((n_samp, n_sig), dtype=read_dtype)
1887+
CHUNK_SIZE = 1024 * 1024
1888+
for chunk_start in range(0, n_samp, CHUNK_SIZE):
1889+
chunk_end = chunk_start + CHUNK_SIZE
1890+
chunk_data = sf.read(out=sig_data[chunk_start:chunk_end])
1891+
samples_read = chunk_data.shape[0]
1892+
if samples_read != CHUNK_SIZE:
1893+
sig_data = sig_data[: chunk_start + samples_read]
1894+
break
18791895

18801896
# If we read an 8-bit stream as int16 or a 24-bit stream as
18811897
# int32, soundfile shifts each sample left by 8 bits. We

0 commit comments

Comments
 (0)