Skip to content

Commit 3393836

Browse files
committed
dev
1 parent 55153c9 commit 3393836

File tree

2 files changed

+130
-92
lines changed

2 files changed

+130
-92
lines changed

wfdb/_signals.py

Lines changed: 130 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -398,30 +398,31 @@ def rddat(filename, dirname, pbdir, fmt, nsig,
398398
readlen = sampto - sampfrom
399399

400400
# Calculate parameters used to read and process the dat file
401-
startbyte, nsampread, extralen, nanreplace, floorsamp = calc_read_params(fmt, siglen, byteoffset,
402-
skew, tsampsperframe,
403-
sampfrom, sampto)
401+
startbyte, nreadsamples, extrapadsamples, nanreplace, blockfloorsamples, blockceilsamples = calc_read_params(fmt, siglen, byteoffset,
402+
skew, tsampsperframe,
403+
sampfrom, sampto)
404+
405+
print(startbyte, nreadsamples, extrapadsamples, nanreplace, blockfloorsamples, blockceilsamples)
404406

405407
# Read the required bytes from the dat file.
406408
# Pad the end if necessary for skewed signals beyond the entire file.
407-
if extralen >0:
409+
if extrapadsamples >0:
408410
# Non-special formats already load samples.
409-
# Special formats load uint8 which are not samples. Make sure their extra padded samples come in complete blocks.
410-
411+
# Special formats load uint8 which are not samples. The extra element count should reflect that.
412+
413+
# Do we need to upround (extrapadsamples * <>) ?
414+
411415
if fmt == '212':
412-
sigbytes = np.concatenate((getdatbytes(filename, dirname, pbdir, fmt, startbyte, nsampread),
413-
np.empty(upround(extralen*tsampsperframe*bytespersample[fmt], 3),
414-
dtype=np.dtype(dataloadtypes[fmt]))))
416+
sigbytes = np.concatenate((getdatbytes(filename, dirname, pbdir, fmt, startbyte, nreadsamples),
417+
np.empty(int(extrapadsamples*3/2), dtype=np.dtype(dataloadtypes[fmt]))))
415418
elif fmt in ['310', '311']:
416-
sigbytes = np.concatenate((getdatbytes(filename, dirname, pbdir, fmt, startbyte, nsampread),
417-
np.empty(upround(extralen*tsampsperframe*bytespersample[fmt], 4),
418-
dtype=np.dtype(dataloadtypes[fmt]))))
419+
sigbytes = np.concatenate((getdatbytes(filename, dirname, pbdir, fmt, startbyte, nreadsamples),
420+
np.empty(int(extrapadsamples*4/3), dtype=np.dtype(dataloadtypes[fmt]))))
419421
else:
420-
sigbytes = np.concatenate((getdatbytes(filename, dirname, pbdir, fmt, startbyte, nsampread),
421-
np.empty(extralen*tsampsperframe,
422-
dtype=np.dtype(dataloadtypes[fmt]))))
422+
sigbytes = np.concatenate((getdatbytes(filename, dirname, pbdir, fmt, startbyte, nreadsamples),
423+
np.empty(extrapadsamples, dtype=np.dtype(dataloadtypes[fmt]))))
423424
else:
424-
sigbytes = getdatbytes(filename, dirname, pbdir, fmt, startbyte, nsampread)
425+
sigbytes = getdatbytes(filename, dirname, pbdir, fmt, startbyte, nreadsamples)
425426

426427

427428
# Continue to process the read values into proper samples
@@ -431,19 +432,20 @@ def rddat(filename, dirname, pbdir, fmt, nsig,
431432
if tsampsperframe==nsig:
432433

433434
# Intermediate number of samples to process. (like readlen)
434-
#processnsamp = readlen * tsampsperframe + floorsamp
435+
# sigbytes always loads byte blocks, so no need to account for floorsamp here when allocating processnsamp.
435436
# Now has to take skew into account, which sigbytes already has.
436-
processnsamp = int(sigbytes.shape[0]*2/3)
437437

438-
# For odd sampled records, imagine an extra sample and add an extra byte
439-
# to simplify the processing step and remove the extra sample at the end.
438+
processnsamp = int(sigbytes.shape[0]*2/3)
439+
# processnsamp always comes in whole block bytes
440440

441-
# Now how will this work with the new processnsamp?
442-
# This cannot go here, if we have to expand samples, we have to do it to sigbytes too. Maybe.
443-
if processnsamp % 2:
444-
sigbytes = np.append(sigbytes, np.zeros(1, dtype='uint8'))
445-
processnsamp+=1
441+
446442

443+
#if processnsamp % 2:
444+
# padone = True
445+
# sigbytes = np.append(sigbytes, np.zeros(1, dtype='uint8'))
446+
# processnsamp+=1
447+
#else:
448+
# padone = False
447449

448450

449451
# No extra samples/frame
@@ -457,23 +459,31 @@ def rddat(filename, dirname, pbdir, fmt, nsig,
457459

458460
#pdb.set_trace()
459461

460-
461462
# One sample pair is stored in one byte triplet.
462463
# Even numbered samples
463464
sig[0::2] = sigbytes[0::3] + 256 * np.bitwise_and(sigbytes[1::3], 0x0f)
464-
if len(sig > 1):
465-
# Odd numbered samples
466-
sig[1::2] = sigbytes[2::3] + 256*np.bitwise_and(sigbytes[1::3] >> 4, 0x0f)
465+
466+
# Odd numbered samples (len(sig) always >1 due to enforcement of whole blocks)
467+
sig[1::2] = sigbytes[2::3] + 256*np.bitwise_and(sigbytes[1::3] >> 4, 0x0f)
468+
469+
# Wait... is this done here?
467470
# Remove extra leading sample read within the byte block
468-
if floorsamp:
469-
sig = sig[floorsamp:]
471+
if blockfloorsamples:
472+
sig = sig[blockfloorsamples:]
470473

471474
# Remove extra trailing sample read within the byte block if originally odd sampled
472-
if (sigbytes.shape[0]/3) % 2:
473-
sig = sig[:-1]
475+
if blockceilsamples:
476+
sig = sig[:-blockceilsamples]
477+
478+
#pdb.set_trace()
479+
480+
#print(sig)
481+
#print(sig.shape)
482+
474483

475484
# Reshape into final array of samples
476-
sig = sig.reshape(-1, nsig)
485+
sig= sig.reshape(-1, nsig)
486+
477487

478488
# Loaded values as unsigned. Convert to 2's complement form:
479489
# values > 2^11-1 are negative.
@@ -658,11 +668,20 @@ def calc_read_params(fmt, siglen, byteoffset, skew, tsampsperframe, sampfrom, sa
658668
Calculate parameters used to read and process the dat file
659669
660670
Output arguments:
661-
- startbyte
662-
- nsampread
671+
- startbyte - The starting byte to read the dat file from. Always points to the start of a
672+
byte block for special formats.
673+
- blockfloorsamples - The extra samples read prior to the first desired sample, for special
674+
formats in order to ensure entire byte blocks are read.
675+
- blockceilsamples - The extra samples read after the last desired sample, for special
676+
formats in order to ensure entire byte blocks are read.
677+
678+
- blockfloorbytes - This should be useful... just have to figure out exactly how to use
679+
680+
681+
- nsampread - The number of samples to read from the dat file. Takes skew into account.
682+
Also
663683
- extralen
664684
- nanreplace
665-
- floorsamp
666685
667686
Example Parameters:
668687
siglen=100, t = 4 (total samples/frame), skew = [0, 2, 4, 5]
@@ -672,87 +691,110 @@ def calc_read_params(fmt, siglen, byteoffset, skew, tsampsperframe, sampfrom, sa
672691
sampfrom=95, sampto=99 --> readlen = 4, nsampread = 5*t, extralen = 4, nanreplace = [0, 1, 3, 4]
673692
"""
674693

675-
# 1. Calculate the starting byte to read the dat file from.
676-
startbyte = int(sampfrom*tsampsperframe*bytespersample[fmt]) + int(byteoffset)
694+
# The starting sample number (if all channels were flattened)
695+
startflatsample = sampfrom*tsampsperframe
696+
# The last theoretical sample number (if all channels were flattened).
697+
# May pass siglen*tsampsperframe at this point.
698+
endflatsample = (sampto + max(skew)-sampfrom) * tsampsperframe
677699

678-
# The above formula needs to be adjusted for special fmts.
679-
# Special formats store samples in specific byte blocks.
680-
# The starting byte should be at the start of a block of 3 or 4.
700+
# Adjust these desired sample numbers for special formats.
681701
if fmt == '212':
682-
# Extra samples to read
683-
floorsamp = (startbyte - byteoffset) % 3
684-
startbyte = startbyte - floorsamp
702+
# Samples come in groups of 2, in 3 byte blocks
703+
blockfloorsamples = startflatsample % 2
704+
blockceilsamples = 1-blockfloorsamples
705+
706+
startflatsample = startflatsample - blockfloorsamples
707+
endflatsample = endflatsample + blockceilsamples
708+
685709
elif fmt in ['310', '311']:
686-
floorsamp = (startbyte - byteoffset) % 4
687-
startbyte = startbyte - floorsamp
688-
else:
689-
floorsamp=0
690-
# Question: Why do we need floorsamp to collect 'extra bytes'?
691-
# Because nsampread may bring us partial way into a byte triplet or quartet for special formats.
692-
# Sometimes you need to stretch bytes.
710+
# Samples come in groups of 3, in 4 byte blocks
711+
floorsamples = startflatsample % 3
712+
blockceilsamples = 1-blockfloorsamples
693713

694-
# Question: Why did startbyte go back with floorsamp?
695-
# Because we have to read from the start of a block.
714+
startflatsample = startflatsample - floorsamples
715+
endflatsample = endflatsample + blockceilsamples
696716

697-
# Question: Why are these values the same?
717+
else:
718+
blockfloorsamples = 0
719+
blockceilsamples = 0
698720

721+
desiredflatsamples = endflatsample - startflatsample
699722

723+
# The starting byte to read from in the file
724+
startbyte = int(startflatsample*bytespersample[fmt]) + byteoffset
700725

701-
# 2. Total number of samples to be read from the dat file (including discarded ones)
702-
# Have to read extra samples if there is a skew, but can't read beyond the limits of the
703-
# dat file.
704-
nsampread = (min(sampto+max(skew), siglen) - sampfrom )*tsampsperframe
705726

706-
# If the skew requires samples beyond the dat file, pad the bytes with
707-
# zeros, and keep track of channels insert nans into.
727+
# nreadsamples: Number of flat samples to be read from the dat file.
728+
# - Have to read extra samples if there is a skew, but can't read beyond file limit.
729+
# If the skew requires samples beyond the dat file, pad the bytes with
730+
# zeros, and keep track of channels insert nans into.
731+
# - Have to read extra samples for special formats to capture entire blocks.
708732

709-
# 3. The extra signal length desired beyond the dat file
710-
extralen = max(0, sampto + max(skew) - siglen)
733+
#nsampread = (min(sampto+max(skew), siglen) - sampfrom )*tsampsperframe
734+
735+
# The number of theoretical samples desired that go past the file limit (due to skew).
736+
# Will pad the samples read from the dat file with this number of dummy samples.
737+
# For special formats, if >0, this should request whole blocks. errr.... DOES IT????
738+
739+
# desiredflatsamples should always come in whole blocks. siglen*tsampsperframe doesn't...
740+
# siglen*tsampsperframe is not bound to complete blocks, but the file size is. Should we take
741+
# advantage of the filesize to read in more? .... But we already know desiredflatsamples is block whole,
742+
# and sincle dfs = nrs + extrapad, it will be fine.
743+
744+
extrapadsamples = max(0, desiredflatsamples - siglen*tsampsperframe)
745+
746+
# The final number of samples to read from the dat file
747+
nreadsamples = desiredflatsamples - extrapadsamples
711748

712-
# 4. The number of samples at the end of each signal to replace with nans
749+
# The number of samples of each signal desired that go beyond the file due to skew.
750+
# The number of samples at the end of each signal to replace with nans
713751
nanreplace = [max(0, sampto + s - siglen) for s in skew]
714752

715-
return (startbyte, nsampread, extralen, nanreplace, floorsamp)
716-
753+
return (startbyte, nreadsamples, extrapadsamples, nanreplace, blockfloorsamples, blockceilsamples)
717754

718755
def getdatbytes(filename, dirname, pbdir, fmt, startbyte, nsamp):
719756
"""
720757
Read bytes from a dat file, either local or remote
721758
722759
Input arguments:
723760
- nsamp: The total number of samples to read
724-
- startbyte: The starting byte to read
761+
- startbyte: The starting byte to read from
762+
763+
This function SHOULD NOT be responsible for ensuring entire blocks
764+
are read for special formats. That should be done in prev functions
765+
passing in parameters.
766+
767+
The input argument *startbyte* should point to the start of a block.
768+
The input argument *nsamp* should read all samples to the end of a block.
769+
Remember, special fmt dat file sizes should come in whole blocks (padded if necessary).
770+
771+
This function should just read stuff.
725772
"""
726773

727-
# count is the number of elements to read using np.fromfile
728-
# bytecount is the number of bytes to read
774+
# elementcount is the number of elements to read using np.fromfile (for local files)
775+
# bytecount is the number of bytes to read (for streaming files)
729776
if fmt == '212':
730-
bytecount = int(np.ceil((nsamp) * 1.5))
731-
count = bytecount
732-
elif fmt == '310':
733-
bytecount = int(((nsamp) + 2) / 3.) * 4
734-
if (nsamp - 1) % 3 == 0:
735-
bytecount -= 2
736-
count = bytecount
737-
elif fmt == '311':
738-
bytecount = int((nsamp - 1) / 3.) + nsamp + 1
739-
count = bytecount
777+
# These int() statements should not be doing any rounding.
778+
if nsamp % 2:
779+
raise ValueError('nsamp should be an integer multiple of 2 to read entire blocks')
780+
bytecount = int(nsamp * 1.5)
781+
elementcount = bytecount
782+
elif fmt in ['310', '311']:
783+
if nsamp % 3:
784+
raise ValueError('nsamp should be an integer multiple of 3 to read entire blocks')
785+
bytecount = int(nsamp * 4/3)
786+
elementcount = bytecount
740787
else:
741-
count = nsamp
742-
bytecount = nsamp*bytespersample[fmt]
788+
elementcount = nsamp
789+
bytecount = nsamp*bytespersample[fmt]
743790

744791
# Local dat file
745792
if pbdir is None:
746793
fp = open(os.path.join(dirname, filename), 'rb')
747794
fp.seek(startbyte)
748795

749796
# Read file using corresponding dtype
750-
sigbytes = np.fromfile(fp, dtype=np.dtype(dataloadtypes[fmt]), count=count)
751-
752-
# For special formats that were read as unsigned 1 byte blocks to be further processed,
753-
# convert dtype from uint8 to uint64. Why? We are not reshaping these. We are sampling from them.
754-
#if fmt in ['212', '310', '311']:
755-
# sigbytes = sigbytes.astype('uint')
797+
sigbytes = np.fromfile(fp, dtype=np.dtype(dataloadtypes[fmt]), count=elementcount)
756798

757799
fp.close()
758800

@@ -764,6 +806,7 @@ def getdatbytes(filename, dirname, pbdir, fmt, startbyte, nsamp):
764806

765807

766808
def bytes2sig(sigbytes):
809+
# This will contain stuff from rddat! Soon!
767810
pass
768811

769812

wfdb/downloads.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,6 @@ def streamdat(filename, pbdir, fmt, bytecount, startbyte, datatypes):
6060
# Convert to numpy array
6161
sigbytes = np.fromstring(sigbytes, dtype = np.dtype(datatypes[fmt]))
6262

63-
# For special formats that were read as unsigned 1 byte blocks to be further processed,
64-
# convert dtype from uint8 to uint64. Why?
65-
#if fmt in ['212', '310', '311']:
66-
# sigbytes = sigbytes.astype('uint')
67-
6863
return sigbytes
6964

7065
# Read an entire annotation file from physiobank

0 commit comments

Comments
 (0)