@@ -398,30 +398,31 @@ def rddat(filename, dirname, pbdir, fmt, nsig,
398
398
readlen = sampto - sampfrom
399
399
400
400
# Calculate parameters used to read and process the dat file
401
- startbyte , nsampread , extralen , nanreplace , floorsamp = calc_read_params (fmt , siglen , byteoffset ,
402
- skew , tsampsperframe ,
403
- sampfrom , sampto )
401
+ startbyte , nreadsamples , extrapadsamples , nanreplace , blockfloorsamples , blockceilsamples = calc_read_params (fmt , siglen , byteoffset ,
402
+ skew , tsampsperframe ,
403
+ sampfrom , sampto )
404
+
405
+ print (startbyte , nreadsamples , extrapadsamples , nanreplace , blockfloorsamples , blockceilsamples )
404
406
405
407
# Read the required bytes from the dat file.
406
408
# Pad the end if necessary for skewed signals beyond the entire file.
407
- if extralen > 0 :
409
+ if extrapadsamples > 0 :
408
410
# Non-special formats already load samples.
409
- # Special formats load uint8 which are not samples. Make sure their extra padded samples come in complete blocks.
410
-
411
+ # Special formats load uint8 which are not samples. The extra element count should reflect that.
412
+
413
+ # Do we need to upround (extrapadsamples * <>) ?
414
+
411
415
if fmt == '212' :
412
- sigbytes = np .concatenate ((getdatbytes (filename , dirname , pbdir , fmt , startbyte , nsampread ),
413
- np .empty (upround (extralen * tsampsperframe * bytespersample [fmt ], 3 ),
414
- dtype = np .dtype (dataloadtypes [fmt ]))))
416
+ sigbytes = np .concatenate ((getdatbytes (filename , dirname , pbdir , fmt , startbyte , nreadsamples ),
417
+ np .empty (int (extrapadsamples * 3 / 2 ), dtype = np .dtype (dataloadtypes [fmt ]))))
415
418
elif fmt in ['310' , '311' ]:
416
- sigbytes = np .concatenate ((getdatbytes (filename , dirname , pbdir , fmt , startbyte , nsampread ),
417
- np .empty (upround (extralen * tsampsperframe * bytespersample [fmt ], 4 ),
418
- dtype = np .dtype (dataloadtypes [fmt ]))))
419
+ sigbytes = np .concatenate ((getdatbytes (filename , dirname , pbdir , fmt , startbyte , nreadsamples ),
420
+ np .empty (int (extrapadsamples * 4 / 3 ), dtype = np .dtype (dataloadtypes [fmt ]))))
419
421
else :
420
- sigbytes = np .concatenate ((getdatbytes (filename , dirname , pbdir , fmt , startbyte , nsampread ),
421
- np .empty (extralen * tsampsperframe ,
422
- dtype = np .dtype (dataloadtypes [fmt ]))))
422
+ sigbytes = np .concatenate ((getdatbytes (filename , dirname , pbdir , fmt , startbyte , nreadsamples ),
423
+ np .empty (extrapadsamples , dtype = np .dtype (dataloadtypes [fmt ]))))
423
424
else :
424
- sigbytes = getdatbytes (filename , dirname , pbdir , fmt , startbyte , nsampread )
425
+ sigbytes = getdatbytes (filename , dirname , pbdir , fmt , startbyte , nreadsamples )
425
426
426
427
427
428
# Continue to process the read values into proper samples
@@ -431,19 +432,20 @@ def rddat(filename, dirname, pbdir, fmt, nsig,
431
432
if tsampsperframe == nsig :
432
433
433
434
# Intermediate number of samples to process. (like readlen)
434
- #processnsamp = readlen * tsampsperframe + floorsamp
435
+ # sigbytes always loads byte blocks, so no need to account for floorsamp here when allocating processnsamp.
435
436
# Now has to take skew into account, which sigbytes already has.
436
- processnsamp = int (sigbytes .shape [0 ]* 2 / 3 )
437
437
438
- # For odd sampled records, imagine an extra sample and add an extra byte
439
- # to simplify the processing step and remove the extra sample at the end.
438
+ processnsamp = int ( sigbytes . shape [ 0 ] * 2 / 3 )
439
+ # processnsamp always comes in whole block bytes
440
440
441
- # Now how will this work with the new processnsamp?
442
- # This cannot go here, if we have to expand samples, we have to do it to sigbytes too. Maybe.
443
- if processnsamp % 2 :
444
- sigbytes = np .append (sigbytes , np .zeros (1 , dtype = 'uint8' ))
445
- processnsamp += 1
441
+
446
442
443
+ #if processnsamp % 2:
444
+ # padone = True
445
+ # sigbytes = np.append(sigbytes, np.zeros(1, dtype='uint8'))
446
+ # processnsamp+=1
447
+ #else:
448
+ # padone = False
447
449
448
450
449
451
# No extra samples/frame
@@ -457,23 +459,31 @@ def rddat(filename, dirname, pbdir, fmt, nsig,
457
459
458
460
#pdb.set_trace()
459
461
460
-
461
462
# One sample pair is stored in one byte triplet.
462
463
# Even numbered samples
463
464
sig [0 ::2 ] = sigbytes [0 ::3 ] + 256 * np .bitwise_and (sigbytes [1 ::3 ], 0x0f )
464
- if len (sig > 1 ):
465
- # Odd numbered samples
466
- sig [1 ::2 ] = sigbytes [2 ::3 ] + 256 * np .bitwise_and (sigbytes [1 ::3 ] >> 4 , 0x0f )
465
+
466
+ # Odd numbered samples (len(sig) always >1 due to enforcement of whole blocks)
467
+ sig [1 ::2 ] = sigbytes [2 ::3 ] + 256 * np .bitwise_and (sigbytes [1 ::3 ] >> 4 , 0x0f )
468
+
469
+ # Wait... is this done here?
467
470
# Remove extra leading sample read within the byte block
468
- if floorsamp :
469
- sig = sig [floorsamp :]
471
+ if blockfloorsamples :
472
+ sig = sig [blockfloorsamples :]
470
473
471
474
# Remove extra trailing sample read within the byte block if originally odd sampled
472
- if (sigbytes .shape [0 ]/ 3 ) % 2 :
473
- sig = sig [:- 1 ]
475
+ if blockceilsamples :
476
+ sig = sig [:- blockceilsamples ]
477
+
478
+ #pdb.set_trace()
479
+
480
+ #print(sig)
481
+ #print(sig.shape)
482
+
474
483
475
484
# Reshape into final array of samples
476
- sig = sig .reshape (- 1 , nsig )
485
+ sig = sig .reshape (- 1 , nsig )
486
+
477
487
478
488
# Loaded values as unsigned. Convert to 2's complement form:
479
489
# values > 2^11-1 are negative.
@@ -658,11 +668,20 @@ def calc_read_params(fmt, siglen, byteoffset, skew, tsampsperframe, sampfrom, sa
658
668
Calculate parameters used to read and process the dat file
659
669
660
670
Output arguments:
661
- - startbyte
662
- - nsampread
671
+ - startbyte - The starting byte to read the dat file from. Always points to the start of a
672
+ byte block for special formats.
673
+ - blockfloorsamples - The extra samples read prior to the first desired sample, for special
674
+ formats in order to ensure entire byte blocks are read.
675
+ - blockceilsamples - The extra samples read after the last desired sample, for special
676
+ formats in order to ensure entire byte blocks are read.
677
+
678
+ - blockfloorbytes - This should be useful... just have to figure out exactly how to use
679
+
680
+
681
+ - nsampread - The number of samples to read from the dat file. Takes skew into account.
682
+ Also
663
683
- extralen
664
684
- nanreplace
665
- - floorsamp
666
685
667
686
Example Parameters:
668
687
siglen=100, t = 4 (total samples/frame), skew = [0, 2, 4, 5]
@@ -672,87 +691,110 @@ def calc_read_params(fmt, siglen, byteoffset, skew, tsampsperframe, sampfrom, sa
672
691
sampfrom=95, sampto=99 --> readlen = 4, nsampread = 5*t, extralen = 4, nanreplace = [0, 1, 3, 4]
673
692
"""
674
693
675
- # 1. Calculate the starting byte to read the dat file from.
676
- startbyte = int (sampfrom * tsampsperframe * bytespersample [fmt ]) + int (byteoffset )
694
+ # The starting sample number (if all channels were flattened)
695
+ startflatsample = sampfrom * tsampsperframe
696
+ # The last theoretical sample number (if all channels were flattened).
697
+ # May pass siglen*tsampsperframe at this point.
698
+ endflatsample = (sampto + max (skew )- sampfrom ) * tsampsperframe
677
699
678
- # The above formula needs to be adjusted for special fmts.
679
- # Special formats store samples in specific byte blocks.
680
- # The starting byte should be at the start of a block of 3 or 4.
700
+ # Adjust these desired sample numbers for special formats.
681
701
if fmt == '212' :
682
- # Extra samples to read
683
- floorsamp = (startbyte - byteoffset ) % 3
684
- startbyte = startbyte - floorsamp
702
+ # Samples come in groups of 2, in 3 byte blocks
703
+ blockfloorsamples = startflatsample % 2
704
+ blockceilsamples = 1 - blockfloorsamples
705
+
706
+ startflatsample = startflatsample - blockfloorsamples
707
+ endflatsample = endflatsample + blockceilsamples
708
+
685
709
elif fmt in ['310' , '311' ]:
686
- floorsamp = (startbyte - byteoffset ) % 4
687
- startbyte = startbyte - floorsamp
688
- else :
689
- floorsamp = 0
690
- # Question: Why do we need floorsamp to collect 'extra bytes'?
691
- # Because nsampread may bring us partial way into a byte triplet or quartet for special formats.
692
- # Sometimes you need to stretch bytes.
710
+ # Samples come in groups of 3, in 4 byte blocks
711
+ floorsamples = startflatsample % 3
712
+ blockceilsamples = 1 - blockfloorsamples
693
713
694
- # Question: Why did startbyte go back with floorsamp?
695
- # Because we have to read from the start of a block.
714
+ startflatsample = startflatsample - floorsamples
715
+ endflatsample = endflatsample + blockceilsamples
696
716
697
- # Question: Why are these values the same?
717
+ else :
718
+ blockfloorsamples = 0
719
+ blockceilsamples = 0
698
720
721
+ desiredflatsamples = endflatsample - startflatsample
699
722
723
+ # The starting byte to read from in the file
724
+ startbyte = int (startflatsample * bytespersample [fmt ]) + byteoffset
700
725
701
- # 2. Total number of samples to be read from the dat file (including discarded ones)
702
- # Have to read extra samples if there is a skew, but can't read beyond the limits of the
703
- # dat file.
704
- nsampread = (min (sampto + max (skew ), siglen ) - sampfrom )* tsampsperframe
705
726
706
- # If the skew requires samples beyond the dat file, pad the bytes with
707
- # zeros, and keep track of channels insert nans into.
727
+ # nreadsamples: Number of flat samples to be read from the dat file.
728
+ # - Have to read extra samples if there is a skew, but can't read beyond file limit.
729
+ # If the skew requires samples beyond the dat file, pad the bytes with
730
+ # zeros, and keep track of channels insert nans into.
731
+ # - Have to read extra samples for special formats to capture entire blocks.
708
732
709
- # 3. The extra signal length desired beyond the dat file
710
- extralen = max (0 , sampto + max (skew ) - siglen )
733
+ #nsampread = (min(sampto+max(skew), siglen) - sampfrom )*tsampsperframe
734
+
735
+ # The number of theoretical samples desired that go past the file limit (due to skew).
736
+ # Will pad the samples read from the dat file with this number of dummy samples.
737
+ # For special formats, if >0, this should request whole blocks. errr.... DOES IT????
738
+
739
+ # desiredflatsamples should always come in whole blocks. siglen*tsampsperframe doesn't...
740
+ # siglen*tsampsperframe is not bound to complete blocks, but the file size is. Should we take
741
+ # advantage of the filesize to read in more? .... But we already know desiredflatsamples is block whole,
742
+ # and sincle dfs = nrs + extrapad, it will be fine.
743
+
744
+ extrapadsamples = max (0 , desiredflatsamples - siglen * tsampsperframe )
745
+
746
+ # The final number of samples to read from the dat file
747
+ nreadsamples = desiredflatsamples - extrapadsamples
711
748
712
- # 4. The number of samples at the end of each signal to replace with nans
749
+ # The number of samples of each signal desired that go beyond the file due to skew.
750
+ # The number of samples at the end of each signal to replace with nans
713
751
nanreplace = [max (0 , sampto + s - siglen ) for s in skew ]
714
752
715
- return (startbyte , nsampread , extralen , nanreplace , floorsamp )
716
-
753
+ return (startbyte , nreadsamples , extrapadsamples , nanreplace , blockfloorsamples , blockceilsamples )
717
754
718
755
def getdatbytes (filename , dirname , pbdir , fmt , startbyte , nsamp ):
719
756
"""
720
757
Read bytes from a dat file, either local or remote
721
758
722
759
Input arguments:
723
760
- nsamp: The total number of samples to read
724
- - startbyte: The starting byte to read
761
+ - startbyte: The starting byte to read from
762
+
763
+ This function SHOULD NOT be responsible for ensuring entire blocks
764
+ are read for special formats. That should be done in prev functions
765
+ passing in parameters.
766
+
767
+ The input argument *startbyte* should point to the start of a block.
768
+ The input argument *nsamp* should read all samples to the end of a block.
769
+ Remember, special fmt dat file sizes should come in whole blocks (padded if necessary).
770
+
771
+ This function should just read stuff.
725
772
"""
726
773
727
- # count is the number of elements to read using np.fromfile
728
- # bytecount is the number of bytes to read
774
+ # elementcount is the number of elements to read using np.fromfile (for local files)
775
+ # bytecount is the number of bytes to read (for streaming files)
729
776
if fmt == '212' :
730
- bytecount = int (np . ceil (( nsamp ) * 1.5 ))
731
- count = bytecount
732
- elif fmt == '310' :
733
- bytecount = int ((( nsamp ) + 2 ) / 3. ) * 4
734
- if ( nsamp - 1 ) % 3 == 0 :
735
- bytecount -= 2
736
- count = bytecount
737
- elif fmt == '311' :
738
- bytecount = int (( nsamp - 1 ) / 3. ) + nsamp + 1
739
- count = bytecount
777
+ # These int() statements should not be doing any rounding.
778
+ if nsamp % 2 :
779
+ raise ValueError ( 'nsamp should be an integer multiple of 2 to read entire blocks' )
780
+ bytecount = int (nsamp * 1.5 )
781
+ elementcount = bytecount
782
+ elif fmt in [ '310' , '311' ]:
783
+ if nsamp % 3 :
784
+ raise ValueError ( 'nsamp should be an integer multiple of 3 to read entire blocks' )
785
+ bytecount = int (nsamp * 4 / 3 )
786
+ elementcount = bytecount
740
787
else :
741
- count = nsamp
742
- bytecount = nsamp * bytespersample [fmt ]
788
+ elementcount = nsamp
789
+ bytecount = nsamp * bytespersample [fmt ]
743
790
744
791
# Local dat file
745
792
if pbdir is None :
746
793
fp = open (os .path .join (dirname , filename ), 'rb' )
747
794
fp .seek (startbyte )
748
795
749
796
# Read file using corresponding dtype
750
- sigbytes = np .fromfile (fp , dtype = np .dtype (dataloadtypes [fmt ]), count = count )
751
-
752
- # For special formats that were read as unsigned 1 byte blocks to be further processed,
753
- # convert dtype from uint8 to uint64. Why? We are not reshaping these. We are sampling from them.
754
- #if fmt in ['212', '310', '311']:
755
- # sigbytes = sigbytes.astype('uint')
797
+ sigbytes = np .fromfile (fp , dtype = np .dtype (dataloadtypes [fmt ]), count = elementcount )
756
798
757
799
fp .close ()
758
800
@@ -764,6 +806,7 @@ def getdatbytes(filename, dirname, pbdir, fmt, startbyte, nsamp):
764
806
765
807
766
808
def bytes2sig (sigbytes ):
809
+ # This will contain stuff from rddat! Soon!
767
810
pass
768
811
769
812
0 commit comments