@@ -2354,6 +2354,198 @@ def rr2ann(rr_array, record_name, extension, fs=250, as_time=False):
2354
2354
wrann (record_name , extension , ann_sample , symbol = ann_symbol )
2355
2355
2356
2356
2357
+ def csv2ann (file_name , extension = 'atr' , fs = None , record_only = False ,
2358
+ time_onset = True , header = True , delimiter = ',' , verbose = False ):
2359
+ """
2360
+ Read a CSV/TSV/etc. file and return either an `Annotation` object with the
2361
+ annotation descriptors as attributes or write an annotation file.
2362
+
2363
+ Parameters
2364
+ ----------
2365
+ file_name : str
2366
+ The name of the CSV file to be read, including the '.csv' file
2367
+ extension. If the argument contains any path delimiter characters, the
2368
+ argument will be interpreted as PATH/BASE_RECORD. Both relative and
2369
+ absolute paths are accepted. The BASE_RECORD file name will be used to
2370
+ name the annotation file with the desired extension.
2371
+ extension : str, optional
2372
+ The string annotation file extension.
2373
+ fs : float, optional
2374
+ This will be used if annotation onsets are given in the format of time
2375
+ (`time_onset` = True) instead of sample since onsets must be sample
2376
+ numbers in order for `wrann` to work. This number can be expressed in
2377
+ any format legal for a Python input of floating point numbers (thus
2378
+ '360', '360.', '360.0', and '3.6e2' are all legal and equivalent). The
2379
+ sampling frequency must be greater than 0; if it is missing, a value
2380
+ of 250 is assumed.
2381
+ record_only : bool, optional
2382
+ Whether to only return the record information (True) or not (False).
2383
+ If false, this function will generate the annotation file.
2384
+ time_onset : bool, optional
2385
+ Whether to assume the values provided in the 'onset' column are in
2386
+ units of time (True) or samples (False). If True, convert the onset
2387
+ times to samples by using the, now required, `fs` input.
2388
+ header : bool, optional
2389
+ Whether to assume the CSV has a first line header (True) or not
2390
+ (False) which defines the signal names.
2391
+ delimiter : str, optional
2392
+ What to use as the delimiter for the file to separate data. The default
2393
+ if a comma (','). Other common delimiters are tabs ('\t '), spaces (' '),
2394
+ pipes ('|'), and colons (':').
2395
+ verbose : bool, optional
2396
+ Whether to print all the information read about the file (True) or
2397
+ not (False).
2398
+
2399
+ Returns
2400
+ -------
2401
+ N/A : Annotation, optional
2402
+ The WFDB Annotation object representing the contents of the CSV file
2403
+ read.
2404
+
2405
+ Notes
2406
+ -----
2407
+ CSVs should be in one of the two possible following format:
2408
+
2409
+ 1) All events are single time events (no duration).
2410
+
2411
+ onset,description
2412
+ onset_1,description_1
2413
+ onset_2,description_2
2414
+ ...,...
2415
+
2416
+ Or this format if `header=False` is defined:
2417
+
2418
+ onset_1,description_1
2419
+ onset_2,description_2
2420
+ ...,...
2421
+
2422
+ 2) A duration is specified for some events.
2423
+
2424
+ onset,duration,description
2425
+ onset_1,duration_1,description_1
2426
+ onset_2,duration_2,description_2
2427
+ ...,...,...
2428
+
2429
+ Or this format if `header=False` is defined:
2430
+
2431
+ onset_1,duration_1,description_1
2432
+ onset_2,duration_2,description_2
2433
+ ...,...,...
2434
+
2435
+ By default, the 'onset' will be interpreted as a sample number if it is
2436
+ strictly in integer format and as a time otherwise. By default, the
2437
+ 'duration' will be interpreted as time values and not elapsed samples. By
2438
+ default, the 'description' will be interpreted as the `aux_note` for the
2439
+ annotation and the `symbol` will automatically be set to " which defines a
2440
+ comment. Future additions will allow the user to customize such
2441
+ attributes.
2442
+
2443
+ Examples
2444
+ --------
2445
+ 1) Write WFDB annotation file from CSV with time onsets:
2446
+ ======= start example.csv =======
2447
+ onset,description
2448
+ 0.2,p-wave
2449
+ 0.8,qrs
2450
+ ======== end example.csv ========
2451
+ >>> wfdb.csv2ann('example.csv', fs=360)
2452
+ * Creates a WFDB annotation file called: 'example.atr'
2453
+
2454
+ 2) Write WFDB annotation file from CSV with sample onsets:
2455
+ ======= start example.csv =======
2456
+ onset,description
2457
+ 5,p-wave
2458
+ 13,qrs
2459
+ ======== end example.csv ========
2460
+ >>> wfdb.csv2ann('example.csv', fs=10, time_onset=False)
2461
+ * Creates a WFDB annotation file called: 'example.atr'
2462
+ * 5,13 samples -> 0.5,1.3 seconds for onset
2463
+
2464
+ 3) Write WFDB annotation file from CSV with time onsets, durations, and no
2465
+ header:
2466
+ ======= start example.csv =======
2467
+ 0.2,0.1,qrs
2468
+ 0.8,0.4,qrs
2469
+ ======== end example.csv ========
2470
+ >>> wfdb.csv2ann('example.csv', extension='qrs', fs=360, header=False)
2471
+ * Creates a WFDB annotation file called: 'example.qrs'
2472
+
2473
+ """
2474
+ # NOTE: No need to write input checks here since the Annotation class
2475
+ # should handle them (except verifying the CSV input format which is for
2476
+ # Pandas)
2477
+ if header :
2478
+ df_CSV = pd .read_csv (file_name , delimiter = delimiter )
2479
+ else :
2480
+ df_CSV = pd .read_csv (file_name , delimiter = delimiter , header = None )
2481
+ if verbose :
2482
+ print ('Successfully read CSV' )
2483
+
2484
+ if verbose :
2485
+ print ('Creating Pandas dataframe from CSV' )
2486
+ if df_CSV .shape [1 ] == 2 :
2487
+ if verbose :
2488
+ print ('onset,description format detected' )
2489
+ df_out = df_CSV
2490
+ elif df_CSV .shape [1 ] == 3 :
2491
+ if verbose :
2492
+ print ('onset,duration,description format detected' )
2493
+ print ('Converting durations to single time-point events' )
2494
+ # Create two separate dataframes for the start and end annotation
2495
+ # then remove them from the original
2496
+ df_start = df_CSV [df_CSV ['duration' ] > 0 ]
2497
+ df_end = df_CSV [df_CSV ['duration' ] > 0 ]
2498
+ df_trunc = df_CSV [df_CSV ['duration' ] == 0 ]
2499
+ # Append parentheses at the start for annotation start and end for
2500
+ # annotation end
2501
+ df_start ['description' ] = '(' + df_start ['description' ].astype (str )
2502
+ df_end ['description' ] = df_end ['description' ].astype (str ) + ')'
2503
+ # Add the duration time to the onset for the end annotation to convert
2504
+ # to single time annotations only
2505
+ df_end ['onset' ] = df_end ['onset' ] + df_end ['duration' ]
2506
+ # Concatenate all of the dataframes
2507
+ df_out = pd .concat ([df_trunc , df_start , df_end ], ignore_index = True )
2508
+ # Make sure the sorting is correct
2509
+ df_out ['col_index' ] = df_out .index
2510
+ df_out = df_out .sort_values (['onset' , 'col_index' ])
2511
+ else :
2512
+ raise Exception ("""The number of columns in the CSV was not
2513
+ recognized.""" )
2514
+
2515
+ # Remove extension from input file name
2516
+ file_name = file_name .split ('.' )[0 ]
2517
+ if time_onset :
2518
+ if not fs :
2519
+ raise Exception ("""`fs` must be provided if `time_onset` is True
2520
+ since it is required to convert time onsets to
2521
+ samples""" )
2522
+ sample = (df_out ['onset' ].to_numpy ()* fs ).astype (np .int64 )
2523
+ else :
2524
+ sample = df_out ['onset' ].to_numpy ()
2525
+ # Assume each annotation is a comment
2526
+ symbol = ['"' ]* len (df_out .index )
2527
+ subtype = np .array ([22 ]* len (df_out .index ))
2528
+ # Assume each annotation belongs with the 1st channel
2529
+ chan = np .array ([0 ]* len (df_out .index ))
2530
+ num = np .array ([0 ]* len (df_out .index ))
2531
+ aux_note = df_out ['description' ].tolist ()
2532
+
2533
+ if verbose :
2534
+ print ('Finished CSV parsing... writing to Annotation object' )
2535
+
2536
+ if record_only :
2537
+ return Annotation (record_name = file_name , extension = extension ,
2538
+ sample = sample , symbol = symbol , subtype = subtype ,
2539
+ chan = chan , num = num , aux_note = aux_note , fs = fs )
2540
+ if verbose :
2541
+ print ('Finished creating Annotation object' )
2542
+ else :
2543
+ wrann (file_name , extension , sample = sample , symbol = symbol ,
2544
+ subtype = subtype , chan = chan , num = num , aux_note = aux_note , fs = fs )
2545
+ if verbose :
2546
+ print ('Finished writing Annotation file' )
2547
+
2548
+
2357
2549
## ------------- Annotation Field Specifications ------------- ##
2358
2550
2359
2551
0 commit comments