Skip to content

Commit 43ac2b1

Browse files
committed
Adds CSV to WFDB-Annotation function
1 parent b026b30 commit 43ac2b1

File tree

3 files changed

+194
-2
lines changed

3 files changed

+194
-2
lines changed

wfdb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
wrsamp, dl_database, edf2mit, mit2edf, wav2mit, mit2wav,
33
wfdb2mat, csv2mit, sampfreq, signame, wfdbdesc, wfdbtime)
44
from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels,
5-
show_ann_classes, ann2rr, rr2ann)
5+
show_ann_classes, ann2rr, rr2ann, csv2ann)
66
from wfdb.io.download import get_dbs, get_record_list, dl_files, set_db_index_url
77
from wfdb.plot.plot import plot_items, plot_wfdb, plot_all_records
88

wfdb/io/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
csv2mit, sampfreq, signame, wfdbdesc, wfdbtime, SIGNAL_CLASSES)
44
from wfdb.io._signal import est_res, wr_dat_file
55
from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels,
6-
show_ann_classes, ann2rr, rr2ann)
6+
show_ann_classes, ann2rr, rr2ann, csv2ann)
77
from wfdb.io.download import get_dbs, get_record_list, dl_files, set_db_index_url
88
from wfdb.io.tff import rdtff

wfdb/io/annotation.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2354,6 +2354,198 @@ def rr2ann(rr_array, record_name, extension, fs=250, as_time=False):
23542354
wrann(record_name, extension, ann_sample, symbol=ann_symbol)
23552355

23562356

2357+
def csv2ann(file_name, extension='atr', fs=None, record_only=False,
2358+
time_onset=True, header=True, delimiter=',', verbose=False):
2359+
"""
2360+
Read a CSV/TSV/etc. file and return either an `Annotation` object with the
2361+
annotation descriptors as attributes or write an annotation file.
2362+
2363+
Parameters
2364+
----------
2365+
file_name : str
2366+
The name of the CSV file to be read, including the '.csv' file
2367+
extension. If the argument contains any path delimiter characters, the
2368+
argument will be interpreted as PATH/BASE_RECORD. Both relative and
2369+
absolute paths are accepted. The BASE_RECORD file name will be used to
2370+
name the annotation file with the desired extension.
2371+
extension : str, optional
2372+
The string annotation file extension.
2373+
fs : float, optional
2374+
This will be used if annotation onsets are given in the format of time
2375+
(`time_onset` = True) instead of sample since onsets must be sample
2376+
numbers in order for `wrann` to work. This number can be expressed in
2377+
any format legal for a Python input of floating point numbers (thus
2378+
'360', '360.', '360.0', and '3.6e2' are all legal and equivalent). The
2379+
sampling frequency must be greater than 0; if it is missing, a value
2380+
of 250 is assumed.
2381+
record_only : bool, optional
2382+
Whether to only return the record information (True) or not (False).
2383+
If false, this function will generate the annotation file.
2384+
time_onset : bool, optional
2385+
Whether to assume the values provided in the 'onset' column are in
2386+
units of time (True) or samples (False). If True, convert the onset
2387+
times to samples by using the, now required, `fs` input.
2388+
header : bool, optional
2389+
Whether to assume the CSV has a first line header (True) or not
2390+
(False) which defines the signal names.
2391+
delimiter : str, optional
2392+
What to use as the delimiter for the file to separate data. The default
2393+
if a comma (','). Other common delimiters are tabs ('\t'), spaces (' '),
2394+
pipes ('|'), and colons (':').
2395+
verbose : bool, optional
2396+
Whether to print all the information read about the file (True) or
2397+
not (False).
2398+
2399+
Returns
2400+
-------
2401+
N/A : Annotation, optional
2402+
The WFDB Annotation object representing the contents of the CSV file
2403+
read.
2404+
2405+
Notes
2406+
-----
2407+
CSVs should be in one of the two possible following format:
2408+
2409+
1) All events are single time events (no duration).
2410+
2411+
onset,description
2412+
onset_1,description_1
2413+
onset_2,description_2
2414+
...,...
2415+
2416+
Or this format if `header=False` is defined:
2417+
2418+
onset_1,description_1
2419+
onset_2,description_2
2420+
...,...
2421+
2422+
2) A duration is specified for some events.
2423+
2424+
onset,duration,description
2425+
onset_1,duration_1,description_1
2426+
onset_2,duration_2,description_2
2427+
...,...,...
2428+
2429+
Or this format if `header=False` is defined:
2430+
2431+
onset_1,duration_1,description_1
2432+
onset_2,duration_2,description_2
2433+
...,...,...
2434+
2435+
By default, the 'onset' will be interpreted as a sample number if it is
2436+
strictly in integer format and as a time otherwise. By default, the
2437+
'duration' will be interpreted as time values and not elapsed samples. By
2438+
default, the 'description' will be interpreted as the `aux_note` for the
2439+
annotation and the `symbol` will automatically be set to " which defines a
2440+
comment. Future additions will allow the user to customize such
2441+
attributes.
2442+
2443+
Examples
2444+
--------
2445+
1) Write WFDB annotation file from CSV with time onsets:
2446+
======= start example.csv =======
2447+
onset,description
2448+
0.2,p-wave
2449+
0.8,qrs
2450+
======== end example.csv ========
2451+
>>> wfdb.csv2ann('example.csv', fs=360)
2452+
* Creates a WFDB annotation file called: 'example.atr'
2453+
2454+
2) Write WFDB annotation file from CSV with sample onsets:
2455+
======= start example.csv =======
2456+
onset,description
2457+
5,p-wave
2458+
13,qrs
2459+
======== end example.csv ========
2460+
>>> wfdb.csv2ann('example.csv', fs=10, time_onset=False)
2461+
* Creates a WFDB annotation file called: 'example.atr'
2462+
* 5,13 samples -> 0.5,1.3 seconds for onset
2463+
2464+
3) Write WFDB annotation file from CSV with time onsets, durations, and no
2465+
header:
2466+
======= start example.csv =======
2467+
0.2,0.1,qrs
2468+
0.8,0.4,qrs
2469+
======== end example.csv ========
2470+
>>> wfdb.csv2ann('example.csv', extension='qrs', fs=360, header=False)
2471+
* Creates a WFDB annotation file called: 'example.qrs'
2472+
2473+
"""
2474+
# NOTE: No need to write input checks here since the Annotation class
2475+
# should handle them (except verifying the CSV input format which is for
2476+
# Pandas)
2477+
if header:
2478+
df_CSV = pd.read_csv(file_name, delimiter=delimiter)
2479+
else:
2480+
df_CSV = pd.read_csv(file_name, delimiter=delimiter, header=None)
2481+
if verbose:
2482+
print('Successfully read CSV')
2483+
2484+
if verbose:
2485+
print('Creating Pandas dataframe from CSV')
2486+
if df_CSV.shape[1] == 2:
2487+
if verbose:
2488+
print('onset,description format detected')
2489+
df_out = df_CSV
2490+
elif df_CSV.shape[1] == 3:
2491+
if verbose:
2492+
print('onset,duration,description format detected')
2493+
print('Converting durations to single time-point events')
2494+
# Create two separate dataframes for the start and end annotation
2495+
# then remove them from the original
2496+
df_start = df_CSV[df_CSV['duration'] > 0]
2497+
df_end = df_CSV[df_CSV['duration'] > 0]
2498+
df_trunc = df_CSV[df_CSV['duration'] == 0]
2499+
# Append parentheses at the start for annotation start and end for
2500+
# annotation end
2501+
df_start['description'] = '(' + df_start['description'].astype(str)
2502+
df_end['description'] = df_end['description'].astype(str) + ')'
2503+
# Add the duration time to the onset for the end annotation to convert
2504+
# to single time annotations only
2505+
df_end['onset'] = df_end['onset'] + df_end['duration']
2506+
# Concatenate all of the dataframes
2507+
df_out = pd.concat([df_trunc, df_start, df_end], ignore_index=True)
2508+
# Make sure the sorting is correct
2509+
df_out['col_index'] = df_out.index
2510+
df_out = df_out.sort_values(['onset', 'col_index'])
2511+
else:
2512+
raise Exception("""The number of columns in the CSV was not
2513+
recognized.""")
2514+
2515+
# Remove extension from input file name
2516+
file_name = file_name.split('.')[0]
2517+
if time_onset:
2518+
if not fs:
2519+
raise Exception("""`fs` must be provided if `time_onset` is True
2520+
since it is required to convert time onsets to
2521+
samples""")
2522+
sample = (df_out['onset'].to_numpy()*fs).astype(np.int64)
2523+
else:
2524+
sample = df_out['onset'].to_numpy()
2525+
# Assume each annotation is a comment
2526+
symbol = ['"']*len(df_out.index)
2527+
subtype = np.array([22]*len(df_out.index))
2528+
# Assume each annotation belongs with the 1st channel
2529+
chan = np.array([0]*len(df_out.index))
2530+
num = np.array([0]*len(df_out.index))
2531+
aux_note = df_out['description'].tolist()
2532+
2533+
if verbose:
2534+
print('Finished CSV parsing... writing to Annotation object')
2535+
2536+
if record_only:
2537+
return Annotation(record_name=file_name, extension=extension,
2538+
sample=sample, symbol=symbol, subtype=subtype,
2539+
chan=chan, num=num, aux_note=aux_note, fs=fs)
2540+
if verbose:
2541+
print('Finished creating Annotation object')
2542+
else:
2543+
wrann(file_name, extension, sample=sample, symbol=symbol,
2544+
subtype=subtype, chan=chan, num=num, aux_note=aux_note, fs=fs)
2545+
if verbose:
2546+
print('Finished writing Annotation file')
2547+
2548+
23572549
## ------------- Annotation Field Specifications ------------- ##
23582550

23592551

0 commit comments

Comments
 (0)