Avoid ambiguous backslashes in regular expression strings.

Benjamin Moody · Benjamin Moody · commit 9b2d6c366eab · 2022-08-18T12:47:08.000-04:00
Sequences such as "\s" should be avoided in ordinary Python
strings due to the potential for confusion, and will result in
warnings in some Python versions.

For regular expressions, it's better to use raw strings (r"\s")
instead, which make parsing unambiguous without needing to double
backslashes.

(Since re.compile understands all the ordinary Python backslash
sequences, it should be safe to change non-raw regexp strings to
raw strings as long as they don't contain any escaped backslashes
or quotes.)
diff --git a/tests/test_annotation.py b/tests/test_annotation.py
@@ -44,7 +44,7 @@ def test_1(self):
         target_aux_note = [None] * nannot
 
         RXannot = re.compile(
-            "[ \t]*(?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.*)"
+            r"[ \t]*(?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.*)"
         )
 
         for i in range(0, nannot):
@@ -117,7 +117,7 @@ def test_2(self):
         target_aux_note = [None] * nannot
 
         RXannot = re.compile(
-            "[ \t]*(?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.*)"
+            r"[ \t]*(?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.*)"
         )
 
         for i in range(0, nannot):
@@ -188,7 +188,7 @@ def test_3(self):
         target_aux_note = [None] * nannot
 
         RXannot = re.compile(
-            "[ \t]*(?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.*)"
+            r"[ \t]*(?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.*)"
         )
 
         for i in range(0, nannot):
diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py
@@ -385,7 +385,7 @@ def check_field(self, field):
 
         # Field specific checks
         if field == "record_name":
-            if bool(re.search("[^-\w]", self.record_name)):
+            if bool(re.search(r"[^-\w]", self.record_name)):
                 raise ValueError(
                     "record_name must only comprise of letters, digits, hyphens, and underscores."
                 )
@@ -2385,9 +2385,9 @@ def update_extra_fields(subtype, chan, num, aux_note, update):
     return subtype, chan, num, aux_note
 
 
-rx_fs = re.compile("## time resolution: (?P<fs>\d+\.?\d*)")
+rx_fs = re.compile(r"## time resolution: (?P<fs>\d+\.?\d*)")
 rx_custom_label = re.compile(
-    "(?P<label_store>\d+) (?P<symbol>\S+) (?P<description>.+)"
+    r"(?P<label_store>\d+) (?P<symbol>\S+) (?P<description>.+)"
 )
 
 
diff --git a/wfdb/io/record.py b/wfdb/io/record.py
@@ -421,7 +421,7 @@ def check_field(self, field, required_channels="all"):
         # Record specification fields
         elif field == "record_name":
             # Allow letters, digits, hyphens, and underscores.
-            accepted_string = re.match("[-\w]+", self.record_name)
+            accepted_string = re.match(r"[-\w]+", self.record_name)
             if (
                 not accepted_string
                 or accepted_string.string != self.record_name
@@ -461,7 +461,7 @@ def check_field(self, field, required_channels="all"):
 
                 if field == "file_name":
                     # Check for file_name characters
-                    accepted_string = re.match("[-\w]+\.?[\w]+", item[ch])
+                    accepted_string = re.match(r"[-\w]+\.?[\w]+", item[ch])
                     if (
                         not accepted_string
                         or accepted_string.string != item[ch]
@@ -505,7 +505,7 @@ def check_field(self, field, required_channels="all"):
                             "baseline values must be between -2147483648 (-2^31) and 2147483647 (2^31 -1)"
                         )
                 elif field == "units":
-                    if re.search("\s", item[ch]):
+                    if re.search(r"\s", item[ch]):
                         raise ValueError(
                             "units strings may not contain whitespaces."
                         )
@@ -520,7 +520,7 @@ def check_field(self, field, required_channels="all"):
                             "block_size values must be non-negative integers"
                         )
                 elif field == "sig_name":
-                    if re.search("\s", item[ch]):
+                    if re.search(r"\s", item[ch]):
                         raise ValueError(
                             "sig_name strings may not contain whitespaces."
                         )
@@ -534,7 +534,7 @@ def check_field(self, field, required_channels="all"):
                     # Segment names must be alphanumerics or just a single '~'
                     if item[ch] == "~":
                         continue
-                    accepted_string = re.match("[-\w]+", item[ch])
+                    accepted_string = re.match(r"[-\w]+", item[ch])
                     if (
                         not accepted_string
                         or accepted_string.string != item[ch]

Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ def test_1(self):`
`44`	`44`	`target_aux_note = [None] * nannot`
`45`	`45`
`46`	`46`	`RXannot = re.compile(`
`47`		`- "[ \t](?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.)"`
	`47`	`+ r"[ \t](?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.)"`
`48`	`48`	`)`
`49`	`49`
`50`	`50`	`for i in range(0, nannot):`
`@@ -117,7 +117,7 @@ def test_2(self):`
`117`	`117`	`target_aux_note = [None] * nannot`
`118`	`118`
`119`	`119`	`RXannot = re.compile(`
`120`		`- "[ \t](?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.)"`
	`120`	`+ r"[ \t](?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.)"`
`121`	`121`	`)`
`122`	`122`
`123`	`123`	`for i in range(0, nannot):`
`@@ -188,7 +188,7 @@ def test_3(self):`
`188`	`188`	`target_aux_note = [None] * nannot`
`189`	`189`
`190`	`190`	`RXannot = re.compile(`
`191`		`- "[ \t](?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.)"`
	`191`	`+ r"[ \t](?P<time>[\[\]\w\.:]+) +(?P<sample>\d+) +(?P<symbol>.) +(?P<subtype>\d+) +(?P<chan>\d+) +(?P<num>\d+)\t?(?P<aux_note>.)"`
`192`	`192`	`)`
`193`	`193`
`194`	`194`	`for i in range(0, nannot):`