python · maurycy · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025
@@ -214,6 +214,13 @@ New modules
 Improved modules
 ================
 
+csv
+---
+
+* The :meth:`csv.Sniffer.sniff` delimiter detection has been optimized,
+  and is now up to 1.5x faster.
+  (Contributed by Maurycy Pawłowski-Wieroński in :gh:`137628`.)
+
 dbm
 ---
 

diff --git a/Lib/csv.py b/Lib/csv.py
@@ -364,31 +364,35 @@ def _guess_delimiter(self, data, delimiters):
         try and evaluate the smallest portion of the data possible, evaluating
         additional chunks as necessary.
         """
+        from collections import Counter, defaultdict
 
         data = list(filter(None, data.split('\n')))
 
-        ascii = [chr(c) for c in range(127)] # 7-bit ASCII
-
         # build frequency tables
         chunkLength = min(10, len(data))
         iteration = 0
-        charFrequency = {}
+        seen = 0
+        # {char -> {count_per_line -> num_lines_with_that_count}}
+        charFrequency = defaultdict(Counter)
         modes = {}
         delims = {}
         start, end = 0, chunkLength
         while start < len(data):
             iteration += 1
             for line in data[start:end]:
-                for char in ascii:
-                    metaFrequency = charFrequency.get(char, {})
-                    # must count even if frequency is 0
-                    freq = line.count(char)
-                    # value is the mode
-                    metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
-                    charFrequency[char] = metaFrequency
-
-            for char in charFrequency.keys():
-                items = list(charFrequency[char].items())
+                seen += 1
+                charCounts = Counter(line)
+                for char, count in charCounts.items():
+                    if ord(char) < 127:
+                        charFrequency[char][count] += 1
+
+            for char, counts in charFrequency.items():
+                presentCount = sum(counts.values())
+                zeroCount = seen - presentCount
+                if zeroCount > 0:
+                    items = list(counts.items()) + [(0, zeroCount)]
+                else:
+                    items = list(counts.items())
                 if len(items) == 1 and items[0][0] == 0:
                     continue
                 # get the mode of the frequencies

diff --git a/Misc/NEWS.d/next/Library/2025-08-11-04-52-18.gh-issue-137627.Ku5Yi2.rst b/Misc/NEWS.d/next/Library/2025-08-11-04-52-18.gh-issue-137627.Ku5Yi2.rst
@@ -0,0 +1 @@
+Speed up :meth:`csv.Sniffer.sniff` delimiter detection by up to 1.5x.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Speed up :meth:`csv.Sniffer.sniff` delimiter detection by up to 1.5x.