Skip to content

Commit e80a7a1

Browse files
committed
unaccent: Remove Python 2 support from Python script
This is a maintainer-only script, but since we're removing Python 2 support elsewhere, we might as well clean this one up as well.
1 parent e3df32b commit e80a7a1

File tree

1 file changed

+5
-24
lines changed

1 file changed

+5
-24
lines changed

contrib/unaccent/generate_unaccent_rules.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,32 +26,13 @@
2626
# [1] https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/UnicodeData.txt
2727
# [2] https://raw.githubusercontent.com/unicode-org/cldr/${TAG}/common/transforms/Latin-ASCII.xml
2828

29-
# BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped
30-
# The approach is to be Python3 compatible with Python2 "backports".
31-
from __future__ import print_function
32-
from __future__ import unicode_literals
33-
# END: Python 2/3 compatibility - remove when Python 2 compatibility dropped
34-
3529
import argparse
3630
import codecs
3731
import re
3832
import sys
3933
import xml.etree.ElementTree as ET
4034

41-
# BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped
42-
if sys.version_info[0] <= 2:
43-
# Encode stdout as UTF-8, so we can just print to it
44-
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
45-
46-
# Map Python 2's chr to unichr
47-
chr = unichr
48-
49-
# Python 2 and 3 compatible bytes call
50-
def bytes(source, encoding='ascii', errors='strict'):
51-
return source.encode(encoding=encoding, errors=errors)
52-
else:
53-
# END: Python 2/3 compatibility - remove when Python 2 compatibility dropped
54-
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
35+
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
5536

5637
# The ranges of Unicode characters that we consider to be "plain letters".
5738
# For now we are being conservative by including only Latin and Greek. This
@@ -213,12 +194,12 @@ def special_cases():
213194
charactersSet = set()
214195

215196
# Cyrillic
216-
charactersSet.add((0x0401, u"\u0415")) # CYRILLIC CAPITAL LETTER IO
217-
charactersSet.add((0x0451, u"\u0435")) # CYRILLIC SMALL LETTER IO
197+
charactersSet.add((0x0401, "\u0415")) # CYRILLIC CAPITAL LETTER IO
198+
charactersSet.add((0x0451, "\u0435")) # CYRILLIC SMALL LETTER IO
218199

219200
# Symbols of "Letterlike Symbols" Unicode Block (U+2100 to U+214F)
220-
charactersSet.add((0x2103, u"\xb0C")) # DEGREE CELSIUS
221-
charactersSet.add((0x2109, u"\xb0F")) # DEGREE FAHRENHEIT
201+
charactersSet.add((0x2103, "\xb0C")) # DEGREE CELSIUS
202+
charactersSet.add((0x2109, "\xb0F")) # DEGREE FAHRENHEIT
222203
charactersSet.add((0x2117, "(P)")) # SOUND RECORDING COPYRIGHT
223204

224205
return charactersSet

0 commit comments

Comments
 (0)