@@ -24,6 +24,14 @@ class Str(str):
24
24
pass
25
25
26
26
27
+ PyUnicode_NATIVE_ASCII = 1
28
+ PyUnicode_NATIVE_UCS1 = 2
29
+ PyUnicode_NATIVE_UCS2 = 3
30
+ PyUnicode_NATIVE_UCS4 = 4
31
+ PyUnicode_NATIVE_UTF8 = 5
32
+ # Invalid native format
33
+ PyUnicode_NATIVE_INVALID = 0
34
+
27
35
class CAPITest (unittest .TestCase ):
28
36
29
37
@support .cpython_only
@@ -1675,6 +1683,75 @@ def test_pep393_utf8_caching_bug(self):
1675
1683
# Check that the second call returns the same result
1676
1684
self .assertEqual (getargs_s_hash (s ), chr (k ).encode () * (i + 1 ))
1677
1685
1678
-
1679
- if __name__ == "__main__" :
1686
+ def test_unicode_asnativeformat (self ):
1687
+ # Test PyUnicode_AsNativeFormat()
1688
+ asnativeformat = _testlimitedcapi .unicode_asnativeformat
1689
+ self .assertEqual (asnativeformat ("abc" ),
1690
+ (b'abc' , PyUnicode_NATIVE_ASCII ))
1691
+ self .assertEqual (asnativeformat ("latin1:\xe9 " ),
1692
+ (b'latin1:\xe9 ' , PyUnicode_NATIVE_UCS1 ))
1693
+
1694
+ ucs2_enc = 'utf-16le' if sys .byteorder == 'little' else 'utf-16be'
1695
+ self .assertEqual (asnativeformat ('ucs2:\u20ac ' ),
1696
+ ('ucs2:\u20ac ' .encode (ucs2_enc ),
1697
+ PyUnicode_NATIVE_UCS2 ))
1698
+
1699
+ ucs4_enc = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1700
+ self .assertEqual (asnativeformat ('ucs4:\U0010ffff ' ),
1701
+ ('ucs4:\U0010ffff ' .encode (ucs4_enc ),
1702
+ PyUnicode_NATIVE_UCS4 ))
1703
+
1704
+ def test_unicode_fromnativeformat (self ):
1705
+ # Test PyUnicode_FromNativeFormat()
1706
+ fromnativeformat = _testlimitedcapi .unicode_fromnativeformat
1707
+ self .assertEqual (fromnativeformat (b'abc' , PyUnicode_NATIVE_ASCII ),
1708
+ "abc" )
1709
+ self .assertEqual (fromnativeformat (b'latin1:\xe9 ' , PyUnicode_NATIVE_UCS1 ),
1710
+ "latin1:\xe9 " )
1711
+
1712
+ ucs2_enc = 'utf-16le' if sys .byteorder == 'little' else 'utf-16be'
1713
+ self .assertEqual (fromnativeformat ('ucs2:\u20ac ' .encode (ucs2_enc ),
1714
+ PyUnicode_NATIVE_UCS2 ),
1715
+ 'ucs2:\u20ac ' )
1716
+
1717
+ ucs4_enc = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1718
+ self .assertEqual (fromnativeformat ('ucs4:\U0010ffff ' .encode (ucs4_enc ),
1719
+ PyUnicode_NATIVE_UCS4 ),
1720
+ 'ucs4:\U0010ffff ' )
1721
+
1722
+ text = "abc\xe9 \U0010ffff "
1723
+ self .assertEqual (fromnativeformat (text .encode ('utf8' ),
1724
+ PyUnicode_NATIVE_UTF8 ),
1725
+ text )
1726
+
1727
+ # Empty string
1728
+ for native_format in (
1729
+ PyUnicode_NATIVE_ASCII ,
1730
+ PyUnicode_NATIVE_UCS1 ,
1731
+ PyUnicode_NATIVE_UCS2 ,
1732
+ PyUnicode_NATIVE_UCS4 ,
1733
+ PyUnicode_NATIVE_UTF8 ,
1734
+ ):
1735
+ with self .subTest (native_format = native_format ):
1736
+ self .assertEqual (fromnativeformat (b'' , native_format ),
1737
+ '' )
1738
+
1739
+ # Invalid format
1740
+ with self .assertRaises (ValueError ):
1741
+ fromnativeformat (b'' , PyUnicode_NATIVE_INVALID )
1742
+
1743
+ # Invalid size
1744
+ ucs2 = 'ucs2:\u20ac ' .encode (ucs2_enc )
1745
+ with self .assertRaises (ValueError ):
1746
+ fromnativeformat (ucs2 [:- 1 ], PyUnicode_NATIVE_UCS2 )
1747
+ ucs4 = 'ucs4:\U0010ffff ' .encode (ucs4_enc )
1748
+ with self .assertRaises (ValueError ):
1749
+ fromnativeformat (ucs4 [:- 1 ], PyUnicode_NATIVE_UCS4 )
1750
+ with self .assertRaises (ValueError ):
1751
+ fromnativeformat (ucs4 [:- 2 ], PyUnicode_NATIVE_UCS4 )
1752
+ with self .assertRaises (ValueError ):
1753
+ fromnativeformat (ucs4 [:- 3 ], PyUnicode_NATIVE_UCS4 )
1754
+
1755
+
1756
+ if __name__ == '__main__' :
1680
1757
unittest .main ()
0 commit comments