RustPython · coolreader18 · Apr 16, 2020 · Apr 14, 2020 · Apr 14, 2020 · Apr 14, 2020
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Lib/test/test_json/test_unicode.py b/Lib/test/test_json/test_unicode.py
@@ -8,25 +8,21 @@ class TestUnicode:
     # test_encoding1 and test_encoding2 from 2.x are irrelevant (only str
     # is supported as input, not bytes).
 
-    @unittest.skip("TODO: RUSTPYTHON")
     def test_encoding3(self):
         u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
         j = self.dumps(u)
         self.assertEqual(j, '"\\u03b1\\u03a9"')
 
-    @unittest.skip("TODO: RUSTPYTHON")
     def test_encoding4(self):
         u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
         j = self.dumps([u])
         self.assertEqual(j, '["\\u03b1\\u03a9"]')
 
-    @unittest.skip("TODO: RUSTPYTHON")
     def test_encoding5(self):
         u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
         j = self.dumps(u, ensure_ascii=False)
         self.assertEqual(j, '"{0}"'.format(u))
 
-    @unittest.skip("TODO: RUSTPYTHON")
     def test_encoding6(self):
         u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
         j = self.dumps([u], ensure_ascii=False)

diff --git a/parser/Cargo.toml b/parser/Cargo.toml
@@ -17,4 +17,5 @@ log="0.4.1"
 num-bigint = "0.2"
 num-traits = "0.2"
 unic-emoji-char = "0.9"
-unic-ucd-ident = "0.9"
+unic-ucd-ident  = "0.9"
+unicode_names2 = "0.4"
diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs
@@ -475,6 +475,37 @@ where
         u8::from_str_radix(&octet_content, 8).unwrap() as char
     }
 
+    fn parse_unicode_name(&mut self) -> Result<char, LexicalError> {
+        let start_pos = self.get_pos();
+        match self.next_char() {
+            Some('{') => {}
+            _ => {
+                return Err(LexicalError {
+                    error: LexicalErrorType::StringError,
+                    location: start_pos,
+                })
+            }
+        }
+        let start_pos = self.get_pos();
+        let mut name = String::new();
+        loop {
+            match self.next_char() {
+                Some('}') => break,
+                Some(c) => name.push(c),
+                None => {
+                    return Err(LexicalError {
+                        error: LexicalErrorType::StringError,
+                        location: self.get_pos(),
+                    })
+                }
+            }
+        }
+        unicode_names2::character(&name).ok_or(LexicalError {
+            error: LexicalErrorType::UnicodeError,
+            location: start_pos,
+        })
+    }
+
     fn lex_string(
         &mut self,
         is_bytes: bool,
@@ -532,11 +563,14 @@ where
                             Some('t') => {
                                 string_content.push('\t');
                             }
-                            Some('u') => string_content.push(self.unicode_literal(4)?),
-                            Some('U') => string_content.push(self.unicode_literal(8)?),
-                            Some('x') => string_content.push(self.unicode_literal(2)?),
                             Some('v') => string_content.push('\x0b'),
                             Some(o @ '0'..='7') => string_content.push(self.parse_octet(o)),
+                            Some('x') => string_content.push(self.unicode_literal(2)?),
+                            Some('u') if !is_bytes => string_content.push(self.unicode_literal(4)?),
+                            Some('U') if !is_bytes => string_content.push(self.unicode_literal(8)?),
+                            Some('N') if !is_bytes => {
+                                string_content.push(self.parse_unicode_name()?)
+                            }
                             Some(c) => {
                                 string_content.push('\\');
                                 string_content.push(c);
@@ -1687,4 +1721,20 @@ mod tests {
             ]
         )
     }
+
+    #[test]
+    fn test_escape_unicode_name() {
+        let source = r#""\N{EN SPACE}""#;
+        let tokens = lex_source(source);
+        assert_eq!(
+            tokens,
+            vec![
+                Tok::String {
+                    value: "\u{2002}".to_owned(),
+                    is_fstring: false,
+                },
+                Tok::Newline
+            ]
+        )
+    }
 }