From 8e99c65cb17b203c0bf7e0a3fb9370fe15a36c26 Mon Sep 17 00:00:00 2001 From: dbarabashh Date: Tue, 29 Apr 2025 17:16:31 +0200 Subject: [PATCH] decode unicode escapes in identifier tokens --- packages/typescript-estree/src/node-utils.ts | 14 +++++++- .../tests/lib/node-utils.test.ts | 32 ++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/packages/typescript-estree/src/node-utils.ts b/packages/typescript-estree/src/node-utils.ts index f2f60c777bd1..7c85249cecd0 100644 --- a/packages/typescript-estree/src/node-utils.ts +++ b/packages/typescript-estree/src/node-utils.ts @@ -577,6 +577,15 @@ export function getTokenType( return AST_TOKEN_TYPES.Identifier; } +export function unescapeUnicodeIdentifier(text: string): string { + return text.replaceAll( + /\\u\{([0-9a-fA-F]+)\}|\\u([0-9a-fA-F]{4})/g, + (_match: string, curlyHex: string, shortHex: string) => { + const codePoint = parseInt(curlyHex || shortHex, 16); + return String.fromCodePoint(codePoint); + }, + ); +} /** * Extends and formats a given ts.Token, for a given AST */ @@ -589,8 +598,11 @@ export function convertToken( ? token.getFullStart() : token.getStart(ast); const end = token.getEnd(); - const value = ast.text.slice(start, end); const tokenType = getTokenType(token); + const value = + token.kind === SyntaxKind.Identifier + ? unescapeUnicodeIdentifier(ast.text.slice(start, end)) + : ast.text.slice(start, end); const range: TSESTree.Range = [start, end]; const loc = getLocFor(range, ast); diff --git a/packages/typescript-estree/tests/lib/node-utils.test.ts b/packages/typescript-estree/tests/lib/node-utils.test.ts index f1a1145685b7..c75b85aaa2b1 100644 --- a/packages/typescript-estree/tests/lib/node-utils.test.ts +++ b/packages/typescript-estree/tests/lib/node-utils.test.ts @@ -1,4 +1,7 @@ -import { unescapeStringLiteralText } from '../../src/node-utils'; +import { + unescapeStringLiteralText, + unescapeUnicodeIdentifier, +} from '../../src/node-utils'; describe(unescapeStringLiteralText, () => { it('should not modify content', () => { @@ -42,3 +45,30 @@ describe(unescapeStringLiteralText, () => { ).toBe(`a\n<>"'&©∆℞😂\u0000\u0001`); }); }); + +describe('unescapeUnicodeIdentifier', () => { + it('should decode simple \\uXXXX escape', () => { + expect(unescapeUnicodeIdentifier('\\u0061')).toBe('a'); + expect(unescapeUnicodeIdentifier('\\u0042')).toBe('B'); + }); + + it('should decode \\u{X} escape', () => { + expect(unescapeUnicodeIdentifier('\\u{61}')).toBe('a'); + expect(unescapeUnicodeIdentifier('\\u{1F602}')).toBe('😂'); + }); + + it('should decode multiple escapes in one string', () => { + expect(unescapeUnicodeIdentifier('\\u0061\\u0062')).toBe('ab'); + expect(unescapeUnicodeIdentifier('\\u{61}\\u{62}')).toBe('ab'); + }); + + it('should leave non-escape text unchanged', () => { + expect(unescapeUnicodeIdentifier('foo')).toBe('foo'); + expect(unescapeUnicodeIdentifier('a\\u0062c')).toBe('abc'); + }); + + it('should not decode invalid escapes', () => { + expect(unescapeUnicodeIdentifier('\\u00ZZ')).toBe('\\u00ZZ'); + expect(unescapeUnicodeIdentifier('\\u{ZZ}')).toBe('\\u{ZZ}'); + }); +});