Skip to content

Commit 9d212e8

Browse files
author
Jonathan Coe
committed
[clang-format] Handle quotes and escaped braces in C# interpolated strings
Summary: This addresses issues raised in https://bugs.llvm.org/show_bug.cgi?id=44454. There are outstanding issues with multi-line verbatim strings in C# that will be addressed in a follow-up PR. Reviewers: krasimir, MyDeveloperDay Reviewed By: krasimir, MyDeveloperDay Subscribers: MyDeveloperDay Tags: #clang-format Differential Revision: https://reviews.llvm.org/D73492
1 parent e916c8d commit 9d212e8

File tree

3 files changed

+65
-7
lines changed

3 files changed

+65
-7
lines changed

clang/lib/Format/FormatTokenLexer.cpp

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ void FormatTokenLexer::tryMergePreviousTokens() {
7474
if (Style.isCSharp()) {
7575
if (tryMergeCSharpKeywordVariables())
7676
return;
77-
if (tryMergeCSharpVerbatimStringLiteral())
77+
if (tryMergeCSharpStringLiteral())
7878
return;
7979
if (tryMergeCSharpDoubleQuestion())
8080
return;
@@ -181,18 +181,68 @@ bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
181181
// Search for verbatim or interpolated string literals @"ABC" or
182182
// $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to
183183
// prevent splitting of @, $ and ".
184-
bool FormatTokenLexer::tryMergeCSharpVerbatimStringLiteral() {
184+
bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
185185
if (Tokens.size() < 2)
186186
return false;
187187

188-
auto &String = *(Tokens.end() - 1);
189-
if (!String->is(tok::string_literal))
190-
return false;
188+
auto &CSharpStringLiteral = *(Tokens.end() - 2);
189+
190+
// Interpolated strings could contain { } with " characters inside.
191+
// $"{x ?? "null"}"
192+
// should not be split into $"{x ?? ", null, "}" but should treated as a
193+
// single string-literal.
194+
//
195+
// We opt not to try and format expressions inside {} within a C#
196+
// interpolated string. Formatting expressions within an interpolated string
197+
// would require similar work as that done for JavaScript template strings
198+
// in `handleTemplateStrings()`.
199+
auto &CSharpInterpolatedString = *(Tokens.end() - 2);
200+
if (CSharpInterpolatedString->Type == TT_CSharpStringLiteral &&
201+
(CSharpInterpolatedString->TokenText.startswith(R"($")") ||
202+
CSharpInterpolatedString->TokenText.startswith(R"($@")"))) {
203+
int UnmatchedOpeningBraceCount = 0;
204+
205+
auto TokenTextSize = CSharpInterpolatedString->TokenText.size();
206+
for (size_t Index = 0; Index < TokenTextSize; ++Index) {
207+
char C = CSharpInterpolatedString->TokenText[Index];
208+
if (C == '{') {
209+
// "{{" inside an interpolated string is an escaped '{' so skip it.
210+
if (Index + 1 < TokenTextSize &&
211+
CSharpInterpolatedString->TokenText[Index + 1] == '{') {
212+
++Index;
213+
continue;
214+
}
215+
++UnmatchedOpeningBraceCount;
216+
} else if (C == '}') {
217+
// "}}" inside an interpolated string is an escaped '}' so skip it.
218+
if (Index + 1 < TokenTextSize &&
219+
CSharpInterpolatedString->TokenText[Index + 1] == '}') {
220+
++Index;
221+
continue;
222+
}
223+
--UnmatchedOpeningBraceCount;
224+
}
225+
}
226+
227+
if (UnmatchedOpeningBraceCount > 0) {
228+
auto &NextToken = *(Tokens.end() - 1);
229+
CSharpInterpolatedString->TokenText =
230+
StringRef(CSharpInterpolatedString->TokenText.begin(),
231+
NextToken->TokenText.end() -
232+
CSharpInterpolatedString->TokenText.begin());
233+
CSharpInterpolatedString->ColumnWidth += NextToken->ColumnWidth;
234+
Tokens.erase(Tokens.end() - 1);
235+
return true;
236+
}
237+
}
191238

192239
// verbatim strings could contain "" which C# sees as an escaped ".
193240
// @"""Hello""" will have been tokenized as @"" "Hello" "" and needs
194241
// merging into a single string literal.
195-
auto &CSharpStringLiteral = *(Tokens.end() - 2);
242+
auto &String = *(Tokens.end() - 1);
243+
if (!String->is(tok::string_literal))
244+
return false;
245+
196246
if (CSharpStringLiteral->Type == TT_CSharpStringLiteral &&
197247
(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
198248
CSharpStringLiteral->TokenText.startswith(R"($@")"))) {

clang/lib/Format/FormatTokenLexer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class FormatTokenLexer {
4949
bool tryMergeLessLess();
5050
bool tryMergeNSStringLiteral();
5151
bool tryMergeJSPrivateIdentifier();
52-
bool tryMergeCSharpVerbatimStringLiteral();
52+
bool tryMergeCSharpStringLiteral();
5353
bool tryMergeCSharpKeywordVariables();
5454
bool tryMergeCSharpNullConditionals();
5555
bool tryMergeCSharpDoubleQuestion();

clang/unittests/Format/FormatTestCSharp.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,5 +417,13 @@ TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) {
417417
verifyFormat(R"(string str = $@"""Hello {friend}""")", Style);
418418
}
419419

420+
TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
421+
FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp);
422+
423+
verifyFormat(R"(string str1 = $"{null ?? "null"}";)", Style);
424+
verifyFormat(R"(string str2 = $"{{{braceCount} braces";)", Style);
425+
verifyFormat(R"(string str3 = $"{braceCount}}} braces";)", Style);
426+
}
427+
420428
} // namespace format
421429
} // end namespace clang

0 commit comments

Comments
 (0)