clang 22.0.0git
Parsing.cpp
Go to the documentation of this file.
1//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "llvm/ADT/StringMap.h"
13#include "llvm/ADT/StringRef.h"
14#include "llvm/Support/Error.h"
15#include <optional>
16#include <string>
17#include <utility>
18
19using namespace clang;
20using namespace transformer;
21
22// FIXME: This implementation is entirely separate from that of the AST
23// matchers. Given the similarity of the languages and uses of the two parsers,
24// the two should share a common parsing infrastructure, as should other
25// Transformer types. We intend to unify this implementation soon to share as
26// much as possible with the AST Matchers parsing.
27
28namespace {
29using llvm::Expected;
30
31template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
32
33struct ParseState {
34 // The remaining input to be processed.
35 StringRef Input;
36 // The original input. Not modified during parsing; only for reference in
37 // error reporting.
38 StringRef OriginalInput;
39};
40
41// Represents an intermediate result returned by a parsing function. Functions
42// that don't generate values should use `std::nullopt`
43template <typename ResultType> struct ParseProgress {
44 ParseState State;
45 // Intermediate result generated by the Parser.
46 ResultType Value;
47};
48
49template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
50template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
51
52class ParseError : public llvm::ErrorInfo<ParseError> {
53public:
54 // Required field for all ErrorInfo derivatives.
55 static char ID;
56
57 ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
58 : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
59 Excerpt(std::move(InputExcerpt)) {}
60
61 void log(llvm::raw_ostream &OS) const override {
62 OS << "parse error at position (" << Pos << "): " << ErrorMsg
63 << ": " + Excerpt;
64 }
65
66 std::error_code convertToErrorCode() const override {
67 return llvm::inconvertibleErrorCode();
68 }
69
70 // Position of the error in the input string.
71 size_t Pos;
72 std::string ErrorMsg;
73 // Excerpt of the input starting at the error position.
74 std::string Excerpt;
75};
76
77char ParseError::ID;
78} // namespace
79
80static const llvm::StringMap<RangeSelectorOp<std::string>> &
82 static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
83 {"name", name},
84 {"node", node},
85 {"statement", statement},
86 {"statements", statements},
87 {"member", member},
88 {"callArgs", callArgs},
89 {"elseBranch", elseBranch},
90 {"initListElements", initListElements}};
91 return M;
92}
93
94static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
96 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
97 {"before", before}, {"after", after}, {"expansion", expansion}};
98 return M;
99}
100
101static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
103 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
104 {"encloseNodes", encloseNodes}};
105 return M;
106}
107
108static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
110 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
111 M = {{"enclose", enclose}, {"between", between}};
112 return M;
113}
114
115template <typename Element>
116std::optional<Element> findOptional(const llvm::StringMap<Element> &Map,
117 llvm::StringRef Key) {
118 auto it = Map.find(Key);
119 if (it == Map.end())
120 return std::nullopt;
121 return it->second;
122}
123
124template <typename ResultType>
125ParseProgress<ResultType> makeParseProgress(ParseState State,
126 ResultType Result) {
127 return ParseProgress<ResultType>{State, std::move(Result)};
128}
129
130static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
131 size_t Pos = S.OriginalInput.size() - S.Input.size();
132 return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
133 S.OriginalInput.substr(Pos, 20).str());
134}
135
136// Returns a new ParseState that advances \c S by \c N characters.
137static ParseState advance(ParseState S, size_t N) {
138 S.Input = S.Input.drop_front(N);
139 return S;
140}
141
142static StringRef consumeWhitespace(StringRef S) {
143 return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
144}
145
146// Parses a single expected character \c c from \c State, skipping preceding
147// whitespace. Error if the expected character isn't found.
148static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) {
149 State.Input = consumeWhitespace(State.Input);
150 if (State.Input.empty() || State.Input.front() != c)
151 return makeParseError(State,
152 ("expected char not found: " + llvm::Twine(c)).str());
153 return makeParseProgress(advance(State, 1), std::nullopt);
154}
155
156// Parses an identitifer "token" -- handles preceding whitespace.
157static ExpectedProgress<std::string> parseId(ParseState State) {
158 State.Input = consumeWhitespace(State.Input);
159 auto Id = State.Input.take_while(
160 [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
161 if (Id.empty())
162 return makeParseError(State, "failed to parse name");
163 return makeParseProgress(advance(State, Id.size()), Id.str());
164}
165
166// For consistency with the AST matcher parser and C++ code, node ids are
167// written as strings. However, we do not support escaping in the string.
168static ExpectedProgress<std::string> parseStringId(ParseState State) {
169 State.Input = consumeWhitespace(State.Input);
170 if (State.Input.empty())
171 return makeParseError(State, "unexpected end of input");
172 if (!State.Input.consume_front("\""))
173 return makeParseError(
174 State,
175 "expecting string, but encountered other character or end of input");
176
177 StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
178 if (State.Input.size() == Id.size())
179 return makeParseError(State, "unterminated string");
180 // Advance past the trailing quote as well.
181 return makeParseProgress(advance(State, Id.size() + 1), Id.str());
182}
183
184// Parses a single element surrounded by parens. `Op` is applied to the parsed
185// result to create the result of this function call.
186template <typename T>
187ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
188 RangeSelectorOp<T> Op,
189 ParseState State) {
190 auto P = parseChar('(', State);
191 if (!P)
192 return P.takeError();
193
194 auto E = ParseElement(P->State);
195 if (!E)
196 return E.takeError();
197
198 P = parseChar(')', E->State);
199 if (!P)
200 return P.takeError();
201
202 return makeParseProgress(P->State, Op(std::move(E->Value)));
203}
204
205// Parses a pair of elements surrounded by parens and separated by comma. `Op`
206// is applied to the parsed results to create the result of this function call.
207template <typename T>
208ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
209 RangeSelectorOp<T, T> Op,
210 ParseState State) {
211 auto P = parseChar('(', State);
212 if (!P)
213 return P.takeError();
214
215 auto Left = ParseElement(P->State);
216 if (!Left)
217 return Left.takeError();
218
219 P = parseChar(',', Left->State);
220 if (!P)
221 return P.takeError();
222
223 auto Right = ParseElement(P->State);
224 if (!Right)
225 return Right.takeError();
226
227 P = parseChar(')', Right->State);
228 if (!P)
229 return P.takeError();
230
231 return makeParseProgress(P->State,
232 Op(std::move(Left->Value), std::move(Right->Value)));
233}
234
235// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
236// Id operator). Returns StencilType representing the operator on success and
237// error if it fails to parse input for an operator.
238static ExpectedProgress<RangeSelector>
239parseRangeSelectorImpl(ParseState State) {
240 auto Id = parseId(State);
241 if (!Id)
242 return Id.takeError();
243
244 std::string OpName = std::move(Id->Value);
245 if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
246 return parseSingle(parseStringId, *Op, Id->State);
247
248 if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
249 return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
250
251 if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
252 return parsePair(parseStringId, *Op, Id->State);
253
254 if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
255 return parsePair(parseRangeSelectorImpl, *Op, Id->State);
256
257 return makeParseError(State, "unknown selector name: " + OpName);
258}
259
261 ParseState State = {Input, Input};
262 ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
263 if (!Result)
264 return Result.takeError();
265 State = Result->State;
266 // Discard any potentially trailing whitespace.
267 State.Input = consumeWhitespace(State.Input);
268 if (State.Input.empty())
269 return Result->Value;
270 return makeParseError(State, "unexpected input after selector");
271}
StringRef P
Expr * E
static ExpectedProgress< std::string > parseStringId(ParseState State)
Definition: Parsing.cpp:168
static ExpectedProgress< std::nullopt_t > parseChar(char c, ParseState State)
Definition: Parsing.cpp:148
ParseProgress< ResultType > makeParseProgress(ParseState State, ResultType Result)
Definition: Parsing.cpp:125
static StringRef consumeWhitespace(StringRef S)
Definition: Parsing.cpp:142
ExpectedProgress< RangeSelector > parseSingle(ParseFunction< T > ParseElement, RangeSelectorOp< T > Op, ParseState State)
Definition: Parsing.cpp:187
ExpectedProgress< RangeSelector > parsePair(ParseFunction< T > ParseElement, RangeSelectorOp< T, T > Op, ParseState State)
Definition: Parsing.cpp:208
std::optional< Element > findOptional(const llvm::StringMap< Element > &Map, llvm::StringRef Key)
Definition: Parsing.cpp:116
static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg)
Definition: Parsing.cpp:130
static const llvm::StringMap< RangeSelectorOp< RangeSelector > > & getUnaryRangeSelectors()
Definition: Parsing.cpp:95
static ExpectedProgress< RangeSelector > parseRangeSelectorImpl(ParseState State)
Definition: Parsing.cpp:239
static ExpectedProgress< std::string > parseId(ParseState State)
Definition: Parsing.cpp:157
static ParseState advance(ParseState S, size_t N)
Definition: Parsing.cpp:137
static const llvm::StringMap< RangeSelectorOp< RangeSelector, RangeSelector > > & getBinaryRangeSelectors()
Definition: Parsing.cpp:109
static const llvm::StringMap< RangeSelectorOp< std::string, std::string > > & getBinaryStringSelectors()
Definition: Parsing.cpp:102
static const llvm::StringMap< RangeSelectorOp< std::string > > & getUnaryStringSelectors()
Definition: Parsing.cpp:81
Defines parsing functions for Transformer types.
Defines a combinator library supporting the definition of selectors, which select source ranges based...
uint32_t Id
Definition: SemaARM.cpp:1179
__device__ __2f16 float c
RangeSelector initListElements(std::string ID)
RangeSelector enclose(RangeSelector Begin, RangeSelector End)
Selects from the start of Begin and to the end of End.
RangeSelector member(std::string ID)
Given a MemberExpr, selects the member token.
RangeSelector between(RangeSelector R1, RangeSelector R2)
Selects the range between R1 and `R2.
Definition: RangeSelector.h:60
RangeSelector elseBranch(std::string ID)
Given an \IfStmt (bound to ID), selects the range of the else branch, starting from the else keyword.
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon, if any (for declarations and non-expression statements)...
MatchConsumer< CharSourceRange > RangeSelector
Definition: RangeSelector.h:27
RangeSelector encloseNodes(std::string BeginID, std::string EndID)
Convenience version of range where end-points are bound nodes.
RangeSelector after(RangeSelector Selector)
Selects the point immediately following Selector.
RangeSelector callArgs(std::string ID)
RangeSelector before(RangeSelector Selector)
Selects the (empty) range [B,B) when Selector selects the range [B,E).
llvm::Expected< RangeSelector > parseRangeSelector(llvm::StringRef Input)
Parses a string representation of a RangeSelector.
Definition: Parsing.cpp:260
RangeSelector statement(std::string ID)
Selects a node, including trailing semicolon (always).
RangeSelector expansion(RangeSelector S)
Selects the range from which S was expanded (possibly along with other source), if S is an expansion,...
RangeSelector statements(std::string ID)
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
Definition: CharInfo.h:41
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
Definition: CharInfo.h:61
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
Definition: CharInfo.h:108
#define log(__x)
Definition: tgmath.h:460