clang 22.0.0git
DataflowAnalysisContext.h
Go to the documentation of this file.
1//===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17
18#include "clang/AST/Decl.h"
19#include "clang/AST/Expr.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/DenseSet.h"
29#include "llvm/ADT/SetVector.h"
30#include "llvm/Support/Compiler.h"
31#include <cassert>
32#include <memory>
33#include <optional>
34
35namespace clang {
36namespace dataflow {
37class Logger;
38
40 /// The maximum depth to analyze. A value of zero is equivalent to disabling
41 /// context-sensitive analysis entirely.
42 unsigned Depth = 2;
43};
44
45/// A simple representation of essential elements of the logical context used in
46/// environments. Designed for import/export for applications requiring
47/// serialization support.
49 // Global invariant that applies for all definitions in the context.
51 // Flow-condition tokens in the context.
52 llvm::DenseMap<Atom, const Formula *> TokenDefs;
53 // Dependencies between flow-condition definitions.
54 llvm::DenseMap<Atom, llvm::DenseSet<Atom>> TokenDeps;
55};
56
57/// Owns objects that encompass the state of a program and stores context that
58/// is used during dataflow analysis.
60public:
61 struct Options {
62 /// Options for analyzing function bodies when present in the translation
63 /// unit, or empty to disable context-sensitive analysis. Note that this is
64 /// fundamentally limited: some constructs, such as recursion, are
65 /// explicitly unsupported.
66 std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
67
68 /// If provided, analysis details will be recorded here.
69 /// (This is always non-null within an AnalysisContext, the framework
70 /// provides a fallback no-op logger).
71 Logger *Log = nullptr;
72 };
73
74 /// Constructs a dataflow analysis context.
75 ///
76 /// Requirements:
77 ///
78 /// `S` must not be null.
79 DataflowAnalysisContext(std::unique_ptr<Solver> S,
80 Options Opts = Options{
81 /*ContextSensitiveOpts=*/std::nullopt,
82 /*Logger=*/nullptr})
83 : DataflowAnalysisContext(*S, std::move(S), Opts) {}
84
85 /// Constructs a dataflow analysis context.
86 ///
87 /// Requirements:
88 ///
89 /// `S` must outlive the `DataflowAnalysisContext`.
91 /*ContextSensitiveOpts=*/std::nullopt,
92 /*Logger=*/nullptr})
93 : DataflowAnalysisContext(S, nullptr, Opts) {}
94
96
97 /// Sets a callback that returns the names and types of the synthetic fields
98 /// to add to a `RecordStorageLocation` of a given type.
99 /// Typically, this is called from the constructor of a `DataflowAnalysis`
100 ///
101 /// The field types returned by the callback may not have reference type.
102 ///
103 /// To maintain the invariant that all `RecordStorageLocation`s of a given
104 /// type have the same fields:
105 /// * The callback must always return the same result for a given type
106 /// * `setSyntheticFieldCallback()` must be called before any
107 // `RecordStorageLocation`s are created.
109 std::function<llvm::StringMap<QualType>(QualType)> CB) {
110 assert(!RecordStorageLocationCreated);
111 SyntheticFieldCallback = CB;
112 }
113
114 /// Returns a new storage location appropriate for `Type`.
115 ///
116 /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
118
119 /// Creates a `RecordStorageLocation` for the given type and with the given
120 /// fields.
121 ///
122 /// Requirements:
123 ///
124 /// `FieldLocs` must contain exactly the fields returned by
125 /// `getModeledFields(Type)`.
126 /// `SyntheticFields` must contain exactly the fields returned by
127 /// `getSyntheticFields(Type)`.
131
132 /// Returns a stable storage location for `D`.
134
135 /// Returns a stable storage location for `E`.
137
138 /// Returns a pointer value that represents a null pointer. Calls with
139 /// `PointeeType` that are canonically equivalent will return the same result.
140 /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
142
143 /// Adds `Constraint` to current and future flow conditions in this context.
144 ///
145 /// Invariants must contain only flow-insensitive information, i.e. facts that
146 /// are true on all paths through the program.
147 /// Information can be added eagerly (when analysis begins), or lazily (e.g.
148 /// when values are first used). The analysis must be careful that the same
149 /// information is added regardless of which order blocks are analyzed in.
150 void addInvariant(const Formula &Constraint);
151
152 /// Adds `Constraint` to the flow condition identified by `Token`.
153 void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
154
155 /// Adds `Deps` to the dependencies of the flow condition identified by
156 /// `Token`. Intended for use in deserializing contexts. The formula alone
157 /// doesn't have enough information to indicate its deps.
158 void addFlowConditionDeps(Atom Token, const llvm::DenseSet<Atom> &Deps) {
159 // Avoid creating an entry for `Token` with an empty set.
160 if (!Deps.empty())
161 FlowConditionDeps[Token].insert(Deps.begin(), Deps.end());
162 }
163
164 /// Creates a new flow condition with the same constraints as the flow
165 /// condition identified by `Token` and returns its token.
167
168 /// Creates a new flow condition that represents the disjunction of the flow
169 /// conditions identified by `FirstToken` and `SecondToken`, and returns its
170 /// token.
171 Atom joinFlowConditions(Atom FirstToken, Atom SecondToken);
172
173 /// Returns true if the constraints of the flow condition identified by
174 /// `Token` imply that `F` is true.
175 /// Returns false if the flow condition does not imply `F` or if the solver
176 /// times out.
177 bool flowConditionImplies(Atom Token, const Formula &F);
178
179 /// Returns true if the constraints of the flow condition identified by
180 /// `Token` still allow `F` to be true.
181 /// Returns false if the flow condition implies that `F` is false or if the
182 /// solver times out.
183 bool flowConditionAllows(Atom Token, const Formula &F);
184
185 /// Returns true if `Val1` is equivalent to `Val2`.
186 /// Note: This function doesn't take into account constraints on `Val1` and
187 /// `Val2` imposed by the flow condition.
188 bool equivalentFormulas(const Formula &Val1, const Formula &Val2);
189
190 LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token,
191 llvm::raw_ostream &OS = llvm::dbgs());
192
193 /// Returns the `AdornedCFG` registered for `F`, if any. Otherwise,
194 /// returns null.
195 const AdornedCFG *getAdornedCFG(const FunctionDecl *F);
196
197 const Options &getOptions() { return Opts; }
198
199 Arena &arena() { return *A; }
200
201 /// Returns the outcome of satisfiability checking on `Constraints`.
202 ///
203 /// Flow conditions are not incorporated, so they may need to be manually
204 /// included in `Constraints` to provide contextually-accurate results, e.g.
205 /// if any definitions or relationships of the values in `Constraints` have
206 /// been stored in flow conditions.
207 Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints);
208
209 /// Returns the fields of `Type`, limited to the set of fields modeled by this
210 /// context.
212
213 /// Returns the names and types of the synthetic fields for the given record
214 /// type.
215 llvm::StringMap<QualType> getSyntheticFields(QualType Type) {
216 assert(Type->isRecordType());
217 if (SyntheticFieldCallback) {
218 llvm::StringMap<QualType> Result = SyntheticFieldCallback(Type);
219 // Synthetic fields are not allowed to have reference type.
220 assert([&Result] {
221 for (const auto &Entry : Result)
222 if (Entry.getValue()->isReferenceType())
223 return false;
224 return true;
225 }());
226 return Result;
227 }
228 return {};
229 }
230
231 /// Export the logical-context portions of `AC`, limited to the given target
232 /// flow-condition tokens.
234 exportLogicalContext(llvm::DenseSet<dataflow::Atom> TargetTokens) const;
235
236 /// Initializes this context's "logical" components with `LC`.
238
239private:
240 friend class Environment;
241
242 struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
243 static QualType getEmptyKey() {
244 // Allow a NULL `QualType` by using a different value as the empty key.
245 return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
246 }
247
248 using DenseMapInfo::getHashValue;
249 using DenseMapInfo::getTombstoneKey;
250 using DenseMapInfo::isEqual;
251 };
252
253 /// `S` is the solver to use. `OwnedSolver` may be:
254 /// * Null (in which case `S` is non-onwed and must outlive this object), or
255 /// * Non-null (in which case it must refer to `S`, and the
256 /// `DataflowAnalysisContext will take ownership of `OwnedSolver`).
257 DataflowAnalysisContext(Solver &S, std::unique_ptr<Solver> &&OwnedSolver,
258 Options Opts);
259
260 /// Computes the transitive closure of dependencies of (flow-condition)
261 /// `Tokens`. That is, the set of flow-condition tokens reachable from
262 /// `Tokens` in the dependency graph.
263 llvm::DenseSet<Atom> collectDependencies(llvm::DenseSet<Atom> Tokens) const;
264
265 // Extends the set of modeled field declarations.
266 void addModeledFields(const FieldSet &Fields);
267
268 /// Adds all constraints of the flow condition identified by `Token` and all
269 /// of its transitive dependencies to `Constraints`.
270 void
271 addTransitiveFlowConditionConstraints(Atom Token,
272 llvm::SetVector<const Formula *> &Out);
273
274 /// Returns true if the solver is able to prove that there is a satisfying
275 /// assignment for `Constraints`.
276 bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) {
277 return querySolver(std::move(Constraints)).getStatus() ==
279 }
280
281 /// Returns true if the solver is able to prove that there is no satisfying
282 /// assignment for `Constraints`
283 bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) {
284 return querySolver(std::move(Constraints)).getStatus() ==
286 }
287
288 Solver &S;
289 std::unique_ptr<Solver> OwnedSolver;
290 std::unique_ptr<Arena> A;
291
292 // Maps from program declarations and statements to storage locations that are
293 // assigned to them. These assignments are global (aggregated across all basic
294 // blocks) and are used to produce stable storage locations when the same
295 // basic blocks are evaluated multiple times. The storage locations that are
296 // in scope for a particular basic block are stored in `Environment`.
297 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
298 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
299
300 // Null pointer values, keyed by the canonical pointee type.
301 //
302 // FIXME: The pointer values are indexed by the pointee types which are
303 // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
304 // creating a type-independent `NullPointerValue` without a `PointeeLoc`
305 // field.
306 llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
307 NullPointerVals;
308
309 Options Opts;
310
311 // Flow conditions are tracked symbolically: each unique flow condition is
312 // associated with a fresh symbolic variable (token), bound to the clause that
313 // defines the flow condition. Conceptually, each binding corresponds to an
314 // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
315 // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
316 // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
317 // the `FlowConditionConstraints` map, keyed by the token of the flow
318 // condition.
319 //
320 // Flow conditions depend on other flow conditions if they are created using
321 // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
322 // dependencies is stored in the `FlowConditionDeps` map.
323 llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
324 llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
325 const Formula *Invariant = nullptr;
326
327 llvm::DenseMap<const FunctionDecl *, AdornedCFG> FunctionContexts;
328
329 // Fields modeled by environments covered by this context.
330 FieldSet ModeledFields;
331
332 std::unique_ptr<Logger> LogOwner; // If created via flags.
333
334 std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback;
335
336 /// Has any `RecordStorageLocation` been created yet?
337 bool RecordStorageLocationCreated = false;
338};
339
340} // namespace dataflow
341} // namespace clang
342
343#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
const Decl * D
Expr * E
Allows QualTypes to be sorted and hence used in maps and sets.
This represents one expression.
Definition: Expr.h:112
Represents a function declaration or definition.
Definition: Decl.h:1999
A (possibly-)qualified type.
Definition: TypeBase.h:937
static QualType getFromOpaquePtr(const void *Ptr)
Definition: TypeBase.h:986
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
The base class of the type hierarchy.
Definition: TypeBase.h:1833
bool isRecordType() const
Definition: TypeBase.h:8707
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:711
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition: AdornedCFG.h:47
The Arena owns the objects that model data within an analysis.
Definition: Arena.h:21
Owns objects that encompass the state of a program and stores context that is used during dataflow an...
void setSyntheticFieldCallback(std::function< llvm::StringMap< QualType >(QualType)> CB)
Sets a callback that returns the names and types of the synthetic fields to add to a RecordStorageLoc...
const AdornedCFG * getAdornedCFG(const FunctionDecl *F)
Returns the AdornedCFG registered for F, if any.
DataflowAnalysisContext(std::unique_ptr< Solver > S, Options Opts=Options{ std::nullopt, nullptr})
Constructs a dataflow analysis context.
Atom joinFlowConditions(Atom FirstToken, Atom SecondToken)
Creates a new flow condition that represents the disjunction of the flow conditions identified by Fir...
void addFlowConditionConstraint(Atom Token, const Formula &Constraint)
Adds Constraint to the flow condition identified by Token.
Atom forkFlowCondition(Atom Token)
Creates a new flow condition with the same constraints as the flow condition identified by Token and ...
bool equivalentFormulas(const Formula &Val1, const Formula &Val2)
Returns true if Val1 is equivalent to Val2.
StorageLocation & getStableStorageLocation(const ValueDecl &D)
Returns a stable storage location for D.
bool flowConditionImplies(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token imply that F is true.
Solver::Result querySolver(llvm::SetVector< const Formula * > Constraints)
Returns the outcome of satisfiability checking on Constraints.
bool flowConditionAllows(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token still allow F to be true.
PointerValue & getOrCreateNullPointerValue(QualType PointeeType)
Returns a pointer value that represents a null pointer.
void addInvariant(const Formula &Constraint)
Adds Constraint to current and future flow conditions in this context.
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
void addFlowConditionDeps(Atom Token, const llvm::DenseSet< Atom > &Deps)
Adds Deps to the dependencies of the flow condition identified by Token.
SimpleLogicalContext exportLogicalContext(llvm::DenseSet< dataflow::Atom > TargetTokens) const
Export the logical-context portions of AC, limited to the given target flow-condition tokens.
FieldSet getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, llvm::raw_ostream &OS=llvm::dbgs())
void initLogicalContext(SimpleLogicalContext LC)
Initializes this context's "logical" components with LC.
DataflowAnalysisContext(Solver &S, Options Opts=Options{ std::nullopt, nullptr})
Constructs a dataflow analysis context.
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
Holds the state of the program (store and heap) at a given program point.
A logger is notified as the analysis progresses.
Definition: Logger.h:27
Models a symbolic pointer. Specifically, any value of type T*.
Definition: Value.h:170
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
An interface for a SAT solver that can be used by dataflow analyses.
Definition: Solver.h:28
Base class for elements of the local variable store and of the heap.
Atom
Identifies an atomic boolean variable such as "V1".
Definition: Formula.h:34
llvm::SmallSetVector< const FieldDecl *, 4 > FieldSet
A set of FieldDecl *.
Definition: ASTOps.h:43
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
int const char * function
Definition: c++config.h:31
unsigned Depth
The maximum depth to analyze.
Logger * Log
If provided, analysis details will be recorded here.
std::optional< ContextSensitiveOptions > ContextSensitiveOpts
Options for analyzing function bodies when present in the translation unit, or empty to disable conte...
A simple representation of essential elements of the logical context used in environments.
llvm::DenseMap< Atom, const Formula * > TokenDefs
llvm::DenseMap< Atom, llvm::DenseSet< Atom > > TokenDeps
Status getStatus() const
Returns the status of satisfiability checking on the queried boolean formula.
Definition: Solver.h:64
@ Unsatisfiable
Indicates that there is no satisfying assignment for a boolean formula.
@ Satisfiable
Indicates that there exists a satisfying assignment for a boolean formula.