clang 22.0.0git
ExplodedGraph.cpp
Go to the documentation of this file.
1//===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the template classes ExplodedNode and ExplodedGraph,
10// which represent a path-sensitive, intra-procedural "exploded graph."
11//
12//===----------------------------------------------------------------------===//
13
15#include "clang/AST/Expr.h"
16#include "clang/AST/ExprObjC.h"
17#include "clang/AST/ParentMap.h"
18#include "clang/AST/Stmt.h"
22#include "clang/Basic/LLVM.h"
26#include "llvm/ADT/DenseSet.h"
27#include "llvm/ADT/FoldingSet.h"
28#include "llvm/ADT/PointerUnion.h"
29#include <cassert>
30#include <memory>
31#include <optional>
32
33using namespace clang;
34using namespace ento;
35
36//===----------------------------------------------------------------------===//
37// Cleanup.
38//===----------------------------------------------------------------------===//
39
41
43
44//===----------------------------------------------------------------------===//
45// Node reclamation.
46//===----------------------------------------------------------------------===//
47
49 if (!Ex->isLValue())
50 return false;
51 return isa<DeclRefExpr, MemberExpr, ObjCIvarRefExpr, ArraySubscriptExpr>(Ex);
52}
53
54bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
55 // First, we only consider nodes for reclamation of the following
56 // conditions apply:
57 //
58 // (1) 1 predecessor (that has one successor)
59 // (2) 1 successor (that has one predecessor)
60 //
61 // If a node has no successor it is on the "frontier", while a node
62 // with no predecessor is a root.
63 //
64 // After these prerequisites, we discard all "filler" nodes that
65 // are used only for intermediate processing, and are not essential
66 // for analyzer history:
67 //
68 // (a) PreStmtPurgeDeadSymbols
69 //
70 // We then discard all other nodes where *all* of the following conditions
71 // apply:
72 //
73 // (3) The ProgramPoint is for a PostStmt, but not a PostStore.
74 // (4) There is no 'tag' for the ProgramPoint.
75 // (5) The 'store' is the same as the predecessor.
76 // (6) The 'GDM' is the same as the predecessor.
77 // (7) The LocationContext is the same as the predecessor.
78 // (8) Expressions that are *not* lvalue expressions.
79 // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
80 // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
81 // PreImplicitCall (so that we would be able to find it when retrying a
82 // call with no inlining).
83 // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
84
85 // Conditions 1 and 2.
86 if (node->pred_size() != 1 || node->succ_size() != 1)
87 return false;
88
89 const ExplodedNode *pred = *(node->pred_begin());
90 if (pred->succ_size() != 1)
91 return false;
92
93 const ExplodedNode *succ = *(node->succ_begin());
94 if (succ->pred_size() != 1)
95 return false;
96
97 // Now reclaim any nodes that are (by definition) not essential to
98 // analysis history and are not consulted by any client code.
99 ProgramPoint progPoint = node->getLocation();
100 if (progPoint.getAs<PreStmtPurgeDeadSymbols>())
101 return !progPoint.getTag();
102
103 // Condition 3.
104 if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>())
105 return false;
106
107 // Condition 4.
108 if (progPoint.getTag())
109 return false;
110
111 // Conditions 5, 6, and 7.
112 ProgramStateRef state = node->getState();
113 ProgramStateRef pred_state = pred->getState();
114 if (state->store != pred_state->store || state->GDM != pred_state->GDM ||
115 progPoint.getLocationContext() != pred->getLocationContext())
116 return false;
117
118 // All further checks require expressions. As per #3, we know that we have
119 // a PostStmt.
120 const Expr *Ex = dyn_cast<Expr>(progPoint.castAs<PostStmt>().getStmt());
121 if (!Ex)
122 return false;
123
124 // Condition 8.
125 // Do not collect nodes for "interesting" lvalue expressions since they are
126 // used extensively for generating path diagnostics.
128 return false;
129
130 // Condition 9.
131 // Do not collect nodes for non-consumed Stmt or Expr to ensure precise
132 // diagnostic generation; specifically, so that we could anchor arrows
133 // pointing to the beginning of statements (as written in code).
134 const ParentMap &PM = progPoint.getLocationContext()->getParentMap();
135 if (!PM.isConsumedExpr(Ex))
136 return false;
137
138 // Condition 10.
139 const ProgramPoint SuccLoc = succ->getLocation();
140 if (std::optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>())
141 if (CallEvent::isCallStmt(SP->getStmt()))
142 return false;
143
144 // Condition 10, continuation.
145 if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>())
146 return false;
147
148 return true;
149}
150
151void ExplodedGraph::collectNode(ExplodedNode *node) {
152 // Removing a node means:
153 // (a) changing the predecessors successor to the successor of this node
154 // (b) changing the successors predecessor to the predecessor of this node
155 // (c) Putting 'node' onto freeNodes.
156 assert(node->pred_size() == 1 || node->succ_size() == 1);
157 ExplodedNode *pred = *(node->pred_begin());
158 ExplodedNode *succ = *(node->succ_begin());
159 pred->replaceSuccessor(succ);
160 succ->replacePredecessor(pred);
161 FreeNodes.push_back(node);
162 Nodes.RemoveNode(node);
163 --NumNodes;
164 node->~ExplodedNode();
165}
166
168 if (ChangedNodes.empty())
169 return;
170
171 // Only periodically reclaim nodes so that we can build up a set of
172 // nodes that meet the reclamation criteria. Freshly created nodes
173 // by definition have no successor, and thus cannot be reclaimed (see below).
174 assert(ReclaimCounter > 0);
175 if (--ReclaimCounter != 0)
176 return;
178
179 for (const auto node : ChangedNodes)
180 if (shouldCollect(node))
181 collectNode(node);
182 ChangedNodes.clear();
183}
184
185//===----------------------------------------------------------------------===//
186// ExplodedNode.
187//===----------------------------------------------------------------------===//
188
189// An NodeGroup's storage type is actually very much like a TinyPtrVector:
190// it can be either a pointer to a single ExplodedNode, or a pointer to a
191// BumpVector allocated with the ExplodedGraph's allocator. This allows the
192// common case of single-node NodeGroups to be implemented with no extra memory.
193//
194// Consequently, each of the NodeGroup methods have up to four cases to handle:
195// 1. The flag is set and this group does not actually contain any nodes.
196// 2. The group is empty, in which case the storage value is null.
197// 3. The group contains a single node.
198// 4. The group contains more than one node.
200using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>;
201
203 assert(!V->isSink());
204 Preds.addNode(V, G);
205 V->Succs.addNode(this, G);
206}
207
208void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) {
209 assert(!getFlag());
210
211 GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
212 assert(isa<ExplodedNode *>(Storage));
213 Storage = node;
214 assert(isa<ExplodedNode *>(Storage));
215}
216
217void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) {
218 assert(!getFlag());
219
220 GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
221 if (Storage.isNull()) {
222 Storage = N;
223 assert(isa<ExplodedNode *>(Storage));
224 return;
225 }
226
227 ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage);
228
229 if (!V) {
230 // Switch from single-node to multi-node representation.
231 auto *Old = cast<ExplodedNode *>(Storage);
232
234 V = new (G.getAllocator()) ExplodedNodeVector(Ctx, 4);
235 V->push_back(Old, Ctx);
236
237 Storage = V;
238 assert(!getFlag());
239 assert(isa<ExplodedNodeVector *>(Storage));
240 }
241
242 V->push_back(N, G.getNodeAllocator());
243}
244
245unsigned ExplodedNode::NodeGroup::size() const {
246 if (getFlag())
247 return 0;
248
249 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
250 if (Storage.isNull())
251 return 0;
252 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
253 return V->size();
254 return 1;
255}
256
257ExplodedNode * const *ExplodedNode::NodeGroup::begin() const {
258 if (getFlag())
259 return nullptr;
260
261 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
262 if (Storage.isNull())
263 return nullptr;
264 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
265 return V->begin();
266 return Storage.getAddrOfPtr1();
267}
268
269ExplodedNode * const *ExplodedNode::NodeGroup::end() const {
270 if (getFlag())
271 return nullptr;
272
273 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
274 if (Storage.isNull())
275 return nullptr;
276 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
277 return V->end();
278 return Storage.getAddrOfPtr1() + 1;
279}
280
282 return pred_size() == 1 && succ_size() == 1 &&
283 getFirstPred()->getState()->getID() == getState()->getID() &&
284 getFirstPred()->succ_size() == 1;
285}
286
289 if (auto BEP = P.getAs<BlockEntrance>())
290 return BEP->getBlock();
291
292 // Find the node's current statement in the CFG.
293 // FIXME: getStmtForDiagnostics() does nasty things in order to provide
294 // a valid statement for body farms, do we need this behavior here?
295 if (const Stmt *S = getStmtForDiagnostics())
296 return getLocationContext()
298 ->getCFGStmtMap()
299 ->getBlock(S);
300
301 return nullptr;
302}
303
304static const LocationContext *
307 const LocationContext *ParentLC = LC->getParent();
308 assert(ParentLC && "We don't start analysis from autosynthesized code");
309 while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
310 LC = ParentLC;
311 ParentLC = LC->getParent();
312 assert(ParentLC && "We don't start analysis from autosynthesized code");
313 }
314 return LC;
315}
316
318 // We cannot place diagnostics on autosynthesized code.
319 // Put them onto the call site through which we jumped into autosynthesized
320 // code for the first time.
323 // It must be a stack frame because we only autosynthesize functions.
324 return cast<StackFrameContext>(findTopAutosynthesizedParentContext(LC))
325 ->getCallSite();
326 }
327 // Otherwise, see if the node's program point directly points to a statement.
328 // FIXME: Refactor into a ProgramPoint method?
330 if (auto SP = P.getAs<StmtPoint>())
331 return SP->getStmt();
332 if (auto BE = P.getAs<BlockEdge>())
333 return BE->getSrc()->getTerminatorStmt();
334 if (auto CE = P.getAs<CallEnter>())
335 return CE->getCallExpr();
336 if (auto CEE = P.getAs<CallExitEnd>())
337 return CEE->getCalleeContext()->getCallSite();
338 if (auto PIPP = P.getAs<PostInitializer>())
339 return PIPP->getInitializer()->getInit();
340 if (auto CEB = P.getAs<CallExitBegin>())
341 return CEB->getReturnStmt();
342 if (auto FEP = P.getAs<FunctionExitPoint>())
343 return FEP->getStmt();
344
345 return nullptr;
346}
347
349 for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
350 if (N->getLocation().isPurgeKind())
351 continue;
352 if (const Stmt *S = N->getStmtForDiagnostics()) {
353 // Check if the statement is '?' or '&&'/'||'. These are "merges",
354 // not actual statement points.
355 switch (S->getStmtClass()) {
356 case Stmt::ChooseExprClass:
357 case Stmt::BinaryConditionalOperatorClass:
358 case Stmt::ConditionalOperatorClass:
359 continue;
360 case Stmt::BinaryOperatorClass: {
361 BinaryOperatorKind Op = cast<BinaryOperator>(S)->getOpcode();
362 if (Op == BO_LAnd || Op == BO_LOr)
363 continue;
364 break;
365 }
366 default:
367 break;
368 }
369 // We found the statement, so return it.
370 return S;
371 }
372 }
373
374 return nullptr;
375}
376
378 for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred())
379 if (const Stmt *S = N->getStmtForDiagnostics(); S && !isa<CompoundStmt>(S))
380 return S;
381
382 return nullptr;
383}
384
386 if (const Stmt *S = getStmtForDiagnostics())
387 return S;
388
390}
391
393 ProgramStateRef State,
394 bool IsSink,
395 bool* IsNew) {
396 // Profile 'State' to determine if we already have an existing node.
397 llvm::FoldingSetNodeID profile;
398 void *InsertPos = nullptr;
399
400 NodeTy::Profile(profile, L, State, IsSink);
401 NodeTy* V = Nodes.FindNodeOrInsertPos(profile, InsertPos);
402
403 if (!V) {
404 if (!FreeNodes.empty()) {
405 V = FreeNodes.back();
406 FreeNodes.pop_back();
407 }
408 else {
409 // Allocate a new node.
410 V = getAllocator().Allocate<NodeTy>();
411 }
412
413 ++NumNodes;
414 new (V) NodeTy(L, State, NumNodes, IsSink);
415
417 ChangedNodes.push_back(V);
418
419 // Insert the node into the node set and return it.
420 Nodes.InsertNode(V, InsertPos);
421
422 if (IsNew) *IsNew = true;
423 }
424 else
425 if (IsNew) *IsNew = false;
426
427 return V;
428}
429
431 ProgramStateRef State,
432 int64_t Id,
433 bool IsSink) {
434 NodeTy *V = getAllocator().Allocate<NodeTy>();
435 new (V) NodeTy(L, State, Id, IsSink);
436 return V;
437}
438
439std::unique_ptr<ExplodedGraph>
441 InterExplodedGraphMap *ForwardMap,
442 InterExplodedGraphMap *InverseMap) const {
443 // FIXME: The two-pass algorithm of this function (which was introduced in
444 // 2008) is terribly overcomplicated and should be replaced by a single
445 // (backward) pass.
446
447 if (Nodes.empty())
448 return nullptr;
449
450 using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
451 Pass1Ty Pass1;
452
453 using Pass2Ty = InterExplodedGraphMap;
454 InterExplodedGraphMap Pass2Scratch;
455 Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
456
458
459 // ===- Pass 1 (reverse DFS) -===
460 for (const auto Sink : Sinks)
461 if (Sink)
462 WL1.push_back(Sink);
463
464 // Process the first worklist until it is empty.
465 while (!WL1.empty()) {
466 const ExplodedNode *N = WL1.pop_back_val();
467
468 // Have we already visited this node? If so, continue to the next one.
469 if (!Pass1.insert(N).second)
470 continue;
471
472 // If this is the root enqueue it to the second worklist.
473 if (N->Preds.empty()) {
474 assert(N == getRoot() && "Found non-root node with no predecessors!");
475 WL2.push_back(N);
476 continue;
477 }
478
479 // Visit our predecessors and enqueue them.
480 WL1.append(N->Preds.begin(), N->Preds.end());
481 }
482
483 // We didn't hit the root? Return with a null pointer for the new graph.
484 if (WL2.empty())
485 return nullptr;
486
487 assert(WL2.size() == 1 && "There must be only one root!");
488
489 // Create an empty graph.
490 std::unique_ptr<ExplodedGraph> G = std::make_unique<ExplodedGraph>();
491
492 // ===- Pass 2 (forward DFS to construct the new graph) -===
493 while (!WL2.empty()) {
494 const ExplodedNode *N = WL2.pop_back_val();
495
496 auto [Place, Inserted] = Pass2.try_emplace(N);
497
498 // Skip this node if we have already processed it.
499 if (!Inserted)
500 continue;
501
502 // Create the corresponding node in the new graph and record the mapping
503 // from the old node to the new node.
504 ExplodedNode *NewN = G->createUncachedNode(N->getLocation(), N->State,
505 N->getID(), N->isSink());
506 Place->second = NewN;
507
508 // Also record the reverse mapping from the new node to the old node.
509 if (InverseMap) (*InverseMap)[NewN] = N;
510
511 // If this node is the root, designate it as such in the graph.
512 if (N->Preds.empty()) {
513 assert(N == getRoot());
514 G->designateAsRoot(NewN);
515 }
516
517 // In the case that some of the intended predecessors of NewN have already
518 // been created, we should hook them up as predecessors.
519
520 // Walk through the predecessors of 'N' and hook up their corresponding
521 // nodes in the new graph (if any) to the freshly created node.
522 for (const ExplodedNode *Pred : N->Preds) {
523 Pass2Ty::iterator PI = Pass2.find(Pred);
524 if (PI == Pass2.end())
525 continue;
526
527 NewN->addPredecessor(const_cast<ExplodedNode *>(PI->second), *G);
528 }
529
530 // In the case that some of the intended successors of NewN have already
531 // been created, we should hook them up as successors. Otherwise, enqueue
532 // the new nodes from the original graph that should have nodes created
533 // in the new graph.
534 for (const ExplodedNode *Succ : N->Succs) {
535 Pass2Ty::iterator PI = Pass2.find(Succ);
536 if (PI != Pass2.end()) {
537 const_cast<ExplodedNode *>(PI->second)->addPredecessor(NewN, *G);
538 continue;
539 }
540
541 // Enqueue nodes to the worklist that were marked during pass 1.
542 if (Pass1.count(Succ))
543 WL2.push_back(Succ);
544 }
545 }
546
547 return G;
548}
#define V(N, I)
Definition: ASTContext.h:3597
StringRef P
llvm::PointerUnion< ExplodedNode *, ExplodedNodeVector * > GroupStorage
BumpVector< ExplodedNode * > ExplodedNodeVector
static const LocationContext * findTopAutosynthesizedParentContext(const LocationContext *LC)
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
uint32_t Id
Definition: SemaARM.cpp:1179
Represents a single basic block in a source-level CFG.
Definition: CFG.h:605
CFGBlock * getBlock(Stmt *S)
Returns the CFGBlock the specified Stmt* appears in.
Definition: CFGStmtMap.cpp:27
Represents a point when we begin processing an inlined call.
Definition: ProgramPoint.h:638
Represents a point when we start the call exit sequence (for inlined call).
Definition: ProgramPoint.h:676
Represents a point when we finish the call exit sequence (for inlined call).
Definition: ProgramPoint.h:696
This represents one expression.
Definition: Expr.h:112
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language.
Definition: Expr.h:284
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
const ParentMap & getParentMap() const
LLVM_ATTRIBUTE_RETURNS_NONNULL AnalysisDeclContext * getAnalysisDeclContext() const
const LocationContext * getParent() const
It might return null.
bool isConsumedExpr(Expr *E) const
Definition: ParentMap.cpp:181
Represents a program point after a store evaluation.
Definition: ProgramPoint.h:436
Represents a program point just before an implicit call event.
Definition: ProgramPoint.h:589
Represents a point after we ran remove dead bindings BEFORE processing the given statement.
Definition: ProgramPoint.h:478
const ProgramPointTag * getTag() const
Definition: ProgramPoint.h:179
bool isPurgeKind()
Is this a program point corresponding to purge/removal of dead symbols and bindings.
Definition: ProgramPoint.h:173
T castAs() const
Convert to the specified ProgramPoint type, asserting that this ProgramPoint is of the desired type.
Definition: ProgramPoint.h:143
std::optional< T > getAs() const
Convert to the specified ProgramPoint type, returning std::nullopt if this ProgramPoint is not of the...
Definition: ProgramPoint.h:153
const LocationContext * getLocationContext() const
Definition: ProgramPoint.h:181
const Stmt * getStmt() const
Definition: ProgramPoint.h:284
Stmt - This represents one statement.
Definition: Stmt.h:85
static bool isCallStmt(const Stmt *S)
Returns true if this is a statement is a function or method call of some kind.
Definition: CallEvent.cpp:346
std::unique_ptr< ExplodedGraph > trim(ArrayRef< const NodeTy * > Nodes, InterExplodedGraphMap *ForwardMap=nullptr, InterExplodedGraphMap *InverseMap=nullptr) const
Creates a trimmed version of the graph that only contains paths leading to the given nodes.
BumpVectorContext & getNodeAllocator()
unsigned ReclaimCounter
Counter to determine when to reclaim nodes.
NodeVector ChangedNodes
A list of recently allocated nodes that can potentially be recycled.
int64_t NumNodes
NumNodes - The number of nodes in the graph.
unsigned ReclaimNodeInterval
Determines how often nodes are reclaimed.
void reclaimRecentlyAllocatedNodes()
Reclaim "uninteresting" nodes created since the last time this method was called.
static bool isInterestingLValueExpr(const Expr *Ex)
Returns true if nodes for the given expression kind are always kept around.
llvm::BumpPtrAllocator & getAllocator()
ExplodedNode * getNode(const ProgramPoint &L, ProgramStateRef State, bool IsSink=false, bool *IsNew=nullptr)
Retrieve the node associated with a (Location, State) pair, where the 'Location' is a ProgramPoint in...
ExplodedNode * getRoot() const
Get the root node of the graph.
ExplodedNode * createUncachedNode(const ProgramPoint &L, ProgramStateRef State, int64_t Id, bool IsSink=false)
Create a node for a (Location, State) pair, but don't store it for deduplication later.
llvm::FoldingSet< ExplodedNode > Nodes
Nodes - The nodes in the graph.
NodeVector FreeNodes
A list of nodes that can be reused.
const CFGBlock * getCFGBlock() const
const ProgramStateRef & getState() const
const Stmt * getStmtForDiagnostics() const
If the node's program point corresponds to a statement, retrieve that statement.
bool isTrivial() const
The node is trivial if it has only one successor, only one predecessor, it's predecessor has only one...
const Stmt * getPreviousStmtForDiagnostics() const
Find the statement that was executed immediately before this node.
ProgramPoint getLocation() const
getLocation - Returns the edge associated with the given node.
void addPredecessor(ExplodedNode *V, ExplodedGraph &G)
addPredeccessor - Adds a predecessor to the current node, and in tandem add this node as a successor ...
ExplodedNode * getFirstSucc()
unsigned pred_size() const
static void Profile(llvm::FoldingSetNodeID &ID, const ProgramPoint &Loc, const ProgramStateRef &state, bool IsSink)
const Stmt * getNextStmtForDiagnostics() const
Find the next statement that was executed on this node's execution path.
const LocationContext * getLocationContext() const
const Stmt * getCurrentOrPreviousStmtForDiagnostics() const
Find the statement that was executed at or immediately before this node.
ExplodedNode * getFirstPred()
unsigned succ_size() const
llvm::DenseMap< const ExplodedNode *, const ExplodedNode * > InterExplodedGraphMap
The JSON file list parser is used to communicate input to InstallAPI.
BinaryOperatorKind