17#include "llvm/Support/MD5.h"
18#include "llvm/Support/Path.h"
23 unsigned StartIndex,
unsigned EndIndex)
24 : S(
Stmt),
D(
D), StartIndex(StartIndex), EndIndex(EndIndex) {
25 assert(
Stmt &&
"Stmt must not be a nullptr");
26 assert(StartIndex < EndIndex &&
"Given array should not be empty");
27 assert(EndIndex <= Stmt->
size() &&
"Given array too big for this Stmt");
31 : S(
Stmt),
D(
D), StartIndex(0), EndIndex(0) {}
34 : S(nullptr),
D(nullptr), StartIndex(0), EndIndex(0) {}
46 bool StartIsInBounds =
62 auto CS = cast<CompoundStmt>(S);
63 return CS->body_begin() + StartIndex;
70 auto CS = cast<CompoundStmt>(S);
71 return CS->body_begin() + EndIndex;
101 if (
Seq.contains(GroupSeq))
115 if (Group.size() < OtherGroup.size())
126 std::vector<CloneDetector::CloneGroup> &
Result) {
127 std::vector<unsigned> IndexesToRemove;
133 for (
unsigned i = 0; i <
Result.size(); ++i) {
134 for (
unsigned j = 0; j <
Result.size(); ++j) {
140 IndexesToRemove.push_back(i);
149 for (
unsigned I : llvm::reverse(IndexesToRemove))
161 StringRef
Filename = llvm::sys::path::filename(
162 SM.getFilename(S.getContainingDecl()->getLocation()));
179class CloneTypeIIStmtDataCollector
185 template <
class Ty>
void addData(
const Ty &
Data) {
192 : Context(Context), DataConsumer(DataConsumer) {
199#define DEF_ADD_DATA(CLASS, CODE) \
200 template <class = void> void Visit##CLASS(const CLASS *S) { \
202 ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \
205#include "clang/AST/StmtDataCollectors.inc"
209 void Visit##CLASS(const CLASS *S) { \
210 ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \
227 llvm::MD5::MD5Result HashResult;
228 Hash.final(HashResult);
232 std::memcpy(&HashCode, &HashResult,
233 std::min(
sizeof(HashCode),
sizeof(HashResult)));
249 std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
253 CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash);
255 auto CS = dyn_cast<CompoundStmt>(S);
258 for (
const Stmt *Child : S->children()) {
259 if (Child ==
nullptr) {
260 ChildHashes.push_back(0);
263 size_t ChildHash =
saveHash(Child,
D, StmtsByHash);
265 StringRef(
reinterpret_cast<char *
>(&ChildHash),
sizeof(ChildHash)));
266 ChildHashes.push_back(ChildHash);
273 for (
unsigned Pos = 0; Pos < CS->size(); ++Pos) {
278 for (
unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {
281 size_t ChildHash = ChildHashes[Pos + Length - 1];
283 StringRef(
reinterpret_cast<char *
>(&ChildHash),
sizeof(ChildHash)));
287 llvm::MD5 SubHash = Hash;
288 StmtsByHash.push_back(std::make_pair(
296 StmtsByHash.push_back(std::make_pair(HashCode,
StmtSequence(S,
D)));
303class FoldingSetNodeIDWrapper {
305 llvm::FoldingSetNodeID &FS;
308 FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}
310 void update(StringRef Str) { FS.AddString(Str); }
317 FoldingSetNodeIDWrapper &OutputData) {
318 for (
const Stmt *S : Sequence) {
319 CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>(
322 for (
const Stmt *Child : S->children()) {
339 llvm::FoldingSetNodeID DataLHS, DataRHS;
340 FoldingSetNodeIDWrapper LHSWrapper(DataLHS);
341 FoldingSetNodeIDWrapper RHSWrapper(DataRHS);
346 return DataLHS == DataRHS;
350 std::vector<CloneDetector::CloneGroup> &Sequences) {
352 std::vector<CloneDetector::CloneGroup>
Result;
360 std::vector<std::pair<size_t, StmtSequence>> StmtsByHash;
364 saveHash(S.front(), S.getContainingDecl(), StmtsByHash);
368 llvm::stable_sort(StmtsByHash, llvm::less_first());
374 for (
unsigned i = 0; i < StmtsByHash.size() - 1; ++i) {
375 const auto Current = StmtsByHash[i];
382 size_t PrototypeHash = Current.first;
384 for (; i < StmtsByHash.size(); ++i) {
386 if (PrototypeHash != StmtsByHash[i].first) {
397 NewGroup.push_back(StmtsByHash[i].second);
402 Result.push_back(NewGroup);
410 std::vector<CloneDetector::CloneGroup> &Sequences) {
419 const std::string &ParentMacroStack) {
423 size_t Complexity = 1;
428 std::string MacroStack =
439 if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) {
445 if (
Seq.holdsSequence()) {
449 if (Complexity >= Limit)
453 for (
const Stmt *S :
Seq.front()->children()) {
456 if (Complexity >= Limit)
464 std::vector<CloneDetector::CloneGroup> &CloneGroups) {
474 std::vector<CloneDetector::CloneGroup> &CloneGroups,
477 std::vector<CloneDetector::CloneGroup>
Result;
478 for (
auto &HashGroup : CloneGroups) {
481 std::vector<char> Indexes;
482 Indexes.resize(HashGroup.size());
484 for (
unsigned i = 0; i < HashGroup.size(); ++i) {
498 for (
unsigned j = i + 1; j < HashGroup.size(); ++j) {
509 PotentialGroup.push_back(Candidate);
516 Result.push_back(PotentialGroup);
519 assert(llvm::all_of(Indexes, [](
char c) {
return c == 1; }));
525 const Stmt *Mention) {
527 for (
size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
528 if (Variables[KindIndex] ==
VarDecl) {
531 Occurences.emplace_back(KindIndex, Mention);
537 Occurences.emplace_back(Variables.size(), Mention);
541void VariablePattern::addVariables(
const Stmt *S) {
549 if (
auto D = dyn_cast<DeclRefExpr>(S)) {
551 addVariableOccurence(VD,
D);
555 for (
const Stmt *Child : S->children()) {
563 unsigned NumberOfDifferences = 0;
565 assert(
Other.Occurences.size() == Occurences.size());
566 for (
unsigned i = 0; i < Occurences.size(); ++i) {
567 auto ThisOccurence = Occurences[i];
568 auto OtherOccurence =
Other.Occurences[i];
569 if (ThisOccurence.KindID == OtherOccurence.KindID)
572 ++NumberOfDifferences;
576 if (FirstMismatch ==
nullptr)
581 if (NumberOfDifferences != 1)
584 const VarDecl *FirstSuggestion =
nullptr;
588 if (OtherOccurence.KindID < Variables.size())
589 FirstSuggestion = Variables[OtherOccurence.KindID];
594 Variables[ThisOccurence.KindID], ThisOccurence.Mention,
600 const VarDecl *SecondSuggestion =
nullptr;
601 if (ThisOccurence.KindID <
Other.Variables.size())
602 SecondSuggestion =
Other.Variables[ThisOccurence.KindID];
607 Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,
622 return NumberOfDifferences;
static bool containsAnyInGroup(StmtSequence &Seq, CloneDetector::CloneGroup &Group)
Returns true if and only if Stmt contains at least one other sequence in the Group.
static size_t createHash(llvm::MD5 &Hash)
static void CollectStmtSequenceData(const StmtSequence &Sequence, FoldingSetNodeIDWrapper &OutputData)
Writes the relevant data from all statements and child statements in the given StmtSequence into the ...
static size_t saveHash(const Stmt *S, const Decl *D, std::vector< std::pair< size_t, StmtSequence > > &StmtsByHash)
Generates and saves a hash code for the given Stmt.
static bool containsGroup(CloneDetector::CloneGroup &Group, CloneDetector::CloneGroup &OtherGroup)
Returns true if and only if all sequences in OtherGroup are contained by a sequence in Group.
static bool areSequencesClones(const StmtSequence &LHS, const StmtSequence &RHS)
Returns true if both sequences are clones of each other.
This file defines classes for searching and analyzing source code clones.
This file declares helper methods for collecting data from AST nodes.
Defines the SourceManager interface.
__device__ __2f16 float c
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceManager & getSourceManager()
A boolean literal, per ([C++ lex.bool] Boolean literals).
static void splitCloneGroups(std::vector< CloneDetector::CloneGroup > &CloneGroups, llvm::function_ref< bool(const StmtSequence &, const StmtSequence &)> Compare)
Splits the given CloneGroups until the given Compare function returns true for all clones in a single...
void analyzeCodeBody(const Decl *D)
Generates and stores search data for all statements in the body of the given Decl.
CompoundStmt - This represents a group of statements like { stmt stmt }.
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
A reference to a declared variable, function, enum, etc.
Decl - This represents one declaration (or definition), e.g.
ASTContext & getASTContext() const LLVM_READONLY
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
size_t calculateStmtComplexity(const StmtSequence &Seq, std::size_t Limit, const std::string &ParentMacroStack="")
Calculates the complexity of the given StmtSequence.
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
ASTContext & getASTContext() const
Encodes a location in the source.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Identifies a list of statements.
bool contains(const StmtSequence &Other) const
Returns true if and only if this sequence covers a source range that contains the source range of the...
iterator begin() const
Returns an iterator pointing to the first statement in this sequence.
const Stmt *const * iterator
const Decl * getContainingDecl() const
Returns the declaration that contains the stored Stmts.
StmtSequence()
Constructs an empty StmtSequence.
ASTContext & getASTContext() const
Returns the related ASTContext for the stored Stmts.
unsigned size() const
Returns the number of statements this object holds.
iterator end() const
Returns an iterator pointing behind the last statement in this sequence.
SourceLocation getEndLoc() const
Returns the end sourcelocation of the last statement in this sequence.
const Stmt * front() const
Returns the first statement in this sequence.
bool holdsSequence() const
Returns true if this objects holds a list of statements.
SourceLocation getBeginLoc() const
Returns the start sourcelocation of the first statement in this sequence.
SourceRange getSourceRange() const
Returns the source range of the whole sequence - from the beginning of the first statement to the end...
const Stmt * back() const
Returns the last statement in this sequence.
Stmt - This represents one statement.
SourceLocation getEndLoc() const LLVM_READONLY
SourceLocation getBeginLoc() const LLVM_READONLY
StringLiteral - This represents a string literal expression, e.g.
Represents a variable declaration or definition.
Analyzes the pattern of the referenced variables in a statement.
unsigned countPatternDifferences(const VariablePattern &Other, VariablePattern::SuspiciousClonePair *FirstMismatch=nullptr)
Counts the differences between this pattern and the given one.
void addDataToConsumer(T &DataConsumer, llvm::StringRef Str)
Utility functions for implementing addData() for a consumer that has a method update(StringRef)
std::string getMacroStack(SourceLocation Loc, ASTContext &Context)
Returns a string that represents all macro expansions that expanded into the given SourceLocation.
The JSON file list parser is used to communicate input to InstallAPI.
@ Seq
'seq' clause, allowed on 'loop' and 'routine' directives.
@ Result
The result type of a method or function.
const FunctionProtoType * T
@ Other
Other implicit parameter.
std::shared_ptr< llvm::Regex > IgnoredFilesRegex
bool isAutoGenerated(const CloneDetector::CloneGroup &Group)
StringRef IgnoredFilesPattern
void constrain(std::vector< CloneDetector::CloneGroup > &CloneGroups)
void constrain(std::vector< CloneDetector::CloneGroup > &Result)
Utility class holding the relevant information about a single clone in this pair.
const VarDecl * Suggestion
The variable that should have been referenced to follow the pattern.
Describes two clones that reference their variables in a different pattern which could indicate a pro...
SuspiciousCloneInfo SecondCloneInfo
This other clone in the pair which can have a suggested variable.
SuspiciousCloneInfo FirstCloneInfo
The first clone in the pair which always has a suggested variable.