25#include "llvm/ADT/APSInt.h"
26#include "llvm/Support/FormatVariadic.h"
27#include "llvm/Support/raw_ostream.h"
43 const auto *ASE = dyn_cast<ArraySubscriptExpr>(
E);
47 const MemRegion *SubscriptBaseReg =
C.getSVal(ASE->getBase()).getAsRegion();
48 if (!SubscriptBaseReg)
54 if (isa<ElementRegion>(SubscriptBaseReg->
StripCasts()))
63static std::optional<QualType> determineElementType(
const Expr *
E,
65 const auto *ASE = getAsCleanArraySubscriptExpr(
E,
C);
69 return ASE->getType();
72static std::optional<int64_t>
73determineElementSize(
const std::optional<QualType>
T,
const CheckerContext &
C) {
76 return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
79class StateUpdateReporter {
82 const NonLoc ByteOffsetVal;
83 const std::optional<QualType> ElementType;
84 const std::optional<int64_t> ElementSize;
85 bool AssumedNonNegative =
false;
86 std::optional<NonLoc> AssumedUpperBound = std::nullopt;
91 : Space(R->getMemorySpace(
C.getState())), Reg(R),
92 ByteOffsetVal(ByteOffsVal), ElementType(determineElementType(
E,
C)),
93 ElementSize(determineElementSize(ElementType,
C)) {}
95 void recordNonNegativeAssumption() { AssumedNonNegative =
true; }
96 void recordUpperBoundAssumption(
NonLoc UpperBoundVal) {
97 AssumedUpperBound = UpperBoundVal;
100 bool assumedNonNegative() {
return AssumedNonNegative; }
121 static bool providesInformationAboutInteresting(
SymbolRef Sym,
123 static bool providesInformationAboutInteresting(
SVal SV,
125 return providesInformationAboutInteresting(SV.
getAsSymbol(), BR);
140class ArrayBoundChecker :
public Checker<check::PostStmt<ArraySubscriptExpr>,
141 check::PostStmt<UnaryOperator>,
142 check::PostStmt<MemberExpr>> {
143 BugType BT{
this,
"Out-of-bound access"};
149 NonLoc Offset, std::optional<NonLoc> Extent,
150 bool IsTaintBug =
false)
const;
170 if (
E->getOpcode() == UO_Deref)
175 performCheck(
E->getBase(),
C);
186static std::optional<std::pair<const SubRegion *, NonLoc>>
198 dyn_cast_or_null<ElementRegion>(Location.
getAsRegion());
216 auto Delta = EvalBinOp(BO_Mul, *Index, Size);
221 Offset = EvalBinOp(BO_Add, *Offset, *Delta);
228 CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
232 return std::make_pair(OwnerRegion, *Offset);
257static std::pair<NonLoc, nonloc::ConcreteInt>
260 const llvm::APSInt &extentVal = extent.
getValue();
262 if (SymVal && SymVal->isExpression()) {
263 if (
const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
265 switch (SIE->getOpcode()) {
269 if ((extentVal % constant) != 0)
270 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
274 svalBuilder.
makeIntVal(extentVal / constant), svalBuilder);
278 svalBuilder.
makeIntVal(extentVal - constant), svalBuilder);
285 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
290 return MaxV && MaxV->isNegative();
305static std::pair<ProgramStateRef, ProgramStateRef>
309 std::tie(
Value, Threshold) =
323 return {
nullptr, State};
326 return {State,
nullptr};
331 return {
nullptr, State};
345 auto BelowThreshold =
350 return State->assume(*BelowThreshold);
352 return {
nullptr,
nullptr};
363 if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
364 return formatv(
"the field '{0}'", Name);
365 return "the unnamed field";
368 if (isa<AllocaRegion>(Region))
369 return "the memory returned by 'alloca'";
371 if (isa<SymbolicRegion>(Region) && isa<HeapSpaceRegion>(Space))
372 return "the heap area";
374 if (isa<StringRegion>(Region))
375 return "the string literal";
382 return ConcreteVal->getValue()->tryExtValue();
393 std::string RegName =
getRegionName(Space, Region), OffsetStr =
"";
396 OffsetStr = formatv(
" {0}", ConcreteOffset);
399 formatv(
"Out of bound access to memory preceding {0}", RegName),
400 formatv(
"Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
408 std::optional<int64_t> &Val2, int64_t Divisor) {
411 const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
412 const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
413 if (Val1HasRemainder || Val2HasRemainder)
425 bool AlsoMentionUnderflow) {
428 assert(EReg &&
"this checker only handles element access");
429 QualType ElemType = EReg->getElementType();
436 bool UseByteOffsets = !
tryDividePair(OffsetN, ExtentN, ElemSize);
437 const char *OffsetOrIndex = UseByteOffsets ?
"byte offset" :
"index";
440 llvm::raw_svector_ostream Out(Buf);
442 if (!ExtentN && !UseByteOffsets)
443 Out <<
"'" << ElemType.
getAsString() <<
"' element in ";
444 Out << RegName <<
" at ";
445 if (AlsoMentionUnderflow) {
446 Out <<
"a negative or overflowing " << OffsetOrIndex;
447 }
else if (OffsetN) {
448 Out << OffsetOrIndex <<
" " << *OffsetN;
450 Out <<
"an overflowing " << OffsetOrIndex;
453 Out <<
", while it holds only ";
461 Out <<
" '" << ElemType.
getAsString() <<
"' element";
467 return {formatv(
"Out of bound access to memory {0} {1}",
468 AlsoMentionUnderflow ?
"around" :
"after the end of",
474 const SubRegion *Region,
const char *OffsetName,
475 bool AlsoMentionUnderflow) {
477 return {formatv(
"Potential out of bound access to {0} with tainted {1}",
478 RegName, OffsetName),
479 formatv(
"Access of {0} with a tainted {1} that may be {2}too large",
481 AlsoMentionUnderflow ?
"negative or " :
"")};
486 if (!AssumedNonNegative && !AssumedUpperBound)
490 return getMessage(BR);
495 bool ShouldReportNonNegative = AssumedNonNegative;
496 if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
497 if (AssumedUpperBound &&
498 providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
502 ShouldReportNonNegative =
false;
512 const bool UseIndex =
513 ElementSize &&
tryDividePair(OffsetN, ExtentN, *ElementSize);
516 llvm::raw_svector_ostream Out(Buf);
521 Out <<
"'" << OffsetN <<
"' ";
522 }
else if (AssumedUpperBound) {
523 Out <<
"byte offset ";
525 Out <<
"'" << OffsetN <<
"' ";
531 if (ShouldReportNonNegative) {
532 Out <<
" non-negative";
534 if (AssumedUpperBound) {
535 if (ShouldReportNonNegative)
537 Out <<
" less than ";
539 Out << *ExtentN <<
", ";
540 if (UseIndex && ElementType)
541 Out <<
"the number of '" << ElementType->getAsString()
544 Out <<
"the extent of ";
547 return std::string(Out.str());
550bool StateUpdateReporter::providesInformationAboutInteresting(
562 if (isa<SymSymExpr>(PartSym))
569 const SVal Location =
C.getSVal(
E);
576 if (isFromCtypeMacro(
E,
C.getASTContext()))
582 const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
588 auto [Reg, ByteOffset] = *RawOffset;
592 StateUpdateReporter SUR(Reg, ByteOffset,
E,
C);
596 if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
607 if (PrecedesLowerBound) {
610 if (isOffsetObviouslyNonnegative(
E,
C)) {
627 if (!WithinLowerBound) {
636 if (!WithinLowerBound) {
639 reportOOB(
C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
644 SUR.recordNonNegativeAssumption();
651 if (WithinLowerBound)
652 State = WithinLowerBound;
663 bool AlsoMentionUnderflow = SUR.assumedNonNegative();
665 auto [WithinUpperBound, ExceedsUpperBound] =
668 if (ExceedsUpperBound) {
670 if (!WithinUpperBound) {
674 if (isIdiomaticPastTheEndPtr(
E, ExceedsUpperBound, ByteOffset,
676 C.addTransition(ExceedsUpperBound, SUR.createNoteTag(
C));
682 *KnownSize, Location, AlsoMentionUnderflow);
683 reportOOB(
C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
693 const char *OffsetName =
"offset";
694 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(
E))
695 if (
isTainted(State, ASE->getIdx(),
C.getLocationContext()))
696 OffsetName =
"index";
699 getTaintMsgs(Space, Reg, OffsetName, AlsoMentionUnderflow);
700 reportOOB(
C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
706 SUR.recordUpperBoundAssumption(*KnownSize);
712 if (WithinUpperBound)
713 State = WithinUpperBound;
717 C.addTransition(State, SUR.createNoteTag(
C));
722 NonLoc Val,
bool MarkTaint) {
744 Messages Msgs,
NonLoc Offset,
745 std::optional<NonLoc> Extent,
746 bool IsTaintBug )
const {
752 auto BR = std::make_unique<PathSensitiveBugReport>(
753 IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
768 markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug);
770 markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);
772 C.emitReport(std::move(BR));
775bool ArrayBoundChecker::isFromCtypeMacro(
const Expr *
E,
ASTContext &ACtx) {
777 if (!
Loc.isMacroID())
783 if (MacroName.size() < 7 || MacroName[0] !=
'i' || MacroName[1] !=
's')
786 return ((MacroName ==
"isalnum") || (MacroName ==
"isalpha") ||
787 (MacroName ==
"isblank") || (MacroName ==
"isdigit") ||
788 (MacroName ==
"isgraph") || (MacroName ==
"islower") ||
789 (MacroName ==
"isnctrl") || (MacroName ==
"isprint") ||
790 (MacroName ==
"ispunct") || (MacroName ==
"isspace") ||
791 (MacroName ==
"isupper") || (MacroName ==
"isxdigit"));
794bool ArrayBoundChecker::isOffsetObviouslyNonnegative(
const Expr *
E,
802bool ArrayBoundChecker::isInAddressOf(
const Stmt *S,
ASTContext &ACtx) {
808 S = Parents[0].get<
Stmt>();
809 }
while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));
810 const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S);
811 return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
814bool ArrayBoundChecker::isIdiomaticPastTheEndPtr(
const Expr *
E,
818 if (isa<ArraySubscriptExpr>(
E) && isInAddressOf(
E,
C.getASTContext())) {
820 State, Offset, Limit,
C.getSValBuilder(),
true);
821 return EqualsToThreshold && !NotEqualToThreshold;
830bool ento::shouldRegisterArrayBoundChecker(
const CheckerManager &mgr) {
static std::pair< ProgramStateRef, ProgramStateRef > compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, SValBuilder &SVB, bool CheckEquality=false)
static std::string getRegionName(const MemSpaceRegion *Space, const SubRegion *Region)
static std::optional< std::pair< const SubRegion *, NonLoc > > computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location)
For a given Location that can be represented as a symbolic expression Arr[Idx] (or perhaps Arr[Idx1][...
static Messages getPrecedesMsgs(const MemSpaceRegion *Space, const SubRegion *Region, NonLoc Offset)
static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value)
static std::optional< int64_t > getConcreteValue(NonLoc SV)
static Messages getTaintMsgs(const MemSpaceRegion *Space, const SubRegion *Region, const char *OffsetName, bool AlsoMentionUnderflow)
static bool isUnsigned(SValBuilder &SVB, NonLoc Value)
static std::pair< NonLoc, nonloc::ConcreteInt > getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder)
static bool tryDividePair(std::optional< int64_t > &Val1, std::optional< int64_t > &Val2, int64_t Divisor)
Try to divide Val1 and Val2 (in place) by Divisor and return true if it can be performed (Divisor is ...
static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space, const SubRegion *Region, NonLoc Offset, NonLoc Extent, SVal Location, bool AlsoMentionUnderflow)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceManager & getSourceManager()
ParentMapContext & getParentMapContext()
Returns the dynamic AST node parent map context.
const LangOptions & getLangOpts() const
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Container for either a single DynTypedNode or for an ArrayRef to DynTypedNode.
This represents one expression.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
DynTypedNodeList getParents(const NodeT &Node)
Returns the parents of the given node (within the traversal scope).
A (possibly-)qualified type.
static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy)
Encodes a location in the source.
Stmt - This represents one statement.
SourceLocation getBeginLoc() const LLVM_READONLY
bool isUnsignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is unsigned or an enumeration types whose underlying ...
bool isIncompleteType(NamedDecl **Def=nullptr) const
Types are partitioned into 3 broad categories (C99 6.2.5p1): object types, function types,...
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
A record of the "type" of an APSInt, used for conversions.
llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY
Convert and return a new APSInt with the given value, but this type's bit width and signedness.
Template implementation for all binary symbolic expressions.
CHECKER * registerChecker(AT &&...Args)
Register a single-part checker (derived from Checker): construct its singleton instance,...
Simple checker classes that implement one frontend (i.e.
ElementRegion is used to represent both array elements and casts.
QualType getElementType() const
MemRegion - The root abstract class for all memory regions.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion * getMemorySpace(ProgramStateRef State) const
Returns the most specific memory space for this memory region in the given ProgramStateRef.
std::string getDescriptiveName(bool UseQuotes=true) const
Get descriptive name for memory region.
const RegionTy * getAs() const
MemSpaceRegion - A memory region that represents a "memory space"; for example, the set of global var...
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
NonLoc makeArrayIndex(uint64_t idx)
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
QualType getConditionType() const
virtual const llvm::APSInt * getMaxValue(ProgramStateRef state, SVal val)=0
Tries to get the maximal possible (integer) value of a given SVal.
NonLoc makeZeroArrayIndex()
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
const MemRegion * getAsRegion() const
SubRegion - A region that subsets another larger region.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
llvm::iterator_range< symbol_iterator > symbols() const
Value representing integer constant.
APSIntPtr getValue() const
Represents symbolic expression that isn't a location.
const char *const TaintedData
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T