Skip to content

C++: Use the new dataflow library in cpp/missing-check-scanf #12818

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 91 additions & 118 deletions cpp/ql/src/Critical/MissingCheckScanf.ql
Original file line number Diff line number Diff line change
Expand Up @@ -16,168 +16,141 @@
import cpp
import semmle.code.cpp.commons.Scanf
import semmle.code.cpp.controlflow.Guards
import semmle.code.cpp.ir.dataflow.DataFlow
import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.ValueNumbering

/**
* Holds if `call` is a `scanf`-like function that may write to `output` at index `index`.
*
* Furthermore, `instr` is the instruction that defines the address of the `index`'th argument
* of `call`, and `vn` is the value number of `instr.`
*/
predicate isSource(ScanfFunctionCall call, int index, Instruction instr, ValueNumber vn, Expr output) {
output = call.getOutputArgument(index).getFullyConverted() and
instr.getConvertedResultExpression() = output and
vn.getAnInstruction() = instr
}

/**
* Holds if `instr` is control-flow reachable in 0 or more steps from
* a call to a `scanf`-like function.
*/
/** Holds if `n` reaches an argument to a call to a `scanf`-like function. */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/** Holds if `n` reaches an argument to a call to a `scanf`-like function. */
/** Holds if `n` reaches an argument to a call to a `scanf`-like function. */

pragma[nomagic]
predicate fwdFlow0(Instruction instr) {
isSource(_, _, instr, _, _)
predicate revFlow0(Node n) {
isSink(_, _, n, _)
or
exists(Instruction prev |
fwdFlow0(prev) and
prev.getASuccessor() = instr
)
exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
}

/**
* Holds if `instr` is part of the IR translation of `access` that
* is not an expression being deallocated, and `instr` has value
* number `vn`.
* Holds if `n` represents an uninitialized stack-allocated variable, or a
* newly (and presumed uninitialized) heap allocation.
*/
predicate isSink(Instruction instr, Access access, ValueNumber vn) {
instr.getAst() = access and
not any(DeallocationExpr dealloc).getFreedExpr() = access and
vn.getAnInstruction() = instr
predicate isUninitialized(Node n) {
exists(n.asUninitialized()) or
n.asIndirectExpr(1) instanceof AllocationExpr
}

/**
* Holds if `instr` is part of a path from a call to a `scanf`-like function
* to a use of the written variable.
*/
pragma[nomagic]
predicate revFlow0(Instruction instr) {
fwdFlow0(instr) and
predicate fwdFlow0(Node n) {
revFlow0(n) and
(
isSink(instr, _, _)
isUninitialized(n)
or
exists(Instruction succ | revFlow0(succ) | instr.getASuccessor() = succ)
exists(Node prev |
fwdFlow0(prev) and
localFlowStep(prev, n)
)
)
}

predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
input = call.getOutputArgument(index) and
n.asIndirectExpr() = input
}

/**
* Holds if `instr` is part of a path from a call to a `scanf`-like function
* that writes to a variable with value number `vn`, without passing through
* redefinitions of the variable.
* Holds if `call` is a `scanf`-like call and `output` is the `index`'th
* argument that has not been previously initialized.
*/
pragma[nomagic]
private predicate fwdFlow(Instruction instr, ValueNumber vn) {
revFlow0(instr) and
(
isSource(_, _, instr, vn, _)
or
exists(Instruction prev |
fwdFlow(prev, vn) and
prev.getASuccessor() = instr and
not isBarrier(instr, vn)
)
)
predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
exists(Node n | fwdFlow0(n) and isSink(call, index, n, output))
}

/**
* Holds if `instr` is part of a path from a call to a `scanf`-like function
* that writes to a variable with value number `vn`, without passing through
* redefinitions of the variable.
*
* Note: This predicate only holds for the `(intr, vn)` pairs that are also
* control-flow reachable from an argument to a `scanf`-like function call.
* Holds if `call` is a `scanf`-like function that may write to `output` at
* index `index` and `n` is the dataflow node that represents the data after
* it has been written to by `call`.
*/
pragma[nomagic]
predicate revFlow(Instruction instr, ValueNumber vn) {
fwdFlow(instr, pragma[only_bind_out](vn)) and
(
isSink(instr, _, vn)
or
exists(Instruction succ | revFlow(succ, vn) |
instr.getASuccessor() = succ and
not isBarrier(succ, vn)
)
)
predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
isRelevantScanfCall(call, index, output) and
output = call.getOutputArgument(index) and
n.asDefiningArgument() = output
}

/**
* A type that bundles together a reachable instruction with the appropriate
* value number (i.e., the value number that's transferred from the source
* to the sink).
* Holds if `n` is reachable from an output argument of a relevant call to
* a `scanf`-like function.
*/
newtype TNode = MkNode(Instruction instr, ValueNumber vn) { revFlow(instr, vn) }

class Node extends MkNode {
ValueNumber vn;
Instruction instr;

Node() { this = MkNode(instr, vn) }

final string toString() { result = instr.toString() }
pragma[nomagic]
predicate fwdFlow(Node n) {
isSource(_, _, n, _)
or
exists(Node prev |
fwdFlow(prev) and
localFlowStep(prev, n) and
not isSanitizerOut(prev)
)
}

final Node getASuccessor() { result = MkNode(pragma[only_bind_out](instr.getASuccessor()), vn) }
/** Holds if `n` should not have outgoing flow. */
predicate isSanitizerOut(Node n) {
// We disable flow out of sinks to reduce result duplication
isSink(n, _)
or
// If the node is being passed to a function it may be
// modified, and thus it's safe to later read the value.
exists(n.asIndirectArgument())
}

final Location getLocation() { result = instr.getLocation() }
/**
* Holds if `n` is a node such that `n.asExpr() = e` and `e` is not an
* argument of a deallocation expression.
*/
predicate isSink(Node n, Expr e) {
n.asExpr() = e and
not any(DeallocationExpr dealloc).getFreedExpr() = e
}

/**
* Holds if `instr` is an instruction with value number `vn` that is
* used in a store operation, or is overwritten by another call to
* a `scanf`-like function.
* Holds if `n` is part of a path from a call to a `scanf`-like function
* to a use of the written variable.
*/
private predicate isBarrier(Instruction instr, ValueNumber vn) {
// We only need to compute barriers for instructions that we
// managed to hit during the initial flow stage.
revFlow0(pragma[only_bind_into](instr)) and
valueNumber(instr) = vn and
exists(Expr e | instr.getAst() = e |
instr = any(StoreInstruction s).getDestinationAddress()
pragma[nomagic]
predicate revFlow(Node n) {
fwdFlow(n) and
(
isSink(n, _)
or
isSource(_, _, _, _, [e, e.getParent().(AddressOfExpr)])
exists(Node succ |
revFlow(succ) and
localFlowStep(n, succ) and
not isSanitizerOut(n)
)
)
}

/** Holds if `n1` steps to `n2` in a single step. */
predicate isSuccessor(Node n1, Node n2) { n1.getASuccessor() = n2 }

predicate hasFlow(Node n1, Node n2) = fastTC(isSuccessor/2)(n1, n2)
/** A local flow step, restricted to relevant dataflow nodes. */
private predicate step(Node n1, Node n2) {
revFlow(n1) and
revFlow(n2) and
localFlowStep(n1, n2)
}

Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)

/**
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
* an instruction that is part of the translation of `access` which is a transitive
* control-flow successor of `call`.
*
* Furthermore, `source` and `sink` have identical global value numbers.
* a dataflow node that represents the expression `e`.
*/
predicate hasFlow(
Instruction source, ScanfFunctionCall call, int index, Instruction sink, Access access
) {
exists(ValueNumber vn |
isSource(call, index, source, vn, _) and
hasFlow(getNode(source, pragma[only_bind_into](vn)), getNode(sink, pragma[only_bind_into](vn))) and
isSink(sink, access, vn)
)
predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
isSource(call, index, source, _) and
hasFlow(source, sink) and
isSink(sink, e)
}

/**
* Gets the smallest possible `scanf` return value of `call` that would indicate
* success in writing the output argument at index `index`.
*/
int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
isSource(call, index, _, _, _) and
isSource(call, index, _, _) and
result =
index + 1 -
count(ScanfFormatLiteral f, int n |
Expand All @@ -191,7 +164,7 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned
* at least `minGuard`.
*/
predicate hasNonGuardedAccess(ScanfFunctionCall call, Access e, int minGuard) {
predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
exists(int index |
hasFlow(_, call, index, _, e) and
minGuard = getMinimumGuardConstant(call, index)
Expand All @@ -211,7 +184,7 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
exists(GuardCondition g, Expr left, Expr right |
right = g.getAChild() and
value = left.getValue().toInt() and
DataFlow::localExprFlow(call, right)
localExprFlow(call, right)
|
g.ensuresEq(left, right, 0, result, true) and op = "=="
or
Expand All @@ -221,9 +194,9 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
)
}

from ScanfFunctionCall call, Access access, int minGuard
where hasNonGuardedAccess(call, access, minGuard)
select access,
from ScanfFunctionCall call, Expr e, int minGuard
where hasNonGuardedAccess(call, e, minGuard)
select e,
"This variable is read, but may not have been written. " +
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
call.toString()
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
| test.cpp:35:7:35:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:34:3:34:7 | call to scanf | call to scanf |
| test.cpp:51:7:51:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:50:3:50:7 | call to scanf | call to scanf |
| test.cpp:68:7:68:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:67:3:67:7 | call to scanf | call to scanf |
| test.cpp:80:7:80:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:79:3:79:7 | call to scanf | call to scanf |
| test.cpp:90:8:90:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
| test.cpp:98:8:98:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
| test.cpp:90:7:90:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
| test.cpp:98:7:98:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
| test.cpp:108:7:108:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:107:3:107:8 | call to fscanf | call to fscanf |
| test.cpp:115:7:115:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:114:3:114:8 | call to sscanf | call to sscanf |
| test.cpp:164:8:164:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:162:7:162:11 | call to scanf | call to scanf |
Expand All @@ -12,13 +11,9 @@
| test.cpp:224:8:224:8 | j | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:221:7:221:11 | call to scanf | call to scanf |
| test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
| test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
| test.cpp:264:7:264:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:263:3:263:7 | call to scanf | call to scanf |
| test.cpp:272:7:272:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:3:271:7 | call to scanf | call to scanf |
| test.cpp:280:7:280:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:279:3:279:7 | call to scanf | call to scanf |
| test.cpp:292:7:292:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:291:3:291:7 | call to scanf | call to scanf |
| test.cpp:302:8:302:12 | ptr_i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:301:3:301:7 | call to scanf | call to scanf |
| test.cpp:310:7:310:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:309:3:309:7 | call to scanf | call to scanf |
| test.cpp:404:25:404:25 | u | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:403:6:403:11 | call to sscanf | call to sscanf |
| test.cpp:416:7:416:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:413:7:413:11 | call to scanf | call to scanf |
| test.cpp:423:7:423:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:420:7:420:11 | call to scanf | call to scanf |
| test.cpp:430:6:430:6 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:429:2:429:6 | call to scanf | call to scanf |
14 changes: 7 additions & 7 deletions cpp/ql/test/query-tests/Critical/MissingCheckScanf/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ int main()
int i = 0;

scanf("%d", &i);
use(i); // BAD. Design choice: already initialized variables shouldn't make a difference.
use(i); // GOOD. Design choice: already initialized variables are fine.
}

{
Expand Down Expand Up @@ -261,23 +261,23 @@ int main()
i = 0;

scanf("%d", &i);
use(i); // BAD
use(i); // GOOD
}

{
int i;

set_by_ref(i);
scanf("%d", &i);
use(i); // BAD
use(i); // GOOD [FALSE POSITIVE]
}

{
int i;

set_by_ptr(&i);
scanf("%d", &i);
use(i); // BAD
use(i); // GOOD [FALSE POSITIVE]
}

{
Expand All @@ -299,15 +299,15 @@ int main()
int *ptr_i = &i;

scanf("%d", &i);
use(*ptr_i); // BAD: may not have written `i`
use(*ptr_i); // BAD [NOT DETECTED]: may not have written `i`
}

{
int i;
int *ptr_i = &i;

scanf("%d", ptr_i);
use(i); // BAD: may not have written `*ptr_i`
use(i); // BAD [NOT DETECTED]: may not have written `*ptr_i`
}

{
Expand Down Expand Up @@ -427,5 +427,5 @@ void scan_and_write() {
void scan_and_static_variable() {
static int i;
scanf("%d", &i);
use(i); // GOOD [FALSE POSITIVE]: static variables are always 0-initialized
use(i); // GOOD: static variables are always 0-initialized
}