diff --git a/.gitignore b/.gitignore index 7b8532b00d2c..cfea60b51a16 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,9 @@ .vs/* !.vs/VSWorkspaceSettings.json +# VSCode temporaries +.vscode/*.log + # Byte-compiled python files *.pyc diff --git a/cpp/ql/src/Critical/MissingCheckScanf.ql b/cpp/ql/src/Critical/MissingCheckScanf.ql new file mode 100644 index 000000000000..8dcde98633f3 --- /dev/null +++ b/cpp/ql/src/Critical/MissingCheckScanf.ql @@ -0,0 +1,122 @@ +/** + * @name Missing return-value check for a scanf-like function + * @description TODO + * @kind problem + * @problem.severity warning + * @security-severity TODO + * @precision TODO + * @id cpp/missing-check-scanf + * @tags security + */ + +import cpp +import semmle.code.cpp.commons.Scanf +import semmle.code.cpp.controlflow.Guards +import semmle.code.cpp.dataflow.DataFlow +import semmle.code.cpp.ir.IR +import semmle.code.cpp.ir.ValueNumbering + +/** An expression appearing as an output argument to a `scanf`-like call */ +class ScanfOutput extends Expr { + ScanfFunctionCall call; + int varargIndex; + Instruction instr; + ValueNumber valNum; + + ScanfOutput() { + this = call.getArgument(call.getTarget().getNumberOfParameters() + varargIndex) and + varargIndex >= 0 and + instr.getUnconvertedResultExpression() = this and + valueNumber(instr) = valNum and + // The following line is a kludge to prohibit more than one associated `instr` field, + // as would occur, for example, when `this` is an access to an array variable. + not instr instanceof ConvertInstruction + } + + ScanfFunctionCall getCall() { result = call } + + /** + * Any subsequent use of this argument should be surrounded by a + * check ensuring that the `scanf`-like function has returned a value + * equal to at least `getMinimumGuardConstant()`. + */ + int getMinimumGuardConstant() { + result = + varargIndex + 1 - + count(ScanfFormatLiteral f, int n | + n <= varargIndex and f.getUse() = call and f.parseConvSpec(n, _, _, _, "n") + ) + } + + predicate hasGuardedAccess(Access e, boolean isGuarded) { + e = this.getAnAccess() and + if + exists(int value, int minGuard | minGuard = this.getMinimumGuardConstant() | + e.getBasicBlock() = blockGuardedBy(value, "==", call) and minGuard <= value + or + e.getBasicBlock() = blockGuardedBy(value, "<", call) and minGuard - 1 <= value + or + e.getBasicBlock() = blockGuardedBy(value, "<=", call) and minGuard <= value + ) + then isGuarded = true + else isGuarded = false + } + + /** + * Get a subsequent access of the same underlying storage, + * but before it gets reset or reused in another `scanf` call. + */ + Access getAnAccess() { + exists(Instruction dst | + this.bigStep(instr) = dst and + dst.getAst() = result and + valueNumber(dst) = valNum + ) + } + + private Instruction bigStep(Instruction i) { + result = this.smallStep(i) + or + exists(Instruction j | j = this.bigStep(i) | result = this.smallStep(j)) + } + + private Instruction smallStep(Instruction i) { + instr.getASuccessor*() = i and + i.getASuccessor() = result and + not this.isBarrier(result) + } + + private predicate isBarrier(Instruction i) { + valueNumber(i) = valNum and + exists(Expr e | i.getAst() = e | + i = any(StoreInstruction s).getDestinationAddress() + or + [e, e.getParent().(AddressOfExpr)] instanceof ScanfOutput + ) + } +} + +/** Returns a block guarded by the assertion of $value $op $call */ +BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) { + exists(GuardCondition g, Expr left, Expr right | + right = g.getAChild() and + value = left.getValue().toInt() and + DataFlow::localExprFlow(call, right) + | + g.ensuresEq(left, right, 0, result, true) and op = "==" + or + g.ensuresLt(left, right, 0, result, true) and op = "<" + or + g.ensuresLt(left, right, 1, result, true) and op = "<=" + ) +} + +from ScanfOutput output, ScanfFunctionCall call, ScanfFunction fun, Access access +where + call.getTarget() = fun and + output.getCall() = call and + output.hasGuardedAccess(access, false) +select access, + "$@ is read here, but may not have been written. " + + "It should be guarded by a check that $@() returns at least " + output.getMinimumGuardConstant() + + ".", access, access.toString(), call, fun.getName() diff --git a/cpp/ql/test/query-tests/Critical/MissingCheckScanf/.clang-format b/cpp/ql/test/query-tests/Critical/MissingCheckScanf/.clang-format new file mode 100644 index 000000000000..e3845288a2ae --- /dev/null +++ b/cpp/ql/test/query-tests/Critical/MissingCheckScanf/.clang-format @@ -0,0 +1 @@ +DisableFormat: true diff --git a/cpp/ql/test/query-tests/Critical/MissingCheckScanf/MissingCheckScanf.expected b/cpp/ql/test/query-tests/Critical/MissingCheckScanf/MissingCheckScanf.expected new file mode 100644 index 000000000000..cb36c25b0e94 --- /dev/null +++ b/cpp/ql/test/query-tests/Critical/MissingCheckScanf/MissingCheckScanf.expected @@ -0,0 +1,19 @@ +| test.cpp:30:7:30:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:30:7:30:7 | i | i | test.cpp:29:3:29:7 | call to scanf | scanf | +| test.cpp:46:7:46:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:46:7:46:7 | i | i | test.cpp:45:3:45:7 | call to scanf | scanf | +| test.cpp:63:7:63:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:63:7:63:7 | i | i | test.cpp:62:3:62:7 | call to scanf | scanf | +| test.cpp:75:7:75:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:75:7:75:7 | i | i | test.cpp:74:3:74:7 | call to scanf | scanf | +| test.cpp:87:7:87:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:87:7:87:7 | i | i | test.cpp:86:3:86:8 | call to fscanf | fscanf | +| test.cpp:94:7:94:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:94:7:94:7 | i | i | test.cpp:93:3:93:8 | call to sscanf | sscanf | +| test.cpp:143:8:143:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:143:8:143:8 | i | i | test.cpp:141:7:141:11 | call to scanf | scanf | +| test.cpp:152:8:152:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:152:8:152:8 | i | i | test.cpp:150:7:150:11 | call to scanf | scanf | +| test.cpp:184:8:184:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:184:8:184:8 | i | i | test.cpp:183:7:183:11 | call to scanf | scanf | +| test.cpp:203:8:203:8 | j | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 2. | test.cpp:203:8:203:8 | j | j | test.cpp:200:7:200:11 | call to scanf | scanf | +| test.cpp:227:9:227:9 | d | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 2. | test.cpp:227:9:227:9 | d | d | test.cpp:225:25:225:29 | call to scanf | scanf | +| test.cpp:231:9:231:9 | d | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 2. | test.cpp:231:9:231:9 | d | d | test.cpp:229:14:229:18 | call to scanf | scanf | +| test.cpp:243:7:243:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:243:7:243:7 | i | i | test.cpp:242:3:242:7 | call to scanf | scanf | +| test.cpp:251:7:251:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:251:7:251:7 | i | i | test.cpp:250:3:250:7 | call to scanf | scanf | +| test.cpp:259:7:259:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:259:7:259:7 | i | i | test.cpp:258:3:258:7 | call to scanf | scanf | +| test.cpp:271:7:271:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:271:7:271:7 | i | i | test.cpp:270:3:270:7 | call to scanf | scanf | +| test.cpp:281:8:281:12 | ptr_i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:281:8:281:12 | ptr_i | ptr_i | test.cpp:280:3:280:7 | call to scanf | scanf | +| test.cpp:289:7:289:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:289:7:289:7 | i | i | test.cpp:288:3:288:7 | call to scanf | scanf | +| test.cpp:383:25:383:25 | u | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:383:25:383:25 | u | u | test.cpp:382:6:382:11 | call to sscanf | sscanf | diff --git a/cpp/ql/test/query-tests/Critical/MissingCheckScanf/MissingCheckScanf.qlref b/cpp/ql/test/query-tests/Critical/MissingCheckScanf/MissingCheckScanf.qlref new file mode 100644 index 000000000000..97e85b5abbea --- /dev/null +++ b/cpp/ql/test/query-tests/Critical/MissingCheckScanf/MissingCheckScanf.qlref @@ -0,0 +1 @@ +Critical/MissingCheckScanf.ql \ No newline at end of file diff --git a/cpp/ql/test/query-tests/Critical/MissingCheckScanf/test.cpp b/cpp/ql/test/query-tests/Critical/MissingCheckScanf/test.cpp new file mode 100644 index 000000000000..087999a41b8e --- /dev/null +++ b/cpp/ql/test/query-tests/Critical/MissingCheckScanf/test.cpp @@ -0,0 +1,387 @@ +typedef struct +{ +} FILE; + +typedef void *locale_t; + +int scanf(const char *format, ...); +int fscanf(FILE *stream, const char *format, ...); +int sscanf(const char *s, const char *format, ...); +int _scanf_l(const char *format, locale_t locale, ...); + +void use(int i); + +void set_by_ref(int &i); +void set_by_ptr(int *i); +bool maybe(); + +FILE *get_a_stream(); +const char *get_a_string(); +extern locale_t get_a_locale(); + +int main() +{ + // --- simple cases --- + + { + int i; + + scanf("%d", &i); + use(i); // BAD: may not have written `i` + } + + { + int i; + + if (scanf("%d", &i) == 1) + { + use(i); // GOOD: checks return value + } + } + + { + int i = 0; + + scanf("%d", &i); + use(i); // BAD. Design choice: already initialized variables shouldn't make a difference. + } + + { + int i; + use(i); // GOOD: only care about uses after scanf call + + if (scanf("%d", &i) == 1) + { + use(i); // GOOD + } + } + + { + int i; // Reused variable + + scanf("%d", &i); + use(i); // BAD + + if (scanf("%d", &i) == 1) + { + use(i); // GOOD + } + } + + { + int i; // Reset variable + + scanf("%d", &i); + use(i); // BAD + + i = 1; + use(i); // GOOD + } + + // --- different scanf functions --- + + { + int i; + + fscanf(get_a_stream(), "%d", &i); + use(i); // BAD: may not have written `i` + } + + { + int i; + + sscanf(get_a_string(), "%d", &i); + use(i); // BAD: may not have written `i` + } + + { + int i; + + if (_scanf_l("%d", get_a_locale(), &i) == 1) + { + use(i); // GOOD + } + } + + // --- different ways of checking --- + + { + int i; + + if (scanf("%d", &i) >= 1) + { + use(i); // GOOD + } + } + + { + int i; + + if (scanf("%d", &i) == 1) + { + use(i); // GOOD + } + } + + { + int i; + + if (0 < scanf("%d", &i)) + { + if (true) + { + use(i); // GOOD + } + } + } + + { + int i; + + if (scanf("%d", &i) != 0) + { + use(i); // BAD: scanf can return EOF + } + } + + { + int i; + + if (scanf("%d", &i) == 0) + { + use(i); // BAD: checks return value incorrectly + } + } + + { + int r; + int i; + + r = scanf("%d", &i); + + if (r >= 1) + { + use(i); // GOOD + } + } + + { + bool b; + int i; + + b = scanf("%d", &i); + + if (b >= 1) + { + use(i); // BAD [NOT DETECTED]: scanf can return EOF (boolifies true) + } + } + + { + int i; + + if (scanf("%d", &i)) + use(i); // BAD + } + + { + int i, j; + + if (scanf("%d %d", &i) >= 2) + { + use(i); // GOOD + use(j); // GOOD: `j` is not a scanf arg, so out of scope of MissingCheckScanf + } + } + + { + int i, j; + + if (scanf("%d %d", &i, &j) >= 1) + { + use(i); // GOOD + use(j); // BAD: checks return value incorrectly + } + } + + { + int i, j; + + if (scanf("%d %d", &i, &j) >= 2) + { + use(i); // GOOD + use(j); // GOOD + } + } + + { + char c[5]; + int d; + + while(maybe()) { + if (maybe()) { + break; + } + else if (maybe() && (scanf("%5c %d", c, &d) == 1)) { // GOOD [FALSE POSITIVE] + use(*(int *)c); // GOOD + use(d); // BAD + } + else if ((scanf("%5c %d", c, &d) == 1) && maybe()) { // GOOD [FALSE POSITIVE] + use(*(int *)c); // GOOD + use(d); // BAD + } + } + } + + // --- different initialization --- + + { + int i; + i = 0; + + scanf("%d", &i); + use(i); // BAD + } + + { + int i; + + set_by_ref(i); + scanf("%d", &i); + use(i); // BAD + } + + { + int i; + + set_by_ptr(&i); + scanf("%d", &i); + use(i); // BAD + } + + { + int i; + + if (maybe()) + { + i = 0; + } + + scanf("%d", &i); + use(i); // BAD: `i` may not have been initialized + } + + // --- different use --- + + { + int i; + int *ptr_i = &i; + + scanf("%d", &i); + use(*ptr_i); // BAD: may not have written `i` + } + + { + int i; + int *ptr_i = &i; + + scanf("%d", ptr_i); + use(i); // BAD: may not have written `*ptr_i` + } + + { + int i; + scanf("%d", &i); + i = 42; + use(i); // GOOD + } + + // --- weird formatting strings --- + + { + int i, j; + + if (sscanf("123", "%n %*d %n", &i, &j) >= 0) + { + use(i); // GOOD (`%n` does not consume input, but writes 0 to i) + use(j); // GOOD (`%n` does not consume input, but writes 3 to j) + } + } + + { + int i; + + if (scanf("%% %d", &i) >= 1) + { + use(i); // GOOD (`%%` does not consume input) + } + } + + { + int i; + + if (scanf("%*d %d", &i) >= 1) + { + use(i); // GOOD (`%*d` does not consume input) + } + } + + { + int d, n; + + if (scanf("%*d %d %n", &d, &n) == 1) { + use(d); // GOOD + use(n); // GOOD + } + } + + { + char substr[32]; + int n; + while (sscanf(get_a_string(), "%31[^:]: %d", substr, &n) == 2) { // GOOD: cycle from write to unguarded access + use(*(int *)substr); // GOOD + use(n); // GOOD + } + } +} + +// --- Non-local cases --- + +bool my_scan_int(int &i) +{ + return scanf("%d", &i) == 1; // GOOD +} + +void my_scan_int_test() +{ + int i; + + use(i); // GOOD: used before scanf + + my_scan_int(i); + use(i); // BAD [NOT DETECTED] + + if (my_scan_int(i)) + { + use(i); // GOOD + } +} + +// --- Can be OK'd given a sufficiently smart analysis --- + +char *my_string_copy() { + static const char SRC_STRING[] = "48656C6C6F"; + static char DST_STRING[] = "....."; + + int len = sizeof(SRC_STRING) - 1; + const char *src = SRC_STRING; + char *ptr = DST_STRING; + + for (int i = 0; i < len; i += 2) { + unsigned int u; + sscanf(src + i, "%2x", &u); + *ptr++ = (char) u; // GOOD [FALSE POSITIVE]? src+i+{0,1} are always valid %x digits, so this should be OK. + } + *ptr++ = 0; + return DST_STRING; +}