github · d10c · Sep 1, 2022 · Aug 11, 2022 · Jul 28, 2022 · Jul 28, 2022
@@ -143,6 +143,28 @@ class ScanfFunctionCall extends FunctionCall {
    * (rather than a `char*`).
    */
   predicate isWideCharDefault() { this.getScanfFunction().isWideCharDefault() }
+
+  /**
+   * Gets the output argument at position `n` in the vararg list of this call.
+   *
+   * The range of `n` is from `0` to `this.getNumberOfOutputArguments() - 1`.
+   */
+  Expr getOutputArgument(int n) {
+    result = this.getArgument(this.getTarget().getNumberOfParameters() + n) and
+    n >= 0
+  }
+
+  /**
+   * Gets an output argument given to this call in vararg position.
+   */
+  Expr getAnOutputArgument() { result = this.getOutputArgument(_) }
+
+  /**
+   * Gets the number of output arguments present in this call.
+   */
+  int getNumberOfOutputArguments() {
+    result = this.getNumberOfArguments() - this.getTarget().getNumberOfParameters()
+  }
 }
 
 /**

@@ -0,0 +1,17 @@
+{
+  int i, j, r;
+
+  r = scanf("%d %d", &i, &j);
+
+  use(i); // BAD: i is not guarded
+
+  if (r >= 1) {
+    use(i); // GOOD: i is guarded correctly
+    use(j); // BAD: j is guarded incorrectly
+  }
+
+  if (r != 2)
+    return;
+
+  use(j); // GOOD: j is guarded correctly
+}
@@ -0,0 +1,51 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+
+<overview>
+<p>
+This query finds calls of <tt>scanf</tt>-like functions with missing or
+improper return-value checking.
+</p>
+<p>
+Specifically, the query flags uses of variables that may have been modified by
+<tt>scanf</tt> and subsequently are used without being guarded by a correct
+return-value check. A proper check is one that ensures that the corresponding
+<tt>scanf</tt> has returned (at least) a certain minimum constant.
+</p>
+<p>
+Functions in the <tt>scanf</tt> family return either EOF (a negative value)
+in case of IO failure, or the number of items successfully read from the
+input. Consequently, a simple check that the return value is truthy (nonzero)
+is not enough.
+</p>
+<warning>
+This query has medium precision because, in the current implementation, it
+takes a strict stance on unguarded uses of output variables, and flags them
+as problematic even if they have already been initialized.
+</warning>
+</overview>
+
+<recommendation>
+<p>
+Ensure that all subsequent uses of <tt>scanf</tt> output arguments occur in a
+branch of an <tt>if</tt> statement (or similar), in which it is known that the
+corresponding <tt>scanf</tt> call has in fact read all possible items from its
+input. This can be done by comparing the return value to a numerical constant.
+</p>
+</recommendation>
+
+<example>
+<p>This example shows different ways of guarding a <tt>scanf</tt> output:
+</p>
+<sample src="MissingCheckScanf.cpp" />
+</example>
+
+<references>
+<li>SEI CERT C++ Coding Standard: <a href="https://wiki.sei.cmu.edu/confluence/display/cplusplus/ERR62-CPP.+Detect+errors+when+converting+a+string+to+a+number">ERR62-CPP. Detect errors when converting a string to a number</a>.</li>
+<li>SEI CERT C Coding Standard: <a href="https://wiki.sei.cmu.edu/confluence/display/c/ERR33-C.+Detect+and+handle+standard+library+errors">ERR33-C. Detect and handle standard library errors</a>.</li>
+<li>cppreference.com: <a href="https://en.cppreference.com/w/c/io/fscanf">scanf, fscanf, sscanf, scanf_s, fscanf_s, sscanf_s</a>.</li>
+</references>
+</qhelp>
@@ -0,0 +1,122 @@
+/**
+ * @name Missing return-value check for a 'scanf'-like function
+ * @description Failing to check that a call to 'scanf' actually writes to an
+ *              output variable can lead to unexpected behavior at reading time.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision medium
+ * @id cpp/missing-check-scanf
+ * @tags security
+ *       correctness
+ *       external/cwe/cwe-252
+ *       external/cwe/cwe-253
+ */
+
+import cpp
+import semmle.code.cpp.commons.Scanf
+import semmle.code.cpp.controlflow.Guards
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.ir.IR
+import semmle.code.cpp.ir.ValueNumbering
+
+/** An expression appearing as an output argument to a `scanf`-like call */
+class ScanfOutput extends Expr {
+  ScanfFunctionCall call;
+  int varargIndex;
+  Instruction instr;
+  ValueNumber valNum;
+
+  ScanfOutput() {
+    this = call.getOutputArgument(varargIndex).getFullyConverted() and
+    instr.getConvertedResultExpression() = this and
+    valueNumber(instr) = valNum
+  }
+
+  ScanfFunctionCall getCall() { result = call }
+
+  /**
+   * Returns the smallest possible `scanf` return value that would indicate
+   * success in writing this output argument.
+   */
+  int getMinimumGuardConstant() {
+    result =
+      varargIndex + 1 -
+        count(ScanfFormatLiteral f, int n |
+          // Special case: %n writes to an argument without reading any input.
+          // It does not increase the count returned by `scanf`.
+          n <= varargIndex and f.getUse() = call and f.getConversionChar(n) = "n"
+        )
+  }
+
+  predicate hasGuardedAccess(Access e, boolean isGuarded) {
+    e = this.getAnAccess() and
+    if
+      exists(int value, int minGuard | minGuard = this.getMinimumGuardConstant() |
+        e.getBasicBlock() = blockGuardedBy(value, "==", call) and minGuard <= value
+        or
+        e.getBasicBlock() = blockGuardedBy(value, "<", call) and minGuard - 1 <= value
+        or
+        e.getBasicBlock() = blockGuardedBy(value, "<=", call) and minGuard <= value
+      )
+    then isGuarded = true
+    else isGuarded = false
+  }
+
+  /**
+   * Get a subsequent access of the same underlying storage,
+   * but before it gets reset or reused in another `scanf` call.
+   */
+  Access getAnAccess() {
+    exists(Instruction dst |
+      this.bigStep() = dst and
+      dst.getAst() = result and
+      valueNumber(dst) = valNum
+    )
+  }
+
+  private Instruction bigStep() {
+    result = this.smallStep(instr)
+    or
+    exists(Instruction i | i = this.bigStep() | result = this.smallStep(i))
+  }
+
+  private Instruction smallStep(Instruction i) {
+    instr.getASuccessor*() = i and
+    i.getASuccessor() = result and
+    not this.isBarrier(result)
+  }
+
+  private predicate isBarrier(Instruction i) {
+    valueNumber(i) = valNum and
+    exists(Expr e | i.getAst() = e |
+      i = any(StoreInstruction s).getDestinationAddress()
+      or
+      [e, e.getParent().(AddressOfExpr)] instanceof ScanfOutput
+    )
+  }
+}
+
+/** Returns a block guarded by the assertion of `value op call` */
+BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
+  exists(GuardCondition g, Expr left, Expr right |
+    right = g.getAChild() and
+    value = left.getValue().toInt() and
+    DataFlow::localExprFlow(call, right)
+  |
+    g.ensuresEq(left, right, 0, result, true) and op = "=="
+    or
+    g.ensuresLt(left, right, 0, result, true) and op = "<"
+    or
+    g.ensuresLt(left, right, 1, result, true) and op = "<="
+  )
+}
+
+from ScanfOutput output, ScanfFunctionCall call, Access access
+where
+  output.getCall() = call and
+  output.hasGuardedAccess(access, false)
+select access,
+  "$@ is read here, but may not have been written. " +
+    "It should be guarded by a check that the $@ returns at least " +
+    output.getMinimumGuardConstant() + ".", access, access.toString(), call, call.toString()
@@ -0,0 +1,4 @@
+---
+category: newQuery
+---
+* Added a new medium-precision query, `cpp/missing-check-scanf`, which detects `scanf` output variables that are used without a proper return-value check to see that they were actually written. A variation of this query was originally contributed as an [experimental query by @ihsinme](https://github.com/github/codeql/pull/8246).
@@ -0,0 +1,19 @@
+| test.cpp:30:7:30:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:30:7:30:7 | i | i | test.cpp:29:3:29:7 | call to scanf | call to scanf |
+| test.cpp:46:7:46:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:46:7:46:7 | i | i | test.cpp:45:3:45:7 | call to scanf | call to scanf |
+| test.cpp:63:7:63:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:63:7:63:7 | i | i | test.cpp:62:3:62:7 | call to scanf | call to scanf |
+| test.cpp:75:7:75:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:75:7:75:7 | i | i | test.cpp:74:3:74:7 | call to scanf | call to scanf |
+| test.cpp:87:7:87:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:87:7:87:7 | i | i | test.cpp:86:3:86:8 | call to fscanf | call to fscanf |
+| test.cpp:94:7:94:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:94:7:94:7 | i | i | test.cpp:93:3:93:8 | call to sscanf | call to sscanf |
+| test.cpp:143:8:143:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:143:8:143:8 | i | i | test.cpp:141:7:141:11 | call to scanf | call to scanf |
+| test.cpp:152:8:152:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:152:8:152:8 | i | i | test.cpp:150:7:150:11 | call to scanf | call to scanf |
+| test.cpp:184:8:184:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:184:8:184:8 | i | i | test.cpp:183:7:183:11 | call to scanf | call to scanf |
+| test.cpp:203:8:203:8 | j | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:203:8:203:8 | j | j | test.cpp:200:7:200:11 | call to scanf | call to scanf |
+| test.cpp:227:9:227:9 | d | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:227:9:227:9 | d | d | test.cpp:225:25:225:29 | call to scanf | call to scanf |
+| test.cpp:231:9:231:9 | d | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:231:9:231:9 | d | d | test.cpp:229:14:229:18 | call to scanf | call to scanf |
+| test.cpp:243:7:243:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:243:7:243:7 | i | i | test.cpp:242:3:242:7 | call to scanf | call to scanf |
+| test.cpp:251:7:251:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:251:7:251:7 | i | i | test.cpp:250:3:250:7 | call to scanf | call to scanf |
+| test.cpp:259:7:259:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:259:7:259:7 | i | i | test.cpp:258:3:258:7 | call to scanf | call to scanf |
+| test.cpp:271:7:271:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:7:271:7 | i | i | test.cpp:270:3:270:7 | call to scanf | call to scanf |
+| test.cpp:281:8:281:12 | ptr_i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:281:8:281:12 | ptr_i | ptr_i | test.cpp:280:3:280:7 | call to scanf | call to scanf |
+| test.cpp:289:7:289:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:289:7:289:7 | i | i | test.cpp:288:3:288:7 | call to scanf | call to scanf |
+| test.cpp:383:25:383:25 | u | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:383:25:383:25 | u | u | test.cpp:382:6:382:11 | call to sscanf | call to sscanf |
@@ -0,0 +1 @@
+Critical/MissingCheckScanf.ql