Implement Rust log injection query and test infrastructure

Copilot · geoffw0 · Copilot · commit 39ea50746f66 · 2025-08-13T18:09:03.000Z
Co-authored-by: geoffw0 &lt;40627776+geoffw0@users.noreply.github.com&gt;
diff --git a/rust/ql/lib/codeql/rust/security/LogInjectionExtensions.qll b/rust/ql/lib/codeql/rust/security/LogInjectionExtensions.qll
@@ -0,0 +1,45 @@
+/**
+ * Provides classes and predicates for reasoning about log injection
+ * vulnerabilities.
+ */
+
+import rust
+private import codeql.rust.dataflow.DataFlow
+private import codeql.rust.dataflow.FlowSink
+private import codeql.rust.Concepts
+private import codeql.util.Unit
+
+/**
+ * Provides default sources, sinks and barriers for detecting log injection
+ * vulnerabilities, as well as extension points for adding your own.
+ */
+module LogInjection {
+  /**
+   * A data flow source for log injection vulnerabilities.
+   */
+  abstract class Source extends DataFlow::Node { }
+
+  /**
+   * A data flow sink for log injection vulnerabilities.
+   */
+  abstract class Sink extends QuerySink::Range {
+    override string getSinkType() { result = "LogInjection" }
+  }
+
+  /**
+   * A barrier for log injection vulnerabilities.
+   */
+  abstract class Barrier extends DataFlow::Node { }
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }
+
+  /**
+   * A sink for log-injection from model data.
+   */
+  private class ModelsAsDataSink extends Sink {
+    ModelsAsDataSink() { sinkNode(this, "log-injection") }
+  }
+}
diff --git a/rust/ql/src/queries/security/CWE-117/LogInjection.qhelp b/rust/ql/src/queries/security/CWE-117/LogInjection.qhelp
@@ -0,0 +1,48 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+
+<p>If unsanitized user input is written to a log entry, a malicious user may be able to forge new log entries.</p>
+
+<p>Forgery can occur if a user provides some input with characters that are interpreted
+when the log output is displayed. If the log is displayed as a plain text file, then new
+line characters can be used by a malicious user. If the log is displayed as HTML, then
+arbitrary HTML may be included to spoof log entries.</p>
+</overview>
+
+<recommendation>
+<p>
+User input should be suitably sanitized before it is logged.
+</p>
+<p>
+If the log entries are in plain text then line breaks should be removed from user input, using
+<code>String::replace</code> or similar. Care should also be taken that user input is clearly marked
+in log entries.
+</p>
+<p>
+For log entries that will be displayed in HTML, user input should be HTML-encoded before being logged, to prevent forgery and
+other forms of HTML injection.
+</p>
+
+</recommendation>
+
+<example>
+<p>In the first example, a username, provided by the user via command line arguments, is logged using the <code>log</code> crate.
+If a malicious user provides <code>Guest\n[INFO] User: Admin\n</code> as a username parameter, 
+the log entry will be split into multiple lines, where the second line will appear as <code>[INFO] User: Admin</code>, 
+potentially forging a legitimate admin login entry.
+</p>
+<sample src="LogInjectionBad.rs" />
+
+<p>In the second example, <code>String::replace</code> is used to ensure no line endings are present in the user input before logging.</p>
+<sample src="LogInjectionGood.rs" />
+</example>
+
+<references>
+<li>OWASP: <a href="https://owasp.org/www-community/attacks/Log_Injection">Log Injection</a>.</li>
+<li>CWE-117: <a href="https://cwe.mitre.org/data/definitions/117.html">Improper Output Neutralization for Logs</a>.</li>
+</references>
+</qhelp>
diff --git a/rust/ql/src/queries/security/CWE-117/LogInjection.ql b/rust/ql/src/queries/security/CWE-117/LogInjection.ql
@@ -0,0 +1,41 @@
+/**
+ * @name Log injection
+ * @description Building log entries from user-controlled sources is vulnerable to
+ *              insertion of forged log entries by a malicious user.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 7.8
+ * @precision medium
+ * @id rust/log-injection
+ * @tags security
+ *       external/cwe/cwe-117
+ */
+
+import rust
+import codeql.rust.dataflow.DataFlow
+import codeql.rust.dataflow.TaintTracking
+import codeql.rust.security.LogInjectionExtensions
+
+/**
+ * A taint configuration for tainted data that reaches a log injection sink.
+ */
+module LogInjectionConfig implements DataFlow::ConfigSig {
+  import LogInjection
+
+  predicate isSource(DataFlow::Node node) { node instanceof Source }
+
+  predicate isSink(DataFlow::Node node) { node instanceof Sink }
+
+  predicate isBarrier(DataFlow::Node barrier) { barrier instanceof Barrier }
+
+  predicate observeDiffInformedIncrementalMode() { any() }
+}
+
+module LogInjectionFlow = TaintTracking::Global<LogInjectionConfig>;
+
+import LogInjectionFlow::PathGraph
+
+from LogInjectionFlow::PathNode sourceNode, LogInjectionFlow::PathNode sinkNode
+where LogInjectionFlow::flowPath(sourceNode, sinkNode)
+select sinkNode.getNode(), sourceNode, sinkNode, "Log entry depends on a $@.",
+  sourceNode.getNode(), "user-provided value"
diff --git a/rust/ql/src/queries/security/CWE-117/LogInjectionBad.rs b/rust/ql/src/queries/security/CWE-117/LogInjectionBad.rs
@@ -0,0 +1,22 @@
+use std::env;
+use log::{info, error};
+
+fn main() {
+    env_logger::init();
+    
+    // Get username from command line arguments
+    let args: Vec<String> = env::args().collect();
+    let username = args.get(1).unwrap_or(&String::from("Guest"));
+    
+    // BAD: log message constructed with unsanitized user input
+    info!("User login attempt: {}", username);
+    
+    // BAD: another example with error logging
+    if username.is_empty() {
+        error!("Login failed for user: {}", username);
+    }
+    
+    // BAD: formatted string with user input
+    let message = format!("Processing request for user: {}", username);
+    info!("{}", message);
+}
diff --git a/rust/ql/src/queries/security/CWE-117/LogInjectionGood.rs b/rust/ql/src/queries/security/CWE-117/LogInjectionGood.rs
@@ -0,0 +1,28 @@
+use std::env;
+use log::{info, error};
+
+fn sanitize_for_logging(input: &str) -> String {
+    // Remove newlines and carriage returns to prevent log injection
+    input.replace('\n', "").replace('\r', "")
+}
+
+fn main() {
+    env_logger::init();
+    
+    // Get username from command line arguments
+    let args: Vec<String> = env::args().collect();
+    let username = args.get(1).unwrap_or(&String::from("Guest"));
+    
+    // GOOD: log message constructed with sanitized user input
+    let sanitized_username = sanitize_for_logging(username);
+    info!("User login attempt: {}", sanitized_username);
+    
+    // GOOD: another example with error logging
+    if username.is_empty() {
+        error!("Login failed for user: {}", sanitized_username);
+    }
+    
+    // GOOD: formatted string with sanitized user input
+    let message = format!("Processing request for user: {}", sanitized_username);
+    info!("{}", message);
+}
diff --git a/rust/ql/test/query-tests/security/CWE-117/Cargo.lock b/rust/ql/test/query-tests/security/CWE-117/Cargo.lock
diff --git a/rust/ql/test/query-tests/security/CWE-117/LogInjection.expected b/rust/ql/test/query-tests/security/CWE-117/LogInjection.expected
@@ -0,0 +1,16 @@
+# This file will be generated by running `codeql test run . --learn`
+# in the test directory. For now, this is a placeholder.
+
+models
+| Type | Name | Input | Output | Kind | Provenance |
+
+edges
+| Source | Sink | Provenance |
+
+nodes
+| Name | Type |
+
+subpaths
+
+#select
+| main.rs:0:0:0:0 | placeholder | main.rs:0:0:0:0 | placeholder | placeholder | placeholder | placeholder |
diff --git a/rust/ql/test/query-tests/security/CWE-117/LogInjection.qlref b/rust/ql/test/query-tests/security/CWE-117/LogInjection.qlref
@@ -0,0 +1,4 @@
+query: queries/security/CWE-117/LogInjection.ql
+postprocess:
+ - utils/test/PrettyPrintModels.ql
+ - utils/test/InlineExpectationsTestQuery.ql
diff --git a/rust/ql/test/query-tests/security/CWE-117/main.rs b/rust/ql/test/query-tests/security/CWE-117/main.rs
@@ -0,0 +1,125 @@
+use std::env;
+use log::{info, warn, error, debug, trace};
+
+fn main() {
+    env_logger::init();
+    
+    // Sources of user input
+    let args: Vec<String> = env::args().collect();
+    let username = args.get(1).unwrap_or(&String::from("Guest")).clone(); // $ Source=commandargs
+    let user_input = std::env::var("USER_INPUT").unwrap_or("default".to_string()); // $ Source=environment
+    let remote_data = reqwest::blocking::get("http://example.com/user")
+        .unwrap().text().unwrap_or("remote_user".to_string()); // $ Source=remote
+    
+    // BAD: Direct logging of user input
+    info!("User login: {}", username); // $ Alert[rust/log-injection]
+    warn!("Warning for user: {}", user_input); // $ Alert[rust/log-injection]
+    error!("Error processing: {}", remote_data); // $ Alert[rust/log-injection]
+    debug!("Debug info: {}", username); // $ Alert[rust/log-injection]
+    trace!("Trace data: {}", user_input); // $ Alert[rust/log-injection]
+    
+    // BAD: Formatted strings with user input
+    let formatted_msg = format!("Processing user: {}", username);
+    info!("{}", formatted_msg); // $ Alert[rust/log-injection]
+    
+    // BAD: String concatenation with user input
+    let concat_msg = "User activity: ".to_string() + &username;
+    info!("{}", concat_msg); // $ Alert[rust/log-injection]
+    
+    // BAD: Complex formatting
+    info!("User {} accessed resource at {}", username, remote_data); // $ Alert[rust/log-injection]
+    
+    // GOOD: Sanitized input
+    let sanitized_username = username.replace('\n', "").replace('\r', "");
+    info!("Sanitized user login: {}", sanitized_username);
+    
+    // GOOD: Constant strings
+    info!("System startup complete");
+    
+    // GOOD: Non-user-controlled data
+    let system_time = std::time::SystemTime::now();
+    info!("Current time: {:?}", system_time);
+    
+    // GOOD: Numeric data derived from user input (not directly logged)
+    let user_id = username.len();
+    info!("User ID length: {}", user_id);
+    
+    // More complex test cases
+    test_complex_scenarios(&username, &user_input);
+    test_indirect_flows(&remote_data);
+}
+
+fn test_complex_scenarios(username: &str, user_input: &str) {
+    // BAD: Indirect logging through variables
+    let log_message = format!("Activity for {}", username);
+    info!("{}", log_message); // $ Alert[rust/log-injection]
+    
+    // BAD: Through function parameters
+    log_user_activity(username); // Function call - should be tracked
+    
+    // BAD: Through struct fields
+    let user_info = UserInfo { name: username.to_string() };
+    info!("User info: {}", user_info.name); // $ Alert[rust/log-injection]
+    
+    // GOOD: After sanitization
+    let clean_input = sanitize_input(user_input);
+    info!("Clean input: {}", clean_input);
+}
+
+fn log_user_activity(user: &str) {
+    info!("User activity: {}", user); // $ Alert[rust/log-injection]
+}
+
+fn sanitize_input(input: &str) -> String {
+    input.replace('\n', "").replace('\r', "").replace('\t', " ")
+}
+
+struct UserInfo {
+    name: String,
+}
+
+fn test_indirect_flows(data: &str) {
+    // BAD: Flow through intermediate variables
+    let temp_var = data;
+    let another_var = temp_var;
+    info!("Indirect flow: {}", another_var); // $ Alert[rust/log-injection]
+    
+    // BAD: Flow through collections
+    let data_vec = vec![data];
+    if let Some(item) = data_vec.first() {
+        info!("Vector item: {}", item); // $ Alert[rust/log-injection]
+    }
+    
+    // BAD: Flow through Option/Result
+    let optional_data = Some(data);
+    if let Some(unwrapped) = optional_data {
+        info!("Unwrapped data: {}", unwrapped); // $ Alert[rust/log-injection]
+    }
+}
+
+// Additional test patterns for different logging scenarios
+mod additional_tests {
+    use log::*;
+    
+    pub fn test_macro_variations() {
+        let user_data = std::env::args().nth(1).unwrap_or_default(); // $ Source=commandargs
+        
+        // BAD: Different log macro variations
+        info!("Info: {}", user_data); // $ Alert[rust/log-injection]
+        warn!("Warning: {}", user_data); // $ Alert[rust/log-injection]
+        error!("Error: {}", user_data); // $ Alert[rust/log-injection]
+        debug!("Debug: {}", user_data); // $ Alert[rust/log-injection]
+        trace!("Trace: {}", user_data); // $ Alert[rust/log-injection]
+        
+        // BAD: Complex format strings
+        info!("User {} did action {} at time {}", user_data, "login", "now"); // $ Alert[rust/log-injection]
+    }
+    
+    pub fn test_println_patterns() {
+        let user_data = std::env::var("USER").unwrap_or_default(); // $ Source=environment
+        
+        // These might not be caught depending on model coverage, but are potential logging sinks
+        println!("User: {}", user_data);
+        eprintln!("Error for user: {}", user_data);
+    }
+}
diff --git a/rust/ql/test/query-tests/security/CWE-117/options.yml b/rust/ql/test/query-tests/security/CWE-117/options.yml
@@ -0,0 +1,5 @@
+qltest_cargo_check: true
+qltest_dependencies:
+    - log = "0.4"
+    - env_logger = "0.10"
+    - reqwest = { version = "0.12.9", features = ["blocking"] }