Skip to content

Python: promote log injection #7735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Feb 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions python/ql/lib/semmle/python/frameworks/Django.qll
Original file line number Diff line number Diff line change
Expand Up @@ -2295,4 +2295,22 @@ module PrivateDjango {

override string getMimetypeDefault() { none() }
}

// ---------------------------------------------------------------------------
// Logging
// ---------------------------------------------------------------------------
/**
* A standard Python logger instance from Django.
* see https://github.com/django/django/blob/stable/4.0.x/django/utils/log.py#L11
*/
private class DjangoLogger extends Stdlib::Logger::InstanceSource {
DjangoLogger() {
this =
API::moduleImport("django")
.getMember("utils")
.getMember("log")
.getMember("request_logger")
.getAnImmediateUse()
}
}
}
15 changes: 15 additions & 0 deletions python/ql/lib/semmle/python/frameworks/Flask.qll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.frameworks.Werkzeug
private import semmle.python.frameworks.Stdlib
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.security.dataflow.PathInjectionCustomizations
Expand Down Expand Up @@ -569,4 +570,18 @@ module Flask {
result in [this.getArg(0), this.getArgByName("filename_or_fp")]
}
}

// ---------------------------------------------------------------------------
// Logging
// ---------------------------------------------------------------------------
/**
* A Flask application provides a standard Python logger via the `logger` attribute.
*
* See
* - https://flask.palletsprojects.com/en/2.0.x/api/#flask.Flask.logger
* - https://flask.palletsprojects.com/en/2.0.x/logging/
*/
private class FlaskLogger extends Stdlib::Logger::InstanceSource {
FlaskLogger() { this = FlaskApp::instance().getMember("logger").getAnImmediateUse() }
}
}
73 changes: 50 additions & 23 deletions python/ql/lib/semmle/python/frameworks/Stdlib.qll
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,54 @@ module Stdlib {
}
}
}

// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
/**
* Provides models for the `logging.Logger` class and subclasses.
*
* See https://docs.python.org/3.9/library/logging.html#logging.Logger.
*/
module Logger {
/** Gets a reference to the `logging.Logger` class or any subclass. */
private API::Node subclassRef() {
result = API::moduleImport("logging").getMember("Logger").getASubclass*()
}

/**
* A source of instances of `logging.Logger`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Logger::instance()` to get references to instances of `logging.Logger`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }

/** A direct instantiation of `logging.Logger`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
ClassInstantiation() {
this = subclassRef().getACall()
or
this = API::moduleImport("logging").getMember("root").getAnImmediateUse()
or
this = API::moduleImport("logging").getMember("getLogger").getACall()
}
}

/** Gets a reference to an instance of `logging.Logger`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}

/** Gets a reference to an instance of `logging.Logger`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
}

/**
Expand Down Expand Up @@ -2642,27 +2690,6 @@ private module StdlibPrivate {
// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
/**
* Provides models for the `logging.Logger` class and subclasses.
*
* See https://docs.python.org/3.9/library/logging.html#logging.Logger.
*/
module Logger {
/** Gets a reference to the `logging.Logger` class or any subclass. */
API::Node subclassRef() {
result = API::moduleImport("logging").getMember("Logger").getASubclass*()
}

/** Gets a reference to an instance of `logging.Logger` or any subclass. */
API::Node instance() {
result = subclassRef().getReturn()
or
result = API::moduleImport("logging").getMember("root")
or
result = API::moduleImport("logging").getMember("getLogger").getReturn()
}
}

/**
* A call to one of the logging methods from `logging` or on a `logging.Logger`
* subclass.
Expand All @@ -2683,14 +2710,14 @@ private module StdlibPrivate {
method = "log" and
msgIndex = 1
|
this = Logger::instance().getMember(method).getACall()
this.(DataFlow::MethodCallNode).calls(Stdlib::Logger::instance(), method)
or
this = API::moduleImport("logging").getMember(method).getACall()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't agree about this removal. I see that API::moduleImport("logging") is now modeled as a LoggerInstance... but that is not true, these are helper methods defined on the module leve.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there value in modelling instances specifically? Or could we just rename instance to something like loggingMethodProvider?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted to previous modelling approach as discussed offline.

)
}

override DataFlow::Node getAnInput() {
result = this.getArgByName("msg")
result = this.getArgByName(["msg", "extra"])
or
result = this.getArg(any(int i | i >= msgIndex))
}
Expand Down
35 changes: 35 additions & 0 deletions python/ql/lib/semmle/python/security/dataflow/LogInjection.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/**
* Provides a taint-tracking configuration for tracking untrusted user input used in log entries.
*
* Note, for performance reasons: only import this file if
* `LogInjection::Configuration` is needed, otherwise
* `LogInjectionCustomizations` should be imported instead.
*/

import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking

/**
* Provides a taint-tracking configuration for tracking untrusted user input used in log entries.
*/
module LogInjection {
import LogInjectionCustomizations::LogInjection

/**
* A taint-tracking configuration for tracking untrusted user input used in log entries.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "LogInjection" }

override predicate isSource(DataFlow::Node source) { source instanceof Source }

override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }

override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }

override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "log injection"
* vulnerabilities, as well as extension points for adding your own.
*/

private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards

/**
* Provides default sources, sinks and sanitizers for detecting
* "log injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module LogInjection {
/**
* A data flow source for "log injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }

/**
* A data flow sink for "log injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }

/**
* A sanitizer for "log injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }

/**
* A sanitizer guard for "log injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }

/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }

/**
* A logging operation, considered as a flow sink.
*/
class LoggingAsSink extends Sink {
LoggingAsSink() { this = any(Logging write).getAnInput() }
}

/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }

/**
* A call to replace line breaks, considered as a sanitizer.
*/
class ReplaceLineBreaksSanitizer extends Sanitizer, DataFlow::CallCfgNode {
// Note: This sanitizer is not 100% accurate, since:
// - we do not check that all kinds of line breaks are replaced
// - we do not check that one kind of line breaks is not replaced by another
//
// However, we lack a simple way to do better, and the query would likely
// be too noisy without this.
//
// TODO: Consider rewriting using flow states.
ReplaceLineBreaksSanitizer() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "replace" and
this.getArg(0).asExpr().(StrConst).getText() in ["\r\n", "\n"]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@

<p>If unsanitized user input is written to a log entry, a malicious user may be able to forge new log entries.</p>

<p>Forgery can occur if a user provides some input creating the appearance of multiple
log entries. This can include unescaped new-line characters, or HTML or other markup.</p>
<p>Forgery can occur if a user provides some input with characters that are interpreted
when the log output is displayed. If the log is displayed as a plain text file, then new
line characters can be used by a malicious user to create the appearance of multiple log
entries. If the log is displayed as HTML, then arbitrary HTML may be included to spoof
log entries.</p>
</overview>

<recommendation>
Expand All @@ -29,14 +32,14 @@ other forms of HTML injection.

<example>
<p>
In the example, the name provided by the user is recorded using the log output function (<code>logging.info</code> or <code>app.logger.info</code>, etc.).
In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides <code>Guest%0D%0AUser name: Admin</code>
In the example, the name provided by the user is recorded using the log output function (<code>logging.info</code> or <code>app.logger.info</code>, etc.).
In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides <code>Guest%0D%0AUser name: Admin</code>
as a parameter, the log entry will be divided into two lines, the first line is <code>User name: Guest</code> code>, the second line is <code>User name: Admin</code>.
</p>
<sample src="LogInjectionBad.py" />

<p>
In a good example, the program uses the <code>replace</code> function to provide parameter processing to the user, and replace <code>\r\n</code> and <code>\n</code>
In a good example, the program uses the <code>replace</code> function to provide parameter processing to the user, and replace <code>\r\n</code> and <code>\n</code>
with empty characters. To a certain extent, the occurrence of log injection vulnerabilities is reduced.
</p>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@
* insertion of forged log entries by a malicious user.
* @kind path-problem
* @problem.severity error
* @precision high
* @security-severity 7.8
* @precision medium
* @id py/log-injection
* @tags security
* external/cwe/cwe-117
*/

import python
import experimental.semmle.python.security.injection.LogInjection
import semmle.python.security.dataflow.LogInjection
import DataFlow::PathGraph

from LogInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
from LogInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(),
"User-provided value"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: newQuery
---
* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182).
30 changes: 0 additions & 30 deletions python/ql/src/experimental/semmle/python/Concepts.qll
Original file line number Diff line number Diff line change
Expand Up @@ -14,36 +14,6 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks

/** Provides classes for modeling log related APIs. */
module LogOutput {
/**
* A data flow node for log output.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LogOutput` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Get the parameter value of the log output function.
*/
abstract DataFlow::Node getAnInput();
}
}

/**
* A data flow node for log output.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LogOutput::Range` instead.
*/
class LogOutput extends DataFlow::Node {
LogOutput::Range range;

LogOutput() { this = range }

DataFlow::Node getAnInput() { result = range.getAnInput() }
}

/** Provides classes for modeling LDAP query execution-related APIs. */
module LDAPQuery {
/**
Expand Down
1 change: 0 additions & 1 deletion python/ql/src/experimental/semmle/python/Frameworks.qll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL
private import experimental.semmle.python.frameworks.Log
private import experimental.semmle.python.frameworks.JWT
private import experimental.semmle.python.libraries.PyJWT
private import experimental.semmle.python.libraries.Authlib
Expand Down
Loading