diff --git a/python/ql/lib/semmle/python/frameworks/Django.qll b/python/ql/lib/semmle/python/frameworks/Django.qll index 9e66c728f6ee..8f34043f0937 100644 --- a/python/ql/lib/semmle/python/frameworks/Django.qll +++ b/python/ql/lib/semmle/python/frameworks/Django.qll @@ -2295,4 +2295,22 @@ module PrivateDjango { override string getMimetypeDefault() { none() } } + + // --------------------------------------------------------------------------- + // Logging + // --------------------------------------------------------------------------- + /** + * A standard Python logger instance from Django. + * see https://github.com/django/django/blob/stable/4.0.x/django/utils/log.py#L11 + */ + private class DjangoLogger extends Stdlib::Logger::InstanceSource { + DjangoLogger() { + this = + API::moduleImport("django") + .getMember("utils") + .getMember("log") + .getMember("request_logger") + .getAnImmediateUse() + } + } } diff --git a/python/ql/lib/semmle/python/frameworks/Flask.qll b/python/ql/lib/semmle/python/frameworks/Flask.qll index 88544ca85378..7f54761d911d 100644 --- a/python/ql/lib/semmle/python/frameworks/Flask.qll +++ b/python/ql/lib/semmle/python/frameworks/Flask.qll @@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts private import semmle.python.frameworks.Werkzeug +private import semmle.python.frameworks.Stdlib private import semmle.python.ApiGraphs private import semmle.python.frameworks.internal.InstanceTaintStepsHelper private import semmle.python.security.dataflow.PathInjectionCustomizations @@ -569,4 +570,18 @@ module Flask { result in [this.getArg(0), this.getArgByName("filename_or_fp")] } } + + // --------------------------------------------------------------------------- + // Logging + // --------------------------------------------------------------------------- + /** + * A Flask application provides a standard Python logger via the `logger` attribute. + * + * See + * - https://flask.palletsprojects.com/en/2.0.x/api/#flask.Flask.logger + * - https://flask.palletsprojects.com/en/2.0.x/logging/ + */ + private class FlaskLogger extends Stdlib::Logger::InstanceSource { + FlaskLogger() { this = FlaskApp::instance().getMember("logger").getAnImmediateUse() } + } } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index a4486f3504f7..6d60cd59c568 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -237,6 +237,54 @@ module Stdlib { } } } + + // --------------------------------------------------------------------------- + // logging + // --------------------------------------------------------------------------- + /** + * Provides models for the `logging.Logger` class and subclasses. + * + * See https://docs.python.org/3.9/library/logging.html#logging.Logger. + */ + module Logger { + /** Gets a reference to the `logging.Logger` class or any subclass. */ + private API::Node subclassRef() { + result = API::moduleImport("logging").getMember("Logger").getASubclass*() + } + + /** + * A source of instances of `logging.Logger`, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `Logger::instance()` to get references to instances of `logging.Logger`. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** A direct instantiation of `logging.Logger`. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CfgNode { + ClassInstantiation() { + this = subclassRef().getACall() + or + this = API::moduleImport("logging").getMember("root").getAnImmediateUse() + or + this = API::moduleImport("logging").getMember("getLogger").getACall() + } + } + + /** Gets a reference to an instance of `logging.Logger`. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an instance of `logging.Logger`. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + } } /** @@ -2642,27 +2690,6 @@ private module StdlibPrivate { // --------------------------------------------------------------------------- // logging // --------------------------------------------------------------------------- - /** - * Provides models for the `logging.Logger` class and subclasses. - * - * See https://docs.python.org/3.9/library/logging.html#logging.Logger. - */ - module Logger { - /** Gets a reference to the `logging.Logger` class or any subclass. */ - API::Node subclassRef() { - result = API::moduleImport("logging").getMember("Logger").getASubclass*() - } - - /** Gets a reference to an instance of `logging.Logger` or any subclass. */ - API::Node instance() { - result = subclassRef().getReturn() - or - result = API::moduleImport("logging").getMember("root") - or - result = API::moduleImport("logging").getMember("getLogger").getReturn() - } - } - /** * A call to one of the logging methods from `logging` or on a `logging.Logger` * subclass. @@ -2683,14 +2710,14 @@ private module StdlibPrivate { method = "log" and msgIndex = 1 | - this = Logger::instance().getMember(method).getACall() + this.(DataFlow::MethodCallNode).calls(Stdlib::Logger::instance(), method) or this = API::moduleImport("logging").getMember(method).getACall() ) } override DataFlow::Node getAnInput() { - result = this.getArgByName("msg") + result = this.getArgByName(["msg", "extra"]) or result = this.getArg(any(int i | i >= msgIndex)) } diff --git a/python/ql/lib/semmle/python/security/dataflow/LogInjection.qll b/python/ql/lib/semmle/python/security/dataflow/LogInjection.qll new file mode 100644 index 000000000000..1e9d0b7a99f9 --- /dev/null +++ b/python/ql/lib/semmle/python/security/dataflow/LogInjection.qll @@ -0,0 +1,35 @@ +/** + * Provides a taint-tracking configuration for tracking untrusted user input used in log entries. + * + * Note, for performance reasons: only import this file if + * `LogInjection::Configuration` is needed, otherwise + * `LogInjectionCustomizations` should be imported instead. + */ + +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking + +/** + * Provides a taint-tracking configuration for tracking untrusted user input used in log entries. + */ +module LogInjection { + import LogInjectionCustomizations::LogInjection + + /** + * A taint-tracking configuration for tracking untrusted user input used in log entries. + */ + class Configuration extends TaintTracking::Configuration { + Configuration() { this = "LogInjection" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof SanitizerGuard + } + } +} diff --git a/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll new file mode 100644 index 000000000000..c564951b8d87 --- /dev/null +++ b/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll @@ -0,0 +1,73 @@ +/** + * Provides default sources, sinks and sanitizers for detecting + * "log injection" + * vulnerabilities, as well as extension points for adding your own. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts +private import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.dataflow.new.BarrierGuards + +/** + * Provides default sources, sinks and sanitizers for detecting + * "log injection" + * vulnerabilities, as well as extension points for adding your own. + */ +module LogInjection { + /** + * A data flow source for "log injection" vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for "log injection" vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for "log injection" vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** + * A sanitizer guard for "log injection" vulnerabilities. + */ + abstract class SanitizerGuard extends DataFlow::BarrierGuard { } + + /** + * A source of remote user input, considered as a flow source. + */ + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + + /** + * A logging operation, considered as a flow sink. + */ + class LoggingAsSink extends Sink { + LoggingAsSink() { this = any(Logging write).getAnInput() } + } + + /** + * A comparison with a constant string, considered as a sanitizer-guard. + */ + class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { } + + /** + * A call to replace line breaks, considered as a sanitizer. + */ + class ReplaceLineBreaksSanitizer extends Sanitizer, DataFlow::CallCfgNode { + // Note: This sanitizer is not 100% accurate, since: + // - we do not check that all kinds of line breaks are replaced + // - we do not check that one kind of line breaks is not replaced by another + // + // However, we lack a simple way to do better, and the query would likely + // be too noisy without this. + // + // TODO: Consider rewriting using flow states. + ReplaceLineBreaksSanitizer() { + this.getFunction().(DataFlow::AttrRead).getAttributeName() = "replace" and + this.getArg(0).asExpr().(StrConst).getText() in ["\r\n", "\n"] + } + } +} diff --git a/python/ql/src/experimental/Security/CWE-117/LogInjection.qhelp b/python/ql/src/Security/CWE-117/LogInjection.qhelp similarity index 78% rename from python/ql/src/experimental/Security/CWE-117/LogInjection.qhelp rename to python/ql/src/Security/CWE-117/LogInjection.qhelp index e5305220997f..5d112806f86e 100644 --- a/python/ql/src/experimental/Security/CWE-117/LogInjection.qhelp +++ b/python/ql/src/Security/CWE-117/LogInjection.qhelp @@ -8,8 +8,11 @@

If unsanitized user input is written to a log entry, a malicious user may be able to forge new log entries.

-

Forgery can occur if a user provides some input creating the appearance of multiple - log entries. This can include unescaped new-line characters, or HTML or other markup.

+

Forgery can occur if a user provides some input with characters that are interpreted +when the log output is displayed. If the log is displayed as a plain text file, then new +line characters can be used by a malicious user to create the appearance of multiple log +entries. If the log is displayed as HTML, then arbitrary HTML may be included to spoof +log entries.

@@ -29,14 +32,14 @@ other forms of HTML injection.

-In the example, the name provided by the user is recorded using the log output function (logging.info or app.logger.info, etc.). -In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides Guest%0D%0AUser name: Admin +In the example, the name provided by the user is recorded using the log output function (logging.info or app.logger.info, etc.). +In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides Guest%0D%0AUser name: Admin as a parameter, the log entry will be divided into two lines, the first line is User name: Guest code>, the second line is User name: Admin.

-In a good example, the program uses the replace function to provide parameter processing to the user, and replace \r\n and \n +In a good example, the program uses the replace function to provide parameter processing to the user, and replace \r\n and \n with empty characters. To a certain extent, the occurrence of log injection vulnerabilities is reduced.

diff --git a/python/ql/src/experimental/Security/CWE-117/LogInjection.ql b/python/ql/src/Security/CWE-117/LogInjection.ql similarity index 71% rename from python/ql/src/experimental/Security/CWE-117/LogInjection.ql rename to python/ql/src/Security/CWE-117/LogInjection.ql index 452ef9448251..d29dee55fbdd 100644 --- a/python/ql/src/experimental/Security/CWE-117/LogInjection.ql +++ b/python/ql/src/Security/CWE-117/LogInjection.ql @@ -4,17 +4,18 @@ * insertion of forged log entries by a malicious user. * @kind path-problem * @problem.severity error - * @precision high + * @security-severity 7.8 + * @precision medium * @id py/log-injection * @tags security * external/cwe/cwe-117 */ import python -import experimental.semmle.python.security.injection.LogInjection +import semmle.python.security.dataflow.LogInjection import DataFlow::PathGraph -from LogInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink +from LogInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink where config.hasFlowPath(source, sink) select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(), "User-provided value" diff --git a/python/ql/src/experimental/Security/CWE-117/LogInjectionBad.py b/python/ql/src/Security/CWE-117/LogInjectionBad.py similarity index 100% rename from python/ql/src/experimental/Security/CWE-117/LogInjectionBad.py rename to python/ql/src/Security/CWE-117/LogInjectionBad.py diff --git a/python/ql/src/experimental/Security/CWE-117/LogInjectionGood.py b/python/ql/src/Security/CWE-117/LogInjectionGood.py similarity index 100% rename from python/ql/src/experimental/Security/CWE-117/LogInjectionGood.py rename to python/ql/src/Security/CWE-117/LogInjectionGood.py diff --git a/python/ql/src/change-notes/2022-02-25-promote-log-injection.md b/python/ql/src/change-notes/2022-02-25-promote-log-injection.md new file mode 100644 index 000000000000..79d3aa23ab72 --- /dev/null +++ b/python/ql/src/change-notes/2022-02-25-promote-log-injection.md @@ -0,0 +1,4 @@ +--- +category: newQuery +--- +* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182). diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 3b1f6072f0c2..bb83fbda5838 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -14,36 +14,6 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import experimental.semmle.python.Frameworks -/** Provides classes for modeling log related APIs. */ -module LogOutput { - /** - * A data flow node for log output. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `LogOutput` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Get the parameter value of the log output function. - */ - abstract DataFlow::Node getAnInput(); - } -} - -/** - * A data flow node for log output. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `LogOutput::Range` instead. - */ -class LogOutput extends DataFlow::Node { - LogOutput::Range range; - - LogOutput() { this = range } - - DataFlow::Node getAnInput() { result = range.getAnInput() } -} - /** Provides classes for modeling LDAP query execution-related APIs. */ module LDAPQuery { /** diff --git a/python/ql/src/experimental/semmle/python/Frameworks.qll b/python/ql/src/experimental/semmle/python/Frameworks.qll index 90fe21cc9331..f438852e3dc5 100644 --- a/python/ql/src/experimental/semmle/python/Frameworks.qll +++ b/python/ql/src/experimental/semmle/python/Frameworks.qll @@ -8,7 +8,6 @@ private import experimental.semmle.python.frameworks.Django private import experimental.semmle.python.frameworks.Werkzeug private import experimental.semmle.python.frameworks.LDAP private import experimental.semmle.python.frameworks.NoSQL -private import experimental.semmle.python.frameworks.Log private import experimental.semmle.python.frameworks.JWT private import experimental.semmle.python.libraries.PyJWT private import experimental.semmle.python.libraries.Authlib diff --git a/python/ql/src/experimental/semmle/python/frameworks/Log.qll b/python/ql/src/experimental/semmle/python/frameworks/Log.qll deleted file mode 100644 index 675b9be1c2d3..000000000000 --- a/python/ql/src/experimental/semmle/python/frameworks/Log.qll +++ /dev/null @@ -1,118 +0,0 @@ -/** - * Provides classes modeling security-relevant aspects of the log libraries. - */ - -private import python -private import semmle.python.dataflow.new.DataFlow -private import semmle.python.dataflow.new.TaintTracking -private import semmle.python.dataflow.new.RemoteFlowSources -private import experimental.semmle.python.Concepts -private import semmle.python.frameworks.Flask -private import semmle.python.ApiGraphs - -/** - * Provides models for Python's log-related libraries. - */ -private module log { - /** - * Log output method list. - * - * See https://docs.python.org/3/library/logging.html#logger-objects - */ - private class LogOutputMethods extends string { - LogOutputMethods() { - this in ["info", "error", "warn", "warning", "debug", "critical", "exception", "log"] - } - } - - /** - * The class used to find the log output method of the `logging` module. - * - * See `LogOutputMethods` - */ - private class LoggingCall extends DataFlow::CallCfgNode, LogOutput::Range { - LoggingCall() { - this = API::moduleImport("logging").getMember(any(LogOutputMethods m)).getACall() - } - - override DataFlow::Node getAnInput() { - this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and - result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg" - or - this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and - result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))] - } - } - - /** - * The class used to find log output methods related to the `logging.getLogger` instance. - * - * See `LogOutputMethods` - */ - private class LoggerCall extends DataFlow::CallCfgNode, LogOutput::Range { - LoggerCall() { - this = - API::moduleImport("logging") - .getMember("getLogger") - .getReturn() - .getMember(any(LogOutputMethods m)) - .getACall() - } - - override DataFlow::Node getAnInput() { - this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and - result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg" - or - this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and - result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))] - } - } - - /** - * The class used to find the relevant log output method of the `flask.Flask.logger` instance (flask application). - * - * See `LogOutputMethods` - */ - private class FlaskLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range { - FlaskLoggingCall() { - this = - Flask::FlaskApp::instance() - .getMember("logger") - .getMember(any(LogOutputMethods m)) - .getACall() - } - - override DataFlow::Node getAnInput() { - this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and - result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg" - or - this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and - result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))] - } - } - - /** - * The class used to find the relevant log output method of the `django.utils.log.request_logger` instance (django application). - * - * See `LogOutputMethods` - */ - private class DjangoLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range { - DjangoLoggingCall() { - this = - API::moduleImport("django") - .getMember("utils") - .getMember("log") - .getMember("request_logger") - .getMember(any(LogOutputMethods m)) - .getACall() - } - - override DataFlow::Node getAnInput() { - this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and - result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg" - or - this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and - result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))] - } - } -} diff --git a/python/ql/src/experimental/semmle/python/security/injection/LogInjection.qll b/python/ql/src/experimental/semmle/python/security/injection/LogInjection.qll deleted file mode 100644 index 4ee9c69a4a91..000000000000 --- a/python/ql/src/experimental/semmle/python/security/injection/LogInjection.qll +++ /dev/null @@ -1,24 +0,0 @@ -import python -import semmle.python.Concepts -import experimental.semmle.python.Concepts -import semmle.python.dataflow.new.DataFlow -import semmle.python.dataflow.new.TaintTracking -import semmle.python.dataflow.new.RemoteFlowSources - -/** - * A taint-tracking configuration for tracking untrusted user input used in log entries. - */ -class LogInjectionFlowConfig extends TaintTracking::Configuration { - LogInjectionFlowConfig() { this = "LogInjectionFlowConfig" } - - override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - - override predicate isSink(DataFlow::Node sink) { sink = any(LogOutput logoutput).getAnInput() } - - override predicate isSanitizer(DataFlow::Node node) { - exists(CallNode call | - node.asCfgNode() = call.getFunction().(AttrNode).getObject("replace") and - call.getArg(0).getNode().(StrConst).getText() in ["\r\n", "\n"] - ) - } -} diff --git a/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.qlref deleted file mode 100644 index 021cc357ac28..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/Security/CWE-117/LogInjection.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.expected b/python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjection.expected similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.expected rename to python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjection.expected diff --git a/python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjection.qlref b/python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjection.qlref new file mode 100644 index 000000000000..1837c628c33e --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjection.qlref @@ -0,0 +1 @@ +Security/CWE-117/LogInjection.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjectionBad.py b/python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjectionBad.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-117/LogInjectionBad.py rename to python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjectionBad.py diff --git a/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjectionGood.py b/python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjectionGood.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-117/LogInjectionGood.py rename to python/ql/test/query-tests/Security/CWE-117-LogInjection/LogInjectionGood.py