diff --git a/python/ql/src/experimental/Security/CWE-117/LogInjection.qhelp b/python/ql/src/experimental/Security/CWE-117/LogInjection.qhelp new file mode 100644 index 000000000000..e5305220997f --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-117/LogInjection.qhelp @@ -0,0 +1,49 @@ + + + + + + +

If unsanitized user input is written to a log entry, a malicious user may be able to forge new log entries.

+ +

Forgery can occur if a user provides some input creating the appearance of multiple + log entries. This can include unescaped new-line characters, or HTML or other markup.

+
+ + +

+User input should be suitably sanitized before it is logged. +

+

+If the log entries are plain text then line breaks should be removed from user input, using for example +replace(old, new) or similar. Care should also be taken that user input is clearly marked +in log entries, and that a malicious user cannot cause confusion in other ways. +

+

+For log entries that will be displayed in HTML, user input should be HTML encoded before being logged, to prevent forgery and +other forms of HTML injection. +

+
+ + +

+In the example, the name provided by the user is recorded using the log output function (logging.info or app.logger.info, etc.). +In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides Guest%0D%0AUser name: Admin +as a parameter, the log entry will be divided into two lines, the first line is User name: Guest code>, the second line is User name: Admin. +

+ + +

+In a good example, the program uses the replace function to provide parameter processing to the user, and replace \r\n and \n +with empty characters. To a certain extent, the occurrence of log injection vulnerabilities is reduced. +

+ + +
+ + +
  • OWASP: Log Injection.
  • +
    +
    diff --git a/python/ql/src/experimental/Security/CWE-117/LogInjection.ql b/python/ql/src/experimental/Security/CWE-117/LogInjection.ql new file mode 100644 index 000000000000..452ef9448251 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-117/LogInjection.ql @@ -0,0 +1,20 @@ +/** + * @name Log Injection + * @description Building log entries from user-controlled data is vulnerable to + * insertion of forged log entries by a malicious user. + * @kind path-problem + * @problem.severity error + * @precision high + * @id py/log-injection + * @tags security + * external/cwe/cwe-117 + */ + +import python +import experimental.semmle.python.security.injection.LogInjection +import DataFlow::PathGraph + +from LogInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(), + "User-provided value" diff --git a/python/ql/src/experimental/Security/CWE-117/LogInjectionBad.py b/python/ql/src/experimental/Security/CWE-117/LogInjectionBad.py new file mode 100644 index 000000000000..0c0cbe1a8fee --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-117/LogInjectionBad.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Desc :Log Injection +""" +from flask import Flask +from flask import request +from django.utils.log import request_logger +import logging + +logging.basicConfig(level=logging.DEBUG) + +app = Flask(__name__) + +@app.route('/bad1') +def bad1(): + name = request.args.get('name') + app.logger.info('User name: ' + name) # Bad + return 'bad1' + +@app.route('/bad2') +def bad2(): + name = request.args.get('name') + logging.info('User name: ' + name) # Bad + return 'bad2' + +@app.route('/bad3') +def bad3(): + name = request.args.get('name') + request_logger.warn('User name: ' + name) # Bad + return 'bad3' + +@app.route('/bad4') +def bad4(): + name = request.args.get('name') + logtest = logging.getLogger('test') + logtest.debug('User name: ' + name) # Bad + return 'bad4' + +if __name__ == '__main__': + app.debug = True + handler = logging.FileHandler('log') + app.logger.addHandler(handler) + app.run() diff --git a/python/ql/src/experimental/Security/CWE-117/LogInjectionGood.py b/python/ql/src/experimental/Security/CWE-117/LogInjectionGood.py new file mode 100644 index 000000000000..d9279f2e4822 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-117/LogInjectionGood.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Desc :Log Injection +""" +from flask import Flask +from flask import request +import logging + +logging.basicConfig(level=logging.DEBUG) + +app = Flask(__name__) + +@app.route('/good1') +def good1(): + name = request.args.get('name') + name = name.replace('\r\n','').replace('\n','') + logging.info('User name: ' + name) # Good + return 'good1' + +if __name__ == '__main__': + app.debug = True + handler = logging.FileHandler('log') + app.logger.addHandler(handler) + app.run() diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index f87caa884971..60f12f8c02bb 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -14,6 +14,36 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import experimental.semmle.python.Frameworks +/** Provides classes for modeling log related APIs. */ +module LogOutput { + /** + * A data flow node for log output. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `LogOutput` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Get the parameter value of the log output function. + */ + abstract DataFlow::Node getAnInput(); + } +} + +/** + * A data flow node for log output. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `LogOutput::Range` instead. + */ +class LogOutput extends DataFlow::Node { + LogOutput::Range range; + + LogOutput() { this = range } + + DataFlow::Node getAnInput() { result = range.getAnInput() } +} + /** Provides classes for modeling Regular Expression-related APIs. */ module RegexExecution { /** diff --git a/python/ql/src/experimental/semmle/python/Frameworks.qll b/python/ql/src/experimental/semmle/python/Frameworks.qll index e8467383824f..2ad560cfc1e0 100644 --- a/python/ql/src/experimental/semmle/python/Frameworks.qll +++ b/python/ql/src/experimental/semmle/python/Frameworks.qll @@ -5,3 +5,4 @@ private import experimental.semmle.python.frameworks.Stdlib private import experimental.semmle.python.frameworks.LDAP private import experimental.semmle.python.frameworks.NoSQL +private import experimental.semmle.python.frameworks.Log diff --git a/python/ql/src/experimental/semmle/python/frameworks/Log.qll b/python/ql/src/experimental/semmle/python/frameworks/Log.qll new file mode 100644 index 000000000000..675b9be1c2d3 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/frameworks/Log.qll @@ -0,0 +1,118 @@ +/** + * Provides classes modeling security-relevant aspects of the log libraries. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.TaintTracking +private import semmle.python.dataflow.new.RemoteFlowSources +private import experimental.semmle.python.Concepts +private import semmle.python.frameworks.Flask +private import semmle.python.ApiGraphs + +/** + * Provides models for Python's log-related libraries. + */ +private module log { + /** + * Log output method list. + * + * See https://docs.python.org/3/library/logging.html#logger-objects + */ + private class LogOutputMethods extends string { + LogOutputMethods() { + this in ["info", "error", "warn", "warning", "debug", "critical", "exception", "log"] + } + } + + /** + * The class used to find the log output method of the `logging` module. + * + * See `LogOutputMethods` + */ + private class LoggingCall extends DataFlow::CallCfgNode, LogOutput::Range { + LoggingCall() { + this = API::moduleImport("logging").getMember(any(LogOutputMethods m)).getACall() + } + + override DataFlow::Node getAnInput() { + this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and + result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg" + or + this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and + result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))] + } + } + + /** + * The class used to find log output methods related to the `logging.getLogger` instance. + * + * See `LogOutputMethods` + */ + private class LoggerCall extends DataFlow::CallCfgNode, LogOutput::Range { + LoggerCall() { + this = + API::moduleImport("logging") + .getMember("getLogger") + .getReturn() + .getMember(any(LogOutputMethods m)) + .getACall() + } + + override DataFlow::Node getAnInput() { + this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and + result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg" + or + this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and + result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))] + } + } + + /** + * The class used to find the relevant log output method of the `flask.Flask.logger` instance (flask application). + * + * See `LogOutputMethods` + */ + private class FlaskLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range { + FlaskLoggingCall() { + this = + Flask::FlaskApp::instance() + .getMember("logger") + .getMember(any(LogOutputMethods m)) + .getACall() + } + + override DataFlow::Node getAnInput() { + this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and + result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg" + or + this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and + result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))] + } + } + + /** + * The class used to find the relevant log output method of the `django.utils.log.request_logger` instance (django application). + * + * See `LogOutputMethods` + */ + private class DjangoLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range { + DjangoLoggingCall() { + this = + API::moduleImport("django") + .getMember("utils") + .getMember("log") + .getMember("request_logger") + .getMember(any(LogOutputMethods m)) + .getACall() + } + + override DataFlow::Node getAnInput() { + this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and + result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg" + or + this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and + result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))] + } + } +} diff --git a/python/ql/src/experimental/semmle/python/security/injection/LogInjection.qll b/python/ql/src/experimental/semmle/python/security/injection/LogInjection.qll new file mode 100644 index 000000000000..4ee9c69a4a91 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/injection/LogInjection.qll @@ -0,0 +1,24 @@ +import python +import semmle.python.Concepts +import experimental.semmle.python.Concepts +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking +import semmle.python.dataflow.new.RemoteFlowSources + +/** + * A taint-tracking configuration for tracking untrusted user input used in log entries. + */ +class LogInjectionFlowConfig extends TaintTracking::Configuration { + LogInjectionFlowConfig() { this = "LogInjectionFlowConfig" } + + override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } + + override predicate isSink(DataFlow::Node sink) { sink = any(LogOutput logoutput).getAnInput() } + + override predicate isSanitizer(DataFlow::Node node) { + exists(CallNode call | + node.asCfgNode() = call.getFunction().(AttrNode).getObject("replace") and + call.getArg(0).getNode().(StrConst).getText() in ["\r\n", "\n"] + ) + } +} diff --git a/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.expected new file mode 100644 index 000000000000..e27b369e6405 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.expected @@ -0,0 +1,28 @@ +edges +| LogInjectionBad.py:17:12:17:18 | ControlFlowNode for request | LogInjectionBad.py:17:12:17:23 | ControlFlowNode for Attribute | +| LogInjectionBad.py:17:12:17:23 | ControlFlowNode for Attribute | LogInjectionBad.py:18:21:18:40 | ControlFlowNode for BinaryExpr | +| LogInjectionBad.py:23:12:23:18 | ControlFlowNode for request | LogInjectionBad.py:23:12:23:23 | ControlFlowNode for Attribute | +| LogInjectionBad.py:23:12:23:23 | ControlFlowNode for Attribute | LogInjectionBad.py:24:18:24:37 | ControlFlowNode for BinaryExpr | +| LogInjectionBad.py:29:12:29:18 | ControlFlowNode for request | LogInjectionBad.py:29:12:29:23 | ControlFlowNode for Attribute | +| LogInjectionBad.py:29:12:29:23 | ControlFlowNode for Attribute | LogInjectionBad.py:30:25:30:44 | ControlFlowNode for BinaryExpr | +| LogInjectionBad.py:35:12:35:18 | ControlFlowNode for request | LogInjectionBad.py:35:12:35:23 | ControlFlowNode for Attribute | +| LogInjectionBad.py:35:12:35:23 | ControlFlowNode for Attribute | LogInjectionBad.py:37:19:37:38 | ControlFlowNode for BinaryExpr | +nodes +| LogInjectionBad.py:17:12:17:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| LogInjectionBad.py:17:12:17:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| LogInjectionBad.py:18:21:18:40 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | +| LogInjectionBad.py:23:12:23:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| LogInjectionBad.py:23:12:23:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| LogInjectionBad.py:24:18:24:37 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | +| LogInjectionBad.py:29:12:29:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| LogInjectionBad.py:29:12:29:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| LogInjectionBad.py:30:25:30:44 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | +| LogInjectionBad.py:35:12:35:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| LogInjectionBad.py:35:12:35:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| LogInjectionBad.py:37:19:37:38 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | +subpaths +#select +| LogInjectionBad.py:18:21:18:40 | ControlFlowNode for BinaryExpr | LogInjectionBad.py:17:12:17:18 | ControlFlowNode for request | LogInjectionBad.py:18:21:18:40 | ControlFlowNode for BinaryExpr | $@ flows to log entry. | LogInjectionBad.py:17:12:17:18 | ControlFlowNode for request | User-provided value | +| LogInjectionBad.py:24:18:24:37 | ControlFlowNode for BinaryExpr | LogInjectionBad.py:23:12:23:18 | ControlFlowNode for request | LogInjectionBad.py:24:18:24:37 | ControlFlowNode for BinaryExpr | $@ flows to log entry. | LogInjectionBad.py:23:12:23:18 | ControlFlowNode for request | User-provided value | +| LogInjectionBad.py:30:25:30:44 | ControlFlowNode for BinaryExpr | LogInjectionBad.py:29:12:29:18 | ControlFlowNode for request | LogInjectionBad.py:30:25:30:44 | ControlFlowNode for BinaryExpr | $@ flows to log entry. | LogInjectionBad.py:29:12:29:18 | ControlFlowNode for request | User-provided value | +| LogInjectionBad.py:37:19:37:38 | ControlFlowNode for BinaryExpr | LogInjectionBad.py:35:12:35:18 | ControlFlowNode for request | LogInjectionBad.py:37:19:37:38 | ControlFlowNode for BinaryExpr | $@ flows to log entry. | LogInjectionBad.py:35:12:35:18 | ControlFlowNode for request | User-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.qlref new file mode 100644 index 000000000000..021cc357ac28 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjection.qlref @@ -0,0 +1 @@ +experimental/Security/CWE-117/LogInjection.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjectionBad.py b/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjectionBad.py new file mode 100644 index 000000000000..0c0cbe1a8fee --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjectionBad.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Desc :Log Injection +""" +from flask import Flask +from flask import request +from django.utils.log import request_logger +import logging + +logging.basicConfig(level=logging.DEBUG) + +app = Flask(__name__) + +@app.route('/bad1') +def bad1(): + name = request.args.get('name') + app.logger.info('User name: ' + name) # Bad + return 'bad1' + +@app.route('/bad2') +def bad2(): + name = request.args.get('name') + logging.info('User name: ' + name) # Bad + return 'bad2' + +@app.route('/bad3') +def bad3(): + name = request.args.get('name') + request_logger.warn('User name: ' + name) # Bad + return 'bad3' + +@app.route('/bad4') +def bad4(): + name = request.args.get('name') + logtest = logging.getLogger('test') + logtest.debug('User name: ' + name) # Bad + return 'bad4' + +if __name__ == '__main__': + app.debug = True + handler = logging.FileHandler('log') + app.logger.addHandler(handler) + app.run() diff --git a/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjectionGood.py b/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjectionGood.py new file mode 100644 index 000000000000..d9279f2e4822 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-117/LogInjectionGood.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Desc :Log Injection +""" +from flask import Flask +from flask import request +import logging + +logging.basicConfig(level=logging.DEBUG) + +app = Flask(__name__) + +@app.route('/good1') +def good1(): + name = request.args.get('name') + name = name.replace('\r\n','').replace('\n','') + logging.info('User name: ' + name) # Good + return 'good1' + +if __name__ == '__main__': + app.debug = True + handler = logging.FileHandler('log') + app.logger.addHandler(handler) + app.run()