Skip to content

Python: Add SqlAlchemy model #5680

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jul 22, 2021
33 changes: 33 additions & 0 deletions python/ql/src/experimental/semmle/python/Concepts.qll
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,36 @@ class LDAPEscape extends DataFlow::Node {
*/
DataFlow::Node getAnInput() { result = range.getAnInput() }
}

/** Provides classes for modeling SQL sanitization libraries. */
module SQLEscape {
/**
* A data-flow node that collects functions that escape SQL statements.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SQLEscape` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the raw SQL statement.
*/
abstract DataFlow::Node getAnInput();
}
}

/**
* A data-flow node that collects functions escaping SQL statements.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SQLEscape::Range` instead.
*/
class SQLEscape extends DataFlow::Node {
SQLEscape::Range range;

SQLEscape() { this = range }

/**
* Gets the argument containing the raw SQL statement.
*/
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
148 changes: 148 additions & 0 deletions python/ql/src/experimental/semmle/python/frameworks/SqlAlchemy.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/**
* Provides classes modeling security-relevant aspects of the 'SqlAlchemy' package.
* See https://pypi.org/project/SQLAlchemy/.
*/

private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.ApiGraphs
private import semmle.python.Concepts
private import experimental.semmle.python.Concepts

private module SqlAlchemy {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any desire on your side Rasmus to version this model? SqlAlchemy came out with 2.0 and I don't know how compatible this model library is for 2.0.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2.0 has still not been released it seems (https://pypi.org/project/SQLAlchemy/#history), so I think it's OK. Thanks for letting me know about this though 👍

/**
* Returns an instantization of a SqlAlchemy Session object.
* See https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session and
* https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.sessionmaker
*/
private API::Node getSqlAlchemySessionInstance() {
result = API::moduleImport("sqlalchemy.orm").getMember("Session").getReturn() or
result = API::moduleImport("sqlalchemy.orm").getMember("sessionmaker").getReturn().getReturn()
}

/**
* Returns an instantization of a SqlAlchemy Engine object.
* See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine
*/
private API::Node getSqlAlchemyEngineInstance() {
result = API::moduleImport("sqlalchemy").getMember("create_engine").getReturn()
}

/**
* Returns an instantization of a SqlAlchemy Query object.
* See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query
*/
private API::Node getSqlAlchemyQueryInstance() {
result = getSqlAlchemySessionInstance().getMember("query").getReturn()
}

/**
* A call to `execute` meant to execute an SQL expression
* See the following links:
* - https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=execute#sqlalchemy.engine.Connection.execute
* - https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=execute#sqlalchemy.engine.Engine.execute
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html?highlight=execute#sqlalchemy.orm.Session.execute
*/
private class SqlAlchemyExecuteCall extends DataFlow::CallCfgNode, SqlExecution::Range {
SqlAlchemyExecuteCall() {
// new way
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wrote this and I have no idea what I was referencing. This model library points to SqlAlchemy 1.X so I don't think I'm referencing 2.0.

this = getSqlAlchemySessionInstance().getMember("execute").getACall() or
this =
getSqlAlchemyEngineInstance()
.getMember(["connect", "begin"])
.getReturn()
.getMember("execute")
.getACall()
}

override DataFlow::Node getSql() { result = this.getArg(0) }
}

/**
* A call to `scalar` meant to execute an SQL expression
* See https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session.scalar and
* https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=scalar#sqlalchemy.engine.Engine.scalar
*/
private class SqlAlchemyScalarCall extends DataFlow::CallCfgNode, SqlExecution::Range {
SqlAlchemyScalarCall() {
this =
[getSqlAlchemySessionInstance(), getSqlAlchemyEngineInstance()]
.getMember("scalar")
.getACall()
}

override DataFlow::Node getSql() { result = this.getArg(0) }
}

/**
* A call on a Query object
* See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query
*/
private class SqlAlchemyQueryCall extends DataFlow::CallCfgNode, SqlExecution::Range {
SqlAlchemyQueryCall() {
this =
getSqlAlchemyQueryInstance()
.getMember(any(SqlAlchemyVulnerableMethodNames methodName))
.getACall()
}

override DataFlow::Node getSql() { result = this.getArg(0) }
}

/**
* This class represents a list of methods vulnerable to sql injection.
*
* See https://github.com/jty-team/codeql/pull/2#issue-611592361
*/
private class SqlAlchemyVulnerableMethodNames extends string {
SqlAlchemyVulnerableMethodNames() { this in ["filter", "filter_by", "group_by", "order_by"] }
}

/**
* Additional taint-steps for `sqlalchemy.text()`
*
* See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text
* See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause
*/
class SqlAlchemyTextAdditionalTaintSteps extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::CallCfgNode call |
(
call = API::moduleImport("sqlalchemy").getMember("text").getACall()
or
call = API::moduleImport("sqlalchemy").getMember("sql").getMember("text").getACall()
or
call =
API::moduleImport("sqlalchemy")
.getMember("sql")
.getMember("expression")
.getMember("text")
.getACall()
or
call =
API::moduleImport("sqlalchemy")
.getMember("sql")
.getMember("expression")
.getMember("TextClause")
.getACall()
) and
nodeFrom in [call.getArg(0), call.getArgByName("text")] and
nodeTo = call
)
}
}

/**
* Gets a reference to `sqlescapy.sqlescape`.
*
* See https://pypi.org/project/sqlescapy/
*/
class SQLEscapySanitizerCall extends DataFlow::CallCfgNode, SQLEscape::Range {
SQLEscapySanitizerCall() {
this = API::moduleImport("sqlescapy").getMember("sqlescape").getACall()
}

override DataFlow::Node getAnInput() { result = this.getArg(0) }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import python
import experimental.meta.ConceptsTest
import experimental.semmle.python.frameworks.SqlAlchemy
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import experimental.meta.InlineTaintTest
import experimental.semmle.python.frameworks.SqlAlchemy
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import sqlalchemy
from sqlalchemy import Column, Integer, String, ForeignKey, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.pool import StaticPool
from sqlalchemy.orm import relationship, backref, sessionmaker, joinedload
from sqlalchemy.sql import text

engine = create_engine(
'sqlite:///:memory:',
echo=True,
connect_args={"check_same_thread": False},
poolclass=StaticPool
)

Base = declarative_base()

class User(Base):
__tablename__ = 'users'

id = Column(Integer, primary_key=True)
name = Column(String)

Base.metadata.create_all(engine)

Session = sessionmaker(bind=engine)
session = Session()

ed_user = User(name='ed')
ed_user2 = User(name='george')

session.add(ed_user)
session.add(ed_user2)

session.commit()

# Injection without requiring the text() taint-step
session.query(User).filter_by(name="some sql") # $ MISSING: getSql="some sql"
session.scalar("some sql") # $ getSql="some sql"
engine.scalar("some sql") # $ getSql="some sql"
session.execute("some sql") # $ getSql="some sql"

with engine.connect() as connection:
connection.execute("some sql") # $ getSql="some sql"

with engine.begin() as connection:
connection.execute("some sql") # $ getSql="some sql"

# Injection requiring the text() taint-step
t = text("some sql")
session.query(User).filter(t) # $ getSql=t
session.query(User).group_by(User.id).having(t) # $ getSql=User.id MISSING: getSql=t
session.query(User).group_by(t).first() # $ getSql=t
session.query(User).order_by(t).first() # $ getSql=t

query = select(User).where(User.name == t) # $ MISSING: getSql=t
with engine.connect() as conn:
conn.execute(query) # $ getSql=query
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import sqlalchemy

def test_taint():
ts = TAINTED_STRING

ensure_tainted(
ts, # $ tainted
sqlalchemy.text(ts), # $ tainted
sqlalchemy.sql.text(ts),# $ tainted
sqlalchemy.sql.expression.text(ts),# $ tainted
sqlalchemy.sql.expression.TextClause(ts),# $ tainted
)