Skip to content

Commit d5a8ac2

Browse files
sowminivdavem330
authored andcommitted
RDS-TCP: Make RDS-TCP work correctly when it is set up in a netns other than init_net
Open the sockets calling sock_create_kern() with the correct struct net pointer, and use that struct net pointer when verifying the address passed to rds_bind(). Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 1ebd08a commit d5a8ac2

File tree

12 files changed

+59
-27
lines changed

12 files changed

+59
-27
lines changed

net/rds/bind.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
185185
ret = 0;
186186
goto out;
187187
}
188-
trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
188+
trans = rds_trans_get_preferred(sock_net(sock->sk),
189+
sin->sin_addr.s_addr);
189190
if (!trans) {
190191
ret = -EADDRNOTAVAIL;
191192
rds_remove_bound(rs);

net/rds/connection.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ static void rds_conn_reset(struct rds_connection *conn)
117117
* For now they are not garbage collected once they're created. They
118118
* are torn down as the module is removed, if ever.
119119
*/
120-
static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
120+
static struct rds_connection *__rds_conn_create(struct net *net,
121+
__be32 laddr, __be32 faddr,
121122
struct rds_transport *trans, gfp_t gfp,
122123
int is_outgoing)
123124
{
@@ -157,6 +158,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
157158
conn->c_faddr = faddr;
158159
spin_lock_init(&conn->c_lock);
159160
conn->c_next_tx_seq = 1;
161+
rds_conn_net_set(conn, net);
160162

161163
init_waitqueue_head(&conn->c_waitq);
162164
INIT_LIST_HEAD(&conn->c_send_queue);
@@ -174,7 +176,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
174176
* can bind to the destination address then we'd rather the messages
175177
* flow through loopback rather than either transport.
176178
*/
177-
loop_trans = rds_trans_get_preferred(faddr);
179+
loop_trans = rds_trans_get_preferred(net, faddr);
178180
if (loop_trans) {
179181
rds_trans_put(loop_trans);
180182
conn->c_loopback = 1;
@@ -260,17 +262,19 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
260262
return conn;
261263
}
262264

263-
struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
265+
struct rds_connection *rds_conn_create(struct net *net,
266+
__be32 laddr, __be32 faddr,
264267
struct rds_transport *trans, gfp_t gfp)
265268
{
266-
return __rds_conn_create(laddr, faddr, trans, gfp, 0);
269+
return __rds_conn_create(net, laddr, faddr, trans, gfp, 0);
267270
}
268271
EXPORT_SYMBOL_GPL(rds_conn_create);
269272

270-
struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
273+
struct rds_connection *rds_conn_create_outgoing(struct net *net,
274+
__be32 laddr, __be32 faddr,
271275
struct rds_transport *trans, gfp_t gfp)
272276
{
273-
return __rds_conn_create(laddr, faddr, trans, gfp, 1);
277+
return __rds_conn_create(net, laddr, faddr, trans, gfp, 1);
274278
}
275279
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
276280

net/rds/ib.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
317317
* allowed to influence which paths have priority. We could call userspace
318318
* asserting this policy "routing".
319319
*/
320-
static int rds_ib_laddr_check(__be32 addr)
320+
static int rds_ib_laddr_check(struct net *net, __be32 addr)
321321
{
322322
int ret;
323323
struct rdma_cm_id *cm_id;

net/rds/ib_cm.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -448,8 +448,9 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
448448
(unsigned long long)be64_to_cpu(lguid),
449449
(unsigned long long)be64_to_cpu(fguid));
450450

451-
conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
452-
GFP_KERNEL);
451+
/* RDS/IB is not currently netns aware, thus init_net */
452+
conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
453+
&rds_ib_transport, GFP_KERNEL);
453454
if (IS_ERR(conn)) {
454455
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
455456
conn = NULL;

net/rds/iw.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ static void rds_iw_ic_info(struct socket *sock, unsigned int len,
218218
* allowed to influence which paths have priority. We could call userspace
219219
* asserting this policy "routing".
220220
*/
221-
static int rds_iw_laddr_check(__be32 addr)
221+
static int rds_iw_laddr_check(struct net *net, __be32 addr)
222222
{
223223
int ret;
224224
struct rdma_cm_id *cm_id;

net/rds/iw_cm.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,9 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
398398
&dp->dp_saddr, &dp->dp_daddr,
399399
RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version));
400400

401-
conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_iw_transport,
402-
GFP_KERNEL);
401+
/* RDS/IW is not currently netns aware, thus init_net */
402+
conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
403+
&rds_iw_transport, GFP_KERNEL);
403404
if (IS_ERR(conn)) {
404405
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
405406
conn = NULL;

net/rds/rds.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,21 @@ struct rds_connection {
128128

129129
/* Protocol version */
130130
unsigned int c_version;
131+
possible_net_t c_net;
131132
};
132133

134+
static inline
135+
struct net *rds_conn_net(struct rds_connection *conn)
136+
{
137+
return read_pnet(&conn->c_net);
138+
}
139+
140+
static inline
141+
void rds_conn_net_set(struct rds_connection *conn, struct net *net)
142+
{
143+
write_pnet(&conn->c_net, net);
144+
}
145+
133146
#define RDS_FLAG_CONG_BITMAP 0x01
134147
#define RDS_FLAG_ACK_REQUIRED 0x02
135148
#define RDS_FLAG_RETRANSMITTED 0x04
@@ -417,7 +430,7 @@ struct rds_transport {
417430
unsigned int t_prefer_loopback:1;
418431
unsigned int t_type;
419432

420-
int (*laddr_check)(__be32 addr);
433+
int (*laddr_check)(struct net *net, __be32 addr);
421434
int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
422435
void (*conn_free)(void *data);
423436
int (*conn_connect)(struct rds_connection *conn);
@@ -608,9 +621,11 @@ struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
608621
/* conn.c */
609622
int rds_conn_init(void);
610623
void rds_conn_exit(void);
611-
struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
624+
struct rds_connection *rds_conn_create(struct net *net,
625+
__be32 laddr, __be32 faddr,
612626
struct rds_transport *trans, gfp_t gfp);
613-
struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
627+
struct rds_connection *rds_conn_create_outgoing(struct net *net,
628+
__be32 laddr, __be32 faddr,
614629
struct rds_transport *trans, gfp_t gfp);
615630
void rds_conn_shutdown(struct rds_connection *conn);
616631
void rds_conn_destroy(struct rds_connection *conn);
@@ -795,7 +810,7 @@ void rds_connect_complete(struct rds_connection *conn);
795810
/* transport.c */
796811
int rds_trans_register(struct rds_transport *trans);
797812
void rds_trans_unregister(struct rds_transport *trans);
798-
struct rds_transport *rds_trans_get_preferred(__be32 addr);
813+
struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
799814
void rds_trans_put(struct rds_transport *trans);
800815
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
801816
unsigned int avail);

net/rds/send.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
10231023
if (rs->rs_conn && rs->rs_conn->c_faddr == daddr)
10241024
conn = rs->rs_conn;
10251025
else {
1026-
conn = rds_conn_create_outgoing(rs->rs_bound_addr, daddr,
1026+
conn = rds_conn_create_outgoing(sock_net(sock->sk),
1027+
rs->rs_bound_addr, daddr,
10271028
rs->rs_transport,
10281029
sock->sk->sk_allocation);
10291030
if (IS_ERR(conn)) {

net/rds/tcp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,9 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
189189
spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
190190
}
191191

192-
static int rds_tcp_laddr_check(__be32 addr)
192+
static int rds_tcp_laddr_check(struct net *net, __be32 addr)
193193
{
194-
if (inet_addr_type(&init_net, addr) == RTN_LOCAL)
194+
if (inet_addr_type(net, addr) == RTN_LOCAL)
195195
return 0;
196196
return -EADDRNOTAVAIL;
197197
}

net/rds/tcp_connect.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
7979
struct sockaddr_in src, dest;
8080
int ret;
8181

82-
ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
82+
ret = sock_create_kern(rds_conn_net(conn), PF_INET,
83+
SOCK_STREAM, IPPROTO_TCP, &sock);
8384
if (ret < 0)
8485
goto out;
8586

net/rds/tcp_listen.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,9 @@ static int rds_tcp_accept_one(struct socket *sock)
8585
struct inet_sock *inet;
8686
struct rds_tcp_connection *rs_tcp;
8787

88-
ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
89-
sock->sk->sk_protocol, &new_sock);
88+
ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family,
89+
sock->sk->sk_type, sock->sk->sk_protocol,
90+
&new_sock);
9091
if (ret)
9192
goto out;
9293

@@ -108,7 +109,8 @@ static int rds_tcp_accept_one(struct socket *sock)
108109
&inet->inet_saddr, ntohs(inet->inet_sport),
109110
&inet->inet_daddr, ntohs(inet->inet_dport));
110111

111-
conn = rds_conn_create(inet->inet_saddr, inet->inet_daddr,
112+
conn = rds_conn_create(sock_net(sock->sk),
113+
inet->inet_saddr, inet->inet_daddr,
112114
&rds_tcp_transport, GFP_KERNEL);
113115
if (IS_ERR(conn)) {
114116
ret = PTR_ERR(conn);
@@ -187,7 +189,13 @@ int rds_tcp_listen_init(void)
187189
struct socket *sock = NULL;
188190
int ret;
189191

190-
ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
192+
/* MUST call sock_create_kern directly so that we avoid get_net()
193+
* in sk_alloc(). Doing a get_net() will result in cleanup_net()
194+
* never getting invoked, which will leave sock and other things
195+
* in limbo.
196+
*/
197+
ret = sock_create_kern(current->nsproxy->net_ns, PF_INET,
198+
SOCK_STREAM, IPPROTO_TCP, &sock);
191199
if (ret < 0)
192200
goto out;
193201

net/rds/transport.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ void rds_trans_put(struct rds_transport *trans)
7777
module_put(trans->t_owner);
7878
}
7979

80-
struct rds_transport *rds_trans_get_preferred(__be32 addr)
80+
struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr)
8181
{
8282
struct rds_transport *ret = NULL;
8383
struct rds_transport *trans;
@@ -90,7 +90,7 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr)
9090
for (i = 0; i < RDS_TRANS_COUNT; i++) {
9191
trans = transports[i];
9292

93-
if (trans && (trans->laddr_check(addr) == 0) &&
93+
if (trans && (trans->laddr_check(net, addr) == 0) &&
9494
(!trans->t_owner || try_module_get(trans->t_owner))) {
9595
ret = trans;
9696
break;

0 commit comments

Comments
 (0)