Skip to content

Commit 58890c0

Browse files
committed
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph fixes from Sage Weil: "Two of Alex's patches deal with a race when reseting server connections for open RBD images, one demotes some non-fatal BUGs to WARNs, and my patch fixes a protocol feature bit failure path." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: libceph: fix protocol feature mismatch failure path libceph: WARN, don't BUG on unexpected connection states libceph: always reset osds when kicking libceph: move linger requests sooner in kick_requests()
2 parents 42288fe + 0fa6ebc commit 58890c0

File tree

2 files changed

+29
-28
lines changed

2 files changed

+29
-28
lines changed

net/ceph/messenger.c

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ static void reset_connection(struct ceph_connection *con)
506506
{
507507
/* reset connection, out_queue, msg_ and connect_seq */
508508
/* discard existing out_queue and msg_seq */
509+
dout("reset_connection %p\n", con);
509510
ceph_msg_remove_list(&con->out_queue);
510511
ceph_msg_remove_list(&con->out_sent);
511512

@@ -561,7 +562,7 @@ void ceph_con_open(struct ceph_connection *con,
561562
mutex_lock(&con->mutex);
562563
dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
563564

564-
BUG_ON(con->state != CON_STATE_CLOSED);
565+
WARN_ON(con->state != CON_STATE_CLOSED);
565566
con->state = CON_STATE_PREOPEN;
566567

567568
con->peer_name.type = (__u8) entity_type;
@@ -1506,13 +1507,6 @@ static int process_banner(struct ceph_connection *con)
15061507
return 0;
15071508
}
15081509

1509-
static void fail_protocol(struct ceph_connection *con)
1510-
{
1511-
reset_connection(con);
1512-
BUG_ON(con->state != CON_STATE_NEGOTIATING);
1513-
con->state = CON_STATE_CLOSED;
1514-
}
1515-
15161510
static int process_connect(struct ceph_connection *con)
15171511
{
15181512
u64 sup_feat = con->msgr->supported_features;
@@ -1530,7 +1524,7 @@ static int process_connect(struct ceph_connection *con)
15301524
ceph_pr_addr(&con->peer_addr.in_addr),
15311525
sup_feat, server_feat, server_feat & ~sup_feat);
15321526
con->error_msg = "missing required protocol features";
1533-
fail_protocol(con);
1527+
reset_connection(con);
15341528
return -1;
15351529

15361530
case CEPH_MSGR_TAG_BADPROTOVER:
@@ -1541,7 +1535,7 @@ static int process_connect(struct ceph_connection *con)
15411535
le32_to_cpu(con->out_connect.protocol_version),
15421536
le32_to_cpu(con->in_reply.protocol_version));
15431537
con->error_msg = "protocol version mismatch";
1544-
fail_protocol(con);
1538+
reset_connection(con);
15451539
return -1;
15461540

15471541
case CEPH_MSGR_TAG_BADAUTHORIZER:
@@ -1631,11 +1625,11 @@ static int process_connect(struct ceph_connection *con)
16311625
ceph_pr_addr(&con->peer_addr.in_addr),
16321626
req_feat, server_feat, req_feat & ~server_feat);
16331627
con->error_msg = "missing required protocol features";
1634-
fail_protocol(con);
1628+
reset_connection(con);
16351629
return -1;
16361630
}
16371631

1638-
BUG_ON(con->state != CON_STATE_NEGOTIATING);
1632+
WARN_ON(con->state != CON_STATE_NEGOTIATING);
16391633
con->state = CON_STATE_OPEN;
16401634

16411635
con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
@@ -2132,7 +2126,6 @@ static int try_read(struct ceph_connection *con)
21322126
if (ret < 0)
21332127
goto out;
21342128

2135-
BUG_ON(con->state != CON_STATE_CONNECTING);
21362129
con->state = CON_STATE_NEGOTIATING;
21372130

21382131
/*
@@ -2160,7 +2153,7 @@ static int try_read(struct ceph_connection *con)
21602153
goto more;
21612154
}
21622155

2163-
BUG_ON(con->state != CON_STATE_OPEN);
2156+
WARN_ON(con->state != CON_STATE_OPEN);
21642157

21652158
if (con->in_base_pos < 0) {
21662159
/*
@@ -2382,7 +2375,7 @@ static void ceph_fault(struct ceph_connection *con)
23822375
dout("fault %p state %lu to peer %s\n",
23832376
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
23842377

2385-
BUG_ON(con->state != CON_STATE_CONNECTING &&
2378+
WARN_ON(con->state != CON_STATE_CONNECTING &&
23862379
con->state != CON_STATE_NEGOTIATING &&
23872380
con->state != CON_STATE_OPEN);
23882381

net/ceph/osd_client.c

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,7 +1270,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
12701270
* Requeue requests whose mapping to an OSD has changed. If requests map to
12711271
* no osd, request a new map.
12721272
*
1273-
* Caller should hold map_sem for read and request_mutex.
1273+
* Caller should hold map_sem for read.
12741274
*/
12751275
static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
12761276
{
@@ -1284,6 +1284,24 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
12841284
for (p = rb_first(&osdc->requests); p; ) {
12851285
req = rb_entry(p, struct ceph_osd_request, r_node);
12861286
p = rb_next(p);
1287+
1288+
/*
1289+
* For linger requests that have not yet been
1290+
* registered, move them to the linger list; they'll
1291+
* be sent to the osd in the loop below. Unregister
1292+
* the request before re-registering it as a linger
1293+
* request to ensure the __map_request() below
1294+
* will decide it needs to be sent.
1295+
*/
1296+
if (req->r_linger && list_empty(&req->r_linger_item)) {
1297+
dout("%p tid %llu restart on osd%d\n",
1298+
req, req->r_tid,
1299+
req->r_osd ? req->r_osd->o_osd : -1);
1300+
__unregister_request(osdc, req);
1301+
__register_linger_request(osdc, req);
1302+
continue;
1303+
}
1304+
12871305
err = __map_request(osdc, req, force_resend);
12881306
if (err < 0)
12891307
continue; /* error */
@@ -1298,24 +1316,14 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
12981316
req->r_flags |= CEPH_OSD_FLAG_RETRY;
12991317
}
13001318
}
1301-
if (req->r_linger && list_empty(&req->r_linger_item)) {
1302-
/*
1303-
* register as a linger so that we will
1304-
* re-submit below and get a new tid
1305-
*/
1306-
dout("%p tid %llu restart on osd%d\n",
1307-
req, req->r_tid,
1308-
req->r_osd ? req->r_osd->o_osd : -1);
1309-
__register_linger_request(osdc, req);
1310-
__unregister_request(osdc, req);
1311-
}
13121319
}
13131320

13141321
list_for_each_entry_safe(req, nreq, &osdc->req_linger,
13151322
r_linger_item) {
13161323
dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
13171324

13181325
err = __map_request(osdc, req, force_resend);
1326+
dout("__map_request returned %d\n", err);
13191327
if (err == 0)
13201328
continue; /* no change and no osd was specified */
13211329
if (err < 0)
@@ -1337,6 +1345,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
13371345
dout("%d requests for down osds, need new map\n", needmap);
13381346
ceph_monc_request_next_osdmap(&osdc->client->monc);
13391347
}
1348+
reset_changed_osds(osdc);
13401349
}
13411350

13421351

@@ -1393,7 +1402,6 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
13931402
osdc->osdmap = newmap;
13941403
}
13951404
kick_requests(osdc, 0);
1396-
reset_changed_osds(osdc);
13971405
} else {
13981406
dout("ignoring incremental map %u len %d\n",
13991407
epoch, maplen);

0 commit comments

Comments
 (0)