Skip to content

Commit 6d87c22

Browse files
committed
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil: "This has a mix of bug fixes and cleanups. Alex's patch fixes a rare race in RBD. Ilya's patches fix an ENOENT check when a second rbd image is mapped and a couple memory leaks. Zheng fixes several issues with fragmented directories and multiple MDSs. Josh fixes a spin/sleep issue, and Josh and Guangliang's patches fix setting and unsetting RBD images read-only. Naturally there are several other cleanups mixed in for good measure" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (23 commits) rbd: only set disk to read-only once rbd: move calls that may sleep out of spin lock range rbd: add ioctl for rbd ceph: use truncate_pagecache() instead of truncate_inode_pages() ceph: include time stamp in every MDS request rbd: fix ida/idr memory leak rbd: use reference counts for image requests rbd: fix osd_request memory leak in __rbd_dev_header_watch_sync() rbd: make sure we have latest osdmap on 'rbd map' libceph: add ceph_monc_wait_osdmap() libceph: mon_get_version request infrastructure libceph: recognize poolop requests in debugfs ceph: refactor readpage_nounlock() to make the logic clearer mds: check cap ID when handling cap export message ceph: remember subtree root dirfrag's auth MDS ceph: introduce ceph_fill_fragtree() ceph: handle cap import atomically ceph: pre-allocate ceph_cap struct for ceph_add_cap() ceph: update inode fields according to issued caps rbd: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO ...
2 parents 338c09a + 22001f6 commit 6d87c22

File tree

14 files changed

+670
-286
lines changed

14 files changed

+670
-286
lines changed

drivers/block/rbd.c

Lines changed: 197 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,6 @@ static int rbd_open(struct block_device *bdev, fmode_t mode)
541541
return -ENOENT;
542542

543543
(void) get_device(&rbd_dev->dev);
544-
set_device_ro(bdev, rbd_dev->mapping.read_only);
545544

546545
return 0;
547546
}
@@ -559,10 +558,76 @@ static void rbd_release(struct gendisk *disk, fmode_t mode)
559558
put_device(&rbd_dev->dev);
560559
}
561560

561+
static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg)
562+
{
563+
int ret = 0;
564+
int val;
565+
bool ro;
566+
bool ro_changed = false;
567+
568+
/* get_user() may sleep, so call it before taking rbd_dev->lock */
569+
if (get_user(val, (int __user *)(arg)))
570+
return -EFAULT;
571+
572+
ro = val ? true : false;
573+
/* Snapshot doesn't allow to write*/
574+
if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro)
575+
return -EROFS;
576+
577+
spin_lock_irq(&rbd_dev->lock);
578+
/* prevent others open this device */
579+
if (rbd_dev->open_count > 1) {
580+
ret = -EBUSY;
581+
goto out;
582+
}
583+
584+
if (rbd_dev->mapping.read_only != ro) {
585+
rbd_dev->mapping.read_only = ro;
586+
ro_changed = true;
587+
}
588+
589+
out:
590+
spin_unlock_irq(&rbd_dev->lock);
591+
/* set_disk_ro() may sleep, so call it after releasing rbd_dev->lock */
592+
if (ret == 0 && ro_changed)
593+
set_disk_ro(rbd_dev->disk, ro ? 1 : 0);
594+
595+
return ret;
596+
}
597+
598+
static int rbd_ioctl(struct block_device *bdev, fmode_t mode,
599+
unsigned int cmd, unsigned long arg)
600+
{
601+
struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
602+
int ret = 0;
603+
604+
switch (cmd) {
605+
case BLKROSET:
606+
ret = rbd_ioctl_set_ro(rbd_dev, arg);
607+
break;
608+
default:
609+
ret = -ENOTTY;
610+
}
611+
612+
return ret;
613+
}
614+
615+
#ifdef CONFIG_COMPAT
616+
static int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode,
617+
unsigned int cmd, unsigned long arg)
618+
{
619+
return rbd_ioctl(bdev, mode, cmd, arg);
620+
}
621+
#endif /* CONFIG_COMPAT */
622+
562623
static const struct block_device_operations rbd_bd_ops = {
563624
.owner = THIS_MODULE,
564625
.open = rbd_open,
565626
.release = rbd_release,
627+
.ioctl = rbd_ioctl,
628+
#ifdef CONFIG_COMPAT
629+
.compat_ioctl = rbd_compat_ioctl,
630+
#endif
566631
};
567632

568633
/*
@@ -1382,6 +1447,13 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
13821447
kref_put(&obj_request->kref, rbd_obj_request_destroy);
13831448
}
13841449

1450+
static void rbd_img_request_get(struct rbd_img_request *img_request)
1451+
{
1452+
dout("%s: img %p (was %d)\n", __func__, img_request,
1453+
atomic_read(&img_request->kref.refcount));
1454+
kref_get(&img_request->kref);
1455+
}
1456+
13851457
static bool img_request_child_test(struct rbd_img_request *img_request);
13861458
static void rbd_parent_request_destroy(struct kref *kref);
13871459
static void rbd_img_request_destroy(struct kref *kref);
@@ -2142,6 +2214,7 @@ static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
21422214
img_request->next_completion = which;
21432215
out:
21442216
spin_unlock_irq(&img_request->completion_lock);
2217+
rbd_img_request_put(img_request);
21452218

21462219
if (!more)
21472220
rbd_img_request_complete(img_request);
@@ -2242,6 +2315,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
22422315
goto out_unwind;
22432316
obj_request->osd_req = osd_req;
22442317
obj_request->callback = rbd_img_obj_callback;
2318+
rbd_img_request_get(img_request);
22452319

22462320
if (write_request) {
22472321
osd_req_op_alloc_hint_init(osd_req, which,
@@ -2872,56 +2946,55 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
28722946
}
28732947

28742948
/*
2875-
* Request sync osd watch/unwatch. The value of "start" determines
2876-
* whether a watch request is being initiated or torn down.
2949+
* Initiate a watch request, synchronously.
28772950
*/
2878-
static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
2951+
static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
28792952
{
28802953
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
28812954
struct rbd_obj_request *obj_request;
28822955
int ret;
28832956

2884-
rbd_assert(start ^ !!rbd_dev->watch_event);
2885-
rbd_assert(start ^ !!rbd_dev->watch_request);
2957+
rbd_assert(!rbd_dev->watch_event);
2958+
rbd_assert(!rbd_dev->watch_request);
28862959

2887-
if (start) {
2888-
ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
2889-
&rbd_dev->watch_event);
2890-
if (ret < 0)
2891-
return ret;
2892-
rbd_assert(rbd_dev->watch_event != NULL);
2893-
}
2960+
ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
2961+
&rbd_dev->watch_event);
2962+
if (ret < 0)
2963+
return ret;
2964+
2965+
rbd_assert(rbd_dev->watch_event);
28942966

2895-
ret = -ENOMEM;
28962967
obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
2897-
OBJ_REQUEST_NODATA);
2898-
if (!obj_request)
2968+
OBJ_REQUEST_NODATA);
2969+
if (!obj_request) {
2970+
ret = -ENOMEM;
28992971
goto out_cancel;
2972+
}
29002973

29012974
obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1,
29022975
obj_request);
2903-
if (!obj_request->osd_req)
2904-
goto out_cancel;
2976+
if (!obj_request->osd_req) {
2977+
ret = -ENOMEM;
2978+
goto out_put;
2979+
}
29052980

2906-
if (start)
2907-
ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
2908-
else
2909-
ceph_osdc_unregister_linger_request(osdc,
2910-
rbd_dev->watch_request->osd_req);
2981+
ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
29112982

29122983
osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
2913-
rbd_dev->watch_event->cookie, 0, start ? 1 : 0);
2984+
rbd_dev->watch_event->cookie, 0, 1);
29142985
rbd_osd_req_format_write(obj_request);
29152986

29162987
ret = rbd_obj_request_submit(osdc, obj_request);
29172988
if (ret)
2918-
goto out_cancel;
2989+
goto out_linger;
2990+
29192991
ret = rbd_obj_request_wait(obj_request);
29202992
if (ret)
2921-
goto out_cancel;
2993+
goto out_linger;
2994+
29222995
ret = obj_request->result;
29232996
if (ret)
2924-
goto out_cancel;
2997+
goto out_linger;
29252998

29262999
/*
29273000
* A watch request is set to linger, so the underlying osd
@@ -2931,36 +3004,84 @@ static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
29313004
* it. We'll drop that reference (below) after we've
29323005
* unregistered it.
29333006
*/
2934-
if (start) {
2935-
rbd_dev->watch_request = obj_request;
3007+
rbd_dev->watch_request = obj_request;
29363008

2937-
return 0;
3009+
return 0;
3010+
3011+
out_linger:
3012+
ceph_osdc_unregister_linger_request(osdc, obj_request->osd_req);
3013+
out_put:
3014+
rbd_obj_request_put(obj_request);
3015+
out_cancel:
3016+
ceph_osdc_cancel_event(rbd_dev->watch_event);
3017+
rbd_dev->watch_event = NULL;
3018+
3019+
return ret;
3020+
}
3021+
3022+
/*
3023+
* Tear down a watch request, synchronously.
3024+
*/
3025+
static int __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
3026+
{
3027+
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
3028+
struct rbd_obj_request *obj_request;
3029+
int ret;
3030+
3031+
rbd_assert(rbd_dev->watch_event);
3032+
rbd_assert(rbd_dev->watch_request);
3033+
3034+
obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
3035+
OBJ_REQUEST_NODATA);
3036+
if (!obj_request) {
3037+
ret = -ENOMEM;
3038+
goto out_cancel;
3039+
}
3040+
3041+
obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1,
3042+
obj_request);
3043+
if (!obj_request->osd_req) {
3044+
ret = -ENOMEM;
3045+
goto out_put;
29383046
}
29393047

3048+
osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
3049+
rbd_dev->watch_event->cookie, 0, 0);
3050+
rbd_osd_req_format_write(obj_request);
3051+
3052+
ret = rbd_obj_request_submit(osdc, obj_request);
3053+
if (ret)
3054+
goto out_put;
3055+
3056+
ret = rbd_obj_request_wait(obj_request);
3057+
if (ret)
3058+
goto out_put;
3059+
3060+
ret = obj_request->result;
3061+
if (ret)
3062+
goto out_put;
3063+
29403064
/* We have successfully torn down the watch request */
29413065

3066+
ceph_osdc_unregister_linger_request(osdc,
3067+
rbd_dev->watch_request->osd_req);
29423068
rbd_obj_request_put(rbd_dev->watch_request);
29433069
rbd_dev->watch_request = NULL;
3070+
3071+
out_put:
3072+
rbd_obj_request_put(obj_request);
29443073
out_cancel:
2945-
/* Cancel the event if we're tearing down, or on error */
29463074
ceph_osdc_cancel_event(rbd_dev->watch_event);
29473075
rbd_dev->watch_event = NULL;
2948-
if (obj_request)
2949-
rbd_obj_request_put(obj_request);
29503076

29513077
return ret;
29523078
}
29533079

2954-
static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
2955-
{
2956-
return __rbd_dev_header_watch_sync(rbd_dev, true);
2957-
}
2958-
29593080
static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
29603081
{
29613082
int ret;
29623083

2963-
ret = __rbd_dev_header_watch_sync(rbd_dev, false);
3084+
ret = __rbd_dev_header_unwatch_sync(rbd_dev);
29643085
if (ret) {
29653086
rbd_warn(rbd_dev, "unable to tear down watch request: %d\n",
29663087
ret);
@@ -3058,7 +3179,6 @@ static void rbd_request_fn(struct request_queue *q)
30583179
__releases(q->queue_lock) __acquires(q->queue_lock)
30593180
{
30603181
struct rbd_device *rbd_dev = q->queuedata;
3061-
bool read_only = rbd_dev->mapping.read_only;
30623182
struct request *rq;
30633183
int result;
30643184

@@ -3094,7 +3214,7 @@ static void rbd_request_fn(struct request_queue *q)
30943214

30953215
if (write_request) {
30963216
result = -EROFS;
3097-
if (read_only)
3217+
if (rbd_dev->mapping.read_only)
30983218
goto end_request;
30993219
rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
31003220
}
@@ -4682,6 +4802,38 @@ static int rbd_add_parse_args(const char *buf,
46824802
return ret;
46834803
}
46844804

4805+
/*
4806+
* Return pool id (>= 0) or a negative error code.
4807+
*/
4808+
static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
4809+
{
4810+
u64 newest_epoch;
4811+
unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
4812+
int tries = 0;
4813+
int ret;
4814+
4815+
again:
4816+
ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name);
4817+
if (ret == -ENOENT && tries++ < 1) {
4818+
ret = ceph_monc_do_get_version(&rbdc->client->monc, "osdmap",
4819+
&newest_epoch);
4820+
if (ret < 0)
4821+
return ret;
4822+
4823+
if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
4824+
ceph_monc_request_next_osdmap(&rbdc->client->monc);
4825+
(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
4826+
newest_epoch, timeout);
4827+
goto again;
4828+
} else {
4829+
/* the osdmap we have is new enough */
4830+
return -ENOENT;
4831+
}
4832+
}
4833+
4834+
return ret;
4835+
}
4836+
46854837
/*
46864838
* An rbd format 2 image has a unique identifier, distinct from the
46874839
* name given to it by the user. Internally, that identifier is
@@ -4752,7 +4904,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
47524904

47534905
image_id = ceph_extract_encoded_string(&p, p + ret,
47544906
NULL, GFP_NOIO);
4755-
ret = IS_ERR(image_id) ? PTR_ERR(image_id) : 0;
4907+
ret = PTR_ERR_OR_ZERO(image_id);
47564908
if (!ret)
47574909
rbd_dev->image_format = 2;
47584910
} else {
@@ -4907,6 +5059,7 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
49075059
if (ret)
49085060
goto err_out_disk;
49095061
set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
5062+
set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
49105063

49115064
ret = rbd_bus_add_dev(rbd_dev);
49125065
if (ret)
@@ -5053,7 +5206,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
50535206
struct rbd_options *rbd_opts = NULL;
50545207
struct rbd_spec *spec = NULL;
50555208
struct rbd_client *rbdc;
5056-
struct ceph_osd_client *osdc;
50575209
bool read_only;
50585210
int rc = -ENOMEM;
50595211

@@ -5075,8 +5227,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
50755227
}
50765228

50775229
/* pick the pool */
5078-
osdc = &rbdc->client->osdc;
5079-
rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name);
5230+
rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
50805231
if (rc < 0)
50815232
goto err_out_client;
50825233
spec->pool_id = (u64)rc;
@@ -5387,6 +5538,7 @@ static int __init rbd_init(void)
53875538

53885539
static void __exit rbd_exit(void)
53895540
{
5541+
ida_destroy(&rbd_dev_id_ida);
53905542
rbd_sysfs_cleanup();
53915543
if (single_major)
53925544
unregister_blkdev(rbd_major, RBD_DRV_NAME);

0 commit comments

Comments
 (0)