Skip to content

Commit ca4ba96

Browse files
committed
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil: "There are several patches from Ilya fixing RBD allocation lifecycle issues, a series adding a nocephx_sign_messages option (and associated bug fixes/cleanups), several patches from Zheng improving the (directory) fsync behavior, a big improvement in IO for direct-io requests when striping is enabled from Caifeng, and several other small fixes and cleanups" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: libceph: clear msg->con in ceph_msg_release() only libceph: add nocephx_sign_messages option libceph: stop duplicating client fields in messenger libceph: drop authorizer check from cephx msg signing routines libceph: msg signing callouts don't need con argument libceph: evaluate osd_req_op_data() arguments only once ceph: make fsync() wait unsafe requests that created/modified inode ceph: add request to i_unsafe_dirops when getting unsafe reply libceph: introduce ceph_x_authorizer_cleanup() ceph: don't invalidate page cache when inode is no longer used rbd: remove duplicate calls to rbd_dev_mapping_clear() rbd: set device_type::release instead of device::release rbd: don't free rbd_dev outside of the release callback rbd: return -ENOMEM instead of pool id if rbd_dev_create() fails libceph: use local variable cursor instead of &msg->cursor libceph: remove con argument in handle_reply() ceph: combine as many iovec as possile into one OSD request ceph: fix message length computation ceph: fix a comment typo rbd: drop null test before destroy functions
2 parents 4aeabc6 + 583d0fe commit ca4ba96

File tree

15 files changed

+314
-222
lines changed

15 files changed

+314
-222
lines changed

drivers/block/rbd.c

Lines changed: 54 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -418,8 +418,6 @@ MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (d
418418

419419
static int rbd_img_request_submit(struct rbd_img_request *img_request);
420420

421-
static void rbd_dev_device_release(struct device *dev);
422-
423421
static ssize_t rbd_add(struct bus_type *bus, const char *buf,
424422
size_t count);
425423
static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
@@ -3991,14 +3989,12 @@ static const struct attribute_group *rbd_attr_groups[] = {
39913989
NULL
39923990
};
39933991

3994-
static void rbd_sysfs_dev_release(struct device *dev)
3995-
{
3996-
}
3992+
static void rbd_dev_release(struct device *dev);
39973993

39983994
static struct device_type rbd_device_type = {
39993995
.name = "rbd",
40003996
.groups = rbd_attr_groups,
4001-
.release = rbd_sysfs_dev_release,
3997+
.release = rbd_dev_release,
40023998
};
40033999

40044000
static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec)
@@ -4041,6 +4037,25 @@ static void rbd_spec_free(struct kref *kref)
40414037
kfree(spec);
40424038
}
40434039

4040+
static void rbd_dev_release(struct device *dev)
4041+
{
4042+
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
4043+
bool need_put = !!rbd_dev->opts;
4044+
4045+
rbd_put_client(rbd_dev->rbd_client);
4046+
rbd_spec_put(rbd_dev->spec);
4047+
kfree(rbd_dev->opts);
4048+
kfree(rbd_dev);
4049+
4050+
/*
4051+
* This is racy, but way better than putting module outside of
4052+
* the release callback. The race window is pretty small, so
4053+
* doing something similar to dm (dm-builtin.c) is overkill.
4054+
*/
4055+
if (need_put)
4056+
module_put(THIS_MODULE);
4057+
}
4058+
40444059
static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
40454060
struct rbd_spec *spec,
40464061
struct rbd_options *opts)
@@ -4057,6 +4072,11 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
40574072
INIT_LIST_HEAD(&rbd_dev->node);
40584073
init_rwsem(&rbd_dev->header_rwsem);
40594074

4075+
rbd_dev->dev.bus = &rbd_bus_type;
4076+
rbd_dev->dev.type = &rbd_device_type;
4077+
rbd_dev->dev.parent = &rbd_root_dev;
4078+
device_initialize(&rbd_dev->dev);
4079+
40604080
rbd_dev->rbd_client = rbdc;
40614081
rbd_dev->spec = spec;
40624082
rbd_dev->opts = opts;
@@ -4068,15 +4088,21 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
40684088
rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
40694089
rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id);
40704090

4091+
/*
4092+
* If this is a mapping rbd_dev (as opposed to a parent one),
4093+
* pin our module. We have a ref from do_rbd_add(), so use
4094+
* __module_get().
4095+
*/
4096+
if (rbd_dev->opts)
4097+
__module_get(THIS_MODULE);
4098+
40714099
return rbd_dev;
40724100
}
40734101

40744102
static void rbd_dev_destroy(struct rbd_device *rbd_dev)
40754103
{
4076-
rbd_put_client(rbd_dev->rbd_client);
4077-
rbd_spec_put(rbd_dev->spec);
4078-
kfree(rbd_dev->opts);
4079-
kfree(rbd_dev);
4104+
if (rbd_dev)
4105+
put_device(&rbd_dev->dev);
40804106
}
40814107

40824108
/*
@@ -4702,27 +4728,6 @@ static int rbd_dev_header_info(struct rbd_device *rbd_dev)
47024728
return rbd_dev_v2_header_info(rbd_dev);
47034729
}
47044730

4705-
static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
4706-
{
4707-
struct device *dev;
4708-
int ret;
4709-
4710-
dev = &rbd_dev->dev;
4711-
dev->bus = &rbd_bus_type;
4712-
dev->type = &rbd_device_type;
4713-
dev->parent = &rbd_root_dev;
4714-
dev->release = rbd_dev_device_release;
4715-
dev_set_name(dev, "%d", rbd_dev->dev_id);
4716-
ret = device_register(dev);
4717-
4718-
return ret;
4719-
}
4720-
4721-
static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
4722-
{
4723-
device_unregister(&rbd_dev->dev);
4724-
}
4725-
47264731
/*
47274732
* Get a unique rbd identifier for the given new rbd_dev, and add
47284733
* the rbd_dev to the global list.
@@ -5225,7 +5230,8 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
52255230
set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
52265231
set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
52275232

5228-
ret = rbd_bus_add_dev(rbd_dev);
5233+
dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
5234+
ret = device_add(&rbd_dev->dev);
52295235
if (ret)
52305236
goto err_out_mapping;
52315237

@@ -5248,8 +5254,6 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
52485254
unregister_blkdev(rbd_dev->major, rbd_dev->name);
52495255
err_out_id:
52505256
rbd_dev_id_put(rbd_dev);
5251-
rbd_dev_mapping_clear(rbd_dev);
5252-
52535257
return ret;
52545258
}
52555259

@@ -5397,15 +5401,15 @@ static ssize_t do_rbd_add(struct bus_type *bus,
53975401
struct rbd_spec *spec = NULL;
53985402
struct rbd_client *rbdc;
53995403
bool read_only;
5400-
int rc = -ENOMEM;
5404+
int rc;
54015405

54025406
if (!try_module_get(THIS_MODULE))
54035407
return -ENODEV;
54045408

54055409
/* parse add command */
54065410
rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
54075411
if (rc < 0)
5408-
goto err_out_module;
5412+
goto out;
54095413

54105414
rbdc = rbd_get_client(ceph_opts);
54115415
if (IS_ERR(rbdc)) {
@@ -5432,8 +5436,10 @@ static ssize_t do_rbd_add(struct bus_type *bus,
54325436
}
54335437

54345438
rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
5435-
if (!rbd_dev)
5439+
if (!rbd_dev) {
5440+
rc = -ENOMEM;
54365441
goto err_out_client;
5442+
}
54375443
rbdc = NULL; /* rbd_dev now owns this */
54385444
spec = NULL; /* rbd_dev now owns this */
54395445
rbd_opts = NULL; /* rbd_dev now owns this */
@@ -5458,10 +5464,13 @@ static ssize_t do_rbd_add(struct bus_type *bus,
54585464
*/
54595465
rbd_dev_header_unwatch_sync(rbd_dev);
54605466
rbd_dev_image_release(rbd_dev);
5461-
goto err_out_module;
5467+
goto out;
54625468
}
54635469

5464-
return count;
5470+
rc = count;
5471+
out:
5472+
module_put(THIS_MODULE);
5473+
return rc;
54655474

54665475
err_out_rbd_dev:
54675476
rbd_dev_destroy(rbd_dev);
@@ -5470,12 +5479,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
54705479
err_out_args:
54715480
rbd_spec_put(spec);
54725481
kfree(rbd_opts);
5473-
err_out_module:
5474-
module_put(THIS_MODULE);
5475-
5476-
dout("Error adding device %s\n", buf);
5477-
5478-
return (ssize_t)rc;
5482+
goto out;
54795483
}
54805484

54815485
static ssize_t rbd_add(struct bus_type *bus,
@@ -5495,17 +5499,15 @@ static ssize_t rbd_add_single_major(struct bus_type *bus,
54955499
return do_rbd_add(bus, buf, count);
54965500
}
54975501

5498-
static void rbd_dev_device_release(struct device *dev)
5502+
static void rbd_dev_device_release(struct rbd_device *rbd_dev)
54995503
{
5500-
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
5501-
55025504
rbd_free_disk(rbd_dev);
55035505
clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
5506+
device_del(&rbd_dev->dev);
55045507
rbd_dev_mapping_clear(rbd_dev);
55055508
if (!single_major)
55065509
unregister_blkdev(rbd_dev->major, rbd_dev->name);
55075510
rbd_dev_id_put(rbd_dev);
5508-
rbd_dev_mapping_clear(rbd_dev);
55095511
}
55105512

55115513
static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
@@ -5590,9 +5592,8 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
55905592
* rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
55915593
* in a potential use after free of rbd_dev->disk or rbd_dev.
55925594
*/
5593-
rbd_bus_del_dev(rbd_dev);
5595+
rbd_dev_device_release(rbd_dev);
55945596
rbd_dev_image_release(rbd_dev);
5595-
module_put(THIS_MODULE);
55965597

55975598
return count;
55985599
}
@@ -5663,10 +5664,8 @@ static int rbd_slab_init(void)
56635664
if (rbd_segment_name_cache)
56645665
return 0;
56655666
out_err:
5666-
if (rbd_obj_request_cache) {
5667-
kmem_cache_destroy(rbd_obj_request_cache);
5668-
rbd_obj_request_cache = NULL;
5669-
}
5667+
kmem_cache_destroy(rbd_obj_request_cache);
5668+
rbd_obj_request_cache = NULL;
56705669

56715670
kmem_cache_destroy(rbd_img_request_cache);
56725671
rbd_img_request_cache = NULL;

fs/ceph/cache.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
8888
const struct ceph_inode_info* ci = cookie_netfs_data;
8989
uint16_t klen;
9090

91-
/* use ceph virtual inode (id + snaphot) */
91+
/* use ceph virtual inode (id + snapshot) */
9292
klen = sizeof(ci->i_vino);
9393
if (klen > maxbuf)
9494
return 0;

fs/ceph/caps.c

Lines changed: 36 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1655,9 +1655,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
16551655
!S_ISDIR(inode->i_mode) && /* ignore readdir cache */
16561656
ci->i_wrbuffer_ref == 0 && /* no dirty pages... */
16571657
inode->i_data.nrpages && /* have cached pages */
1658-
(file_wanted == 0 || /* no open files */
1659-
(revoking & (CEPH_CAP_FILE_CACHE|
1660-
CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */
1658+
(revoking & (CEPH_CAP_FILE_CACHE|
1659+
CEPH_CAP_FILE_LAZYIO)) && /* or revoking cache */
16611660
!tried_invalidate) {
16621661
dout("check_caps trying to invalidate on %p\n", inode);
16631662
if (try_nonblocking_invalidate(inode) < 0) {
@@ -1971,49 +1970,46 @@ static void sync_write_wait(struct inode *inode)
19711970
}
19721971

19731972
/*
1974-
* wait for any uncommitted directory operations to commit.
1973+
* wait for any unsafe requests to complete.
19751974
*/
1976-
static int unsafe_dirop_wait(struct inode *inode)
1975+
static int unsafe_request_wait(struct inode *inode)
19771976
{
19781977
struct ceph_inode_info *ci = ceph_inode(inode);
1979-
struct list_head *head = &ci->i_unsafe_dirops;
1980-
struct ceph_mds_request *req;
1981-
u64 last_tid;
1982-
int ret = 0;
1983-
1984-
if (!S_ISDIR(inode->i_mode))
1985-
return 0;
1978+
struct ceph_mds_request *req1 = NULL, *req2 = NULL;
1979+
int ret, err = 0;
19861980

19871981
spin_lock(&ci->i_unsafe_lock);
1988-
if (list_empty(head))
1989-
goto out;
1990-
1991-
req = list_last_entry(head, struct ceph_mds_request,
1992-
r_unsafe_dir_item);
1993-
last_tid = req->r_tid;
1994-
1995-
do {
1996-
ceph_mdsc_get_request(req);
1997-
spin_unlock(&ci->i_unsafe_lock);
1982+
if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) {
1983+
req1 = list_last_entry(&ci->i_unsafe_dirops,
1984+
struct ceph_mds_request,
1985+
r_unsafe_dir_item);
1986+
ceph_mdsc_get_request(req1);
1987+
}
1988+
if (!list_empty(&ci->i_unsafe_iops)) {
1989+
req2 = list_last_entry(&ci->i_unsafe_iops,
1990+
struct ceph_mds_request,
1991+
r_unsafe_target_item);
1992+
ceph_mdsc_get_request(req2);
1993+
}
1994+
spin_unlock(&ci->i_unsafe_lock);
19981995

1999-
dout("unsafe_dirop_wait %p wait on tid %llu (until %llu)\n",
2000-
inode, req->r_tid, last_tid);
2001-
ret = !wait_for_completion_timeout(&req->r_safe_completion,
2002-
ceph_timeout_jiffies(req->r_timeout));
1996+
dout("unsafe_requeset_wait %p wait on tid %llu %llu\n",
1997+
inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
1998+
if (req1) {
1999+
ret = !wait_for_completion_timeout(&req1->r_safe_completion,
2000+
ceph_timeout_jiffies(req1->r_timeout));
20032001
if (ret)
2004-
ret = -EIO; /* timed out */
2005-
2006-
ceph_mdsc_put_request(req);
2007-
2008-
spin_lock(&ci->i_unsafe_lock);
2009-
if (ret || list_empty(head))
2010-
break;
2011-
req = list_first_entry(head, struct ceph_mds_request,
2012-
r_unsafe_dir_item);
2013-
} while (req->r_tid < last_tid);
2014-
out:
2015-
spin_unlock(&ci->i_unsafe_lock);
2016-
return ret;
2002+
err = -EIO;
2003+
ceph_mdsc_put_request(req1);
2004+
}
2005+
if (req2) {
2006+
ret = !wait_for_completion_timeout(&req2->r_safe_completion,
2007+
ceph_timeout_jiffies(req2->r_timeout));
2008+
if (ret)
2009+
err = -EIO;
2010+
ceph_mdsc_put_request(req2);
2011+
}
2012+
return err;
20172013
}
20182014

20192015
int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
@@ -2039,7 +2035,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
20392035
dirty = try_flush_caps(inode, &flush_tid);
20402036
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
20412037

2042-
ret = unsafe_dirop_wait(inode);
2038+
ret = unsafe_request_wait(inode);
20432039

20442040
/*
20452041
* only wait on non-file metadata writeback (the mds

0 commit comments

Comments
 (0)