Skip to content

Commit 89a59c1

Browse files
committed
rbd: copyup with an empty snapshot context (aka deep-copyup)
This is the core of deep-flatten feature: sending a copyup request (i.e. a guarded write of the data read from the parent) with an empty snapshot context (snaps = [], seq = 0) causes the OSD to reflect the write in all existing snapshots. This allows "rbd flatten" to fully disconnect the clone image and its snapshots from the parent and make the parent snapshot removable. The actual modification request is sent only after deep-copyup request is completed. Waiting for deep-copyup reply is unnecessary, this will be improved in the future. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
1 parent 3a48250 commit 89a59c1

File tree

1 file changed

+79
-10
lines changed

1 file changed

+79
-10
lines changed

drivers/block/rbd.c

Lines changed: 79 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -221,22 +221,32 @@ enum obj_operation_type {
221221
* Writes go through the following state machine to deal with
222222
* layering:
223223
*
224-
* need copyup
225-
* RBD_OBJ_WRITE_GUARD ---------------> RBD_OBJ_WRITE_COPYUP
226-
* | ^ |
227-
* v \------------------------------/
228-
* done
229-
* ^
230-
* |
231-
* RBD_OBJ_WRITE_FLAT
224+
* . . . . . RBD_OBJ_WRITE_GUARD. . . . . . . . . . . . . .
225+
* . | .
226+
* . v .
227+
* . RBD_OBJ_WRITE_READ_FROM_PARENT. . . .
228+
* . | . .
229+
* . v v (deep-copyup .
230+
* (image . RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC . not needed) .
231+
* flattened) v | . .
232+
* . v . .
233+
* . . . .RBD_OBJ_WRITE_COPYUP_OPS. . . . . (copyup .
234+
* | not needed) v
235+
* v .
236+
* done . . . . . . . . . . . . . . . . . .
237+
* ^
238+
* |
239+
* RBD_OBJ_WRITE_FLAT
232240
*
233241
* Writes start in RBD_OBJ_WRITE_GUARD or _FLAT, depending on whether
234-
* there is a parent or not.
242+
* assert_exists guard is needed or not (in some cases it's not needed
243+
* even if there is a parent).
235244
*/
236245
enum rbd_obj_write_state {
237246
RBD_OBJ_WRITE_FLAT = 1,
238247
RBD_OBJ_WRITE_GUARD,
239248
RBD_OBJ_WRITE_READ_FROM_PARENT,
249+
RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC,
240250
RBD_OBJ_WRITE_COPYUP_OPS,
241251
};
242252

@@ -422,6 +432,10 @@ static DEFINE_IDA(rbd_dev_id_ida);
422432

423433
static struct workqueue_struct *rbd_wq;
424434

435+
static struct ceph_snap_context rbd_empty_snapc = {
436+
.nref = REFCOUNT_INIT(1),
437+
};
438+
425439
/*
426440
* single-major requires >= 0.75 version of userspace rbd utility.
427441
*/
@@ -2461,6 +2475,38 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
24612475

24622476
#define MODS_ONLY U32_MAX
24632477

2478+
static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req,
2479+
u32 bytes)
2480+
{
2481+
int ret;
2482+
2483+
dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
2484+
rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT);
2485+
rbd_assert(bytes > 0 && bytes != MODS_ONLY);
2486+
rbd_osd_req_destroy(obj_req->osd_req);
2487+
2488+
obj_req->osd_req = __rbd_osd_req_create(obj_req, &rbd_empty_snapc, 1);
2489+
if (!obj_req->osd_req)
2490+
return -ENOMEM;
2491+
2492+
ret = osd_req_op_cls_init(obj_req->osd_req, 0, "rbd", "copyup");
2493+
if (ret)
2494+
return ret;
2495+
2496+
osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0,
2497+
obj_req->copyup_bvecs,
2498+
obj_req->copyup_bvec_count,
2499+
bytes);
2500+
rbd_osd_req_format_write(obj_req);
2501+
2502+
ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
2503+
if (ret)
2504+
return ret;
2505+
2506+
rbd_obj_request_submit(obj_req);
2507+
return 0;
2508+
}
2509+
24642510
static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes)
24652511
{
24662512
struct rbd_img_request *img_req = obj_req->img_request;
@@ -2469,7 +2515,8 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes)
24692515
int ret;
24702516

24712517
dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
2472-
rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT);
2518+
rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT ||
2519+
obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_CALL);
24732520
rbd_osd_req_destroy(obj_req->osd_req);
24742521

24752522
switch (img_req->op_type) {
@@ -2531,6 +2578,17 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
25312578
bytes = 0;
25322579
}
25332580

2581+
if (obj_req->img_request->snapc->num_snaps && bytes > 0) {
2582+
/*
2583+
* Send a copyup request with an empty snapshot context to
2584+
* deep-copyup the object through all existing snapshots.
2585+
* A second request with the current snapshot context will be
2586+
* sent for the actual modification.
2587+
*/
2588+
obj_req->write_state = RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC;
2589+
return rbd_obj_issue_copyup_empty_snapc(obj_req, bytes);
2590+
}
2591+
25342592
obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
25352593
return rbd_obj_issue_copyup_ops(obj_req, bytes);
25362594
}
@@ -2632,6 +2690,17 @@ static bool rbd_obj_handle_write(struct rbd_obj_request *obj_req)
26322690
return true;
26332691
}
26342692
return false;
2693+
case RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC:
2694+
if (obj_req->result)
2695+
return true;
2696+
2697+
obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
2698+
ret = rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY);
2699+
if (ret) {
2700+
obj_req->result = ret;
2701+
return true;
2702+
}
2703+
return false;
26352704
default:
26362705
BUG();
26372706
}

0 commit comments

Comments
 (0)