@@ -221,22 +221,32 @@ enum obj_operation_type {
221
221
* Writes go through the following state machine to deal with
222
222
* layering:
223
223
*
224
- * need copyup
225
- * RBD_OBJ_WRITE_GUARD ---------------> RBD_OBJ_WRITE_COPYUP
226
- * | ^ |
227
- * v \------------------------------/
228
- * done
229
- * ^
230
- * |
231
- * RBD_OBJ_WRITE_FLAT
224
+ * . . . . . RBD_OBJ_WRITE_GUARD. . . . . . . . . . . . . .
225
+ * . | .
226
+ * . v .
227
+ * . RBD_OBJ_WRITE_READ_FROM_PARENT. . . .
228
+ * . | . .
229
+ * . v v (deep-copyup .
230
+ * (image . RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC . not needed) .
231
+ * flattened) v | . .
232
+ * . v . .
233
+ * . . . .RBD_OBJ_WRITE_COPYUP_OPS. . . . . (copyup .
234
+ * | not needed) v
235
+ * v .
236
+ * done . . . . . . . . . . . . . . . . . .
237
+ * ^
238
+ * |
239
+ * RBD_OBJ_WRITE_FLAT
232
240
*
233
241
* Writes start in RBD_OBJ_WRITE_GUARD or _FLAT, depending on whether
234
- * there is a parent or not.
242
+ * assert_exists guard is needed or not (in some cases it's not needed
243
+ * even if there is a parent).
235
244
*/
236
245
enum rbd_obj_write_state {
237
246
RBD_OBJ_WRITE_FLAT = 1 ,
238
247
RBD_OBJ_WRITE_GUARD ,
239
248
RBD_OBJ_WRITE_READ_FROM_PARENT ,
249
+ RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC ,
240
250
RBD_OBJ_WRITE_COPYUP_OPS ,
241
251
};
242
252
@@ -422,6 +432,10 @@ static DEFINE_IDA(rbd_dev_id_ida);
422
432
423
433
static struct workqueue_struct * rbd_wq ;
424
434
435
+ static struct ceph_snap_context rbd_empty_snapc = {
436
+ .nref = REFCOUNT_INIT (1 ),
437
+ };
438
+
425
439
/*
426
440
* single-major requires >= 0.75 version of userspace rbd utility.
427
441
*/
@@ -2461,6 +2475,38 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
2461
2475
2462
2476
#define MODS_ONLY U32_MAX
2463
2477
2478
+ static int rbd_obj_issue_copyup_empty_snapc (struct rbd_obj_request * obj_req ,
2479
+ u32 bytes )
2480
+ {
2481
+ int ret ;
2482
+
2483
+ dout ("%s obj_req %p bytes %u\n" , __func__ , obj_req , bytes );
2484
+ rbd_assert (obj_req -> osd_req -> r_ops [0 ].op == CEPH_OSD_OP_STAT );
2485
+ rbd_assert (bytes > 0 && bytes != MODS_ONLY );
2486
+ rbd_osd_req_destroy (obj_req -> osd_req );
2487
+
2488
+ obj_req -> osd_req = __rbd_osd_req_create (obj_req , & rbd_empty_snapc , 1 );
2489
+ if (!obj_req -> osd_req )
2490
+ return - ENOMEM ;
2491
+
2492
+ ret = osd_req_op_cls_init (obj_req -> osd_req , 0 , "rbd" , "copyup" );
2493
+ if (ret )
2494
+ return ret ;
2495
+
2496
+ osd_req_op_cls_request_data_bvecs (obj_req -> osd_req , 0 ,
2497
+ obj_req -> copyup_bvecs ,
2498
+ obj_req -> copyup_bvec_count ,
2499
+ bytes );
2500
+ rbd_osd_req_format_write (obj_req );
2501
+
2502
+ ret = ceph_osdc_alloc_messages (obj_req -> osd_req , GFP_NOIO );
2503
+ if (ret )
2504
+ return ret ;
2505
+
2506
+ rbd_obj_request_submit (obj_req );
2507
+ return 0 ;
2508
+ }
2509
+
2464
2510
static int rbd_obj_issue_copyup_ops (struct rbd_obj_request * obj_req , u32 bytes )
2465
2511
{
2466
2512
struct rbd_img_request * img_req = obj_req -> img_request ;
@@ -2469,7 +2515,8 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes)
2469
2515
int ret ;
2470
2516
2471
2517
dout ("%s obj_req %p bytes %u\n" , __func__ , obj_req , bytes );
2472
- rbd_assert (obj_req -> osd_req -> r_ops [0 ].op == CEPH_OSD_OP_STAT );
2518
+ rbd_assert (obj_req -> osd_req -> r_ops [0 ].op == CEPH_OSD_OP_STAT ||
2519
+ obj_req -> osd_req -> r_ops [0 ].op == CEPH_OSD_OP_CALL );
2473
2520
rbd_osd_req_destroy (obj_req -> osd_req );
2474
2521
2475
2522
switch (img_req -> op_type ) {
@@ -2531,6 +2578,17 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
2531
2578
bytes = 0 ;
2532
2579
}
2533
2580
2581
+ if (obj_req -> img_request -> snapc -> num_snaps && bytes > 0 ) {
2582
+ /*
2583
+ * Send a copyup request with an empty snapshot context to
2584
+ * deep-copyup the object through all existing snapshots.
2585
+ * A second request with the current snapshot context will be
2586
+ * sent for the actual modification.
2587
+ */
2588
+ obj_req -> write_state = RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC ;
2589
+ return rbd_obj_issue_copyup_empty_snapc (obj_req , bytes );
2590
+ }
2591
+
2534
2592
obj_req -> write_state = RBD_OBJ_WRITE_COPYUP_OPS ;
2535
2593
return rbd_obj_issue_copyup_ops (obj_req , bytes );
2536
2594
}
@@ -2632,6 +2690,17 @@ static bool rbd_obj_handle_write(struct rbd_obj_request *obj_req)
2632
2690
return true;
2633
2691
}
2634
2692
return false;
2693
+ case RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC :
2694
+ if (obj_req -> result )
2695
+ return true;
2696
+
2697
+ obj_req -> write_state = RBD_OBJ_WRITE_COPYUP_OPS ;
2698
+ ret = rbd_obj_issue_copyup_ops (obj_req , MODS_ONLY );
2699
+ if (ret ) {
2700
+ obj_req -> result = ret ;
2701
+ return true;
2702
+ }
2703
+ return false;
2635
2704
default :
2636
2705
BUG ();
2637
2706
}
0 commit comments