|
40 | 40 | */
|
41 | 41 | #define R5L_POOL_SIZE 4
|
42 | 42 |
|
| 43 | +/* |
| 44 | + * r5c journal modes of the array: write-back or write-through. |
| 45 | + * write-through mode has identical behavior as existing log only |
| 46 | + * implementation. |
| 47 | + */ |
| 48 | +enum r5c_journal_mode { |
| 49 | + R5C_JOURNAL_MODE_WRITE_THROUGH = 0, |
| 50 | + R5C_JOURNAL_MODE_WRITE_BACK = 1, |
| 51 | +}; |
| 52 | + |
| 53 | +/* |
| 54 | + * raid5 cache state machine |
| 55 | + * |
| 56 | + * With rhe RAID cache, each stripe works in two phases: |
| 57 | + * - caching phase |
| 58 | + * - writing-out phase |
| 59 | + * |
| 60 | + * These two phases are controlled by bit STRIPE_R5C_CACHING: |
| 61 | + * if STRIPE_R5C_CACHING == 0, the stripe is in writing-out phase |
| 62 | + * if STRIPE_R5C_CACHING == 1, the stripe is in caching phase |
| 63 | + * |
| 64 | + * When there is no journal, or the journal is in write-through mode, |
| 65 | + * the stripe is always in writing-out phase. |
| 66 | + * |
| 67 | + * For write-back journal, the stripe is sent to caching phase on write |
| 68 | + * (r5c_try_caching_write). r5c_make_stripe_write_out() kicks off |
| 69 | + * the write-out phase by clearing STRIPE_R5C_CACHING. |
| 70 | + * |
| 71 | + * Stripes in caching phase do not write the raid disks. Instead, all |
| 72 | + * writes are committed from the log device. Therefore, a stripe in |
| 73 | + * caching phase handles writes as: |
| 74 | + * - write to log device |
| 75 | + * - return IO |
| 76 | + * |
| 77 | + * Stripes in writing-out phase handle writes as: |
| 78 | + * - calculate parity |
| 79 | + * - write pending data and parity to journal |
| 80 | + * - write data and parity to raid disks |
| 81 | + * - return IO for pending writes |
| 82 | + */ |
| 83 | + |
43 | 84 | struct r5l_log {
|
44 | 85 | struct md_rdev *rdev;
|
45 | 86 |
|
@@ -96,6 +137,9 @@ struct r5l_log {
|
96 | 137 | spinlock_t no_space_stripes_lock;
|
97 | 138 |
|
98 | 139 | bool need_cache_flush;
|
| 140 | + |
| 141 | + /* for r5c_cache */ |
| 142 | + enum r5c_journal_mode r5c_journal_mode; |
99 | 143 | };
|
100 | 144 |
|
101 | 145 | /*
|
@@ -133,6 +177,12 @@ enum r5l_io_unit_state {
|
133 | 177 | IO_UNIT_STRIPE_END = 3, /* stripes data finished writing to raid */
|
134 | 178 | };
|
135 | 179 |
|
| 180 | +bool r5c_is_writeback(struct r5l_log *log) |
| 181 | +{ |
| 182 | + return (log != NULL && |
| 183 | + log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK); |
| 184 | +} |
| 185 | + |
136 | 186 | static sector_t r5l_ring_add(struct r5l_log *log, sector_t start, sector_t inc)
|
137 | 187 | {
|
138 | 188 | start += inc;
|
@@ -168,12 +218,51 @@ static void __r5l_set_io_unit_state(struct r5l_io_unit *io,
|
168 | 218 | io->state = state;
|
169 | 219 | }
|
170 | 220 |
|
| 221 | +/* |
| 222 | + * Put the stripe into writing-out phase by clearing STRIPE_R5C_CACHING. |
| 223 | + * This function should only be called in write-back mode. |
| 224 | + */ |
| 225 | +static void r5c_make_stripe_write_out(struct stripe_head *sh) |
| 226 | +{ |
| 227 | + struct r5conf *conf = sh->raid_conf; |
| 228 | + struct r5l_log *log = conf->log; |
| 229 | + |
| 230 | + BUG_ON(!r5c_is_writeback(log)); |
| 231 | + |
| 232 | + WARN_ON(!test_bit(STRIPE_R5C_CACHING, &sh->state)); |
| 233 | + clear_bit(STRIPE_R5C_CACHING, &sh->state); |
| 234 | +} |
| 235 | + |
| 236 | +/* |
| 237 | + * Setting proper flags after writing (or flushing) data and/or parity to the |
| 238 | + * log device. This is called from r5l_log_endio() or r5l_log_flush_endio(). |
| 239 | + */ |
| 240 | +static void r5c_finish_cache_stripe(struct stripe_head *sh) |
| 241 | +{ |
| 242 | + struct r5l_log *log = sh->raid_conf->log; |
| 243 | + |
| 244 | + if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) { |
| 245 | + BUG_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); |
| 246 | + /* |
| 247 | + * Set R5_InJournal for parity dev[pd_idx]. This means |
| 248 | + * all data AND parity in the journal. For RAID 6, it is |
| 249 | + * NOT necessary to set the flag for dev[qd_idx], as the |
| 250 | + * two parities are written out together. |
| 251 | + */ |
| 252 | + set_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags); |
| 253 | + } else |
| 254 | + BUG(); /* write-back logic in next patch */ |
| 255 | +} |
| 256 | + |
171 | 257 | static void r5l_io_run_stripes(struct r5l_io_unit *io)
|
172 | 258 | {
|
173 | 259 | struct stripe_head *sh, *next;
|
174 | 260 |
|
175 | 261 | list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
|
176 | 262 | list_del_init(&sh->log_list);
|
| 263 | + |
| 264 | + r5c_finish_cache_stripe(sh); |
| 265 | + |
177 | 266 | set_bit(STRIPE_HANDLE, &sh->state);
|
178 | 267 | raid5_release_stripe(sh);
|
179 | 268 | }
|
@@ -412,18 +501,19 @@ static int r5l_log_stripe(struct r5l_log *log, struct stripe_head *sh,
|
412 | 501 | r5l_append_payload_page(log, sh->dev[i].page);
|
413 | 502 | }
|
414 | 503 |
|
415 |
| - if (sh->qd_idx >= 0) { |
| 504 | + if (parity_pages == 2) { |
416 | 505 | r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY,
|
417 | 506 | sh->sector, sh->dev[sh->pd_idx].log_checksum,
|
418 | 507 | sh->dev[sh->qd_idx].log_checksum, true);
|
419 | 508 | r5l_append_payload_page(log, sh->dev[sh->pd_idx].page);
|
420 | 509 | r5l_append_payload_page(log, sh->dev[sh->qd_idx].page);
|
421 |
| - } else { |
| 510 | + } else if (parity_pages == 1) { |
422 | 511 | r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY,
|
423 | 512 | sh->sector, sh->dev[sh->pd_idx].log_checksum,
|
424 | 513 | 0, false);
|
425 | 514 | r5l_append_payload_page(log, sh->dev[sh->pd_idx].page);
|
426 |
| - } |
| 515 | + } else /* Just writing data, not parity, in caching phase */ |
| 516 | + BUG_ON(parity_pages != 0); |
427 | 517 |
|
428 | 518 | list_add_tail(&sh->log_list, &io->stripe_list);
|
429 | 519 | atomic_inc(&io->pending_stripe);
|
@@ -455,6 +545,8 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
|
455 | 545 | return -EAGAIN;
|
456 | 546 | }
|
457 | 547 |
|
| 548 | + WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); |
| 549 | + |
458 | 550 | for (i = 0; i < sh->disks; i++) {
|
459 | 551 | void *addr;
|
460 | 552 |
|
@@ -1112,6 +1204,49 @@ static void r5l_write_super(struct r5l_log *log, sector_t cp)
|
1112 | 1204 | set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
1113 | 1205 | }
|
1114 | 1206 |
|
| 1207 | +/* |
| 1208 | + * Try handle write operation in caching phase. This function should only |
| 1209 | + * be called in write-back mode. |
| 1210 | + * |
| 1211 | + * If all outstanding writes can be handled in caching phase, returns 0 |
| 1212 | + * If writes requires write-out phase, call r5c_make_stripe_write_out() |
| 1213 | + * and returns -EAGAIN |
| 1214 | + */ |
| 1215 | +int r5c_try_caching_write(struct r5conf *conf, |
| 1216 | + struct stripe_head *sh, |
| 1217 | + struct stripe_head_state *s, |
| 1218 | + int disks) |
| 1219 | +{ |
| 1220 | + struct r5l_log *log = conf->log; |
| 1221 | + |
| 1222 | + BUG_ON(!r5c_is_writeback(log)); |
| 1223 | + |
| 1224 | + /* more write-back logic in next patches */ |
| 1225 | + r5c_make_stripe_write_out(sh); |
| 1226 | + return -EAGAIN; |
| 1227 | +} |
| 1228 | + |
| 1229 | +/* |
| 1230 | + * clean up the stripe (clear R5_InJournal for dev[pd_idx] etc.) after the |
| 1231 | + * stripe is committed to RAID disks. |
| 1232 | + */ |
| 1233 | +void r5c_finish_stripe_write_out(struct r5conf *conf, |
| 1234 | + struct stripe_head *sh, |
| 1235 | + struct stripe_head_state *s) |
| 1236 | +{ |
| 1237 | + if (!conf->log || |
| 1238 | + !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)) |
| 1239 | + return; |
| 1240 | + |
| 1241 | + WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); |
| 1242 | + clear_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags); |
| 1243 | + |
| 1244 | + if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) |
| 1245 | + return; |
| 1246 | + BUG(); /* write-back logic in following patches */ |
| 1247 | +} |
| 1248 | + |
| 1249 | + |
1115 | 1250 | static int r5l_load_log(struct r5l_log *log)
|
1116 | 1251 | {
|
1117 | 1252 | struct md_rdev *rdev = log->rdev;
|
@@ -1249,6 +1384,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
|
1249 | 1384 | INIT_LIST_HEAD(&log->no_space_stripes);
|
1250 | 1385 | spin_lock_init(&log->no_space_stripes_lock);
|
1251 | 1386 |
|
| 1387 | + log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; |
| 1388 | + |
1252 | 1389 | if (r5l_load_log(log))
|
1253 | 1390 | goto error;
|
1254 | 1391 |
|
|
0 commit comments