Skip to content

Commit 724376a

Browse files
Mikulas Patockasnitm
authored andcommitted
dm integrity: implement fair range locks
dm-integrity locks a range of sectors to prevent concurrent I/O or journal writeback. These locks were not fair - so that many small overlapping I/Os could starve a large I/O indefinitely. Fix this by making the range locks fair. The ranges that are waiting are added to the list "wait_list". If a new I/O overlaps some of the waiting I/Os, it is not dispatched, but it is also added to that wait list. Entries on the wait list are processed in first-in-first-out order, so that an I/O can't starve indefinitely. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
1 parent 518748b commit 724376a

File tree

1 file changed

+59
-9
lines changed

1 file changed

+59
-9
lines changed

drivers/md/dm-integrity.c

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ struct dm_integrity_c {
186186

187187
/* these variables are locked with endio_wait.lock */
188188
struct rb_root in_progress;
189+
struct list_head wait_list;
189190
wait_queue_head_t endio_wait;
190191
struct workqueue_struct *wait_wq;
191192

@@ -233,7 +234,14 @@ struct dm_integrity_c {
233234
struct dm_integrity_range {
234235
sector_t logical_sector;
235236
unsigned n_sectors;
236-
struct rb_node node;
237+
bool waiting;
238+
union {
239+
struct rb_node node;
240+
struct {
241+
struct task_struct *task;
242+
struct list_head wait_entry;
243+
};
244+
};
237245
};
238246

239247
struct dm_integrity_io {
@@ -867,13 +875,27 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig
867875
}
868876
}
869877

870-
static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
878+
static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2)
879+
{
880+
return range1->logical_sector < range2->logical_sector + range2->n_sectors &&
881+
range2->logical_sector + range2->n_sectors > range2->logical_sector;
882+
}
883+
884+
static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting)
871885
{
872886
struct rb_node **n = &ic->in_progress.rb_node;
873887
struct rb_node *parent;
874888

875889
BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1));
876890

891+
if (likely(check_waiting)) {
892+
struct dm_integrity_range *range;
893+
list_for_each_entry(range, &ic->wait_list, wait_entry) {
894+
if (unlikely(ranges_overlap(range, new_range)))
895+
return false;
896+
}
897+
}
898+
877899
parent = NULL;
878900

879901
while (*n) {
@@ -898,7 +920,22 @@ static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *
898920
static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range)
899921
{
900922
rb_erase(&range->node, &ic->in_progress);
901-
wake_up_locked(&ic->endio_wait);
923+
while (unlikely(!list_empty(&ic->wait_list))) {
924+
struct dm_integrity_range *last_range =
925+
list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry);
926+
struct task_struct *last_range_task;
927+
if (!ranges_overlap(range, last_range))
928+
break;
929+
last_range_task = last_range->task;
930+
list_del(&last_range->wait_entry);
931+
if (!add_new_range(ic, last_range, false)) {
932+
last_range->task = last_range_task;
933+
list_add(&last_range->wait_entry, &ic->wait_list);
934+
break;
935+
}
936+
last_range->waiting = false;
937+
wake_up_process(last_range_task);
938+
}
902939
}
903940

904941
static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range)
@@ -910,6 +947,19 @@ static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *r
910947
spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
911948
}
912949

950+
static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
951+
{
952+
new_range->waiting = true;
953+
list_add_tail(&new_range->wait_entry, &ic->wait_list);
954+
new_range->task = current;
955+
do {
956+
__set_current_state(TASK_UNINTERRUPTIBLE);
957+
spin_unlock_irq(&ic->endio_wait.lock);
958+
io_schedule();
959+
spin_lock_irq(&ic->endio_wait.lock);
960+
} while (unlikely(new_range->waiting));
961+
}
962+
913963
static void init_journal_node(struct journal_node *node)
914964
{
915965
RB_CLEAR_NODE(&node->node);
@@ -1658,7 +1708,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
16581708
}
16591709
}
16601710
}
1661-
if (unlikely(!add_new_range(ic, &dio->range))) {
1711+
if (unlikely(!add_new_range(ic, &dio->range, true))) {
16621712
/*
16631713
* We must not sleep in the request routine because it could
16641714
* stall bios on current->bio_list.
@@ -1670,10 +1720,8 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
16701720
INIT_WORK(&dio->work, integrity_bio_wait);
16711721
queue_work(ic->wait_wq, &dio->work);
16721722
return;
1673-
} else {
1674-
sleep_on_endio_wait(ic);
1675-
goto retry;
16761723
}
1724+
wait_and_add_new_range(ic, &dio->range);
16771725
}
16781726
spin_unlock_irq(&ic->endio_wait.lock);
16791727

@@ -1896,8 +1944,8 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
18961944
io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
18971945

18981946
spin_lock_irq(&ic->endio_wait.lock);
1899-
while (unlikely(!add_new_range(ic, &io->range)))
1900-
sleep_on_endio_wait(ic);
1947+
if (unlikely(!add_new_range(ic, &io->range, true)))
1948+
wait_and_add_new_range(ic, &io->range);
19011949

19021950
if (likely(!from_replay)) {
19031951
struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries];
@@ -2852,6 +2900,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
28522900
ti->per_io_data_size = sizeof(struct dm_integrity_io);
28532901

28542902
ic->in_progress = RB_ROOT;
2903+
INIT_LIST_HEAD(&ic->wait_list);
28552904
init_waitqueue_head(&ic->endio_wait);
28562905
bio_list_init(&ic->flush_bio_list);
28572906
init_waitqueue_head(&ic->copy_to_journal_wait);
@@ -3196,6 +3245,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
31963245
struct dm_integrity_c *ic = ti->private;
31973246

31983247
BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
3248+
BUG_ON(!list_empty(&ic->wait_list));
31993249

32003250
if (ic->metadata_wq)
32013251
destroy_workqueue(ic->metadata_wq);

0 commit comments

Comments
 (0)