Skip to content

Commit d1f854a

Browse files
committed
Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm fixes from Dan Williams: "These fixes are all tagged for -stable and have received a build success notification from the kbuild robot. - NVDIMM namespaces, configured to enforce 1GB alignment, fail to initialize on platforms that mis-align the start or end of the physical address range. - The Linux implementation of the BTT (Block Translation Table) is incompatible with the UEFI 2.7 definition of the BTT format. The BTT layers a software atomic sector semantic on top of an NVDIMM namespace. Linux needs to be compatible with the UEFI definition to enable boot support or any pre-OS access of data on a BTT enabled namespace. - A fix for ACPI SMART notification events, this allows a userspace monitor to register for health events rather than poll. This has been broken since it was initially merged as the unit test inadvertently worked around the problem. The urgency for fixing this during the -rc series is driven by how expensive it is to poll for this data (System Management Mode entry)" * 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: libnvdimm, btt: Fix an incompatibility in the log layout libnvdimm, btt: add a couple of missing kernel-doc lines libnvdimm, dax: fix 1GB-aligned namespaces vs physical misalignment libnvdimm, pfn: fix start_pad handling for aligned namespaces acpi, nfit: fix health event notification
2 parents caf9a82 + 24e3a7f commit d1f854a

File tree

4 files changed

+236
-41
lines changed

4 files changed

+236
-41
lines changed

drivers/acpi/nfit/core.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
16701670
dev_name(&adev_dimm->dev));
16711671
return -ENXIO;
16721672
}
1673+
/*
1674+
* Record nfit_mem for the notification path to track back to
1675+
* the nfit sysfs attributes for this dimm device object.
1676+
*/
1677+
dev_set_drvdata(&adev_dimm->dev, nfit_mem);
16731678

16741679
/*
16751680
* Until standardization materializes we need to consider 4
@@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data)
17521757
sysfs_put(nfit_mem->flags_attr);
17531758
nfit_mem->flags_attr = NULL;
17541759
}
1755-
if (adev_dimm)
1760+
if (adev_dimm) {
17561761
acpi_remove_notify_handler(adev_dimm->handle,
17571762
ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
1763+
dev_set_drvdata(&adev_dimm->dev, NULL);
1764+
}
17581765
}
17591766
mutex_unlock(&acpi_desc->init_mutex);
17601767
}

drivers/nvdimm/btt.c

Lines changed: 167 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
211211
return ret;
212212
}
213213

214-
static int btt_log_read_pair(struct arena_info *arena, u32 lane,
215-
struct log_entry *ent)
214+
static int btt_log_group_read(struct arena_info *arena, u32 lane,
215+
struct log_group *log)
216216
{
217217
return arena_read_bytes(arena,
218-
arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
219-
2 * LOG_ENT_SIZE, 0);
218+
arena->logoff + (lane * LOG_GRP_SIZE), log,
219+
LOG_GRP_SIZE, 0);
220220
}
221221

222222
static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
256256
debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
257257
debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
258258
debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
259+
debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
260+
debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
259261
}
260262

261263
static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
274276
}
275277
}
276278

279+
static u32 log_seq(struct log_group *log, int log_idx)
280+
{
281+
return le32_to_cpu(log->ent[log_idx].seq);
282+
}
283+
277284
/*
278285
* This function accepts two log entries, and uses the
279286
* sequence number to find the 'older' entry.
@@ -283,32 +290,34 @@ static void btt_debugfs_init(struct btt *btt)
283290
*
284291
* TODO The logic feels a bit kludge-y. make it better..
285292
*/
286-
static int btt_log_get_old(struct log_entry *ent)
293+
static int btt_log_get_old(struct arena_info *a, struct log_group *log)
287294
{
295+
int idx0 = a->log_index[0];
296+
int idx1 = a->log_index[1];
288297
int old;
289298

290299
/*
291300
* the first ever time this is seen, the entry goes into [0]
292301
* the next time, the following logic works out to put this
293302
* (next) entry into [1]
294303
*/
295-
if (ent[0].seq == 0) {
296-
ent[0].seq = cpu_to_le32(1);
304+
if (log_seq(log, idx0) == 0) {
305+
log->ent[idx0].seq = cpu_to_le32(1);
297306
return 0;
298307
}
299308

300-
if (ent[0].seq == ent[1].seq)
309+
if (log_seq(log, idx0) == log_seq(log, idx1))
301310
return -EINVAL;
302-
if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
311+
if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
303312
return -EINVAL;
304313

305-
if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
306-
if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
314+
if (log_seq(log, idx0) < log_seq(log, idx1)) {
315+
if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
307316
old = 0;
308317
else
309318
old = 1;
310319
} else {
311-
if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
320+
if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
312321
old = 1;
313322
else
314323
old = 0;
@@ -328,25 +337,26 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
328337
{
329338
int ret;
330339
int old_ent, ret_ent;
331-
struct log_entry log[2];
340+
struct log_group log;
332341

333-
ret = btt_log_read_pair(arena, lane, log);
342+
ret = btt_log_group_read(arena, lane, &log);
334343
if (ret)
335344
return -EIO;
336345

337-
old_ent = btt_log_get_old(log);
346+
old_ent = btt_log_get_old(arena, &log);
338347
if (old_ent < 0 || old_ent > 1) {
339348
dev_err(to_dev(arena),
340349
"log corruption (%d): lane %d seq [%d, %d]\n",
341-
old_ent, lane, log[0].seq, log[1].seq);
350+
old_ent, lane, log.ent[arena->log_index[0]].seq,
351+
log.ent[arena->log_index[1]].seq);
342352
/* TODO set error state? */
343353
return -EIO;
344354
}
345355

346356
ret_ent = (old_flag ? old_ent : (1 - old_ent));
347357

348358
if (ent != NULL)
349-
memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
359+
memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
350360

351361
return ret_ent;
352362
}
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
360370
u32 sub, struct log_entry *ent, unsigned long flags)
361371
{
362372
int ret;
363-
/*
364-
* Ignore the padding in log_entry for calculating log_half.
365-
* The entry is 'committed' when we write the sequence number,
366-
* and we want to ensure that that is the last thing written.
367-
* We don't bother writing the padding as that would be extra
368-
* media wear and write amplification
369-
*/
370-
unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
371-
u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
373+
u32 group_slot = arena->log_index[sub];
374+
unsigned int log_half = LOG_ENT_SIZE / 2;
372375
void *src = ent;
376+
u64 ns_off;
373377

378+
ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
379+
(group_slot * LOG_ENT_SIZE);
374380
/* split the 16B write into atomic, durable halves */
375381
ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
376382
if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
453459
{
454460
size_t logsize = arena->info2off - arena->logoff;
455461
size_t chunk_size = SZ_4K, offset = 0;
456-
struct log_entry log;
462+
struct log_entry ent;
457463
void *zerobuf;
458464
int ret;
459465
u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
485491
}
486492

487493
for (i = 0; i < arena->nfree; i++) {
488-
log.lba = cpu_to_le32(i);
489-
log.old_map = cpu_to_le32(arena->external_nlba + i);
490-
log.new_map = cpu_to_le32(arena->external_nlba + i);
491-
log.seq = cpu_to_le32(LOG_SEQ_INIT);
492-
ret = __btt_log_write(arena, i, 0, &log, 0);
494+
ent.lba = cpu_to_le32(i);
495+
ent.old_map = cpu_to_le32(arena->external_nlba + i);
496+
ent.new_map = cpu_to_le32(arena->external_nlba + i);
497+
ent.seq = cpu_to_le32(LOG_SEQ_INIT);
498+
ret = __btt_log_write(arena, i, 0, &ent, 0);
493499
if (ret)
494500
goto free;
495501
}
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
594600
return 0;
595601
}
596602

603+
static bool ent_is_padding(struct log_entry *ent)
604+
{
605+
return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
606+
&& (ent->seq == 0);
607+
}
608+
609+
/*
610+
* Detecting valid log indices: We read a log group (see the comments in btt.h
611+
* for a description of a 'log_group' and its 'slots'), and iterate over its
612+
* four slots. We expect that a padding slot will be all-zeroes, and use this
613+
* to detect a padding slot vs. an actual entry.
614+
*
615+
* If a log_group is in the initial state, i.e. hasn't been used since the
616+
* creation of this BTT layout, it will have three of the four slots with
617+
* zeroes. We skip over these log_groups for the detection of log_index. If
618+
* all log_groups are in the initial state (i.e. the BTT has never been
619+
* written to), it is safe to assume the 'new format' of log entries in slots
620+
* (0, 1).
621+
*/
622+
static int log_set_indices(struct arena_info *arena)
623+
{
624+
bool idx_set = false, initial_state = true;
625+
int ret, log_index[2] = {-1, -1};
626+
u32 i, j, next_idx = 0;
627+
struct log_group log;
628+
u32 pad_count = 0;
629+
630+
for (i = 0; i < arena->nfree; i++) {
631+
ret = btt_log_group_read(arena, i, &log);
632+
if (ret < 0)
633+
return ret;
634+
635+
for (j = 0; j < 4; j++) {
636+
if (!idx_set) {
637+
if (ent_is_padding(&log.ent[j])) {
638+
pad_count++;
639+
continue;
640+
} else {
641+
/* Skip if index has been recorded */
642+
if ((next_idx == 1) &&
643+
(j == log_index[0]))
644+
continue;
645+
/* valid entry, record index */
646+
log_index[next_idx] = j;
647+
next_idx++;
648+
}
649+
if (next_idx == 2) {
650+
/* two valid entries found */
651+
idx_set = true;
652+
} else if (next_idx > 2) {
653+
/* too many valid indices */
654+
return -ENXIO;
655+
}
656+
} else {
657+
/*
658+
* once the indices have been set, just verify
659+
* that all subsequent log groups are either in
660+
* their initial state or follow the same
661+
* indices.
662+
*/
663+
if (j == log_index[0]) {
664+
/* entry must be 'valid' */
665+
if (ent_is_padding(&log.ent[j]))
666+
return -ENXIO;
667+
} else if (j == log_index[1]) {
668+
;
669+
/*
670+
* log_index[1] can be padding if the
671+
* lane never got used and it is still
672+
* in the initial state (three 'padding'
673+
* entries)
674+
*/
675+
} else {
676+
/* entry must be invalid (padding) */
677+
if (!ent_is_padding(&log.ent[j]))
678+
return -ENXIO;
679+
}
680+
}
681+
}
682+
/*
683+
* If any of the log_groups have more than one valid,
684+
* non-padding entry, then the we are no longer in the
685+
* initial_state
686+
*/
687+
if (pad_count < 3)
688+
initial_state = false;
689+
pad_count = 0;
690+
}
691+
692+
if (!initial_state && !idx_set)
693+
return -ENXIO;
694+
695+
/*
696+
* If all the entries in the log were in the initial state,
697+
* assume new padding scheme
698+
*/
699+
if (initial_state)
700+
log_index[1] = 1;
701+
702+
/*
703+
* Only allow the known permutations of log/padding indices,
704+
* i.e. (0, 1), and (0, 2)
705+
*/
706+
if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
707+
; /* known index possibilities */
708+
else {
709+
dev_err(to_dev(arena), "Found an unknown padding scheme\n");
710+
return -ENXIO;
711+
}
712+
713+
arena->log_index[0] = log_index[0];
714+
arena->log_index[1] = log_index[1];
715+
dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
716+
dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
717+
return 0;
718+
}
719+
597720
static int btt_rtt_init(struct arena_info *arena)
598721
{
599722
arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
650773
available -= 2 * BTT_PG_SIZE;
651774

652775
/* The log takes a fixed amount of space based on nfree */
653-
logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
654-
BTT_PG_SIZE);
776+
logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
655777
available -= logsize;
656778

657779
/* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
668790
arena->mapoff = arena->dataoff + datasize;
669791
arena->logoff = arena->mapoff + mapsize;
670792
arena->info2off = arena->logoff + logsize;
793+
794+
/* Default log indices are (0,1) */
795+
arena->log_index[0] = 0;
796+
arena->log_index[1] = 1;
671797
return arena;
672798
}
673799

@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
758884
arena->external_lba_start = cur_nlba;
759885
parse_arena_meta(arena, super, cur_off);
760886

887+
ret = log_set_indices(arena);
888+
if (ret) {
889+
dev_err(to_dev(arena),
890+
"Unable to deduce log/padding indices\n");
891+
goto out;
892+
}
893+
761894
mutex_init(&arena->err_lock);
762895
ret = btt_freelist_init(arena);
763896
if (ret)

0 commit comments

Comments
 (0)