@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
211
211
return ret ;
212
212
}
213
213
214
- static int btt_log_read_pair (struct arena_info * arena , u32 lane ,
215
- struct log_entry * ent )
214
+ static int btt_log_group_read (struct arena_info * arena , u32 lane ,
215
+ struct log_group * log )
216
216
{
217
217
return arena_read_bytes (arena ,
218
- arena -> logoff + (2 * lane * LOG_ENT_SIZE ), ent ,
219
- 2 * LOG_ENT_SIZE , 0 );
218
+ arena -> logoff + (lane * LOG_GRP_SIZE ), log ,
219
+ LOG_GRP_SIZE , 0 );
220
220
}
221
221
222
222
static struct dentry * debugfs_root ;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
256
256
debugfs_create_x64 ("logoff" , S_IRUGO , d , & a -> logoff );
257
257
debugfs_create_x64 ("info2off" , S_IRUGO , d , & a -> info2off );
258
258
debugfs_create_x32 ("flags" , S_IRUGO , d , & a -> flags );
259
+ debugfs_create_u32 ("log_index_0" , S_IRUGO , d , & a -> log_index [0 ]);
260
+ debugfs_create_u32 ("log_index_1" , S_IRUGO , d , & a -> log_index [1 ]);
259
261
}
260
262
261
263
static void btt_debugfs_init (struct btt * btt )
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
274
276
}
275
277
}
276
278
279
+ static u32 log_seq (struct log_group * log , int log_idx )
280
+ {
281
+ return le32_to_cpu (log -> ent [log_idx ].seq );
282
+ }
283
+
277
284
/*
278
285
* This function accepts two log entries, and uses the
279
286
* sequence number to find the 'older' entry.
@@ -283,32 +290,34 @@ static void btt_debugfs_init(struct btt *btt)
283
290
*
284
291
* TODO The logic feels a bit kludge-y. make it better..
285
292
*/
286
- static int btt_log_get_old (struct log_entry * ent )
293
+ static int btt_log_get_old (struct arena_info * a , struct log_group * log )
287
294
{
295
+ int idx0 = a -> log_index [0 ];
296
+ int idx1 = a -> log_index [1 ];
288
297
int old ;
289
298
290
299
/*
291
300
* the first ever time this is seen, the entry goes into [0]
292
301
* the next time, the following logic works out to put this
293
302
* (next) entry into [1]
294
303
*/
295
- if (ent [ 0 ]. seq == 0 ) {
296
- ent [0 ].seq = cpu_to_le32 (1 );
304
+ if (log_seq ( log , idx0 ) == 0 ) {
305
+ log -> ent [idx0 ].seq = cpu_to_le32 (1 );
297
306
return 0 ;
298
307
}
299
308
300
- if (ent [ 0 ]. seq == ent [ 1 ]. seq )
309
+ if (log_seq ( log , idx0 ) == log_seq ( log , idx1 ) )
301
310
return - EINVAL ;
302
- if (le32_to_cpu ( ent [ 0 ]. seq ) + le32_to_cpu ( ent [ 1 ]. seq ) > 5 )
311
+ if (log_seq ( log , idx0 ) + log_seq ( log , idx1 ) > 5 )
303
312
return - EINVAL ;
304
313
305
- if (le32_to_cpu ( ent [ 0 ]. seq ) < le32_to_cpu ( ent [ 1 ]. seq )) {
306
- if (le32_to_cpu ( ent [ 1 ]. seq ) - le32_to_cpu ( ent [ 0 ]. seq ) == 1 )
314
+ if (log_seq ( log , idx0 ) < log_seq ( log , idx1 )) {
315
+ if (( log_seq ( log , idx1 ) - log_seq ( log , idx0 ) ) == 1 )
307
316
old = 0 ;
308
317
else
309
318
old = 1 ;
310
319
} else {
311
- if (le32_to_cpu ( ent [ 0 ]. seq ) - le32_to_cpu ( ent [ 1 ]. seq ) == 1 )
320
+ if (( log_seq ( log , idx0 ) - log_seq ( log , idx1 ) ) == 1 )
312
321
old = 1 ;
313
322
else
314
323
old = 0 ;
@@ -328,25 +337,26 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
328
337
{
329
338
int ret ;
330
339
int old_ent , ret_ent ;
331
- struct log_entry log [ 2 ] ;
340
+ struct log_group log ;
332
341
333
- ret = btt_log_read_pair (arena , lane , log );
342
+ ret = btt_log_group_read (arena , lane , & log );
334
343
if (ret )
335
344
return - EIO ;
336
345
337
- old_ent = btt_log_get_old (log );
346
+ old_ent = btt_log_get_old (arena , & log );
338
347
if (old_ent < 0 || old_ent > 1 ) {
339
348
dev_err (to_dev (arena ),
340
349
"log corruption (%d): lane %d seq [%d, %d]\n" ,
341
- old_ent , lane , log [0 ].seq , log [1 ].seq );
350
+ old_ent , lane , log .ent [arena -> log_index [0 ]].seq ,
351
+ log .ent [arena -> log_index [1 ]].seq );
342
352
/* TODO set error state? */
343
353
return - EIO ;
344
354
}
345
355
346
356
ret_ent = (old_flag ? old_ent : (1 - old_ent ));
347
357
348
358
if (ent != NULL )
349
- memcpy (ent , & log [ ret_ent ], LOG_ENT_SIZE );
359
+ memcpy (ent , & log . ent [ arena -> log_index [ ret_ent ] ], LOG_ENT_SIZE );
350
360
351
361
return ret_ent ;
352
362
}
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
360
370
u32 sub , struct log_entry * ent , unsigned long flags )
361
371
{
362
372
int ret ;
363
- /*
364
- * Ignore the padding in log_entry for calculating log_half.
365
- * The entry is 'committed' when we write the sequence number,
366
- * and we want to ensure that that is the last thing written.
367
- * We don't bother writing the padding as that would be extra
368
- * media wear and write amplification
369
- */
370
- unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof (u64 )) / 2 ;
371
- u64 ns_off = arena -> logoff + (((2 * lane ) + sub ) * LOG_ENT_SIZE );
373
+ u32 group_slot = arena -> log_index [sub ];
374
+ unsigned int log_half = LOG_ENT_SIZE / 2 ;
372
375
void * src = ent ;
376
+ u64 ns_off ;
373
377
378
+ ns_off = arena -> logoff + (lane * LOG_GRP_SIZE ) +
379
+ (group_slot * LOG_ENT_SIZE );
374
380
/* split the 16B write into atomic, durable halves */
375
381
ret = arena_write_bytes (arena , ns_off , src , log_half , flags );
376
382
if (ret )
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
453
459
{
454
460
size_t logsize = arena -> info2off - arena -> logoff ;
455
461
size_t chunk_size = SZ_4K , offset = 0 ;
456
- struct log_entry log ;
462
+ struct log_entry ent ;
457
463
void * zerobuf ;
458
464
int ret ;
459
465
u32 i ;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
485
491
}
486
492
487
493
for (i = 0 ; i < arena -> nfree ; i ++ ) {
488
- log .lba = cpu_to_le32 (i );
489
- log .old_map = cpu_to_le32 (arena -> external_nlba + i );
490
- log .new_map = cpu_to_le32 (arena -> external_nlba + i );
491
- log .seq = cpu_to_le32 (LOG_SEQ_INIT );
492
- ret = __btt_log_write (arena , i , 0 , & log , 0 );
494
+ ent .lba = cpu_to_le32 (i );
495
+ ent .old_map = cpu_to_le32 (arena -> external_nlba + i );
496
+ ent .new_map = cpu_to_le32 (arena -> external_nlba + i );
497
+ ent .seq = cpu_to_le32 (LOG_SEQ_INIT );
498
+ ret = __btt_log_write (arena , i , 0 , & ent , 0 );
493
499
if (ret )
494
500
goto free ;
495
501
}
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
594
600
return 0 ;
595
601
}
596
602
603
+ static bool ent_is_padding (struct log_entry * ent )
604
+ {
605
+ return (ent -> lba == 0 ) && (ent -> old_map == 0 ) && (ent -> new_map == 0 )
606
+ && (ent -> seq == 0 );
607
+ }
608
+
609
+ /*
610
+ * Detecting valid log indices: We read a log group (see the comments in btt.h
611
+ * for a description of a 'log_group' and its 'slots'), and iterate over its
612
+ * four slots. We expect that a padding slot will be all-zeroes, and use this
613
+ * to detect a padding slot vs. an actual entry.
614
+ *
615
+ * If a log_group is in the initial state, i.e. hasn't been used since the
616
+ * creation of this BTT layout, it will have three of the four slots with
617
+ * zeroes. We skip over these log_groups for the detection of log_index. If
618
+ * all log_groups are in the initial state (i.e. the BTT has never been
619
+ * written to), it is safe to assume the 'new format' of log entries in slots
620
+ * (0, 1).
621
+ */
622
+ static int log_set_indices (struct arena_info * arena )
623
+ {
624
+ bool idx_set = false, initial_state = true;
625
+ int ret , log_index [2 ] = {-1 , -1 };
626
+ u32 i , j , next_idx = 0 ;
627
+ struct log_group log ;
628
+ u32 pad_count = 0 ;
629
+
630
+ for (i = 0 ; i < arena -> nfree ; i ++ ) {
631
+ ret = btt_log_group_read (arena , i , & log );
632
+ if (ret < 0 )
633
+ return ret ;
634
+
635
+ for (j = 0 ; j < 4 ; j ++ ) {
636
+ if (!idx_set ) {
637
+ if (ent_is_padding (& log .ent [j ])) {
638
+ pad_count ++ ;
639
+ continue ;
640
+ } else {
641
+ /* Skip if index has been recorded */
642
+ if ((next_idx == 1 ) &&
643
+ (j == log_index [0 ]))
644
+ continue ;
645
+ /* valid entry, record index */
646
+ log_index [next_idx ] = j ;
647
+ next_idx ++ ;
648
+ }
649
+ if (next_idx == 2 ) {
650
+ /* two valid entries found */
651
+ idx_set = true;
652
+ } else if (next_idx > 2 ) {
653
+ /* too many valid indices */
654
+ return - ENXIO ;
655
+ }
656
+ } else {
657
+ /*
658
+ * once the indices have been set, just verify
659
+ * that all subsequent log groups are either in
660
+ * their initial state or follow the same
661
+ * indices.
662
+ */
663
+ if (j == log_index [0 ]) {
664
+ /* entry must be 'valid' */
665
+ if (ent_is_padding (& log .ent [j ]))
666
+ return - ENXIO ;
667
+ } else if (j == log_index [1 ]) {
668
+ ;
669
+ /*
670
+ * log_index[1] can be padding if the
671
+ * lane never got used and it is still
672
+ * in the initial state (three 'padding'
673
+ * entries)
674
+ */
675
+ } else {
676
+ /* entry must be invalid (padding) */
677
+ if (!ent_is_padding (& log .ent [j ]))
678
+ return - ENXIO ;
679
+ }
680
+ }
681
+ }
682
+ /*
683
+ * If any of the log_groups have more than one valid,
684
+ * non-padding entry, then the we are no longer in the
685
+ * initial_state
686
+ */
687
+ if (pad_count < 3 )
688
+ initial_state = false;
689
+ pad_count = 0 ;
690
+ }
691
+
692
+ if (!initial_state && !idx_set )
693
+ return - ENXIO ;
694
+
695
+ /*
696
+ * If all the entries in the log were in the initial state,
697
+ * assume new padding scheme
698
+ */
699
+ if (initial_state )
700
+ log_index [1 ] = 1 ;
701
+
702
+ /*
703
+ * Only allow the known permutations of log/padding indices,
704
+ * i.e. (0, 1), and (0, 2)
705
+ */
706
+ if ((log_index [0 ] == 0 ) && ((log_index [1 ] == 1 ) || (log_index [1 ] == 2 )))
707
+ ; /* known index possibilities */
708
+ else {
709
+ dev_err (to_dev (arena ), "Found an unknown padding scheme\n" );
710
+ return - ENXIO ;
711
+ }
712
+
713
+ arena -> log_index [0 ] = log_index [0 ];
714
+ arena -> log_index [1 ] = log_index [1 ];
715
+ dev_dbg (to_dev (arena ), "log_index_0 = %d\n" , log_index [0 ]);
716
+ dev_dbg (to_dev (arena ), "log_index_1 = %d\n" , log_index [1 ]);
717
+ return 0 ;
718
+ }
719
+
597
720
static int btt_rtt_init (struct arena_info * arena )
598
721
{
599
722
arena -> rtt = kcalloc (arena -> nfree , sizeof (u32 ), GFP_KERNEL );
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
650
773
available -= 2 * BTT_PG_SIZE ;
651
774
652
775
/* The log takes a fixed amount of space based on nfree */
653
- logsize = roundup (2 * arena -> nfree * sizeof (struct log_entry ),
654
- BTT_PG_SIZE );
776
+ logsize = roundup (arena -> nfree * LOG_GRP_SIZE , BTT_PG_SIZE );
655
777
available -= logsize ;
656
778
657
779
/* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
668
790
arena -> mapoff = arena -> dataoff + datasize ;
669
791
arena -> logoff = arena -> mapoff + mapsize ;
670
792
arena -> info2off = arena -> logoff + logsize ;
793
+
794
+ /* Default log indices are (0,1) */
795
+ arena -> log_index [0 ] = 0 ;
796
+ arena -> log_index [1 ] = 1 ;
671
797
return arena ;
672
798
}
673
799
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
758
884
arena -> external_lba_start = cur_nlba ;
759
885
parse_arena_meta (arena , super , cur_off );
760
886
887
+ ret = log_set_indices (arena );
888
+ if (ret ) {
889
+ dev_err (to_dev (arena ),
890
+ "Unable to deduce log/padding indices\n" );
891
+ goto out ;
892
+ }
893
+
761
894
mutex_init (& arena -> err_lock );
762
895
ret = btt_freelist_init (arena );
763
896
if (ret )
0 commit comments