Skip to content

Commit d4013bc

Browse files
committed
Merge tag 'bitmap-6.1-rc1' of https://github.com/norov/linux
Pull bitmap updates from Yury Norov: - Fix unsigned comparison to -1 in CPUMAP_FILE_MAX_BYTES (Phil Auld) - cleanup nr_cpu_ids vs nr_cpumask_bits mess (me) This series cleans that mess and adds new config FORCE_NR_CPUS that allows to optimize cpumask subsystem if the number of CPUs is known at compile-time. - optimize find_bit() functions (me) Reworks find_bit() functions based on new FIND_{FIRST,NEXT}_BIT() macros. - add find_nth_bit() (me) Adds find_nth_bit(), which is ~70 times faster than bitcounting with for_each() loop: for_each_set_bit(bit, mask, size) if (n-- == 0) return bit; Also adds bitmap_weight_and() to let people replace this pattern: tmp = bitmap_alloc(nbits); bitmap_and(tmp, map1, map2, nbits); weight = bitmap_weight(tmp, nbits); bitmap_free(tmp); with a single bitmap_weight_and() call. - repair cpumask_check() (me) After switching cpumask to use nr_cpu_ids, cpumask_check() started generating many false-positive warnings. This series fixes it. - Add for_each_cpu_andnot() and for_each_cpu_andnot() (Valentin Schneider) Extends the API with one more function and applies it in sched/core. * tag 'bitmap-6.1-rc1' of https://github.com/norov/linux: (28 commits) sched/core: Merge cpumask_andnot()+for_each_cpu() into for_each_cpu_andnot() lib/test_cpumask: Add for_each_cpu_and(not) tests cpumask: Introduce for_each_cpu_andnot() lib/find_bit: Introduce find_next_andnot_bit() cpumask: fix checking valid cpu range lib/bitmap: add tests for for_each() loops lib/find: optimize for_each() macros lib/bitmap: introduce for_each_set_bit_wrap() macro lib/find_bit: add find_next{,_and}_bit_wrap cpumask: switch for_each_cpu{,_not} to use for_each_bit() net: fix cpu_max_bits_warn() usage in netif_attrmask_next{,_and} cpumask: add cpumask_nth_{,and,andnot} lib/bitmap: remove bitmap_ord_to_pos lib/bitmap: add tests for find_nth_bit() lib: add find_nth{,_and,_andnot}_bit() lib/bitmap: add bitmap_weight_and() lib/bitmap: don't call __bitmap_weight() in kernel code tools: sync find_bit() implementation lib/find_bit: optimize find_next_bit() functions lib/find_bit: create find_first_zero_bit_le() ...
2 parents cdf072a + 585463f commit d4013bc

File tree

23 files changed

+1035
-369
lines changed

23 files changed

+1035
-369
lines changed

arch/loongarch/kernel/setup.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ static void __init prefill_possible_map(void)
336336
for (; i < NR_CPUS; i++)
337337
set_cpu_possible(i, false);
338338

339-
nr_cpu_ids = possible;
339+
set_nr_cpu_ids(possible);
340340
}
341341
#endif
342342

arch/mips/kernel/setup.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,7 @@ static void __init prefill_possible_map(void)
751751
for (; i < NR_CPUS; i++)
752752
set_cpu_possible(i, false);
753753

754-
nr_cpu_ids = possible;
754+
set_nr_cpu_ids(possible);
755755
}
756756
#else
757757
static inline void prefill_possible_map(void) {}

arch/powerpc/kernel/head_64.S

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,12 @@ generic_secondary_common_init:
393393
#else
394394
LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */
395395
ld r8,0(r8) /* Get base vaddr of array */
396+
#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
397+
LOAD_REG_IMMEDIATE(r7, NR_CPUS)
398+
#else
396399
LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
397400
lwz r7,0(r7) /* also the max paca allocated */
401+
#endif
398402
li r5,0 /* logical cpu id */
399403
1:
400404
sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */

arch/x86/kernel/smpboot.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,7 +1316,7 @@ static void __init smp_sanity_check(void)
13161316
nr++;
13171317
}
13181318

1319-
nr_cpu_ids = 8;
1319+
set_nr_cpu_ids(8);
13201320
}
13211321
#endif
13221322

@@ -1569,7 +1569,7 @@ __init void prefill_possible_map(void)
15691569
possible = i;
15701570
}
15711571

1572-
nr_cpu_ids = possible;
1572+
set_nr_cpu_ids(possible);
15731573

15741574
pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
15751575
possible, max_t(int, possible - num_processors, 0));

arch/x86/xen/smp_pv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ static void __init _get_smp_config(unsigned int early)
179179
* hypercall to expand the max number of VCPUs an already
180180
* running guest has. So cap it up to X. */
181181
if (subtract)
182-
nr_cpu_ids = nr_cpu_ids - subtract;
182+
set_nr_cpu_ids(nr_cpu_ids - subtract);
183183
#endif
184184

185185
}

fs/ntfs3/bitmap.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ static int wnd_rescan(struct wnd_bitmap *wnd)
560560

561561
buf = (ulong *)bh->b_data;
562562

563-
used = __bitmap_weight(buf, wbits);
563+
used = bitmap_weight(buf, wbits);
564564
if (used < wbits) {
565565
frb = wbits - used;
566566
wnd->free_bits[iw] = frb;
@@ -1364,7 +1364,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
13641364
buf = (ulong *)bh->b_data;
13651365

13661366
__bitmap_clear(buf, b0, blocksize * 8 - b0);
1367-
frb = wbits - __bitmap_weight(buf, wbits);
1367+
frb = wbits - bitmap_weight(buf, wbits);
13681368
wnd->total_zeroes += frb - wnd->free_bits[iw];
13691369
wnd->free_bits[iw] = frb;
13701370

include/linux/bitmap.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ struct device;
5151
* bitmap_empty(src, nbits) Are all bits zero in *src?
5252
* bitmap_full(src, nbits) Are all bits set in *src?
5353
* bitmap_weight(src, nbits) Hamming Weight: number set bits
54+
* bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap
5455
* bitmap_set(dst, pos, nbits) Set specified bit area
5556
* bitmap_clear(dst, pos, nbits) Clear specified bit area
5657
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
@@ -164,6 +165,8 @@ bool __bitmap_intersects(const unsigned long *bitmap1,
164165
bool __bitmap_subset(const unsigned long *bitmap1,
165166
const unsigned long *bitmap2, unsigned int nbits);
166167
unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
168+
unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
169+
const unsigned long *bitmap2, unsigned int nbits);
167170
void __bitmap_set(unsigned long *map, unsigned int start, int len);
168171
void __bitmap_clear(unsigned long *map, unsigned int start, int len);
169172

@@ -222,7 +225,6 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n
222225
#else
223226
#define bitmap_copy_le bitmap_copy
224227
#endif
225-
unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits);
226228
int bitmap_print_to_pagebuf(bool list, char *buf,
227229
const unsigned long *maskp, int nmaskbits);
228230

@@ -439,6 +441,15 @@ unsigned int bitmap_weight(const unsigned long *src, unsigned int nbits)
439441
return __bitmap_weight(src, nbits);
440442
}
441443

444+
static __always_inline
445+
unsigned long bitmap_weight_and(const unsigned long *src1,
446+
const unsigned long *src2, unsigned int nbits)
447+
{
448+
if (small_const_nbits(nbits))
449+
return hweight_long(*src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits));
450+
return __bitmap_weight_and(src1, src2, nbits);
451+
}
452+
442453
static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
443454
unsigned int nbits)
444455
{

include/linux/bitops.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,25 @@ static inline unsigned long __ffs64(u64 word)
247247
return __ffs((unsigned long)word);
248248
}
249249

250+
/**
251+
* fns - find N'th set bit in a word
252+
* @word: The word to search
253+
* @n: Bit to find
254+
*/
255+
static inline unsigned long fns(unsigned long word, unsigned int n)
256+
{
257+
unsigned int bit;
258+
259+
while (word) {
260+
bit = __ffs(word);
261+
if (n-- == 0)
262+
return bit;
263+
__clear_bit(bit, &word);
264+
}
265+
266+
return BITS_PER_LONG;
267+
}
268+
250269
/**
251270
* assign_bit - Assign value to a bit in memory
252271
* @nr: the bit to set

include/linux/cpumask.h

Lines changed: 97 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,23 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
3535
*/
3636
#define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp)
3737

38-
#if NR_CPUS == 1
39-
#define nr_cpu_ids 1U
38+
#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
39+
#define nr_cpu_ids ((unsigned int)NR_CPUS)
4040
#else
4141
extern unsigned int nr_cpu_ids;
4242
#endif
4343

44-
#ifdef CONFIG_CPUMASK_OFFSTACK
45-
/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also,
46-
* not all bits may be allocated. */
47-
#define nr_cpumask_bits nr_cpu_ids
44+
static inline void set_nr_cpu_ids(unsigned int nr)
45+
{
46+
#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
47+
WARN_ON(nr != nr_cpu_ids);
4848
#else
49-
#define nr_cpumask_bits ((unsigned int)NR_CPUS)
49+
nr_cpu_ids = nr;
5050
#endif
51+
}
52+
53+
/* Deprecated. Always use nr_cpu_ids. */
54+
#define nr_cpumask_bits nr_cpu_ids
5155

5256
/*
5357
* The following particular system cpumasks and operations manage
@@ -67,10 +71,6 @@ extern unsigned int nr_cpu_ids;
6771
* cpu_online_mask is the dynamic subset of cpu_present_mask,
6872
* indicating those CPUs available for scheduling.
6973
*
70-
* If HOTPLUG is enabled, then cpu_possible_mask is forced to have
71-
* all NR_CPUS bits set, otherwise it is just the set of CPUs that
72-
* ACPI reports present at boot.
73-
*
7474
* If HOTPLUG is enabled, then cpu_present_mask varies dynamically,
7575
* depending on what ACPI reports as currently plugged in, otherwise
7676
* cpu_present_mask is just a copy of cpu_possible_mask.
@@ -174,9 +174,8 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp)
174174
static inline
175175
unsigned int cpumask_next(int n, const struct cpumask *srcp)
176176
{
177-
/* -1 is a legal arg here. */
178-
if (n != -1)
179-
cpumask_check(n);
177+
/* n is a prior cpu */
178+
cpumask_check(n + 1);
180179
return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1);
181180
}
182181

@@ -189,9 +188,8 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp)
189188
*/
190189
static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
191190
{
192-
/* -1 is a legal arg here. */
193-
if (n != -1)
194-
cpumask_check(n);
191+
/* n is a prior cpu */
192+
cpumask_check(n + 1);
195193
return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
196194
}
197195

@@ -231,9 +229,8 @@ static inline
231229
unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
232230
const struct cpumask *src2p)
233231
{
234-
/* -1 is a legal arg here. */
235-
if (n != -1)
236-
cpumask_check(n);
232+
/* n is a prior cpu */
233+
cpumask_check(n + 1);
237234
return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p),
238235
nr_cpumask_bits, n + 1);
239236
}
@@ -246,9 +243,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
246243
* After the loop, cpu is >= nr_cpu_ids.
247244
*/
248245
#define for_each_cpu(cpu, mask) \
249-
for ((cpu) = -1; \
250-
(cpu) = cpumask_next((cpu), (mask)), \
251-
(cpu) < nr_cpu_ids;)
246+
for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
252247

253248
/**
254249
* for_each_cpu_not - iterate over every cpu in a complemented mask
@@ -258,17 +253,15 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
258253
* After the loop, cpu is >= nr_cpu_ids.
259254
*/
260255
#define for_each_cpu_not(cpu, mask) \
261-
for ((cpu) = -1; \
262-
(cpu) = cpumask_next_zero((cpu), (mask)), \
263-
(cpu) < nr_cpu_ids;)
256+
for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
264257

265258
#if NR_CPUS == 1
266259
static inline
267260
unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
268261
{
269262
cpumask_check(start);
270-
if (n != -1)
271-
cpumask_check(n);
263+
/* n is a prior cpu */
264+
cpumask_check(n + 1);
272265

273266
/*
274267
* Return the first available CPU when wrapping, or when starting before cpu0,
@@ -293,10 +286,8 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
293286
*
294287
* After the loop, cpu is >= nr_cpu_ids.
295288
*/
296-
#define for_each_cpu_wrap(cpu, mask, start) \
297-
for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \
298-
(cpu) < nr_cpumask_bits; \
299-
(cpu) = cpumask_next_wrap((cpu), (mask), (start), true))
289+
#define for_each_cpu_wrap(cpu, mask, start) \
290+
for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits, start)
300291

301292
/**
302293
* for_each_cpu_and - iterate over every cpu in both masks
@@ -313,9 +304,25 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
313304
* After the loop, cpu is >= nr_cpu_ids.
314305
*/
315306
#define for_each_cpu_and(cpu, mask1, mask2) \
316-
for ((cpu) = -1; \
317-
(cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \
318-
(cpu) < nr_cpu_ids;)
307+
for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits)
308+
309+
/**
310+
* for_each_cpu_andnot - iterate over every cpu present in one mask, excluding
311+
* those present in another.
312+
* @cpu: the (optionally unsigned) integer iterator
313+
* @mask1: the first cpumask pointer
314+
* @mask2: the second cpumask pointer
315+
*
316+
* This saves a temporary CPU mask in many places. It is equivalent to:
317+
* struct cpumask tmp;
318+
* cpumask_andnot(&tmp, &mask1, &mask2);
319+
* for_each_cpu(cpu, &tmp)
320+
* ...
321+
*
322+
* After the loop, cpu is >= nr_cpu_ids.
323+
*/
324+
#define for_each_cpu_andnot(cpu, mask1, mask2) \
325+
for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits)
319326

320327
/**
321328
* cpumask_any_but - return a "random" in a cpumask, but not this one.
@@ -337,6 +344,50 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
337344
return i;
338345
}
339346

347+
/**
348+
* cpumask_nth - get the first cpu in a cpumask
349+
* @srcp: the cpumask pointer
350+
* @cpu: the N'th cpu to find, starting from 0
351+
*
352+
* Returns >= nr_cpu_ids if such cpu doesn't exist.
353+
*/
354+
static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
355+
{
356+
return find_nth_bit(cpumask_bits(srcp), nr_cpumask_bits, cpumask_check(cpu));
357+
}
358+
359+
/**
360+
* cpumask_nth_and - get the first cpu in 2 cpumasks
361+
* @srcp1: the cpumask pointer
362+
* @srcp2: the cpumask pointer
363+
* @cpu: the N'th cpu to find, starting from 0
364+
*
365+
* Returns >= nr_cpu_ids if such cpu doesn't exist.
366+
*/
367+
static inline
368+
unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
369+
const struct cpumask *srcp2)
370+
{
371+
return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
372+
nr_cpumask_bits, cpumask_check(cpu));
373+
}
374+
375+
/**
376+
* cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd.
377+
* @srcp1: the cpumask pointer
378+
* @srcp2: the cpumask pointer
379+
* @cpu: the N'th cpu to find, starting from 0
380+
*
381+
* Returns >= nr_cpu_ids if such cpu doesn't exist.
382+
*/
383+
static inline
384+
unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
385+
const struct cpumask *srcp2)
386+
{
387+
return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
388+
nr_cpumask_bits, cpumask_check(cpu));
389+
}
390+
340391
#define CPU_BITS_NONE \
341392
{ \
342393
[0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
@@ -586,6 +637,17 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)
586637
return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits);
587638
}
588639

640+
/**
641+
* cpumask_weight_and - Count of bits in (*srcp1 & *srcp2)
642+
* @srcp1: the cpumask to count bits (< nr_cpu_ids) in.
643+
* @srcp2: the cpumask to count bits (< nr_cpu_ids) in.
644+
*/
645+
static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
646+
const struct cpumask *srcp2)
647+
{
648+
return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits);
649+
}
650+
589651
/**
590652
* cpumask_shift_right - *dstp = *srcp >> n
591653
* @dstp: the cpumask result

0 commit comments

Comments
 (0)