Skip to content

Commit 7f6dcff

Browse files
committed
Merge tag 'sched-rt-2022-10-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull preempt RT updates from Thomas Gleixner: "Introduce preempt_[dis|enable_nested() and use it to clean up various places which have open coded PREEMPT_RT conditionals. On PREEMPT_RT enabled kernels, spinlocks and rwlocks are neither disabling preemption nor interrupts. Though there are a few places which depend on the implicit preemption/interrupt disable of those locks, e.g. seqcount write sections, per CPU statistics updates etc. PREEMPT_RT added open coded CONFIG_PREEMPT_RT conditionals to disable/enable preemption in the related code parts all over the place. That's hard to read and does not really explain why this is necessary. Linus suggested to use helper functions (preempt_disable_nested() and preempt_enable_nested()) and use those in the affected places. On !RT enabled kernels these functions are NOPs, but contain a lockdep assert to validate that preemption is actually disabled to catch call sites which do not have preemption disabled. Clean up the affected code paths in mm, dentry and lib" * tag 'sched-rt-2022-10-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: u64_stats: Streamline the implementation flex_proportions: Disable preemption entering the write section. mm/compaction: Get rid of RT ifdeffery mm/memcontrol: Replace the PREEMPT_RT conditionals mm/debug: Provide VM_WARN_ON_IRQS_ENABLED() mm/vmstat: Use preempt_[dis|en]able_nested() dentry: Use preempt_[dis|en]able_nested() preempt: Provide preempt_[dis|en]able_nested()
2 parents 65f109e + 44b0c29 commit 7f6dcff

File tree

10 files changed

+144
-134
lines changed

10 files changed

+144
-134
lines changed

fs/dcache.c

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2597,15 +2597,7 @@ EXPORT_SYMBOL(d_rehash);
25972597

25982598
static inline unsigned start_dir_add(struct inode *dir)
25992599
{
2600-
/*
2601-
* The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT
2602-
* kernels spin_lock() implicitly disables preemption, but not on
2603-
* PREEMPT_RT. So for RT it has to be done explicitly to protect
2604-
* the sequence count write side critical section against a reader
2605-
* or another writer preempting, which would result in a live lock.
2606-
*/
2607-
if (IS_ENABLED(CONFIG_PREEMPT_RT))
2608-
preempt_disable();
2600+
preempt_disable_nested();
26092601
for (;;) {
26102602
unsigned n = dir->i_dir_seq;
26112603
if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
@@ -2618,8 +2610,7 @@ static inline void end_dir_add(struct inode *dir, unsigned int n,
26182610
wait_queue_head_t *d_wait)
26192611
{
26202612
smp_store_release(&dir->i_dir_seq, n + 2);
2621-
if (IS_ENABLED(CONFIG_PREEMPT_RT))
2622-
preempt_enable();
2613+
preempt_enable_nested();
26232614
wake_up_all(d_wait);
26242615
}
26252616

include/linux/mmdebug.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ void dump_mm(const struct mm_struct *mm);
9494
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
9595
#endif
9696

97+
#ifdef CONFIG_DEBUG_VM_IRQSOFF
98+
#define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled())
99+
#else
100+
#define VM_WARN_ON_IRQS_ENABLED() do { } while (0)
101+
#endif
102+
97103
#ifdef CONFIG_DEBUG_VIRTUAL
98104
#define VIRTUAL_BUG_ON(cond) BUG_ON(cond)
99105
#else

include/linux/preempt.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,4 +421,46 @@ static inline void migrate_enable(void) { }
421421

422422
#endif /* CONFIG_SMP */
423423

424+
/**
425+
* preempt_disable_nested - Disable preemption inside a normally preempt disabled section
426+
*
427+
* Use for code which requires preemption protection inside a critical
428+
* section which has preemption disabled implicitly on non-PREEMPT_RT
429+
* enabled kernels, by e.g.:
430+
* - holding a spinlock/rwlock
431+
* - soft interrupt context
432+
* - regular interrupt handlers
433+
*
434+
* On PREEMPT_RT enabled kernels spinlock/rwlock held sections, soft
435+
* interrupt context and regular interrupt handlers are preemptible and
436+
* only prevent migration. preempt_disable_nested() ensures that preemption
437+
* is disabled for cases which require CPU local serialization even on
438+
* PREEMPT_RT. For non-PREEMPT_RT kernels this is a NOP.
439+
*
440+
* The use cases are code sequences which are not serialized by a
441+
* particular lock instance, e.g.:
442+
* - seqcount write side critical sections where the seqcount is not
443+
* associated to a particular lock and therefore the automatic
444+
* protection mechanism does not work. This prevents a live lock
445+
* against a preempting high priority reader.
446+
* - RMW per CPU variable updates like vmstat.
447+
*/
448+
/* Macro to avoid header recursion hell vs. lockdep */
449+
#define preempt_disable_nested() \
450+
do { \
451+
if (IS_ENABLED(CONFIG_PREEMPT_RT)) \
452+
preempt_disable(); \
453+
else \
454+
lockdep_assert_preemption_disabled(); \
455+
} while (0)
456+
457+
/**
458+
* preempt_enable_nested - Undo the effect of preempt_disable_nested()
459+
*/
460+
static __always_inline void preempt_enable_nested(void)
461+
{
462+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
463+
preempt_enable();
464+
}
465+
424466
#endif /* __LINUX_PREEMPT_H */

include/linux/u64_stats_sync.h

Lines changed: 64 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
* Key points :
1010
*
11-
* - Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP.
11+
* - Use a seqcount on 32-bit
1212
* - The whole thing is a no-op on 64-bit architectures.
1313
*
1414
* Usage constraints:
@@ -20,7 +20,8 @@
2020
* writer and also spin forever.
2121
*
2222
* 3) Write side must use the _irqsave() variant if other writers, or a reader,
23-
* can be invoked from an IRQ context.
23+
* can be invoked from an IRQ context. On 64bit systems this variant does not
24+
* disable interrupts.
2425
*
2526
* 4) If reader fetches several counters, there is no guarantee the whole values
2627
* are consistent w.r.t. each other (remember point #2: seqcounts are not
@@ -29,11 +30,6 @@
2930
* 5) Readers are allowed to sleep or be preempted/interrupted: they perform
3031
* pure reads.
3132
*
32-
* 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats
33-
* might be updated from a hardirq or softirq context (remember point #1:
34-
* seqcounts are not used for UP kernels). 32-bit UP stat readers could read
35-
* corrupted 64-bit values otherwise.
36-
*
3733
* Usage :
3834
*
3935
* Stats producer (writer) should use following template granted it already got
@@ -66,7 +62,7 @@
6662
#include <linux/seqlock.h>
6763

6864
struct u64_stats_sync {
69-
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
65+
#if BITS_PER_LONG == 32
7066
seqcount_t seq;
7167
#endif
7268
};
@@ -98,7 +94,22 @@ static inline void u64_stats_inc(u64_stats_t *p)
9894
local64_inc(&p->v);
9995
}
10096

101-
#else
97+
static inline void u64_stats_init(struct u64_stats_sync *syncp) { }
98+
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { }
99+
static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { }
100+
static inline unsigned long __u64_stats_irqsave(void) { return 0; }
101+
static inline void __u64_stats_irqrestore(unsigned long flags) { }
102+
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
103+
{
104+
return 0;
105+
}
106+
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
107+
unsigned int start)
108+
{
109+
return false;
110+
}
111+
112+
#else /* 64 bit */
102113

103114
typedef struct {
104115
u64 v;
@@ -123,123 +134,95 @@ static inline void u64_stats_inc(u64_stats_t *p)
123134
{
124135
p->v++;
125136
}
126-
#endif
127137

128-
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
129-
#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq)
130-
#else
131138
static inline void u64_stats_init(struct u64_stats_sync *syncp)
132139
{
140+
seqcount_init(&syncp->seq);
133141
}
134-
#endif
135142

136-
static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
143+
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
137144
{
138-
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
139-
if (IS_ENABLED(CONFIG_PREEMPT_RT))
140-
preempt_disable();
145+
preempt_disable_nested();
141146
write_seqcount_begin(&syncp->seq);
142-
#endif
143147
}
144148

145-
static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
149+
static inline void __u64_stats_update_end(struct u64_stats_sync *syncp)
146150
{
147-
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
148151
write_seqcount_end(&syncp->seq);
149-
if (IS_ENABLED(CONFIG_PREEMPT_RT))
150-
preempt_enable();
151-
#endif
152+
preempt_enable_nested();
152153
}
153154

154-
static inline unsigned long
155-
u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
155+
static inline unsigned long __u64_stats_irqsave(void)
156156
{
157-
unsigned long flags = 0;
157+
unsigned long flags;
158158

159-
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
160-
if (IS_ENABLED(CONFIG_PREEMPT_RT))
161-
preempt_disable();
162-
else
163-
local_irq_save(flags);
164-
write_seqcount_begin(&syncp->seq);
165-
#endif
159+
local_irq_save(flags);
166160
return flags;
167161
}
168162

169-
static inline void
170-
u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
171-
unsigned long flags)
163+
static inline void __u64_stats_irqrestore(unsigned long flags)
172164
{
173-
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
174-
write_seqcount_end(&syncp->seq);
175-
if (IS_ENABLED(CONFIG_PREEMPT_RT))
176-
preempt_enable();
177-
else
178-
local_irq_restore(flags);
179-
#endif
165+
local_irq_restore(flags);
180166
}
181167

182168
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
183169
{
184-
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
185170
return read_seqcount_begin(&syncp->seq);
186-
#else
187-
return 0;
188-
#endif
189171
}
190172

191-
static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
173+
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
174+
unsigned int start)
192175
{
193-
#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
194-
preempt_disable();
195-
#endif
196-
return __u64_stats_fetch_begin(syncp);
176+
return read_seqcount_retry(&syncp->seq, start);
197177
}
178+
#endif /* !64 bit */
198179

199-
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
200-
unsigned int start)
180+
static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
201181
{
202-
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
203-
return read_seqcount_retry(&syncp->seq, start);
204-
#else
205-
return false;
206-
#endif
182+
__u64_stats_update_begin(syncp);
183+
}
184+
185+
static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
186+
{
187+
__u64_stats_update_end(syncp);
188+
}
189+
190+
static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
191+
{
192+
unsigned long flags = __u64_stats_irqsave();
193+
194+
__u64_stats_update_begin(syncp);
195+
return flags;
196+
}
197+
198+
static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
199+
unsigned long flags)
200+
{
201+
__u64_stats_update_end(syncp);
202+
__u64_stats_irqrestore(flags);
203+
}
204+
205+
static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
206+
{
207+
return __u64_stats_fetch_begin(syncp);
207208
}
208209

209210
static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
210211
unsigned int start)
211212
{
212-
#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
213-
preempt_enable();
214-
#endif
215213
return __u64_stats_fetch_retry(syncp, start);
216214
}
217215

218-
/*
219-
* In case irq handlers can update u64 counters, readers can use following helpers
220-
* - SMP 32bit arches use seqcount protection, irq safe.
221-
* - UP 32bit must disable irqs.
222-
* - 64bit have no problem atomically reading u64 values, irq safe.
223-
*/
216+
/* Obsolete interfaces */
224217
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
225218
{
226-
#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
227-
preempt_disable();
228-
#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
229-
local_irq_disable();
230-
#endif
231-
return __u64_stats_fetch_begin(syncp);
219+
return u64_stats_fetch_begin(syncp);
232220
}
233221

234222
static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
235223
unsigned int start)
236224
{
237-
#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
238-
preempt_enable();
239-
#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
240-
local_irq_enable();
241-
#endif
242-
return __u64_stats_fetch_retry(syncp, start);
225+
return u64_stats_fetch_retry(syncp, start);
243226
}
244227

245228
#endif /* _LINUX_U64_STATS_SYNC_H */

lib/Kconfig.debug

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -805,6 +805,9 @@ config ARCH_HAS_DEBUG_VM_PGTABLE
805805
An architecture should select this when it can successfully
806806
build and run DEBUG_VM_PGTABLE.
807807

808+
config DEBUG_VM_IRQSOFF
809+
def_bool DEBUG_VM && !PREEMPT_RT
810+
808811
config DEBUG_VM
809812
bool "Debug VM"
810813
depends on DEBUG_KERNEL

lib/flex_proportions.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,15 @@ bool fprop_new_period(struct fprop_global *p, int periods)
7070
*/
7171
if (events <= 1)
7272
return false;
73+
preempt_disable_nested();
7374
write_seqcount_begin(&p->sequence);
7475
if (periods < 64)
7576
events -= events >> periods;
7677
/* Use addition to avoid losing events happening between sum and set */
7778
percpu_counter_add(&p->events, -events);
7879
p->period += periods;
7980
write_seqcount_end(&p->sequence);
81+
preempt_enable_nested();
8082

8183
return true;
8284
}

mm/Kconfig

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,12 @@ config COMPACTION
579579
it and then we would be really interested to hear about that at
580580
linux-mm@kvack.org.
581581

582+
config COMPACT_UNEVICTABLE_DEFAULT
583+
int
584+
depends on COMPACTION
585+
default 0 if PREEMPT_RT
586+
default 1
587+
582588
#
583589
# support for free page reporting
584590
config PAGE_REPORTING

mm/compaction.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,11 +1727,7 @@ typedef enum {
17271727
* Allow userspace to control policy on scanning the unevictable LRU for
17281728
* compactable pages.
17291729
*/
1730-
#ifdef CONFIG_PREEMPT_RT
1731-
int sysctl_compact_unevictable_allowed __read_mostly = 0;
1732-
#else
1733-
int sysctl_compact_unevictable_allowed __read_mostly = 1;
1734-
#endif
1730+
int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT;
17351731

17361732
static inline void
17371733
update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)

0 commit comments

Comments
 (0)