diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 46cc07b5cae6..1f36a4eccc72 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -143,8 +143,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ -#define TIF_NEED_RESCHED_LAZY 8 +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */ +#define TIF_NEED_RESCHED_LAZY 7 #define TIF_NOHZ 12 /* in adaptive nohz mode */ #define TIF_USING_IWMMXT 17 @@ -170,7 +170,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, * Change these and you break ASM code in entry-common.S */ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_UPROBE) + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NEED_RESCHED_LAZY) #endif /* __KERNEL__ */ #endif /* __ASM_ARM_THREAD_INFO_H */ diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 30a7228eaceb..c3bd6cbfce4b 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -36,7 +36,9 @@ UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) + bne fast_work_pending + tst r1, #_TIF_SECCOMP bne fast_work_pending /* perform architecture specific actions before user return */ @@ -62,8 +64,11 @@ ENDPROC(ret_fast_syscall) str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 disable_irq_notrace @ disable interrupts ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) + bne do_slower_path + tst r1, #_TIF_SECCOMP beq no_work_pending +do_slower_path: UNWIND(.fnend ) ENDPROC(ret_fast_syscall) diff --git a/arch/arm/mach-omap2/gpio.c b/arch/arm/mach-omap2/gpio.c index 689a1af47c80..7a577145b68b 100644 --- a/arch/arm/mach-omap2/gpio.c +++ b/arch/arm/mach-omap2/gpio.c @@ -130,6 +130,7 @@ static int __init omap2_gpio_dev_init(struct omap_hwmod *oh, void *unused) } pwrdm = omap_hwmod_get_pwrdm(oh); + pdata->loses_context = pwrdm_can_ever_lose_context(pwrdm); pdev = omap_device_build(name, id - 1, oh, pdata, sizeof(*pdata)); kfree(pdata); diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index ef4227ffa3b6..78af6d8cf2e2 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -1166,3 +1166,43 @@ int pwrdm_get_context_loss_count(struct powerdomain *pwrdm) return count; } +/** + * pwrdm_can_ever_lose_context - can this powerdomain ever lose context? + * @pwrdm: struct powerdomain * + * + * Given a struct powerdomain * @pwrdm, returns 1 if the powerdomain + * can lose either memory or logic context or if @pwrdm is invalid, or + * returns 0 otherwise. This function is not concerned with how the + * powerdomain registers are programmed (i.e., to go off or not); it's + * concerned with whether it's ever possible for this powerdomain to + * go off while some other part of the chip is active. This function + * assumes that every powerdomain can go to either ON or INACTIVE. + */ +bool pwrdm_can_ever_lose_context(struct powerdomain *pwrdm) +{ + int i; + + if (!pwrdm) { + pr_debug("powerdomain: %s: invalid powerdomain pointer\n", + __func__); + return 1; + } + + if (pwrdm->pwrsts & PWRSTS_OFF) + return 1; + + if (pwrdm->pwrsts & PWRSTS_RET) { + if (pwrdm->pwrsts_logic_ret & PWRSTS_OFF) + return 1; + + for (i = 0; i < pwrdm->banks; i++) + if (pwrdm->pwrsts_mem_ret[i] & PWRSTS_OFF) + return 1; + } + + for (i = 0; i < pwrdm->banks; i++) + if (pwrdm->pwrsts_mem_on[i] & PWRSTS_OFF) + return 1; + + return 0; +} diff --git a/arch/arm/mach-omap2/powerdomain.h b/arch/arm/mach-omap2/powerdomain.h index 5e0c033a21db..28a796ce07d7 100644 --- a/arch/arm/mach-omap2/powerdomain.h +++ b/arch/arm/mach-omap2/powerdomain.h @@ -244,6 +244,7 @@ int pwrdm_state_switch(struct powerdomain *pwrdm); int pwrdm_pre_transition(struct powerdomain *pwrdm); int pwrdm_post_transition(struct powerdomain *pwrdm); int pwrdm_get_context_loss_count(struct powerdomain *pwrdm); +bool pwrdm_can_ever_lose_context(struct powerdomain *pwrdm); extern int omap_set_pwrdm_state(struct powerdomain *pwrdm, u8 state); diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index deabc36c936c..542692dbd40a 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr) return *ptep; } +static unsigned int fixmap_idx(int type) +{ + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); +} + void *kmap(struct page *page) { might_sleep(); @@ -80,7 +85,7 @@ void *kmap_atomic(struct page *page) type = kmap_atomic_idx_push(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM /* @@ -110,7 +115,7 @@ void __kunmap_atomic(void *kvaddr) if (kvaddr >= (void *)FIXADDR_START) { type = kmap_atomic_idx(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); if (cache_is_vivt()) __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); @@ -146,7 +151,7 @@ void *kmap_atomic_pfn(unsigned long pfn) return page_address(page); type = kmap_atomic_idx_push(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(get_fixmap_pte(vaddr))); @@ -167,7 +172,7 @@ void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) * Clear @prev's kmap_atomic mappings */ for (i = 0; i < prev_p->kmap_idx; i++) { - int idx = i + KM_TYPE_NR * smp_processor_id(); + int idx = fixmap_idx(i); set_fixmap_pte(idx, __pte(0)); } @@ -175,7 +180,7 @@ void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) * Restore @next_p's kmap_atomic mappings */ for (i = 0; i < next_p->kmap_idx; i++) { - int idx = i + KM_TYPE_NR * smp_processor_id(); + int idx = fixmap_idx(i); if (!pte_none(next_p->kmap_pte[i])) set_fixmap_pte(idx, next_p->kmap_pte[i]); diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index b229ef49a643..5f4e89fbc290 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -129,7 +129,8 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ + _TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7c7c9ba7d4f7..3ec240f3951a 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -220,7 +220,7 @@ long syscall_trace_enter(struct pt_regs *regs) #define EXIT_TO_USERMODE_LOOP_FLAGS \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY) static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) { diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index 80d74c4adcbe..a7abdb6638cd 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -96,10 +96,11 @@ static int pit_clkevt_shutdown(struct clock_event_device *dev) /* disable irq, leaving the clocksource active */ pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN); - free_irq(atmel_pit_irq, data); + free_irq(data->irq, data); return 0; } +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id); /* * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) */ @@ -189,7 +190,6 @@ static void __init at91sam926x_pit_common_init(struct pit_data *data) { unsigned long pit_rate; unsigned bits; - int ret; /* * Use our actual MCK to figure out how many MCK/16 ticks per diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c index ea37afc26e1b..103d0fd70cc4 100644 --- a/drivers/clocksource/timer-atmel-st.c +++ b/drivers/clocksource/timer-atmel-st.c @@ -150,7 +150,7 @@ static int clkevt32k_set_oneshot(struct clock_event_device *dev) static int clkevt32k_set_periodic(struct clock_event_device *dev) { - int irq; + int ret; clkdev32k_disable_and_flush_irq(); @@ -229,8 +229,8 @@ static void __init atmel_st_timer_init(struct device_node *node) regmap_read(regmap_st, AT91_ST_SR, &val); /* Get the interrupts property */ - irq = irq_of_parse_and_map(node, 0); - if (!irq) + atmel_st_irq = irq_of_parse_and_map(node, 0); + if (!atmel_st_irq) panic(pr_fmt("Unable to get IRQ from DT\n")); sclk = of_clk_get(node, 0); diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 344058f8501a..d5657d50ac40 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -119,7 +119,6 @@ struct cpuidle_coupled { #define CPUIDLE_COUPLED_NOT_IDLE (-1) -static DEFINE_MUTEX(cpuidle_coupled_lock); static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); /* diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 004888bf794e..f7fbb46d5d79 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -69,7 +69,7 @@ struct gpio_bank { struct device *dev; bool is_mpuio; bool dbck_flag; - + bool loses_context; bool context_valid; int stride; u32 width; @@ -1208,9 +1208,15 @@ static int omap_gpio_probe(struct platform_device *pdev) #ifdef CONFIG_OF_GPIO bank->chip.of_node = of_node_get(node); #endif - if (!node) { - bank->get_context_loss_count = - pdata->get_context_loss_count; + if (node) { + if (!of_property_read_bool(node, "ti,gpio-always-on")) + bank->loses_context = true; + } else { + bank->loses_context = pdata->loses_context; + + if (bank->loses_context) + bank->get_context_loss_count = + pdata->get_context_loss_count; } if (bank->regs->set_dataout && bank->regs->clr_dataout) @@ -1367,7 +1373,7 @@ static int omap_gpio_runtime_resume(struct device *dev) * been initialised and so initialise it now. Also initialise * the context loss count. */ - if (!bank->context_valid) { + if (bank->loses_context && !bank->context_valid) { omap_gpio_init_context(bank); if (bank->get_context_loss_count) @@ -1388,15 +1394,17 @@ static int omap_gpio_runtime_resume(struct device *dev) writel_relaxed(bank->context.risingdetect, bank->base + bank->regs->risingdetect); - if (!bank->get_context_loss_count) { - omap_gpio_restore_context(bank); - } else { - c = bank->get_context_loss_count(bank->dev); - if (c != bank->context_loss_count) { + if (bank->loses_context) { + if (!bank->get_context_loss_count) { omap_gpio_restore_context(bank); } else { - spin_unlock_irqrestore(&bank->lock, flags); - return 0; + c = bank->get_context_loss_count(bank->dev); + if (c != bank->context_loss_count) { + omap_gpio_restore_context(bank); + } else { + raw_spin_unlock_irqrestore(&bank->lock, flags); + return 0; + } } } @@ -1468,7 +1476,7 @@ void omap2_gpio_prepare_for_idle(int pwr_mode) struct gpio_bank *bank; list_for_each_entry(bank, &omap_gpio_list, node) { - if (!BANK_USED(bank)) + if (!BANK_USED(bank) || !bank->loses_context) continue; bank->power_mode = pwr_mode; @@ -1482,7 +1490,7 @@ void omap2_gpio_resume_after_idle(void) struct gpio_bank *bank; list_for_each_entry(bank, &omap_gpio_list, node) { - if (!BANK_USED(bank)) + if (!BANK_USED(bank) || !bank->loses_context) continue; pm_runtime_get_sync(bank->dev); diff --git a/drivers/media/platform/vsp1/vsp1_video.c b/drivers/media/platform/vsp1/vsp1_video.c index 5ce88e1f5d71..b4f8cd74ecb8 100644 --- a/drivers/media/platform/vsp1/vsp1_video.c +++ b/drivers/media/platform/vsp1/vsp1_video.c @@ -520,7 +520,7 @@ static bool vsp1_pipeline_stopped(struct vsp1_pipeline *pipe) bool stopped; spin_lock_irqsave(&pipe->irqlock, flags); - stopped = pipe->state == VSP1_PIPELINE_STOPPED, + stopped = pipe->state == VSP1_PIPELINE_STOPPED; spin_unlock_irqrestore(&pipe->irqlock, flags); return stopped; diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c index 2429c4331e68..52f5ad5fd9c0 100644 --- a/drivers/misc/hwlat_detector.c +++ b/drivers/misc/hwlat_detector.c @@ -616,7 +616,7 @@ static ssize_t debug_enable_fwrite(struct file *filp, buf[sizeof(buf)-1] = '\0'; /* just in case */ err = kstrtoul(buf, 10, &val); - if (0 != err) + if (err) return -EINVAL; if (val) { @@ -921,7 +921,7 @@ static ssize_t debug_width_fwrite(struct file *filp, buf[U64STR_SIZE-1] = '\0'; /* just in case */ err = kstrtoull(buf, 10, &val); - if (0 != err) + if (err) return -EINVAL; mutex_lock(&data.lock); @@ -1005,7 +1005,7 @@ static ssize_t debug_window_fwrite(struct file *filp, buf[U64STR_SIZE-1] = '\0'; /* just in case */ err = kstrtoull(buf, 10, &val); - if (0 != err) + if (err) return -EINVAL; mutex_lock(&data.lock); @@ -1198,11 +1198,11 @@ static int detector_init(void) pr_info(BANNER "version %s\n", VERSION); ret = init_stats(); - if (0 != ret) + if (ret) goto out; ret = init_debugfs(); - if (0 != ret) + if (ret) goto err_stats; if (enabled) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a23399e8e3ab..1fc2e13fc2c1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -232,6 +232,7 @@ static struct btrfs_device *__alloc_device(void) spin_lock_init(&dev->reada_lock); atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); + btrfs_device_data_ordered_init(dev); INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1f57cb8f9d95..27933e47ed22 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -87,6 +87,9 @@ enum hrtimer_restart { * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) + * @cb_entry: list entry to defer timers from hardirq context + * @irqsafe: timer can run in hardirq context + * @praecox: timer expiry time if expired at the time of programming * @start_pid: timer statistics field to store the pid of the task which * started the timer * @start_site: timer statistics field to store the site where the timer @@ -135,6 +138,7 @@ struct hrtimer_sleeper { * timer to a base on another cpu. * @clockid: clock id for per_cpu support * @active: red black tree root node for the active timers + * @expired: list head for deferred timers. * @get_time: function to retrieve the current time of the clock * @offset: offset of this clock to the monotonic base */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 47d38a0e4a81..655cee096aed 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -208,6 +208,7 @@ extern void resume_device_irqs(void); * @irq: Interrupt to which notification applies * @kref: Reference count, for internal use * @work: Work item, for internal use + * @list: List item for deferred callbacks * @notify: Function to be called on change. This will be * called in process context. * @release: Function to be called on release. This will be @@ -464,6 +465,14 @@ extern void thread_do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); +#ifdef CONFIG_PREEMPT_RT_FULL +extern void __raise_softirq_irqoff_ksoft(unsigned int nr); +#else +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr) +{ + __raise_softirq_irqoff(nr); +} +#endif extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f14e39cb897c..c0e12f7f0f13 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2249,11 +2249,20 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); +#ifdef CONFIG_PREEMPT_RT_FULL +static inline int dev_recursion_level(void) +{ + return current->xmit_recursion; +} + +#else + DECLARE_PER_CPU(int, xmit_recursion); static inline int dev_recursion_level(void) { return this_cpu_read(xmit_recursion); } +#endif struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index ff43e01b8ca9..cb2618147c34 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -198,6 +198,7 @@ struct omap_gpio_platform_data { int bank_width; /* GPIO bank width */ int bank_stride; /* Only needed for omap1 MPUIO */ bool dbck_flag; /* dbck required or not - True for OMAP3&4 */ + bool loses_context; /* whether the bank would ever lose context */ bool is_mpuio; /* whether the bank is of type MPUIO */ u32 non_wakeup_gpios; diff --git a/include/linux/sched.h b/include/linux/sched.h index 04eb2f8bc274..a8d5ae88b30b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1858,6 +1858,9 @@ struct task_struct { #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif +#ifdef CONFIG_PREEMPT_RT_FULL + int xmit_recursion; +#endif int pagefault_disabled; /* CPU-specific state of this task */ struct thread_struct thread; @@ -3280,14 +3283,19 @@ static inline int __migrate_disabled(struct task_struct *p) /* Future-safe accessor for struct task_struct's cpus_allowed. */ static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p) { -#ifdef CONFIG_PREEMPT_RT_FULL - if (p->migrate_disable) + if (__migrate_disabled(p)) return cpumask_of(task_cpu(p)); -#endif return &p->cpus_allowed; } +static inline int tsk_nr_cpus_allowed(struct task_struct *p) +{ + if (__migrate_disabled(p)) + return 1; + return p->nr_cpus_allowed; +} + extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); diff --git a/init/Kconfig b/init/Kconfig index 6f4408adf62d..a7c81c0911da 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -498,7 +498,7 @@ config TINY_RCU config RCU_EXPERT bool "Make expert-level adjustments to RCU configuration" - default n + default y if PREEMPT_RT_FULL help This option needs to be enabled if you wish to make expert-level adjustments to RCU configuration. By default, diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d1d158005ad0..2856b433d9d6 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -129,12 +129,14 @@ static bool ptrace_freeze_traced(struct task_struct *task) spin_lock_irq(&task->sighand->siglock); if (task_is_traced(task) && !__fatal_signal_pending(task)) { - raw_spin_lock_irq(&task->pi_lock); + unsigned long flags; + + raw_spin_lock_irqsave(&task->pi_lock, flags); if (task->state & __TASK_TRACED) task->state = __TASK_TRACED; else task->saved_state = __TASK_TRACED; - raw_spin_unlock_irq(&task->pi_lock); + raw_spin_unlock_irqrestore(&task->pi_lock, flags); ret = true; } spin_unlock_irq(&task->sighand->siglock); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8d9f6a657d4e..7a13fbc28454 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1209,15 +1209,6 @@ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_ma p->nr_cpus_allowed = cpumask_weight(new_mask); } -#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP) -#define MIGRATE_DISABLE_SET_AFFIN (1<<30) /* Can't make a negative */ -#define migrate_disabled_updated(p) ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN) -#define migrate_disable_count(p) ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN) -#else -static inline void update_migrate_disable(struct task_struct *p) { } -#define migrate_disabled_updated(p) 0 -#endif - void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { struct rq *rq = task_rq(p); @@ -1225,7 +1216,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) lockdep_assert_held(&p->pi_lock); - if (migrate_disabled_updated(p)) { + if (__migrate_disabled(p)) { cpumask_copy(&p->cpus_allowed, new_mask); return; } @@ -1774,7 +1765,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) { lockdep_assert_held(&p->pi_lock); - if (p->nr_cpus_allowed > 1) + if (tsk_nr_cpus_allowed(p) > 1) cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); /* @@ -3162,38 +3153,6 @@ static inline void schedule_debug(struct task_struct *prev) #if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP) -static inline void update_migrate_disable(struct task_struct *p) -{ - const struct cpumask *mask; - - if (likely(!p->migrate_disable)) - return; - - /* Did we already update affinity? */ - if (unlikely(migrate_disabled_updated(p))) - return; - - /* - * Since this is always current we can get away with only locking - * rq->lock, the ->cpus_allowed value can normally only be changed - * while holding both p->pi_lock and rq->lock, but seeing that this - * is current, we cannot actually be waking up, so all code that - * relies on serialization against p->pi_lock is out of scope. - * - * Having rq->lock serializes us against things like - * set_cpus_allowed_ptr() that can still happen concurrently. - */ - mask = tsk_cpus_allowed(p); - - if (p->sched_class->set_cpus_allowed) - p->sched_class->set_cpus_allowed(p, mask); - /* mask==cpumask_of(task_cpu(p)) which has a cpumask_weight==1 */ - p->nr_cpus_allowed = 1; - - /* Let migrate_enable know to fix things back up */ - p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN; -} - void migrate_disable(void) { struct task_struct *p = current; @@ -3221,6 +3180,7 @@ void migrate_disable(void) preempt_lazy_disable(); pin_current_cpu(); p->migrate_disable = 1; + p->nr_cpus_allowed = 1; preempt_enable(); } EXPORT_SYMBOL(migrate_disable); @@ -3228,9 +3188,6 @@ EXPORT_SYMBOL(migrate_disable); void migrate_enable(void) { struct task_struct *p = current; - const struct cpumask *mask; - unsigned long flags; - struct rq *rq; if (in_atomic()) { #ifdef CONFIG_SCHED_DEBUG @@ -3247,33 +3204,17 @@ void migrate_enable(void) #endif WARN_ON_ONCE(p->migrate_disable <= 0); - if (migrate_disable_count(p) > 1) { + if (p->migrate_disable > 1) { p->migrate_disable--; return; } preempt_disable(); - if (unlikely(migrate_disabled_updated(p))) { - /* - * Undo whatever update_migrate_disable() did, also see there - * about locking. - */ - rq = this_rq(); - raw_spin_lock_irqsave(¤t->pi_lock, flags); - raw_spin_lock(&rq->lock); - - /* - * Clearing migrate_disable causes tsk_cpus_allowed to - * show the tasks original cpu affinity. - */ - p->migrate_disable = 0; - mask = tsk_cpus_allowed(p); - do_set_cpus_allowed(p, mask); - - raw_spin_unlock(&rq->lock); - raw_spin_unlock_irqrestore(¤t->pi_lock, flags); - } else - p->migrate_disable = 0; + /* + * Clearing migrate_disable causes tsk_cpus_allowed to + * show the tasks original cpu affinity. + */ + p->migrate_disable = 0; unpin_current_cpu(); preempt_enable(); @@ -3397,8 +3338,6 @@ static void __sched notrace __schedule(bool preempt) raw_spin_lock_irq(&rq->lock); lockdep_pin_lock(&rq->lock); - update_migrate_disable(prev); - rq->clock_skip_update <<= 1; /* promote REQ to ACT */ switch_count = &prev->nivcsw; @@ -3525,6 +3464,30 @@ static void __sched notrace preempt_schedule_common(void) } while (need_resched()); } +#ifdef CONFIG_PREEMPT_LAZY +/* + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as + * preempt_lazy_count counter >0. + */ +static int preemptible_lazy(void) +{ + if (test_thread_flag(TIF_NEED_RESCHED)) + return 1; + if (current_thread_info()->preempt_lazy_count) + return 0; + return 1; +} + +#else + +static int preemptible_lazy(void) +{ + return 1; +} + +#endif + #ifdef CONFIG_PREEMPT /* * this is the entry point to schedule() from in-kernel preemption @@ -3539,6 +3502,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) */ if (likely(!preemptible())) return; + if (!preemptible_lazy()) + return; preempt_schedule_common(); } @@ -3565,15 +3530,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) if (likely(!preemptible())) return; - -#ifdef CONFIG_PREEMPT_LAZY - /* - * Check for lazy preemption - */ - if (current_thread_info()->preempt_lazy_count && - !test_thread_flag(TIF_NEED_RESCHED)) + if (!preemptible_lazy()) return; -#endif + do { preempt_disable_notrace(); /* diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 5a75b08cfd85..5be58820465c 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -103,10 +103,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, const struct sched_dl_entity *dl_se = &p->dl; if (later_mask && - cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { + cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) { best_cpu = cpumask_any(later_mask); goto out; - } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && + } else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) && dl_time_before(dl_se->deadline, cp->elements[0].dl)) { best_cpu = cpudl_maximum(cp); if (later_mask) diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 981fcd7dc394..11e9705bf937 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (skip) continue; - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) + if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids) continue; if (lowest_mask) { - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); + cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask); /* * We have to ensure that we have at least one bit diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b8223bdd1650..7a72e69fcf65 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -134,7 +134,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { struct task_struct *p = dl_task_of(dl_se); - if (p->nr_cpus_allowed > 1) + if (tsk_nr_cpus_allowed(p) > 1) dl_rq->dl_nr_migratory++; update_dl_migration(dl_rq); @@ -144,7 +144,7 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { struct task_struct *p = dl_task_of(dl_se); - if (p->nr_cpus_allowed > 1) + if (tsk_nr_cpus_allowed(p) > 1) dl_rq->dl_nr_migratory--; update_dl_migration(dl_rq); @@ -990,7 +990,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) enqueue_dl_entity(&p->dl, pi_se, flags); - if (!task_current(rq, p) && p->nr_cpus_allowed > 1) + if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1) enqueue_pushable_dl_task(rq, p); } @@ -1068,9 +1068,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) * try to make it stay here, it might be important. */ if (unlikely(dl_task(curr)) && - (curr->nr_cpus_allowed < 2 || + (tsk_nr_cpus_allowed(curr) < 2 || !dl_entity_preempt(&p->dl, &curr->dl)) && - (p->nr_cpus_allowed > 1)) { + (tsk_nr_cpus_allowed(p) > 1)) { int target = find_later_rq(p); if (target != -1 && @@ -1091,7 +1091,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) * Current can't be migrated, useless to reschedule, * let's hope p can move out. */ - if (rq->curr->nr_cpus_allowed == 1 || + if (tsk_nr_cpus_allowed(rq->curr) == 1 || cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1) return; @@ -1099,7 +1099,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) * p is migratable, so let's not schedule it and * see if it is pushed or pulled somewhere else. */ - if (p->nr_cpus_allowed != 1 && + if (tsk_nr_cpus_allowed(p) != 1 && cpudl_find(&rq->rd->cpudl, p, NULL) != -1) return; @@ -1213,7 +1213,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p) { update_curr_dl(rq); - if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) + if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1) enqueue_pushable_dl_task(rq, p); } @@ -1336,7 +1336,7 @@ static int find_later_rq(struct task_struct *task) if (unlikely(!later_mask)) return -1; - if (task->nr_cpus_allowed == 1) + if (tsk_nr_cpus_allowed(task) == 1) return -1; /* @@ -1442,7 +1442,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) if (double_lock_balance(rq, later_rq)) { if (unlikely(task_rq(task) != rq || !cpumask_test_cpu(later_rq->cpu, - &task->cpus_allowed) || + tsk_cpus_allowed(task)) || task_running(rq, task) || !task_on_rq_queued(task))) { double_unlock_balance(rq, later_rq); @@ -1481,7 +1481,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(task_current(rq, p)); - BUG_ON(p->nr_cpus_allowed <= 1); + BUG_ON(tsk_nr_cpus_allowed(p) <= 1); BUG_ON(!task_on_rq_queued(p)); BUG_ON(!dl_task(p)); @@ -1520,7 +1520,7 @@ static int push_dl_task(struct rq *rq) */ if (dl_task(rq->curr) && dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && - rq->curr->nr_cpus_allowed > 1) { + tsk_nr_cpus_allowed(rq->curr) > 1) { resched_curr(rq); return 0; } @@ -1667,9 +1667,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p) { if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && - p->nr_cpus_allowed > 1 && + tsk_nr_cpus_allowed(p) > 1 && dl_task(rq->curr) && - (rq->curr->nr_cpus_allowed < 2 || + (tsk_nr_cpus_allowed(rq->curr) < 2 || !dl_entity_preempt(&p->dl, &rq->curr->dl))) { push_dl_tasks(rq); } @@ -1770,7 +1770,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) { if (task_on_rq_queued(p) && rq->curr != p) { #ifdef CONFIG_SMP - if (p->nr_cpus_allowed > 1 && rq->dl.overloaded) + if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded) queue_push_tasks(rq); #else if (dl_task(rq->curr)) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 3d11807f6dd5..8cf360d309ec 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -328,7 +328,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total++; - if (p->nr_cpus_allowed > 1) + if (tsk_nr_cpus_allowed(p) > 1) rt_rq->rt_nr_migratory++; update_rt_migration(rt_rq); @@ -345,7 +345,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total--; - if (p->nr_cpus_allowed > 1) + if (tsk_nr_cpus_allowed(p) > 1) rt_rq->rt_nr_migratory--; update_rt_migration(rt_rq); @@ -1264,7 +1264,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); - if (!task_current(rq, p) && p->nr_cpus_allowed > 1) + if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1) enqueue_pushable_task(rq, p); } @@ -1353,7 +1353,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) * will have to sort it out. */ if (curr && unlikely(rt_task(curr)) && - (curr->nr_cpus_allowed < 2 || + (tsk_nr_cpus_allowed(curr) < 2 || curr->prio <= p->prio)) { int target = find_lowest_rq(p); @@ -1377,7 +1377,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) * Current can't be migrated, useless to reschedule, * let's hope p can move out. */ - if (rq->curr->nr_cpus_allowed == 1 || + if (tsk_nr_cpus_allowed(rq->curr) == 1 || !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) return; @@ -1385,7 +1385,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) * p is migratable, so let's not schedule it and * see if it is pushed or pulled somewhere else. */ - if (p->nr_cpus_allowed != 1 + if (tsk_nr_cpus_allowed(p) != 1 && cpupri_find(&rq->rd->cpupri, p, NULL)) return; @@ -1519,7 +1519,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) * The previous task needs to be made eligible for pushing * if it is still active */ - if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) + if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1) enqueue_pushable_task(rq, p); } @@ -1569,7 +1569,7 @@ static int find_lowest_rq(struct task_struct *task) if (unlikely(!lowest_mask)) return -1; - if (task->nr_cpus_allowed == 1) + if (tsk_nr_cpus_allowed(task) == 1) return -1; /* No other targets possible */ if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) @@ -1701,7 +1701,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(task_current(rq, p)); - BUG_ON(p->nr_cpus_allowed <= 1); + BUG_ON(tsk_nr_cpus_allowed(p) <= 1); BUG_ON(!task_on_rq_queued(p)); BUG_ON(!rt_task(p)); @@ -2061,9 +2061,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) { if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && - p->nr_cpus_allowed > 1 && + tsk_nr_cpus_allowed(p) > 1 && (dl_task(rq->curr) || rt_task(rq->curr)) && - (rq->curr->nr_cpus_allowed < 2 || + (tsk_nr_cpus_allowed(rq->curr) < 2 || rq->curr->prio <= p->prio)) push_rt_tasks(rq); } @@ -2136,7 +2136,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) */ if (task_on_rq_queued(p) && rq->curr != p) { #ifdef CONFIG_SMP - if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) + if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded) queue_push_tasks(rq); #else if (p->prio < rq->curr->prio) diff --git a/kernel/softirq.c b/kernel/softirq.c index 0fd93311536f..d1e999e74d23 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -58,6 +58,10 @@ EXPORT_SYMBOL(irq_stat); static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +#ifdef CONFIG_PREEMPT_RT_FULL +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ)) +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd); +#endif const char * const softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", @@ -171,6 +175,17 @@ static void wakeup_softirqd(void) wake_up_process(tsk); } +#ifdef CONFIG_PREEMPT_RT_FULL +static void wakeup_timer_softirqd(void) +{ + /* Interrupts are disabled: no need to stop preemption */ + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd); + + if (tsk && tsk->state != TASK_RUNNING) + wake_up_process(tsk); +} +#endif + static void handle_softirq(unsigned int vec_nr) { struct softirq_action *h = softirq_vec + vec_nr; @@ -473,7 +488,6 @@ void __raise_softirq_irqoff(unsigned int nr) static inline void local_bh_disable_nort(void) { local_bh_disable(); } static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } static void ksoftirqd_set_sched_params(unsigned int cpu) { } -static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { } #else /* !PREEMPT_RT_FULL */ @@ -599,8 +613,8 @@ static void run_ksoftirqd(unsigned int cpu) do_current_softirqs(); current->softirq_nestcnt--; - rcu_note_context_switch(); local_irq_enable(); + cond_resched_rcu_qs(); } /* @@ -618,8 +632,12 @@ void thread_do_softirq(void) static void do_raise_softirq_irqoff(unsigned int nr) { + unsigned int mask; + + mask = 1UL << nr; + trace_softirq_raise(nr); - or_softirq_pending(1UL << nr); + or_softirq_pending(mask); /* * If we are not in a hard interrupt and inside a bh disabled @@ -628,16 +646,51 @@ static void do_raise_softirq_irqoff(unsigned int nr) * delegate it to ksoftirqd. */ if (!in_irq() && current->softirq_nestcnt) - current->softirqs_raised |= (1U << nr); - else if (__this_cpu_read(ksoftirqd)) - __this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr); + current->softirqs_raised |= mask; + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd)) + return; + + if (mask & TIMER_SOFTIRQS) + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; + else + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; } +static void wakeup_proper_softirq(unsigned int nr) +{ + if ((1UL << nr) & TIMER_SOFTIRQS) + wakeup_timer_softirqd(); + else + wakeup_softirqd(); +} + + void __raise_softirq_irqoff(unsigned int nr) { do_raise_softirq_irqoff(nr); if (!in_irq() && !current->softirq_nestcnt) - wakeup_softirqd(); + wakeup_proper_softirq(nr); +} + +/* + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd + */ +void __raise_softirq_irqoff_ksoft(unsigned int nr) +{ + unsigned int mask; + + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) || + !__this_cpu_read(ktimer_softirqd))) + return; + mask = 1UL << nr; + + trace_softirq_raise(nr); + or_softirq_pending(mask); + if (mask & TIMER_SOFTIRQS) + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; + else + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; + wakeup_proper_softirq(nr); } /* @@ -663,7 +716,7 @@ void raise_softirq_irqoff(unsigned int nr) * raise a WARN() if the condition is met. */ if (!current->softirq_nestcnt) - wakeup_softirqd(); + wakeup_proper_softirq(nr); } static inline int ksoftirqd_softirq_pending(void) @@ -676,22 +729,37 @@ static inline void _local_bh_enable_nort(void) { } static inline void ksoftirqd_set_sched_params(unsigned int cpu) { - struct sched_param param = { .sched_priority = 1 }; - - sched_setscheduler(current, SCHED_FIFO, ¶m); - /* Take over all pending softirqs when starting */ + /* Take over all but timer pending softirqs when starting */ local_irq_disable(); - current->softirqs_raised = local_softirq_pending(); + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS; local_irq_enable(); } -static inline void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu) +{ + struct sched_param param = { .sched_priority = 1 }; + + sched_setscheduler(current, SCHED_FIFO, ¶m); + + /* Take over timer pending softirqs when starting */ + local_irq_disable(); + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS; + local_irq_enable(); +} + +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu, + bool online) { struct sched_param param = { .sched_priority = 0 }; sched_setscheduler(current, SCHED_NORMAL, ¶m); } +static int ktimer_softirqd_should_run(unsigned int cpu) +{ + return current->softirqs_raised; +} + #endif /* PREEMPT_RT_FULL */ /* * Enter an interrupt context. @@ -741,6 +809,9 @@ static inline void invoke_softirq(void) if (__this_cpu_read(ksoftirqd) && __this_cpu_read(ksoftirqd)->softirqs_raised) wakeup_softirqd(); + if (__this_cpu_read(ktimer_softirqd) && + __this_cpu_read(ktimer_softirqd)->softirqs_raised) + wakeup_timer_softirqd(); local_irq_restore(flags); #endif } @@ -1173,17 +1244,30 @@ static struct notifier_block cpu_nfb = { static struct smp_hotplug_thread softirq_threads = { .store = &ksoftirqd, .setup = ksoftirqd_set_sched_params, - .cleanup = ksoftirqd_clr_sched_params, .thread_should_run = ksoftirqd_should_run, .thread_fn = run_ksoftirqd, .thread_comm = "ksoftirqd/%u", }; +#ifdef CONFIG_PREEMPT_RT_FULL +static struct smp_hotplug_thread softirq_timer_threads = { + .store = &ktimer_softirqd, + .setup = ktimer_softirqd_set_sched_params, + .cleanup = ktimer_softirqd_clr_sched_params, + .thread_should_run = ktimer_softirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "ktimersoftd/%u", +}; +#endif + static __init int spawn_ksoftirqd(void) { register_cpu_notifier(&cpu_nfb); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); +#ifdef CONFIG_PREEMPT_RT_FULL + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads)); +#endif return 0; } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index fdd2b859d05d..27c198e74967 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1435,6 +1435,7 @@ static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; } #endif +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer); static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) { @@ -1480,18 +1481,16 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) break; - if (!hrtimer_rt_defer(timer)) - __run_hrtimer(cpu_base, base, timer, &basenow); - else - raise = 1; + if (!hrtimer_rt_defer(timer)) + __run_hrtimer(cpu_base, base, timer, &basenow); + else + raise = 1; } } if (raise) raise_softirq_irqoff(HRTIMER_SOFTIRQ); } -static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer); - #ifdef CONFIG_HIGH_RES_TIMERS /* diff --git a/kernel/time/timer.c b/kernel/time/timer.c index c3314fc41316..fee8682c209e 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1118,7 +1118,7 @@ int try_to_del_timer_sync(struct timer_list *timer) } EXPORT_SYMBOL(try_to_del_timer_sync); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1453,7 +1453,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) * the base lock to check when the next timer is pending and so * we assume the next jiffy. */ - return basej; + return basem + TICK_NSEC; #endif spin_lock(&base->lock); if (base->active_timers) { diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c index b6c1d14b71c4..7f6ee70dea41 100644 --- a/kernel/trace/latency_hist.c +++ b/kernel/trace/latency_hist.c @@ -117,7 +117,7 @@ static char *wakeup_latency_hist_dir_sharedprio = "sharedprio"; static notrace void probe_wakeup_latency_hist_start(void *v, struct task_struct *p); static notrace void probe_wakeup_latency_hist_stop(void *v, - struct task_struct *prev, struct task_struct *next); + bool preempt, struct task_struct *prev, struct task_struct *next); static notrace void probe_sched_migrate_task(void *, struct task_struct *task, int cpu); static struct enable_data wakeup_latency_enabled_data = { @@ -907,7 +907,7 @@ static notrace void probe_wakeup_latency_hist_start(void *v, } static notrace void probe_wakeup_latency_hist_stop(void *v, - struct task_struct *prev, struct task_struct *next) + bool preempt, struct task_struct *prev, struct task_struct *next) { unsigned long flags; int cpu = task_cpu(next); diff --git a/localversion-rt b/localversion-rt index c3054d08a112..1445cd65885c 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt2 +-rt3 diff --git a/mm/rmap.c b/mm/rmap.c index 950d79743e8f..b577fbb98d4b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -89,10 +89,8 @@ static inline struct anon_vma *anon_vma_alloc(void) return anon_vma; } -#include -static void anon_vma_free(struct anon_vma *anon_vma) +static inline void anon_vma_free(struct anon_vma *anon_vma) { - int cnt = 0; VM_BUG_ON(atomic_read(&anon_vma->refcount)); /* @@ -113,17 +111,9 @@ static void anon_vma_free(struct anon_vma *anon_vma) * happen _before_ what follows. */ might_sleep(); -retry: if (rwsem_is_locked(&anon_vma->root->rwsem)) { anon_vma_lock_write(anon_vma); anon_vma_unlock_write(anon_vma); - - if (rwsem_is_locked(&anon_vma->root->rwsem)) { - cnt++; - if (cnt > 3) - msleep(1); - } - goto retry; } kmem_cache_free(anon_vma_cachep, anon_vma); diff --git a/net/core/dev.c b/net/core/dev.c index ae4a67e7e654..13a55d0df151 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2946,9 +2946,44 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif +#ifdef CONFIG_PREEMPT_RT_FULL + +static inline int xmit_rec_read(void) +{ + return current->xmit_recursion; +} + +static inline void xmit_rec_inc(void) +{ + current->xmit_recursion++; +} + +static inline void xmit_rec_dec(void) +{ + current->xmit_recursion--; +} + +#else + DEFINE_PER_CPU(int, xmit_recursion); EXPORT_SYMBOL(xmit_recursion); +static inline int xmit_rec_read(void) +{ + return __this_cpu_read(xmit_recursion); +} + +static inline void xmit_rec_inc(void) +{ + __this_cpu_inc(xmit_recursion); +} + +static inline int xmit_rec_dec(void) +{ + __this_cpu_dec(xmit_recursion); +} +#endif + #define RECURSION_LIMIT 10 /** @@ -3141,7 +3176,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) if (txq->xmit_lock_owner != cpu) { - if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) + if (xmit_rec_read() > RECURSION_LIMIT) goto recursion_alert; skb = validate_xmit_skb(skb, dev); @@ -3151,9 +3186,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { - __this_cpu_inc(xmit_recursion); + xmit_rec_inc(); skb = dev_hard_start_xmit(skb, dev, txq, &rc); - __this_cpu_dec(xmit_recursion); + xmit_rec_dec(); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; @@ -4920,7 +4955,7 @@ static void net_rx_action(struct softirq_action *h) list_splice_tail(&repoll, &list); list_splice(&list, &sd->poll_list); if (!list_empty(&sd->poll_list)) - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d507656e98ce..53f10c2d7718 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -351,6 +351,7 @@ EXPORT_SYMBOL(build_skb); static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock); static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { @@ -380,9 +381,13 @@ EXPORT_SYMBOL(netdev_alloc_frag); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { - struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct page_frag_cache *nc; + void *data; - return __alloc_page_frag(nc, fragsz, gfp_mask); + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); + data = __alloc_page_frag(nc, fragsz, gfp_mask); + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); + return data; } void *napi_alloc_frag(unsigned int fragsz) @@ -429,13 +434,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; - local_irq_save(flags); + local_lock_irqsave(netdev_alloc_lock, flags); nc = this_cpu_ptr(&netdev_alloc_cache); data = __alloc_page_frag(nc, len, gfp_mask); pfmemalloc = nc->pfmemalloc; - local_irq_restore(flags); + local_unlock_irqrestore(netdev_alloc_lock, flags); if (unlikely(!data)) return NULL; @@ -476,9 +481,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { - struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct page_frag_cache *nc; struct sk_buff *skb; void *data; + bool pfmemalloc; len += NET_SKB_PAD + NET_IP_ALIGN; @@ -496,7 +502,11 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); data = __alloc_page_frag(nc, len, gfp_mask); + pfmemalloc = nc->pfmemalloc; + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); + if (unlikely(!data)) return NULL; @@ -507,7 +517,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, } /* use OR instead of assignment to avoid clearing of bits in mask */ - if (nc->pfmemalloc) + if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1;