From 762af5a2aa0ad18da1316666dae30d369268d44c Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Date: Mon, 25 Aug 2025 15:26:35 +0200
Subject: vdso/vsyscall: Avoid slow division loop in auxiliary clock update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The call to __iter_div_u64_rem() in vdso_time_update_aux() is a wrapper
around subtraction. It cannot be used to divide large numbers, as that
introduces long, computationally expensive delays.  A regular u64 division
is also not possible in the timekeeper update path as it can be too slow.

Instead of splitting the ktime_t offset into into second and subsecond
components during the timekeeper update fast-path, do it together with the
adjustment of tk->offs_aux in the slow-path. Equivalent to the handling of
offs_boot and monotonic_to_boot.

Reuse the storage of monotonic_to_boot for the new field, as it is not used
by auxiliary timekeepers.

Fixes: 380b84e168e5 ("vdso/vsyscall: Update auxiliary clock data in the datapage")
Reported-by: Miroslav Lichvar <mlichvar@redhat.com>
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250825-vdso-auxclock-division-v1-1-a1d32a16a313@linutronix.de
Closes: https://lore.kernel.org/lkml/aKwsNNWsHJg8IKzj@localhost/
---
 kernel/time/timekeeping.c | 10 ++++++++--
 kernel/time/vsyscall.c    |  4 ++--
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 059fa8b79be6..b6974fce800c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -83,6 +83,12 @@ static inline bool tk_is_aux(const struct timekeeper *tk)
 }
 #endif
 
+static inline void tk_update_aux_offs(struct timekeeper *tk, ktime_t offs)
+{
+	tk->offs_aux = offs;
+	tk->monotonic_to_aux = ktime_to_timespec64(offs);
+}
+
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
@@ -1506,7 +1512,7 @@ static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespe
 			timekeeping_restore_shadow(tkd);
 			return -EINVAL;
 		}
-		tks->offs_aux = offs;
+		tk_update_aux_offs(tks, offs);
 	}
 
 	timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL);
@@ -2937,7 +2943,7 @@ static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew)
 	 * xtime ("realtime") is not applicable for auxiliary clocks and
 	 * kept in sync with "monotonic".
 	 */
-	aux_tks->offs_aux = ktime_sub(timespec64_to_ktime(*tnew), tnow);
+	tk_update_aux_offs(aux_tks, ktime_sub(timespec64_to_ktime(*tnew), tnow));
 
 	timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL);
 	return 0;
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index 8ba8b0d8a387..aa59919b8f2c 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -159,10 +159,10 @@ void vdso_time_update_aux(struct timekeeper *tk)
 	if (clock_mode != VDSO_CLOCKMODE_NONE) {
 		fill_clock_configuration(vc, &tk->tkr_mono);
 
-		vdso_ts->sec	= tk->xtime_sec;
+		vdso_ts->sec = tk->xtime_sec + tk->monotonic_to_aux.tv_sec;
 
 		nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
-		nsec += tk->offs_aux;
+		nsec += tk->monotonic_to_aux.tv_nsec;
 		vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec);
 		nsec = nsec << tk->tkr_mono.shift;
 		vdso_ts->nsec = nsec;
-- 
cgit v1.2.3


From e895f8e29119c8c966ea794af9e9100b10becb88 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Tue, 5 Aug 2025 16:10:25 +0800
Subject: hrtimers: Unconditionally update target CPU base after offline timer
 migration

When testing softirq based hrtimers on an ARM32 board, with high resolution
mode and NOHZ inactive, softirq based hrtimers fail to expire after being
moved away from an offline CPU:

CPU0				CPU1
				hrtimer_start(..., HRTIMER_MODE_SOFT);
cpu_down(CPU1)			...
				hrtimers_cpu_dying()
				  // Migrate timers to CPU0
				  smp_call_function_single(CPU0, returgger_next_event);
  retrigger_next_event()
    if (!highres && !nohz)
        return;

As retrigger_next_event() is a NOOP when both high resolution timers and
NOHZ are inactive CPU0's hrtimer_cpu_base::softirq_expires_next is not
updated and the migrated softirq timers never expire unless there is a
softirq based hrtimer queued on CPU0 later.

Fix this by removing the hrtimer_hres_active() and tick_nohz_active() check
in retrigger_next_event(), which enforces a full update of the CPU base.
As this is not a fast path the extra cost does not matter.

[ tglx: Massaged change log ]

Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier")
Co-developed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250805081025.54235-1-wangxiongfeng2@huawei.com
---
 kernel/time/hrtimer.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 30899a8cc52c..e8c479329282 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -787,10 +787,10 @@ static void retrigger_next_event(void *arg)
 	 * of the next expiring timer is enough. The return from the SMP
 	 * function call will take care of the reprogramming in case the
 	 * CPU was in a NOHZ idle sleep.
+	 *
+	 * In periodic low resolution mode, the next softirq expiration
+	 * must also be updated.
 	 */
-	if (!hrtimer_hres_active(base) && !tick_nohz_active)
-		return;
-
 	raw_spin_lock(&base->lock);
 	hrtimer_update_base(base);
 	if (hrtimer_hres_active(base))
@@ -2295,11 +2295,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
 				     &new_base->clock_base[i]);
 	}
 
-	/*
-	 * The migration might have changed the first expiring softirq
-	 * timer on this CPU. Update it.
-	 */
-	__hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
 	/* Tell the other CPU to retrigger the next event */
 	smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
 
-- 
cgit v1.2.3