From 7cbf3b13f00c8341afff1c48ad83d11995842c40 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 10 May 2024 17:24:25 -0700 Subject: time: Add MODULE_DESCRIPTION() to time test modules Fix the make W=1 warnings: WARNING: modpost: missing MODULE_DESCRIPTION() in kernel/time/clocksource-wdtest.o WARNING: modpost: missing MODULE_DESCRIPTION() in kernel/time/test_udelay.o WARNING: modpost: missing MODULE_DESCRIPTION() in kernel/time/time_test.o Signed-off-by: Jeff Johnson Signed-off-by: Thomas Gleixner Acked-by: Paul E. McKenney Link: https://lore.kernel.org/r/20240510-time-md-v1-1-44a8a36ac4b0@quicinc.com --- kernel/time/clocksource-wdtest.c | 1 + kernel/time/test_udelay.c | 1 + kernel/time/time_test.c | 1 + 3 files changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c index d06185e054ea..62e73444ffe4 100644 --- a/kernel/time/clocksource-wdtest.c +++ b/kernel/time/clocksource-wdtest.c @@ -22,6 +22,7 @@ #include "tick-internal.h" MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Clocksource watchdog unit test"); MODULE_AUTHOR("Paul E. McKenney "); static int holdoff = IS_BUILTIN(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) ? 10 : 0; diff --git a/kernel/time/test_udelay.c b/kernel/time/test_udelay.c index 20d5df631570..783f2297111b 100644 --- a/kernel/time/test_udelay.c +++ b/kernel/time/test_udelay.c @@ -155,5 +155,6 @@ static void __exit udelay_test_exit(void) module_exit(udelay_test_exit); +MODULE_DESCRIPTION("udelay test module"); MODULE_AUTHOR("David Riley "); MODULE_LICENSE("GPL"); diff --git a/kernel/time/time_test.c b/kernel/time/time_test.c index 3e5d422dd15c..2889763165e5 100644 --- a/kernel/time/time_test.c +++ b/kernel/time/time_test.c @@ -96,4 +96,5 @@ static struct kunit_suite time_test_suite = { }; kunit_test_suite(time_test_suite); +MODULE_DESCRIPTION("time unit test suite"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 6b2e29977518ec13ef3022f234ff8f3014c243da Mon Sep 17 00:00:00 2001 From: Lakshmi Sowjanya D Date: Mon, 13 May 2024 16:08:02 +0530 Subject: timekeeping: Provide infrastructure for converting to/from a base clock Hardware time stamps like provided by PTP clock implementations are based on a clock which feeds both the PCIe device and the system clock. For further processing the underlying hardwarre clock timestamp must be converted to the system clock. Right now this requires drivers to invoke an architecture specific conversion function, e.g. to convert the ART (Always Running Timer) timestamp to a TSC timestamp. As the system clock is aware of the underlying base clock, this can be moved to the core code by providing a base clock property for the system clock which contains the conversion factors and assigning a clocksource ID to the base clock. Add the required data structures and the conversion infrastructure in the core code to prepare for converting X86 and the related PTP drivers over. [ tglx: Added a missing READ_ONCE(). Massaged change log ] Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Co-developed-by: Christopher S. Hall Signed-off-by: Christopher S. Hall Signed-off-by: Lakshmi Sowjanya D Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240513103813.5666-2-lakshmi.sowjanya.d@intel.com --- include/linux/clocksource.h | 27 +++++++++++++++++++++++++++ include/linux/timekeeping.h | 2 ++ kernel/time/timekeeping.c | 42 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 0ad8b550bb4b..d35b677b08fe 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -21,6 +21,7 @@ #include #include +struct clocksource_base; struct clocksource; struct module; @@ -50,6 +51,7 @@ struct module; * multiplication * @name: Pointer to clocksource name * @list: List head for registration (internal) + * @freq_khz: Clocksource frequency in khz. * @rating: Rating value for selection (higher is better) * To avoid rating inflation the following * list should give you a guide as to how @@ -70,6 +72,8 @@ struct module; * validate the clocksource from which the snapshot was * taken. * @flags: Flags describing special properties + * @base: Hardware abstraction for clock on which a clocksource + * is based * @enable: Optional function to enable the clocksource * @disable: Optional function to disable the clocksource * @suspend: Optional suspend function for the clocksource @@ -107,10 +111,12 @@ struct clocksource { u64 max_cycles; const char *name; struct list_head list; + u32 freq_khz; int rating; enum clocksource_ids id; enum vdso_clock_mode vdso_clock_mode; unsigned long flags; + struct clocksource_base *base; int (*enable)(struct clocksource *cs); void (*disable)(struct clocksource *cs); @@ -306,4 +312,25 @@ static inline unsigned int clocksource_get_max_watchdog_retry(void) void clocksource_verify_percpu(struct clocksource *cs); +/** + * struct clocksource_base - hardware abstraction for clock on which a clocksource + * is based + * @id: Defaults to CSID_GENERIC. The id value is used for conversion + * functions which require that the current clocksource is based + * on a clocksource_base with a particular ID in certain snapshot + * functions to allow callers to validate the clocksource from + * which the snapshot was taken. + * @freq_khz: Nominal frequency of the base clock in kHz + * @offset: Offset between the base clock and the clocksource + * @numerator: Numerator of the clock ratio between base clock and the clocksource + * @denominator: Denominator of the clock ratio between base clock and the clocksource + */ +struct clocksource_base { + enum clocksource_ids id; + u32 freq_khz; + u64 offset; + u32 numerator; + u32 denominator; +}; + #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 0ea7823b7f31..b2ee182d891e 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -310,10 +310,12 @@ struct system_device_crosststamp { * timekeeping code to verify comparability of two cycle values. * The default ID, CSID_GENERIC, does not identify a specific * clocksource. + * @use_nsecs: @cycles is in nanoseconds. */ struct system_counterval_t { u64 cycles; enum clocksource_ids cs_id; + bool use_nsecs; }; /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4e18db1819f8..3096e10e0a1d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1195,6 +1195,46 @@ static bool timestamp_in_interval(u64 start, u64 end, u64 ts) return false; } +static bool convert_clock(u64 *val, u32 numerator, u32 denominator) +{ + u64 rem, res; + + if (!numerator || !denominator) + return false; + + res = div64_u64_rem(*val, denominator, &rem) * numerator; + *val = res + div_u64(rem * numerator, denominator); + return true; +} + +static bool convert_base_to_cs(struct system_counterval_t *scv) +{ + struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; + struct clocksource_base *base; + u32 num, den; + + /* The timestamp was taken from the time keeper clock source */ + if (cs->id == scv->cs_id) + return true; + + /* + * Check whether cs_id matches the base clock. Prevent the compiler from + * re-evaluating @base as the clocksource might change concurrently. + */ + base = READ_ONCE(cs->base); + if (!base || base->id != scv->cs_id) + return false; + + num = scv->use_nsecs ? cs->freq_khz : base->numerator; + den = scv->use_nsecs ? USEC_PER_SEC : base->denominator; + + if (!convert_clock(&scv->cycles, num, den)) + return false; + + scv->cycles += base->offset; + return true; +} + /** * get_device_system_crosststamp - Synchronously capture system/device timestamp * @get_time_fn: Callback to get simultaneous device time and @@ -1241,7 +1281,7 @@ int get_device_system_crosststamp(int (*get_time_fn) * installed timekeeper clocksource */ if (system_counterval.cs_id == CSID_GENERIC || - tk->tkr_mono.clock->id != system_counterval.cs_id) + !convert_base_to_cs(&system_counterval)) return -ENODEV; cycles = system_counterval.cycles; -- cgit v1.2.3 From 02ecee07ca30f76f2a0f1381661a688b8e501ab0 Mon Sep 17 00:00:00 2001 From: Lakshmi Sowjanya D Date: Mon, 13 May 2024 16:08:10 +0530 Subject: timekeeping: Add function to convert realtime to base clock PPS (Pulse Per Second) generates a hardware pulse every second based on CLOCK_REALTIME. This works fine when the pulse is generated in software from a hrtimer callback function. For hardware which generates the pulse by programming a timer it is required to convert CLOCK_REALTIME to the underlying hardware clock. The X86 Timed IO device is based on the Always Running Timer (ART), which is the base clock of the TSC, which is usually the system clocksource on X86. The core code already has functionality to convert base clock timestamps to system clocksource timestamps, but there is no support for converting the other way around. Provide the required functionality to support such devices in a generic way to avoid code duplication in drivers: 1) ktime_real_to_base_clock() to convert a CLOCK_REALTIME timestamp to a base clock timestamp 2) timekeeping_clocksource_has_base() to allow drivers to validate that the system clocksource is based on a particular clocksource ID. [ tglx: Simplify timekeeping_clocksource_has_base() and add missing READ_ONCE() ] Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Co-developed-by: Christopher S. Hall Signed-off-by: Christopher S. Hall Signed-off-by: Lakshmi Sowjanya D Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240513103813.5666-10-lakshmi.sowjanya.d@intel.com --- include/linux/timekeeping.h | 4 +++ kernel/time/timekeeping.c | 86 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) (limited to 'kernel') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b2ee182d891e..fc12a9ba2c88 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -318,6 +318,10 @@ struct system_counterval_t { bool use_nsecs; }; +extern bool ktime_real_to_base_clock(ktime_t treal, + enum clocksource_ids base_id, u64 *cycles); +extern bool timekeeping_clocksource_has_base(enum clocksource_ids id); + /* * Get cross timestamp between system clock and device clock */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3096e10e0a1d..da984a368a95 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1235,6 +1235,68 @@ static bool convert_base_to_cs(struct system_counterval_t *scv) return true; } +static bool convert_cs_to_base(u64 *cycles, enum clocksource_ids base_id) +{ + struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; + struct clocksource_base *base; + + /* + * Check whether base_id matches the base clock. Prevent the compiler from + * re-evaluating @base as the clocksource might change concurrently. + */ + base = READ_ONCE(cs->base); + if (!base || base->id != base_id) + return false; + + *cycles -= base->offset; + if (!convert_clock(cycles, base->denominator, base->numerator)) + return false; + return true; +} + +static bool convert_ns_to_cs(u64 *delta) +{ + struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; + + if (BITS_TO_BYTES(fls64(*delta) + tkr->shift) >= sizeof(*delta)) + return false; + + *delta = div_u64((*delta << tkr->shift) - tkr->xtime_nsec, tkr->mult); + return true; +} + +/** + * ktime_real_to_base_clock() - Convert CLOCK_REALTIME timestamp to a base clock timestamp + * @treal: CLOCK_REALTIME timestamp to convert + * @base_id: base clocksource id + * @cycles: pointer to store the converted base clock timestamp + * + * Converts a supplied, future realtime clock value to the corresponding base clock value. + * + * Return: true if the conversion is successful, false otherwise. + */ +bool ktime_real_to_base_clock(ktime_t treal, enum clocksource_ids base_id, u64 *cycles) +{ + struct timekeeper *tk = &tk_core.timekeeper; + unsigned int seq; + u64 delta; + + do { + seq = read_seqcount_begin(&tk_core.seq); + if ((u64)treal < tk->tkr_mono.base_real) + return false; + delta = (u64)treal - tk->tkr_mono.base_real; + if (!convert_ns_to_cs(&delta)) + return false; + *cycles = tk->tkr_mono.cycle_last + delta; + if (!convert_cs_to_base(cycles, base_id)) + return false; + } while (read_seqcount_retry(&tk_core.seq, seq)); + + return true; +} +EXPORT_SYMBOL_GPL(ktime_real_to_base_clock); + /** * get_device_system_crosststamp - Synchronously capture system/device timestamp * @get_time_fn: Callback to get simultaneous device time and @@ -1346,6 +1408,30 @@ int get_device_system_crosststamp(int (*get_time_fn) } EXPORT_SYMBOL_GPL(get_device_system_crosststamp); +/** + * timekeeping_clocksource_has_base - Check whether the current clocksource + * is based on given a base clock + * @id: base clocksource ID + * + * Note: The return value is a snapshot which can become invalid right + * after the function returns. + * + * Return: true if the timekeeper clocksource has a base clock with @id, + * false otherwise + */ +bool timekeeping_clocksource_has_base(enum clocksource_ids id) +{ + /* + * This is a snapshot, so no point in using the sequence + * count. Just prevent the compiler from re-evaluating @base as the + * clocksource might change concurrently. + */ + struct clocksource_base *base = READ_ONCE(tk_core.timekeeper.tkr_mono.clock->base); + + return base ? base->id == id : false; +} +EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base); + /** * do_settimeofday64 - Sets the time of day. * @ts: pointer to the timespec64 variable containing the new time -- cgit v1.2.3 From 9403408e122628a6dc7154a95716f00dae3747c7 Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Mon, 17 Jun 2024 17:16:15 +0100 Subject: tick: Remove unnused tick_nohz_get_idle_calls() The function returns the idle calls counter for the current cpu and therefore usually isn't what the caller wants. It is unnused since commit 466a2b42d676 ("cpufreq: schedutil: Use idle_calls counter of the remote CPU") Signed-off-by: Christian Loehle Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240617161615.49309-1-christian.loehle@arm.com --- include/linux/tick.h | 1 - kernel/time/tick-sched.c | 14 -------------- 2 files changed, 15 deletions(-) (limited to 'kernel') diff --git a/include/linux/tick.h b/include/linux/tick.h index 4924a33700b7..72744638c5b0 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -139,7 +139,6 @@ extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); extern ktime_t tick_nohz_get_next_hrtimer(void); extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); -extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 71a792cd8936..43a6370a2ebc 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1385,20 +1385,6 @@ unsigned long tick_nohz_get_idle_calls_cpu(int cpu) return ts->idle_calls; } -/** - * tick_nohz_get_idle_calls - return the current idle calls counter value - * - * Called from the schedutil frequency scaling governor in scheduler context. - * - * Return: the current idle calls counter value for the current CPU - */ -unsigned long tick_nohz_get_idle_calls(void) -{ - struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); - - return ts->idle_calls; -} - static void tick_nohz_account_idle_time(struct tick_sched *ts, ktime_t now) { -- cgit v1.2.3 From e1b6a78b58aa859c66a32cfaeb121df87631d4ed Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 7 Jun 2024 17:06:56 +0800 Subject: timekeeping: Add missing kernel-doc function comments Fixup the incomplete kernel-doc style comments for do_adjtimex() and hardpps() by documenting the function parameters. Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240607090656.104883-1-yang.lee@linux.alibaba.com Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=9301 --- kernel/time/timekeeping.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index da984a368a95..2fa87dcfeda9 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2547,6 +2547,7 @@ EXPORT_SYMBOL_GPL(random_get_entropy_fallback); /** * do_adjtimex() - Accessor function to NTP __do_adjtimex function + * @txc: Pointer to kernel_timex structure containing NTP parameters */ int do_adjtimex(struct __kernel_timex *txc) { @@ -2615,6 +2616,8 @@ int do_adjtimex(struct __kernel_timex *txc) #ifdef CONFIG_NTP_PPS /** * hardpps() - Accessor function to NTP __hardpps function + * @phase_ts: Pointer to timespec64 structure representing phase timestamp + * @raw_ts: Pointer to timespec64 structure representing raw timestamp */ void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) { -- cgit v1.2.3 From 59dbee7d4d59425658b1d86238732c575216b718 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 10 Jun 2024 12:35:52 +0200 Subject: tick/sched: Combine WARN_ON_ONCE and print_once When the WARN_ON_ONCE() triggers, the printk() of the additional information related to the warning will not happen in print level "warn". When reading dmesg with a restriction to level "warn", the information published by the printk_once() will not show up there. Transform WARN_ON_ONCE() and printk_once() into a WARN_ONCE(). Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240610103552.25252-1-anna-maria@linutronix.de --- kernel/time/tick-sched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 43a6370a2ebc..753a184c7090 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1026,10 +1026,10 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) if (expires == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer)) return; - WARN_ON_ONCE(1); - printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n", - basemono, ts->next_tick, dev->next_event, - hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer)); + WARN_ONCE(1, "basemono: %llu ts->next_tick: %llu dev->next_event: %llu " + "timer->active: %d timer->expires: %llu\n", basemono, ts->next_tick, + dev->next_event, hrtimer_active(&ts->sched_timer), + hrtimer_get_expires(&ts->sched_timer)); } /* -- cgit v1.2.3 From f7d43dd206e7e18c182f200e67a8db8c209907fa Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Thu, 11 Jul 2024 20:48:43 +0800 Subject: tick/broadcast: Make takeover of broadcast hrtimer reliable Running the LTP hotplug stress test on a aarch64 machine results in rcu_sched stall warnings when the broadcast hrtimer was owned by the un-plugged CPU. The issue is the following: CPU1 (owns the broadcast hrtimer) CPU2 tick_broadcast_enter() // shutdown local timer device broadcast_shutdown_local() ... tick_broadcast_exit() clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT) // timer device is not programmed cpumask_set_cpu(cpu, tick_broadcast_force_mask) initiates offlining of CPU1 take_cpu_down() /* * CPU1 shuts down and does not * send broadcast IPI anymore */ takedown_cpu() hotplug_cpu__broadcast_tick_pull() // move broadcast hrtimer to this CPU clockevents_program_event() bc_set_next() hrtimer_start() /* * timer device is not programmed * because only the first expiring * timer will trigger clockevent * device reprogramming */ What happens is that CPU2 exits broadcast mode with force bit set, then the local timer device is not reprogrammed and CPU2 expects to receive the expired event by the broadcast IPI. But this does not happen because CPU1 is offlined by CPU2. CPU switches the clockevent device to ONESHOT state, but does not reprogram the device. The subsequent reprogramming of the hrtimer broadcast device does not program the clockevent device of CPU2 either because the pending expiry time is already in the past and the CPU expects the event to be delivered. As a consequence all CPUs which wait for a broadcast event to be delivered are stuck forever. Fix this issue by reprogramming the local timer device if the broadcast force bit of the CPU is set so that the broadcast hrtimer is delivered. [ tglx: Massage comment and change log. Add Fixes tag ] Fixes: 989dcb645ca7 ("tick: Handle broadcast wakeup of multiple cpus") Signed-off-by: Yu Liao Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240711124843.64167-1-liaoyu15@huawei.com --- kernel/time/tick-broadcast.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'kernel') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 771d1e040303..b4843099a8da 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -1141,6 +1141,7 @@ void tick_broadcast_switch_to_oneshot(void) #ifdef CONFIG_HOTPLUG_CPU void hotplug_cpu__broadcast_tick_pull(int deadcpu) { + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *bc; unsigned long flags; @@ -1148,6 +1149,28 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) bc = tick_broadcast_device.evtdev; if (bc && broadcast_needs_cpu(bc, deadcpu)) { + /* + * If the broadcast force bit of the current CPU is set, + * then the current CPU has not yet reprogrammed the local + * timer device to avoid a ping-pong race. See + * ___tick_broadcast_oneshot_control(). + * + * If the broadcast device is hrtimer based then + * programming the broadcast event below does not have any + * effect because the local clockevent device is not + * running and not programmed because the broadcast event + * is not earlier than the pending event of the local clock + * event device. As a consequence all CPUs waiting for a + * broadcast event are stuck forever. + * + * Detect this condition and reprogram the cpu local timer + * device to avoid the starvation. + */ + if (tick_check_broadcast_expired()) { + cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); + tick_program_event(td->evtdev->next_event, 1); + } + /* This moves the broadcast assignment to this CPU: */ clockevents_program_event(bc, bc->next_event, 1); } -- cgit v1.2.3