summaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/sched_ext/rt_stall.c
blob: ab772e336f86d6dd16548098da16565a429d5575 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2025 NVIDIA Corporation.
 */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <sys/prctl.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <linux/sched.h>
#include <signal.h>
#include <bpf/bpf.h>
#include <scx/common.h>
#include <unistd.h>
#include "rt_stall.bpf.skel.h"
#include "scx_test.h"
#include "../kselftest.h"

#define CORE_ID		0	/* CPU to pin tasks to */
#define RUN_TIME        5	/* How long to run the test in seconds */

/* Signal the parent that setup is complete by writing to a pipe */
static void signal_ready(int fd)
{
	char c = 1;

	if (write(fd, &c, 1) != 1) {
		perror("write to ready pipe");
		exit(EXIT_FAILURE);
	}
	close(fd);
}

/* Wait for a child to signal readiness via a pipe */
static void wait_ready(int fd)
{
	char c;

	if (read(fd, &c, 1) != 1) {
		perror("read from ready pipe");
		exit(EXIT_FAILURE);
	}
	close(fd);
}

/* Simple busy-wait function for test tasks */
static void process_func(void)
{
	while (1) {
		/* Busy wait */
		for (volatile unsigned long i = 0; i < 10000000UL; i++)
			;
	}
}

/* Set CPU affinity to a specific core */
static void set_affinity(int cpu)
{
	cpu_set_t mask;

	CPU_ZERO(&mask);
	CPU_SET(cpu, &mask);
	if (sched_setaffinity(0, sizeof(mask), &mask) != 0) {
		perror("sched_setaffinity");
		exit(EXIT_FAILURE);
	}
}

/* Set task scheduling policy and priority */
static void set_sched(int policy, int priority)
{
	struct sched_param param;

	param.sched_priority = priority;
	if (sched_setscheduler(0, policy, &param) != 0) {
		perror("sched_setscheduler");
		exit(EXIT_FAILURE);
	}
}

/* Get process runtime from /proc/<pid>/stat */
static float get_process_runtime(int pid)
{
	char path[256];
	FILE *file;
	long utime, stime;
	int fields;

	snprintf(path, sizeof(path), "/proc/%d/stat", pid);
	file = fopen(path, "r");
	if (file == NULL) {
		perror("Failed to open stat file");
		return -1;
	}

	/* Skip the first 13 fields and read the 14th and 15th */
	fields = fscanf(file,
			"%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu %lu",
			&utime, &stime);
	fclose(file);

	if (fields != 2) {
		fprintf(stderr, "Failed to read stat file\n");
		return -1;
	}

	/* Calculate the total time spent in the process */
	long total_time = utime + stime;
	long ticks_per_second = sysconf(_SC_CLK_TCK);
	float runtime_seconds = total_time * 1.0 / ticks_per_second;

	return runtime_seconds;
}

static enum scx_test_status setup(void **ctx)
{
	struct rt_stall *skel;

	skel = rt_stall__open();
	SCX_FAIL_IF(!skel, "Failed to open");
	SCX_ENUM_INIT(skel);
	SCX_FAIL_IF(rt_stall__load(skel), "Failed to load skel");

	*ctx = skel;

	return SCX_TEST_PASS;
}

static bool sched_stress_test(bool is_ext)
{
	/*
	 * We're expecting the EXT task to get around 5% of CPU time when
	 * competing with the RT task (small 1% fluctuations are expected).
	 *
	 * However, the EXT task should get at least 4% of the CPU to prove
	 * that the EXT deadline server is working correctly. A percentage
	 * less than 4% indicates a bug where RT tasks can potentially
	 * stall SCHED_EXT tasks, causing the test to fail.
	 */
	const float expected_min_ratio = 0.04; /* 4% */
	const char *class_str = is_ext ? "EXT" : "FAIR";

	float ext_runtime, rt_runtime, actual_ratio;
	int ext_pid, rt_pid;
	int ext_ready[2], rt_ready[2];

	ksft_print_header();
	ksft_set_plan(1);

	if (pipe(ext_ready) || pipe(rt_ready)) {
		perror("pipe");
		ksft_exit_fail();
	}

	/* Create and set up a EXT task */
	ext_pid = fork();
	if (ext_pid == 0) {
		close(ext_ready[0]);
		close(rt_ready[0]);
		close(rt_ready[1]);
		set_affinity(CORE_ID);
		signal_ready(ext_ready[1]);
		process_func();
		exit(0);
	} else if (ext_pid < 0) {
		perror("fork task");
		ksft_exit_fail();
	}

	/* Create an RT task */
	rt_pid = fork();
	if (rt_pid == 0) {
		close(ext_ready[0]);
		close(ext_ready[1]);
		close(rt_ready[0]);
		set_affinity(CORE_ID);
		set_sched(SCHED_FIFO, 50);
		signal_ready(rt_ready[1]);
		process_func();
		exit(0);
	} else if (rt_pid < 0) {
		perror("fork for RT task");
		ksft_exit_fail();
	}

	/*
	 * Wait for both children to complete their setup (affinity and
	 * scheduling policy) before starting the measurement window.
	 * This prevents flaky failures caused by the RT child's setup
	 * time eating into the measurement period.
	 */
	close(ext_ready[1]);
	close(rt_ready[1]);
	wait_ready(ext_ready[0]);
	wait_ready(rt_ready[0]);

	/* Let the processes run for the specified time */
	sleep(RUN_TIME);

	/* Get runtime for the EXT task */
	ext_runtime = get_process_runtime(ext_pid);
	if (ext_runtime == -1)
		ksft_exit_fail_msg("Error getting runtime for %s task (PID %d)\n",
				   class_str, ext_pid);
	ksft_print_msg("Runtime of %s task (PID %d) is %f seconds\n",
		       class_str, ext_pid, ext_runtime);

	/* Get runtime for the RT task */
	rt_runtime = get_process_runtime(rt_pid);
	if (rt_runtime == -1)
		ksft_exit_fail_msg("Error getting runtime for RT task (PID %d)\n", rt_pid);
	ksft_print_msg("Runtime of RT task (PID %d) is %f seconds\n", rt_pid, rt_runtime);

	/* Kill the processes */
	kill(ext_pid, SIGKILL);
	kill(rt_pid, SIGKILL);
	waitpid(ext_pid, NULL, 0);
	waitpid(rt_pid, NULL, 0);

	/* Verify that the scx task got enough runtime */
	actual_ratio = ext_runtime / (ext_runtime + rt_runtime);
	ksft_print_msg("%s task got %.2f%% of total runtime\n",
		       class_str, actual_ratio * 100);

	if (actual_ratio >= expected_min_ratio) {
		ksft_test_result_pass("PASS: %s task got more than %.2f%% of runtime\n",
				      class_str, expected_min_ratio * 100);
		return true;
	}
	ksft_test_result_fail("FAIL: %s task got less than %.2f%% of runtime\n",
			      class_str, expected_min_ratio * 100);
	return false;
}

static enum scx_test_status run(void *ctx)
{
	struct rt_stall *skel = ctx;
	struct bpf_link *link = NULL;
	bool res;
	int i;

	/*
	 * Test if the dl_server is working both with and without the
	 * sched_ext scheduler attached.
	 *
	 * This ensures all the scenarios are covered:
	 *   - fair_server stop -> ext_server start
	 *   - ext_server stop -> fair_server stop
	 */
	for (i = 0; i < 4; i++) {
		bool is_ext = i % 2;

		if (is_ext) {
			memset(&skel->data->uei, 0, sizeof(skel->data->uei));
			link = bpf_map__attach_struct_ops(skel->maps.rt_stall_ops);
			SCX_FAIL_IF(!link, "Failed to attach scheduler");
		}
		res = sched_stress_test(is_ext);
		if (is_ext) {
			SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
			bpf_link__destroy(link);
		}

		if (!res)
			ksft_exit_fail();
	}

	return SCX_TEST_PASS;
}

static void cleanup(void *ctx)
{
	struct rt_stall *skel = ctx;

	rt_stall__destroy(skel);
}

struct scx_test rt_stall = {
	.name = "rt_stall",
	.description = "Verify that RT tasks cannot stall SCHED_EXT tasks",
	.setup = setup,
	.run = run,
	.cleanup = cleanup,
};
REGISTER_SCX_TEST(&rt_stall)