summaryrefslogtreecommitdiffstats
path: root/tools/sched_ext/scx_pair.bpf.c
blob: 267011b57cba5244e2237730e83480dc6d9945f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * A demo sched_ext core-scheduler which always makes every sibling CPU pair
 * execute from the same CPU cgroup.
 *
 * This scheduler is a minimal implementation and would need some form of
 * priority handling both inside each cgroup and across the cgroups to be
 * practically useful.
 *
 * Each CPU in the system is paired with exactly one other CPU, according to a
 * "stride" value that can be specified when the BPF scheduler program is first
 * loaded. Throughout the runtime of the scheduler, these CPU pairs guarantee
 * that they will only ever schedule tasks that belong to the same CPU cgroup.
 *
 * Scheduler Initialization
 * ------------------------
 *
 * The scheduler BPF program is first initialized from user space, before it is
 * enabled. During this initialization process, each CPU on the system is
 * assigned several values that are constant throughout its runtime:
 *
 * 1. *Pair CPU*: The CPU that it synchronizes with when making scheduling
 *		  decisions. Paired CPUs always schedule tasks from the same
 *		  CPU cgroup, and synchronize with each other to guarantee
 *		  that this constraint is not violated.
 * 2. *Pair ID*:  Each CPU pair is assigned a Pair ID, which is used to access
 *		  a struct pair_ctx object that is shared between the pair.
 * 3. *In-pair-index*: An index, 0 or 1, that is assigned to each core in the
 *		       pair. Each struct pair_ctx has an active_mask field,
 *		       which is a bitmap used to indicate whether each core
 *		       in the pair currently has an actively running task.
 *		       This index specifies which entry in the bitmap corresponds
 *		       to each CPU in the pair.
 *
 * During this initialization, the CPUs are paired according to a "stride" that
 * may be specified when invoking the user space program that initializes and
 * loads the scheduler. By default, the stride is 1/2 the total number of CPUs.
 *
 * Tasks and cgroups
 * -----------------
 *
 * Every cgroup in the system is registered with the scheduler using the
 * pair_cgroup_init() callback, and every task in the system is associated with
 * exactly one cgroup. At a high level, the idea with the pair scheduler is to
 * always schedule tasks from the same cgroup within a given CPU pair. When a
 * task is enqueued (i.e. passed to the pair_enqueue() callback function), its
 * cgroup ID is read from its task struct, and then a corresponding queue map
 * is used to FIFO-enqueue the task for that cgroup.
 *
 * If you look through the implementation of the scheduler, you'll notice that
 * there is quite a bit of complexity involved with looking up the per-cgroup
 * FIFO queue that we enqueue tasks in. For example, there is a cgrp_q_idx_hash
 * BPF hash map that is used to map a cgroup ID to a globally unique ID that's
 * allocated in the BPF program. This is done because we use separate maps to
 * store the FIFO queue of tasks, and the length of that map, per cgroup. This
 * complexity is only present because of current deficiencies in BPF that will
 * soon be addressed. The main point to keep in mind is that newly enqueued
 * tasks are added to their cgroup's FIFO queue.
 *
 * Dispatching tasks
 * -----------------
 *
 * This section will describe how enqueued tasks are dispatched and scheduled.
 * Tasks are dispatched in pair_dispatch(), and at a high level the workflow is
 * as follows:
 *
 * 1. Fetch the struct pair_ctx for the current CPU. As mentioned above, this is
 *    the structure that's used to synchronize amongst the two pair CPUs in their
 *    scheduling decisions. After any of the following events have occurred:
 *
 * - The cgroup's slice run has expired, or
 * - The cgroup becomes empty, or
 * - Either CPU in the pair is preempted by a higher priority scheduling class
 *
 * The cgroup transitions to the draining state and stops executing new tasks
 * from the cgroup.
 *
 * 2. If the pair is still executing a task, mark the pair_ctx as draining, and
 *    wait for the pair CPU to be preempted.
 *
 * 3. Otherwise, if the pair CPU is not running a task, we can move onto
 *    scheduling new tasks. Pop the next cgroup id from the top_q queue.
 *
 * 4. Pop a task from that cgroup's FIFO task queue, and begin executing it.
 *
 * Note again that this scheduling behavior is simple, but the implementation
 * is complex mostly because this it hits several BPF shortcomings and has to
 * work around in often awkward ways. Most of the shortcomings are expected to
 * be resolved in the near future which should allow greatly simplifying this
 * scheduler.
 *
 * Dealing with preemption
 * -----------------------
 *
 * SCX is the lowest priority sched_class, and could be preempted by them at
 * any time. To address this, the scheduler implements pair_cpu_release() and
 * pair_cpu_acquire() callbacks which are invoked by the core scheduler when
 * the scheduler loses and gains control of the CPU respectively.
 *
 * In pair_cpu_release(), we mark the pair_ctx as having been preempted, and
 * then invoke:
 *
 * scx_bpf_kick_cpu(pair_cpu, SCX_KICK_PREEMPT | SCX_KICK_WAIT);
 *
 * This preempts the pair CPU, and waits until it has re-entered the scheduler
 * before returning. This is necessary to ensure that the higher priority
 * sched_class that preempted our scheduler does not schedule a task
 * concurrently with our pair CPU.
 *
 * When the CPU is re-acquired in pair_cpu_acquire(), we unmark the preemption
 * in the pair_ctx, and send another resched IPI to the pair CPU to re-enable
 * pair scheduling.
 *
 * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
 * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
 * Copyright (c) 2022 David Vernet <dvernet@meta.com>
 */
#include <scx/common.bpf.h>
#include "scx_pair.h"

char _license[] SEC("license") = "GPL";

/* !0 for veristat, set during init */
const volatile u32 nr_cpu_ids = 1;

/* a pair of CPUs stay on a cgroup for this duration */
const volatile u32 pair_batch_dur_ns;

/* cpu ID -> pair cpu ID */
const volatile s32 RESIZABLE_ARRAY(rodata, pair_cpu);

/* cpu ID -> pair_id */
const volatile u32 RESIZABLE_ARRAY(rodata, pair_id);

/* CPU ID -> CPU # in the pair (0 or 1) */
const volatile u32 RESIZABLE_ARRAY(rodata, in_pair_idx);

struct pair_ctx {
	struct bpf_spin_lock	lock;

	/* the cgroup the pair is currently executing */
	u64			cgid;

	/* the pair started executing the current cgroup at */
	u64			started_at;

	/* whether the current cgroup is draining */
	bool			draining;

	/* the CPUs that are currently active on the cgroup */
	u32			active_mask;

	/*
	 * the CPUs that are currently preempted and running tasks in a
	 * different scheduler.
	 */
	u32			preempted_mask;
};

struct {
	__uint(type, BPF_MAP_TYPE_ARRAY);
	__type(key, u32);
	__type(value, struct pair_ctx);
} pair_ctx SEC(".maps");

/* queue of cgrp_q's possibly with tasks on them */
struct {
	__uint(type, BPF_MAP_TYPE_QUEUE);
	/*
	 * Because it's difficult to build strong synchronization encompassing
	 * multiple non-trivial operations in BPF, this queue is managed in an
	 * opportunistic way so that we guarantee that a cgroup w/ active tasks
	 * is always on it but possibly multiple times. Once we have more robust
	 * synchronization constructs and e.g. linked list, we should be able to
	 * do this in a prettier way but for now just size it big enough.
	 */
	__uint(max_entries, 4 * MAX_CGRPS);
	__type(value, u64);
} top_q SEC(".maps");

/* per-cgroup q which FIFOs the tasks from the cgroup */
struct cgrp_q {
	__uint(type, BPF_MAP_TYPE_QUEUE);
	__uint(max_entries, MAX_QUEUED);
	__type(value, u32);
};

/*
 * Ideally, we want to allocate cgrp_q and cgrq_q_len in the cgroup local
 * storage; however, a cgroup local storage can only be accessed from the BPF
 * progs attached to the cgroup. For now, work around by allocating array of
 * cgrp_q's and then allocating per-cgroup indices.
 *
 * Another caveat: It's difficult to populate a large array of maps statically
 * or from BPF. Initialize it from userland.
 */
struct {
	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
	__uint(max_entries, MAX_CGRPS);
	__type(key, s32);
	__array(values, struct cgrp_q);
} cgrp_q_arr SEC(".maps");

static u64 cgrp_q_len[MAX_CGRPS];

/*
 * This and cgrp_q_idx_hash combine into a poor man's IDR. This likely would be
 * useful to have as a map type.
 */
static u32 cgrp_q_idx_cursor;
static u64 cgrp_q_idx_busy[MAX_CGRPS];

/*
 * All added up, the following is what we do:
 *
 * 1. When a cgroup is enabled, RR cgroup_q_idx_busy array doing cmpxchg looking
 *    for a free ID. If not found, fail cgroup creation with -EBUSY.
 *
 * 2. Hash the cgroup ID to the allocated cgrp_q_idx in the following
 *    cgrp_q_idx_hash.
 *
 * 3. Whenever a cgrp_q needs to be accessed, first look up the cgrp_q_idx from
 *    cgrp_q_idx_hash and then access the corresponding entry in cgrp_q_arr.
 *
 * This is sadly complicated for something pretty simple. Hopefully, we should
 * be able to simplify in the future.
 */
struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__uint(max_entries, MAX_CGRPS);
	__uint(key_size, sizeof(u64));		/* cgrp ID */
	__uint(value_size, sizeof(s32));	/* cgrp_q idx */
} cgrp_q_idx_hash SEC(".maps");

/* statistics */
u64 nr_total, nr_dispatched, nr_missing, nr_kicks, nr_preemptions;
u64 nr_exps, nr_exp_waits, nr_exp_empty;
u64 nr_cgrp_next, nr_cgrp_coll, nr_cgrp_empty;

UEI_DEFINE(uei);

void BPF_STRUCT_OPS(pair_enqueue, struct task_struct *p, u64 enq_flags)
{
	struct cgroup *cgrp;
	struct cgrp_q *cgq;
	s32 pid = p->pid;
	u64 cgid;
	u32 *q_idx;
	u64 *cgq_len;

	__sync_fetch_and_add(&nr_total, 1);

	cgrp = scx_bpf_task_cgroup(p);
	cgid = cgrp->kn->id;
	bpf_cgroup_release(cgrp);

	/* find the cgroup's q and push @p into it */
	q_idx = bpf_map_lookup_elem(&cgrp_q_idx_hash, &cgid);
	if (!q_idx) {
		scx_bpf_error("failed to lookup q_idx for cgroup[%llu]", cgid);
		return;
	}

	cgq = bpf_map_lookup_elem(&cgrp_q_arr, q_idx);
	if (!cgq) {
		scx_bpf_error("failed to lookup q_arr for cgroup[%llu] q_idx[%u]",
			      cgid, *q_idx);
		return;
	}

	if (bpf_map_push_elem(cgq, &pid, 0)) {
		scx_bpf_error("cgroup[%llu] queue overflow", cgid);
		return;
	}

	/* bump q len, if going 0 -> 1, queue cgroup into the top_q */
	cgq_len = MEMBER_VPTR(cgrp_q_len, [*q_idx]);
	if (!cgq_len) {
		scx_bpf_error("MEMBER_VTPR malfunction");
		return;
	}

	if (!__sync_fetch_and_add(cgq_len, 1) &&
	    bpf_map_push_elem(&top_q, &cgid, 0)) {
		scx_bpf_error("top_q overflow");
		return;
	}
}

static int lookup_pairc_and_mask(s32 cpu, struct pair_ctx **pairc, u32 *mask)
{
	u32 *vptr;

	vptr = (u32 *)ARRAY_ELEM_PTR(pair_id, cpu, nr_cpu_ids);
	if (!vptr)
		return -EINVAL;

	*pairc = bpf_map_lookup_elem(&pair_ctx, vptr);
	if (!(*pairc))
		return -EINVAL;

	vptr = (u32 *)ARRAY_ELEM_PTR(in_pair_idx, cpu, nr_cpu_ids);
	if (!vptr)
		return -EINVAL;

	*mask = 1U << *vptr;

	return 0;
}

__attribute__((noinline))
static int try_dispatch(s32 cpu)
{
	struct pair_ctx *pairc;
	struct bpf_map *cgq_map;
	struct task_struct *p;
	u64 now = scx_bpf_now();
	bool kick_pair = false;
	bool expired, pair_preempted;
	u32 *vptr, in_pair_mask;
	s32 pid, q_idx;
	u64 cgid;
	int ret;

	ret = lookup_pairc_and_mask(cpu, &pairc, &in_pair_mask);
	if (ret) {
		scx_bpf_error("failed to lookup pairc and in_pair_mask for cpu[%d]",
			      cpu);
		return -ENOENT;
	}

	bpf_spin_lock(&pairc->lock);
	pairc->active_mask &= ~in_pair_mask;

	expired = time_before(pairc->started_at + pair_batch_dur_ns, now);
	if (expired || pairc->draining) {
		u64 new_cgid = 0;

		__sync_fetch_and_add(&nr_exps, 1);

		/*
		 * We're done with the current cgid. An obvious optimization
		 * would be not draining if the next cgroup is the current one.
		 * For now, be dumb and always expire.
		 */
		pairc->draining = true;

		pair_preempted = pairc->preempted_mask;
		if (pairc->active_mask || pair_preempted) {
			/*
			 * The other CPU is still active, or is no longer under
			 * our control due to e.g. being preempted by a higher
			 * priority sched_class. We want to wait until this
			 * cgroup expires, or until control of our pair CPU has
			 * been returned to us.
			 *
			 * If the pair controls its CPU, and the time already
			 * expired, kick.  When the other CPU arrives at
			 * dispatch and clears its active mask, it'll push the
			 * pair to the next cgroup and kick this CPU.
			 */
			__sync_fetch_and_add(&nr_exp_waits, 1);
			bpf_spin_unlock(&pairc->lock);
			if (expired && !pair_preempted)
				kick_pair = true;
			goto out_maybe_kick;
		}

		bpf_spin_unlock(&pairc->lock);

		/*
		 * Pick the next cgroup. It'd be easier / cleaner to not drop
		 * pairc->lock and use stronger synchronization here especially
		 * given that we'll be switching cgroups significantly less
		 * frequently than tasks. Unfortunately, bpf_spin_lock can't
		 * really protect anything non-trivial. Let's do opportunistic
		 * operations instead.
		 */
		bpf_repeat(BPF_MAX_LOOPS) {
			u32 *q_idx;
			u64 *cgq_len;

			if (bpf_map_pop_elem(&top_q, &new_cgid)) {
				/* no active cgroup, go idle */
				__sync_fetch_and_add(&nr_exp_empty, 1);
				return 0;
			}

			q_idx = bpf_map_lookup_elem(&cgrp_q_idx_hash, &new_cgid);
			if (!q_idx)
				continue;

			/*
			 * This is the only place where empty cgroups are taken
			 * off the top_q.
			 */
			cgq_len = MEMBER_VPTR(cgrp_q_len, [*q_idx]);
			if (!cgq_len || !*cgq_len)
				continue;

			/*
			 * If it has any tasks, requeue as we may race and not
			 * execute it.
			 */
			bpf_map_push_elem(&top_q, &new_cgid, 0);
			break;
		}

		bpf_spin_lock(&pairc->lock);

		/*
		 * The other CPU may already have started on a new cgroup while
		 * we dropped the lock. Make sure that we're still draining and
		 * start on the new cgroup.
		 */
		if (pairc->draining && !pairc->active_mask) {
			__sync_fetch_and_add(&nr_cgrp_next, 1);
			pairc->cgid = new_cgid;
			pairc->started_at = now;
			pairc->draining = false;
			kick_pair = true;
		} else {
			__sync_fetch_and_add(&nr_cgrp_coll, 1);
		}
	}

	cgid = pairc->cgid;
	pairc->active_mask |= in_pair_mask;
	bpf_spin_unlock(&pairc->lock);

	/* again, it'd be better to do all these with the lock held, oh well */
	vptr = bpf_map_lookup_elem(&cgrp_q_idx_hash, &cgid);
	if (!vptr) {
		scx_bpf_error("failed to lookup q_idx for cgroup[%llu]", cgid);
		return -ENOENT;
	}
	q_idx = *vptr;

	/* claim one task from cgrp_q w/ q_idx */
	bpf_repeat(BPF_MAX_LOOPS) {
		u64 *cgq_len, len;

		cgq_len = MEMBER_VPTR(cgrp_q_len, [q_idx]);
		if (!cgq_len || !(len = *(volatile u64 *)cgq_len)) {
			/* the cgroup must be empty, expire and repeat */
			__sync_fetch_and_add(&nr_cgrp_empty, 1);
			bpf_spin_lock(&pairc->lock);
			pairc->draining = true;
			pairc->active_mask &= ~in_pair_mask;
			bpf_spin_unlock(&pairc->lock);
			return -EAGAIN;
		}

		if (__sync_val_compare_and_swap(cgq_len, len, len - 1) != len)
			continue;

		break;
	}

	cgq_map = bpf_map_lookup_elem(&cgrp_q_arr, &q_idx);
	if (!cgq_map) {
		scx_bpf_error("failed to lookup cgq_map for cgroup[%llu] q_idx[%d]",
			      cgid, q_idx);
		return -ENOENT;
	}

	if (bpf_map_pop_elem(cgq_map, &pid)) {
		scx_bpf_error("cgq_map is empty for cgroup[%llu] q_idx[%d]",
			      cgid, q_idx);
		return -ENOENT;
	}

	p = bpf_task_from_pid(pid);
	if (p) {
		__sync_fetch_and_add(&nr_dispatched, 1);
		scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
		bpf_task_release(p);
	} else {
		/* we don't handle dequeues, retry on lost tasks */
		__sync_fetch_and_add(&nr_missing, 1);
		return -EAGAIN;
	}

out_maybe_kick:
	if (kick_pair) {
		s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);
		if (pair) {
			__sync_fetch_and_add(&nr_kicks, 1);
			scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT);
		}
	}
	return 0;
}

void BPF_STRUCT_OPS(pair_dispatch, s32 cpu, struct task_struct *prev)
{
	bpf_repeat(BPF_MAX_LOOPS) {
		if (try_dispatch(cpu) != -EAGAIN)
			break;
	}
}

void BPF_STRUCT_OPS(pair_cpu_acquire, s32 cpu, struct scx_cpu_acquire_args *args)
{
	int ret;
	u32 in_pair_mask;
	struct pair_ctx *pairc;
	bool kick_pair;

	ret = lookup_pairc_and_mask(cpu, &pairc, &in_pair_mask);
	if (ret)
		return;

	bpf_spin_lock(&pairc->lock);
	pairc->preempted_mask &= ~in_pair_mask;
	/* Kick the pair CPU, unless it was also preempted. */
	kick_pair = !pairc->preempted_mask;
	bpf_spin_unlock(&pairc->lock);

	if (kick_pair) {
		s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);

		if (pair) {
			__sync_fetch_and_add(&nr_kicks, 1);
			scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT);
		}
	}
}

void BPF_STRUCT_OPS(pair_cpu_release, s32 cpu, struct scx_cpu_release_args *args)
{
	int ret;
	u32 in_pair_mask;
	struct pair_ctx *pairc;
	bool kick_pair;

	ret = lookup_pairc_and_mask(cpu, &pairc, &in_pair_mask);
	if (ret)
		return;

	bpf_spin_lock(&pairc->lock);
	pairc->preempted_mask |= in_pair_mask;
	pairc->active_mask &= ~in_pair_mask;
	/* Kick the pair CPU if it's still running. */
	kick_pair = pairc->active_mask;
	pairc->draining = true;
	bpf_spin_unlock(&pairc->lock);

	if (kick_pair) {
		s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);

		if (pair) {
			__sync_fetch_and_add(&nr_kicks, 1);
			scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT | SCX_KICK_WAIT);
		}
	}
	__sync_fetch_and_add(&nr_preemptions, 1);
}

s32 BPF_STRUCT_OPS(pair_cgroup_init, struct cgroup *cgrp)
{
	u64 cgid = cgrp->kn->id;
	s32 i, q_idx;

	bpf_for(i, 0, MAX_CGRPS) {
		q_idx = __sync_fetch_and_add(&cgrp_q_idx_cursor, 1) % MAX_CGRPS;
		if (!__sync_val_compare_and_swap(&cgrp_q_idx_busy[q_idx], 0, 1))
			break;
	}
	if (i == MAX_CGRPS)
		return -EBUSY;

	if (bpf_map_update_elem(&cgrp_q_idx_hash, &cgid, &q_idx, BPF_ANY)) {
		u64 *busy = MEMBER_VPTR(cgrp_q_idx_busy, [q_idx]);
		if (busy)
			*busy = 0;
		return -EBUSY;
	}

	return 0;
}

void BPF_STRUCT_OPS(pair_cgroup_exit, struct cgroup *cgrp)
{
	u64 cgid = cgrp->kn->id;
	s32 *q_idx;

	q_idx = bpf_map_lookup_elem(&cgrp_q_idx_hash, &cgid);
	if (q_idx) {
		u64 *busy = MEMBER_VPTR(cgrp_q_idx_busy, [*q_idx]);
		if (busy)
			*busy = 0;
		bpf_map_delete_elem(&cgrp_q_idx_hash, &cgid);
	}
}

void BPF_STRUCT_OPS(pair_exit, struct scx_exit_info *ei)
{
	UEI_RECORD(uei, ei);
}

SCX_OPS_DEFINE(pair_ops,
	       .enqueue			= (void *)pair_enqueue,
	       .dispatch		= (void *)pair_dispatch,
	       .cpu_acquire		= (void *)pair_cpu_acquire,
	       .cpu_release		= (void *)pair_cpu_release,
	       .cgroup_init		= (void *)pair_cgroup_init,
	       .cgroup_exit		= (void *)pair_cgroup_exit,
	       .exit			= (void *)pair_exit,
	       .name			= "pair");