Skip to content

Commit 8e4e94c

Browse files
Copilotyunwei37
andauthored
Remove incorrect cgroup filtering from runqlat (#201)
* Initial plan * Fix incorrect cgroup filtering in runqlat - remove checks for waker instead of wakee Co-authored-by: yunwei37 <34985212+yunwei37@users.noreply.github.com> * Clarify that filter_cg is deprecated and has no effect Co-authored-by: yunwei37 <34985212+yunwei37@users.noreply.github.com> * Update docs to show DEPRECATED comment in code snippets and mention --filter_cg is no-op Co-authored-by: yunwei37 <34985212+yunwei37@users.noreply.github.com> * Remove filter_cg and cgroup_map completely from code and docs Co-authored-by: yunwei37 <34985212+yunwei37@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: yunwei37 <34985212+yunwei37@users.noreply.github.com>
1 parent 251aa12 commit 8e4e94c

3 files changed

Lines changed: 6 additions & 77 deletions

File tree

src/9-runqlat/README.md

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -67,20 +67,12 @@ First, we need to write a source code file `runqlat.bpf.c`:
6767
#define MAX_ENTRIES 10240
6868
#define TASK_RUNNING 0
6969

70-
const volatile bool filter_cg = false;
7170
const volatile bool targ_per_process = false;
7271
const volatile bool targ_per_thread = false;
7372
const volatile bool targ_per_pidns = false;
7473
const volatile bool targ_ms = false;
7574
const volatile pid_t targ_tgid = 0;
7675

77-
struct {
78-
__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
79-
__type(key, u32);
80-
__type(value, u32);
81-
__uint(max_entries, 1);
82-
} cgroup_map SEC(".maps");
83-
8476
struct {
8577
__uint(type, BPF_MAP_TYPE_HASH);
8678
__uint(max_entries, MAX_ENTRIES);
@@ -137,9 +129,6 @@ u64 *tsp, slot;
137129
u32 pid, hkey;
138130
s64 delta;
139131

140-
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
141-
return 0;
142-
143132
if (get_task_state(prev) == TASK_RUNNING)
144133
trace_enqueue(BPF_CORE_READ(prev, tgid), BPF_CORE_READ(prev, pid));
145134

@@ -183,18 +172,12 @@ return 0;
183172
SEC("raw_tp/sched_wakeup")
184173
int BPF_PROG(handle_sched_wakeup, struct task_struct *p)
185174
{
186-
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
187-
return 0;
188-
189175
return trace_enqueue(BPF_CORE_READ(p, tgid), BPF_CORE_READ(p, pid));
190176
}
191177

192178
SEC("raw_tp/sched_wakeup_new")
193179
int BPF_PROG(handle_sched_wakeup_new, struct task_struct *p)
194180
{
195-
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
196-
return 0;
197-
198181
return trace_enqueue(BPF_CORE_READ(p, tgid), BPF_CORE_READ(p, pid));
199182
}
200183

@@ -215,7 +198,6 @@ The code defines several constants and volatile global variables used for filter
215198
#define MAX_ENTRIES 10240
216199
#define TASK_RUNNING 0
217200
218-
const volatile bool filter_cg = false;
219201
const volatile bool targ_per_process = false;
220202
const volatile bool targ_per_thread = false;
221203
const volatile bool targ_per_pidns = false;
@@ -225,20 +207,13 @@ const volatile pid_t targ_tgid = 0;
225207

226208
- `MAX_ENTRIES`: The maximum number of map entries.
227209
- `TASK_RUNNING`: The task status value.
228-
- `filter_cg`, `targ_per_process`, `targ_per_thread`, `targ_per_pidns`, `targ_ms`, `targ_tgid`: Boolean variables for filtering and target options. These options can be set by user-space programs to customize the behavior of the eBPF program.
210+
- `targ_per_process`, `targ_per_thread`, `targ_per_pidns`, `targ_ms`, `targ_tgid`: Boolean variables for filtering and target options. These options can be set by user-space programs to customize the behavior of the eBPF program.
229211

230212
#### eBPF Maps
231213

232214
The code defines several eBPF maps including:
233215

234216
```c
235-
struct {
236-
__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
237-
__type(key, u32);
238-
__type(value, u32);
239-
__uint(max_entries, 1);
240-
} cgroup_map SEC(".maps");
241-
242217
struct {
243218
__uint(type, BPF_MAP_TYPE_HASH);
244219
__uint(max_entries, MAX_ENTRIES);
@@ -256,7 +231,6 @@ struct {
256231
} hists SEC(".maps");
257232
```
258233
259-
- `cgroup_map`: A cgroup array map used for filtering cgroups.
260234
- `start`: A hash map used to store timestamps when processes are enqueued.
261235
- `hists`: A hash map used to store histogram data for recording process scheduling delays.
262236
@@ -313,7 +287,7 @@ static int handle_switch(bool preempt, struct task_struct *prev, struct task_str
313287
}
314288
```
315289

316-
Firstly, the function determines whether to filter cgroup based on the setting of `filter_cg`. Then, if the previous process state is `TASK_RUNNING`, the `trace_enqueue` function is called to record the enqueue time of the process. Then, the function looks up the enqueue timestamp of the next process. If it is not found, it returns directly. The scheduling latency (delta) is calculated, and the key for the histogram map (hkey) is determined based on different options (targ_per_process, targ_per_thread, targ_per_pidns). Then, the histogram map is looked up or initialized, and the histogram data is updated. Finally, the enqueue timestamp record of the process is deleted.
290+
If the previous process state is `TASK_RUNNING`, the `trace_enqueue` function is called to record the enqueue time of the process. Then, the function looks up the enqueue timestamp of the next process. If it is not found, it returns directly. The scheduling latency (delta) is calculated, and the key for the histogram map (hkey) is determined based on different options (targ_per_process, targ_per_thread, targ_per_pidns). Then, the histogram map is looked up or initialized, and the histogram data is updated. Finally, the enqueue timestamp record of the process is deleted.
317291

318292
Next is the entry point of the eBPF program. The program uses three entry points to capture different scheduling events:
319293

@@ -374,15 +348,14 @@ Run:
374348

375349
```console
376350
$ sudo ecli run examples/bpftools/runqlat/package.json -h
377-
Usage: runqlat_bpf [--help] [--version] [--verbose] [--filter_cg] [--targ_per_process] [--targ_per_thread] [--targ_per_pidns] [--targ_ms] [--targ_tgid VAR]
351+
Usage: runqlat_bpf [--help] [--version] [--verbose] [--targ_per_process] [--targ_per_thread] [--targ_per_pidns] [--targ_ms] [--targ_tgid VAR]
378352

379353
A simple eBPF program
380354

381355
Optional arguments:
382356
-h, --help shows help message and exits
383357
-v, --version prints version information and exits
384358
--verbose prints libbpf debug information
385-
--filter_cg set value of bool variable filter_cg
386359
--targ_per_process set value of bool variable targ_per_process
387360
--targ_per_thread set value of bool variable targ_per_thread
388361
--targ_per_pidns set value of bool variable targ_per_pidns

src/9-runqlat/README.zh.md

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -70,20 +70,12 @@ runqlat 的实现利用了 eBPF 程序,它通过内核跟踪点和函数探针
7070
#define MAX_ENTRIES 10240
7171
#define TASK_RUNNING 0
7272

73-
const volatile bool filter_cg = false;
7473
const volatile bool targ_per_process = false;
7574
const volatile bool targ_per_thread = false;
7675
const volatile bool targ_per_pidns = false;
7776
const volatile bool targ_ms = false;
7877
const volatile pid_t targ_tgid = 0;
7978

80-
struct {
81-
__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
82-
__type(key, u32);
83-
__type(value, u32);
84-
__uint(max_entries, 1);
85-
} cgroup_map SEC(".maps");
86-
8779
struct {
8880
__uint(type, BPF_MAP_TYPE_HASH);
8981
__uint(max_entries, MAX_ENTRIES);
@@ -140,9 +132,6 @@ static int handle_switch(bool preempt, struct task_struct *prev, struct task_str
140132
u32 pid, hkey;
141133
s64 delta;
142134

143-
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
144-
return 0;
145-
146135
if (get_task_state(prev) == TASK_RUNNING)
147136
trace_enqueue(BPF_CORE_READ(prev, tgid), BPF_CORE_READ(prev, pid));
148137

@@ -186,18 +175,12 @@ cleanup:
186175
SEC("raw_tp/sched_wakeup")
187176
int BPF_PROG(handle_sched_wakeup, struct task_struct *p)
188177
{
189-
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
190-
return 0;
191-
192178
return trace_enqueue(BPF_CORE_READ(p, tgid), BPF_CORE_READ(p, pid));
193179
}
194180

195181
SEC("raw_tp/sched_wakeup_new")
196182
int BPF_PROG(handle_sched_wakeup_new, struct task_struct *p)
197183
{
198-
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
199-
return 0;
200-
201184
return trace_enqueue(BPF_CORE_READ(p, tgid), BPF_CORE_READ(p, pid));
202185
}
203186

@@ -218,7 +201,6 @@ char LICENSE[] SEC("license") = "GPL";
218201
#define MAX_ENTRIES 10240
219202
#define TASK_RUNNING 0
220203
221-
const volatile bool filter_cg = false;
222204
const volatile bool targ_per_process = false;
223205
const volatile bool targ_per_thread = false;
224206
const volatile bool targ_per_pidns = false;
@@ -228,20 +210,13 @@ const volatile pid_t targ_tgid = 0;
228210

229211
- `MAX_ENTRIES`: map 条目最大数量
230212
- `TASK_RUNNING`: 任务状态值
231-
- `filter_cg`, `targ_per_process`, `targ_per_thread`, `targ_per_pidns`, `targ_ms`, `targ_tgid`: 用于过滤选项和目标选项的布尔变量。这些选项可以通过用户空间程序设置来自定义eBPF程序的行为
213+
- `targ_per_process`, `targ_per_thread`, `targ_per_pidns`, `targ_ms`, `targ_tgid`: 用于过滤选项和目标选项的布尔变量。这些选项可以通过用户空间程序设置来自定义eBPF程序的行为.
232214

233215
#### eBPF Maps 映射
234216

235217
接下来,定义了一些 eBPF 映射:
236218

237219
```c
238-
struct {
239-
__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
240-
__type(key, u32);
241-
__type(value, u32);
242-
__uint(max_entries, 1);
243-
} cgroup_map SEC(".maps");
244-
245220
struct {
246221
__uint(type, BPF_MAP_TYPE_HASH);
247222
__uint(max_entries, MAX_ENTRIES);
@@ -261,7 +236,6 @@ struct {
261236
262237
这些映射包括:
263238
264-
- `cgroup_map` 用于过滤 cgroup;
265239
- `start` 用于存储进程入队时的时间戳;
266240
- `hists` 用于存储直方图数据,记录进程调度延迟。
267241
@@ -318,7 +292,7 @@ static int handle_switch(bool preempt, struct task_struct *prev, struct task_str
318292
}
319293
```
320294

321-
首先,函数根据 `filter_cg` 的设置判断是否需要过滤 cgroup。然后,如果之前的进程状态为 `TASK_RUNNING`,则调用 `trace_enqueue` 函数记录进程的入队时间。接着,函数查找下一个进程的入队时间戳,如果找不到,直接返回。计算调度延迟(delta),并根据不同的选项设置(targ_per_process,targ_per_thread,targ_per_pidns),确定直方图映射的键(hkey)。然后查找或初始化直方图映射,更新直方图数据,最后删除进程的入队时间戳记录。
295+
如果之前的进程状态为 `TASK_RUNNING`,则调用 `trace_enqueue` 函数记录进程的入队时间。接着,函数查找下一个进程的入队时间戳,如果找不到,直接返回。计算调度延迟(delta),并根据不同的选项设置(targ_per_process,targ_per_thread,targ_per_pidns),确定直方图映射的键(hkey)。然后查找或初始化直方图映射,更新直方图数据,最后删除进程的入队时间戳记录。
322296

323297
接下来是 eBPF 程序的入口点。程序使用三个入口点来捕获不同的调度事件:
324298

@@ -379,15 +353,14 @@ Run:
379353

380354
```console
381355
$ sudo ecli run examples/bpftools/runqlat/package.json -h
382-
Usage: runqlat_bpf [--help] [--version] [--verbose] [--filter_cg] [--targ_per_process] [--targ_per_thread] [--targ_per_pidns] [--targ_ms] [--targ_tgid VAR]
356+
Usage: runqlat_bpf [--help] [--version] [--verbose] [--targ_per_process] [--targ_per_thread] [--targ_per_pidns] [--targ_ms] [--targ_tgid VAR]
383357

384358
A simple eBPF program
385359

386360
Optional arguments:
387361
-h, --help shows help message and exits
388362
-v, --version prints version information and exits
389363
--verbose prints libbpf debug information
390-
--filter_cg set value of bool variable filter_cg
391364
--targ_per_process set value of bool variable targ_per_process
392365
--targ_per_thread set value of bool variable targ_per_thread
393366
--targ_per_pidns set value of bool variable targ_per_pidns

src/9-runqlat/runqlat.bpf.c

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,12 @@
1212
#define MAX_ENTRIES 10240
1313
#define TASK_RUNNING 0
1414

15-
const volatile bool filter_cg = false;
1615
const volatile bool targ_per_process = false;
1716
const volatile bool targ_per_thread = false;
1817
const volatile bool targ_per_pidns = false;
1918
const volatile bool targ_ms = false;
2019
const volatile pid_t targ_tgid = 0;
2120

22-
struct {
23-
__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
24-
__type(key, u32);
25-
__type(value, u32);
26-
__uint(max_entries, 1);
27-
} cgroup_map SEC(".maps");
28-
2921
struct {
3022
__uint(type, BPF_MAP_TYPE_HASH);
3123
__uint(max_entries, MAX_ENTRIES);
@@ -82,9 +74,6 @@ static int handle_switch(bool preempt, struct task_struct *prev, struct task_str
8274
u32 pid, hkey;
8375
s64 delta;
8476

85-
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
86-
return 0;
87-
8877
if (get_task_state(prev) == TASK_RUNNING)
8978
trace_enqueue(BPF_CORE_READ(prev, tgid), BPF_CORE_READ(prev, pid));
9079

@@ -128,18 +117,12 @@ static int handle_switch(bool preempt, struct task_struct *prev, struct task_str
128117
SEC("raw_tp/sched_wakeup")
129118
int BPF_PROG(handle_sched_wakeup, struct task_struct *p)
130119
{
131-
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
132-
return 0;
133-
134120
return trace_enqueue(BPF_CORE_READ(p, tgid), BPF_CORE_READ(p, pid));
135121
}
136122

137123
SEC("raw_tp/sched_wakeup_new")
138124
int BPF_PROG(handle_sched_wakeup_new, struct task_struct *p)
139125
{
140-
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
141-
return 0;
142-
143126
return trace_enqueue(BPF_CORE_READ(p, tgid), BPF_CORE_READ(p, pid));
144127
}
145128

0 commit comments

Comments
 (0)