Skip to content

Commit 6d3ba3e

Browse files
feat: Add cgroup eBPF policy guard with TCP, device, and sysctl controls
- Implemented cgroup-based access control using eBPF with three main functionalities: 1. Block TCP connections to specified ports. 2. Deny access to specified devices. 3. Control sysctl read/write operations. - Added necessary Makefile and documentation for building and running the policy guard.
1 parent 42884e3 commit 6d3ba3e

8 files changed

Lines changed: 1561 additions & 0 deletions

File tree

src/cgroup/.gitignore

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Build artifacts
2+
*.o
3+
*.skel.h
4+
*.skel.json
5+
.output/
6+
cgroup_guard
7+
8+
# Test artifacts
9+
*.tmp
10+
*.err
11+
12+
# IDE
13+
.vscode/
14+
.idea/
15+
*.swp
16+
*.swo
17+
18+
# OS
19+
.DS_Store
20+
Thumbs.db

src/cgroup/Makefile

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2+
OUTPUT := .output
3+
CLANG ?= clang
4+
LIBBPF_SRC := $(abspath ../third_party/libbpf/src)
5+
BPFTOOL_SRC := $(abspath ../third_party/bpftool/src)
6+
LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
7+
BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool)
8+
BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool
9+
ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
10+
| sed 's/arm.*/arm/' \
11+
| sed 's/aarch64/arm64/' \
12+
| sed 's/ppc64le/powerpc/' \
13+
| sed 's/mips.*/mips/' \
14+
| sed 's/riscv64/riscv/' \
15+
| sed 's/loongarch64/loongarch/')
16+
VMLINUX := ../third_party/vmlinux/$(ARCH)/vmlinux.h
17+
# Use our own libbpf API headers and Linux UAPI headers distributed with
18+
# libbpf to avoid dependency on system-wide headers, which could be missing or
19+
# outdated
20+
INCLUDES := -I$(OUTPUT) -I../third_party/libbpf/include/uapi -I$(dir $(VMLINUX))
21+
CFLAGS := -g -Wall
22+
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)
23+
24+
APPS = cgroup_guard
25+
26+
# Get Clang's default includes on this system. We'll explicitly add these dirs
27+
# to the includes list when compiling with `-target bpf` because otherwise some
28+
# architecture-specific dirs will be "missing" on some architectures/distros -
29+
# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h,
30+
# sys/cdefs.h etc. might be missing.
31+
#
32+
# Use '-idirafter': Don't interfere with include mechanics except where the
33+
# build would have failed anyways.
34+
CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - </dev/null 2>&1 \
35+
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
36+
37+
ifeq ($(V),1)
38+
Q =
39+
msg =
40+
else
41+
Q = @
42+
msg = @printf ' %-8s %s%s\n' \
43+
"$(1)" \
44+
"$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \
45+
"$(if $(3), $(3))";
46+
MAKEFLAGS += --no-print-directory
47+
endif
48+
49+
define allow-override
50+
$(if $(or $(findstring environment,$(origin $(1))),\
51+
$(findstring command line,$(origin $(1)))),,\
52+
$(eval $(1) = $(2)))
53+
endef
54+
55+
$(call allow-override,CC,$(CROSS_COMPILE)cc)
56+
$(call allow-override,LD,$(CROSS_COMPILE)ld)
57+
58+
.PHONY: all
59+
all: $(APPS)
60+
61+
.PHONY: clean
62+
clean:
63+
$(call msg,CLEAN)
64+
$(Q)rm -rf $(OUTPUT) $(APPS)
65+
66+
$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT):
67+
$(call msg,MKDIR,$@)
68+
$(Q)mkdir -p $@
69+
70+
# Build libbpf
71+
$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
72+
$(call msg,LIB,$@)
73+
$(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \
74+
OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \
75+
INCLUDEDIR= LIBDIR= UAPIDIR= \
76+
install
77+
78+
# Build bpftool
79+
$(BPFTOOL): | $(BPFTOOL_OUTPUT)
80+
$(call msg,BPFTOOL,$@)
81+
$(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap
82+
83+
# Build BPF code
84+
$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
85+
$(call msg,BPF,$@)
86+
$(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \
87+
$(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \
88+
-c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
89+
$(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
90+
91+
# Generate BPF skeletons
92+
$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL)
93+
$(call msg,GEN-SKEL,$@)
94+
$(Q)$(BPFTOOL) gen skeleton $< > $@
95+
96+
# Build user-space code
97+
$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h
98+
99+
$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT)
100+
$(call msg,CC,$@)
101+
$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
102+
103+
# Build application binary
104+
$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT)
105+
$(call msg,BINARY,$@)
106+
$(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@
107+
108+
# delete failed targets
109+
.DELETE_ON_ERROR:
110+
111+
# keep intermediate (.skel.h, .bpf.o, etc) targets
112+
.SECONDARY:

src/cgroup/README.md

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
# eBPF Tutorial: cgroup-based Policy Control
2+
3+
This tutorial demonstrates how to use cgroup eBPF programs to implement per-cgroup policy controls for networking, device access, and sysctl operations.
4+
5+
## What is cgroup eBPF?
6+
7+
**cgroup eBPF** allows you to attach eBPF programs to cgroups (control groups) to enforce policies based on process/container membership. Unlike XDP/tc which work on network interfaces, cgroup eBPF works at the process level:
8+
9+
- Policies only affect processes in the target cgroup
10+
- Perfect for container/multi-tenant/sandbox isolation
11+
- Covers: network access control, socket options, sysctl access, device access
12+
13+
When a cgroup eBPF program denies an operation, userspace typically sees `EPERM` (Operation not permitted).
14+
15+
## cgroup eBPF Hook Points
16+
17+
### 1. `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` - Socket Address Hooks
18+
19+
Triggered on socket address syscalls (bind/connect/sendmsg/recvmsg):
20+
21+
| Hook | Section Name | Description |
22+
|------|--------------|-------------|
23+
| IPv4 bind | `cgroup/bind4` | Filter bind() calls |
24+
| IPv6 bind | `cgroup/bind6` | Filter bind() calls |
25+
| IPv4 connect | `cgroup/connect4` | Filter connect() calls |
26+
| IPv6 connect | `cgroup/connect6` | Filter connect() calls |
27+
| UDP sendmsg | `cgroup/sendmsg4`, `cgroup/sendmsg6` | Filter UDP sends |
28+
| UDP recvmsg | `cgroup/recvmsg4`, `cgroup/recvmsg6` | Filter UDP receives |
29+
| Unix connect | `cgroup/connect_unix` | Filter Unix socket connect |
30+
31+
**Context**: `struct bpf_sock_addr` - contains `user_ip4`, `user_port` (network byte order)
32+
33+
**Return semantics**: `return 1` = allow, `return 0` = deny (EPERM)
34+
35+
### 2. `BPF_PROG_TYPE_CGROUP_DEVICE` - Device Access Control
36+
37+
| Hook | Section Name | Description |
38+
|------|--------------|-------------|
39+
| Device access | `cgroup/dev` | Filter device open/read/write/mknod |
40+
41+
**Context**: `struct bpf_cgroup_dev_ctx` - contains `major`, `minor`, `access_type`
42+
43+
**Return semantics**: `return 0` = deny (EPERM), non-zero = allow
44+
45+
### 3. `BPF_PROG_TYPE_CGROUP_SYSCTL` - Sysctl Access Control
46+
47+
| Hook | Section Name | Description |
48+
|------|--------------|-------------|
49+
| Sysctl access | `cgroup/sysctl` | Filter /proc/sys reads/writes |
50+
51+
**Context**: `struct bpf_sysctl` - use `bpf_sysctl_get_name()` to get sysctl name
52+
53+
**Return semantics**: `return 0` = reject (EPERM), `return 1` = proceed
54+
55+
### 4. Other cgroup Hooks
56+
57+
- `cgroup_skb/ingress`, `cgroup_skb/egress` - Packet-level filtering
58+
- `cgroup/getsockopt`, `cgroup/setsockopt` - Socket option filtering
59+
- `cgroup/sock_create`, `cgroup/sock_release` - Socket lifecycle
60+
- `sockops` - TCP-level optimization (attached via `BPF_CGROUP_SOCK_OPS`)
61+
62+
## This Tutorial: cgroup Policy Guard
63+
64+
We implement a single eBPF object with three programs:
65+
66+
1. **Network (TCP)**: Block `connect()` to a specified destination port
67+
2. **Device**: Block access to a specified `major:minor` device
68+
3. **Sysctl**: Block reading a specified sysctl (read-only, safer for testing)
69+
70+
Events are sent to userspace via ringbuf for observability.
71+
72+
## Building
73+
74+
```bash
75+
cd src/49-cgroup
76+
make
77+
```
78+
79+
## Running
80+
81+
### Terminal A: Start the loader
82+
83+
```bash
84+
# Block: TCP port 9090, /dev/null (1:3), reading kernel/hostname
85+
sudo ./cgroup_guard \
86+
--cgroup /sys/fs/cgroup/ebpf_demo \
87+
--block-port 9090 \
88+
--deny-device 1:3 \
89+
--deny-sysctl kernel/hostname
90+
```
91+
92+
You should see:
93+
```
94+
Attached to cgroup: /sys/fs/cgroup/ebpf_demo
95+
Config: block_port=9090, deny_device=1:3, deny_sysctl_read=kernel/hostname
96+
Press Ctrl-C to stop.
97+
```
98+
99+
### Terminal B: Start test servers (outside cgroup)
100+
101+
```bash
102+
# Start two HTTP servers
103+
python3 -m http.server 8080 --bind 127.0.0.1 &
104+
python3 -m http.server 9090 --bind 127.0.0.1 &
105+
```
106+
107+
### Terminal C: Test from within the cgroup
108+
109+
```bash
110+
sudo bash -c '
111+
echo $$ > /sys/fs/cgroup/ebpf_demo/cgroup.procs
112+
113+
echo "== TCP test =="
114+
curl -s http://127.0.0.1:8080 >/dev/null && echo "8080 OK"
115+
curl -s http://127.0.0.1:9090 >/dev/null && echo "9090 OK (unexpected)" || echo "9090 BLOCKED (expected)"
116+
117+
echo
118+
echo "== Device test =="
119+
cat /dev/null && echo "/dev/null OK (unexpected)" || echo "/dev/null BLOCKED (expected)"
120+
121+
echo
122+
echo "== Sysctl test =="
123+
cat /proc/sys/kernel/hostname && echo "sysctl read OK (unexpected)" || echo "sysctl read BLOCKED (expected)"
124+
'
125+
```
126+
127+
Expected output:
128+
- `8080 OK` - Port 8080 is allowed
129+
- `9090 BLOCKED (expected)` - Port 9090 is blocked
130+
- `/dev/null BLOCKED (expected)` - Device 1:3 is blocked
131+
- `sysctl read BLOCKED (expected)` - Reading kernel/hostname is blocked
132+
133+
### Terminal A output (events)
134+
135+
```
136+
[DENY connect4] pid=12345 comm=curl daddr=127.0.0.1 dport=9090 proto=6
137+
[DENY device] pid=12346 comm=cat major=1 minor=3 access_type=0x...
138+
[DENY sysctl] pid=12347 comm=cat write=0 name=kernel/hostname
139+
```
140+
141+
## Verifying with bpftool
142+
143+
```bash
144+
sudo bpftool cgroup tree /sys/fs/cgroup/ebpf_demo
145+
```
146+
147+
## Key Implementation Details
148+
149+
### 1. Network byte order for sock_addr
150+
151+
```c
152+
// user_port is network byte order, must convert
153+
__u16 dport = bpf_ntohs((__u16)ctx->user_port);
154+
```
155+
156+
### 2. Return value semantics
157+
158+
```c
159+
// For sock_addr (connect4/bind4/etc):
160+
return 1; // allow
161+
return 0; // deny -> EPERM
162+
163+
// For device:
164+
return 0; // deny -> EPERM
165+
return 1; // allow
166+
167+
// For sysctl:
168+
return 0; // reject -> EPERM
169+
return 1; // proceed
170+
```
171+
172+
### 3. Configuration via .rodata
173+
174+
```c
175+
// BPF side - const volatile for CO-RE
176+
const volatile __u16 blocked_tcp_dport = 0;
177+
178+
// Userspace - set before load
179+
skel->rodata->blocked_tcp_dport = (__u16)port;
180+
```
181+
182+
## Files
183+
184+
- `cgroup_guard.h` - Shared data structures
185+
- `cgroup_guard.bpf.c` - eBPF programs (connect4, device, sysctl hooks)
186+
- `cgroup_guard.c` - Userspace loader
187+
- `Makefile` - Build system
188+
189+
## References
190+
191+
- [Kernel docs: libbpf program types](https://docs.kernel.org/bpf/libbpf/program_types.html)
192+
- [eBPF docs: CGROUP_SOCK_ADDR](https://docs.ebpf.io/linux/program-type/BPF_PROG_TYPE_CGROUP_SOCK_ADDR/)
193+
- [eBPF docs: CGROUP_DEVICE](https://docs.ebpf.io/linux/program-type/BPF_PROG_TYPE_CGROUP_DEVICE/)

0 commit comments

Comments
 (0)