在dev_queue_xmit中使用ebpf读取sk_buff会产生有问题的数据



我正试图通过在__dev_queue_xmit()中插入kprobe来捕获本地主机上发送的以太网帧。但是,我从sk_buff结构中提取的字节与随后捕获的数据包不匹配。

到目前为止,我只尝试过线性skb,因为我已经在那里得到了意想不到的结果。例如,我的kprobe在呼叫__dev_queue_xmit():时报告了以下信息

COMM            PID      TGID     LEN        DATALEN
chronyd         1058     1058     90         0
3431c4b06a8b3c7c3f2023bd08006500d0a57f040f7f0000000000000000000000000000000000006018d11a0f7f00000100000000000000000000000000000060a67f040f7f0000000000000000000000000000000000004001

COMM是调用函数的进程的名称,PID是调用线程的id,TGID它的线程组id。LEN(skb->len - skb->data_len)的值,DATA_LEN则为skb->data_len

接下来,程序已经复制了从skb->data开始的LEN(在本例中为90(字节。由于DATALEN为零,因此这是一个线性skb。因此,这些字节应该包含即将发送的帧,不是吗?

Wireshark随后录制了这个帧:

0000   34 31 c4 b0 6a 8b 3c 7c 3f 20 23 bd 08 00 45 00
0010   00 4c 83 93 40 00 40 11 d1 a2 c0 a8 b2 18 c0 a8
0020   b2 01 c8 07 00 7b 00 38 e5 b4 23 00 06 20 00 00
0030   00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0040   00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0050   00 00 38 bc 17 13 12 4a 4c c0

形成以太网头的前14个字节与预期完全匹配。其他一切都不匹配。现在的问题是:为什么字节不匹配

(是的,我确信Wireshark的帧确实是由对__dev_queue_xmit()的调用引起的。这是因为当时只有使用网络的后台程序在运行,所以传出流量相当小。此外,捕获的帧包含90个字节,正如预期的那样。此外,此帧包含NTP有效负载,这正是chronyd所期望的。(。)

我的内核版本是5.12.6-200.fc33.x86_64。

如果你想自己尝试一下,或者仔细看看我的程序,这里是:

from bcc import BPF
from ctypes import cast, POINTER, c_char
prog = """
#include <linux/sched.h>
#include <linux/skbuff.h>
struct xmit_event {
u64 ts;
u32 pid;
u32 tgid;
u32 len;
u32 datalen;
u32 packet_buf_ptr;
char comm[TASK_COMM_LEN];

u64 head;
u64 data;
u64 tail;
u64 end;
};
BPF_PERF_OUTPUT(xmits);
#define PACKET_BUF_SIZE 32768
# define PACKET_BUFS_PER_CPU 15
struct packet_buf {
char data[PACKET_BUF_SIZE];
};
BPF_PERCPU_ARRAY(packet_buf, struct packet_buf, PACKET_BUFS_PER_CPU);
BPF_PERCPU_ARRAY(packet_buf_head, u32, 1);
int kprobe____dev_queue_xmit(struct pt_regs *ctx, struct sk_buff *skb, void *accel_priv) {
if (skb == NULL || skb->data == NULL)
return 0;
struct xmit_event data = { };
u64 both = bpf_get_current_pid_tgid();
data.pid = both;
if (data.pid == 0)
return 0;
data.tgid = both >> 32;
data.ts = bpf_ktime_get_ns();
bpf_get_current_comm(&data.comm, sizeof(data.comm));
data.len = skb->len;

// Copy packet contents
int slot = 0;
u32 *packet_buf_ptr = packet_buf_head.lookup(&slot);
if (packet_buf_ptr == NULL)
return 0;
u32 buf_head = *packet_buf_ptr;
u32 next_buf_head = (buf_head + 1) % PACKET_BUFS_PER_CPU;
packet_buf_head.update(&slot, &next_buf_head);

struct packet_buf *ringbuf = packet_buf.lookup(&buf_head);
if (ringbuf == NULL)
return 0;

u32 skb_data_len = skb->data_len;
u32 headlen = data.len - skb_data_len;
headlen &= 0xffffff; // Useless, but validator demands it because "this unsigned(!) variable could otherwise be negative"
bpf_probe_read_kernel(ringbuf->data, headlen < PACKET_BUF_SIZE ? headlen : PACKET_BUF_SIZE, skb->data);
data.packet_buf_ptr = buf_head;

data.len = headlen;
data.datalen = skb_data_len;

data.head = (u64) skb->head;
data.data = (u64) skb->data;
data.tail = (u64) skb->tail;
data.end = (u64) skb->end;

xmits.perf_submit(ctx, &data, sizeof(data));
return 0;
}
"""
global b
def xmit_received(cpu, data, size):
global b
global py_packet_buf
ev = b["xmits"].event(data)
print("%-18d %-25s %-8d %-8d %-10d %-10d %-12d %-12d %-12d %-12d" % (ev.ts, ev.comm.decode(), ev.pid, ev.tgid, ev.len, ev.datalen, ev.head, ev.data, ev.tail, ev.end))
bs = cast(py_packet_buf[ev.packet_buf_ptr][cpu].data, POINTER(c_char))[:ev.len]
c = bytes(bs)
print(c.hex())

def observe_kernel():
# load BPF program
global b
b = BPF(text=prog)
print("%-18s %-25s %-8s %-8s %-10s %-10s %-12s %-12s %-12s %-12s" % ("TS", "COMM", "PID", "TGID", "LEN", "DATALEN", "HEAD", "DATA", "TAIL", "END"))
b["xmits"].open_perf_buffer(xmit_received)
global py_packet_buf
py_packet_buf = b["packet_buf"]
try:
while True:
b.perf_buffer_poll()
except KeyboardInterrupt:
print("Kernel observer thread stopped.")
observe_kernel()

发现问题。我需要更换

struct packet_buf {
char data[PACKET_BUF_SIZE];
};

带有

struct packet_buf {
unsigned char data[PACKET_BUF_SIZE];
};

I、 然而,我不明白当我不使用这些数据进行比较或算术运算时,有符号性是如何产生影响的。

相关内容

  • 没有找到相关文章

最新更新