环境
Linux版本:Linux 5.4.0-4-amd64 Debian 5.4.19-1 x86_64 GNU/Linux
NIC散射聚集:
scatter-gather: on
tx-scatter-gather: on
tx-scatter-gather-fraglist: off [fixed]
输出
sock_extended_err代码设置为SO_EE_code_ZEROCOPY_COPIED。根据Linux内核文档,当设备不支持分散收集I/O时,会返回此代码,但您可以看到我的NIC支持并启用分散收集I/O。
链接的文档是为了显示SO_EE_CODE_ZEROCOPY_COPIED的官方解释,linux支持udp msg_ZEROCOPY版本>=5.0
那么,还有其他原因吗?或者我的代码错了?
代码
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <error.h>
#include <errno.h>
#include <limits.h>
#include <linux/errqueue.h>
#include <linux/if_packet.h>
#include <linux/ipv6.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <poll.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <linux/rds.h>
#ifndef SO_EE_ORIGIN_ZEROCOPY
#define SO_EE_ORIGIN_ZEROCOPY 5
#endif
#ifndef SO_ZEROCOPY
#define SO_ZEROCOPY 60
#endif
#ifndef SO_EE_CODE_ZEROCOPY_COPIED
#define SO_EE_CODE_ZEROCOPY_COPIED 1
#endif
#ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY 0x4000000
#endif
#define TESTSIZE 16*1024
static char payload[TESTSIZE];
static long packets, bytes, completions, expected_completions;
static int zerocopied = -1;
static uint32_t next_completion;
static void do_setsockopt(int fd, int level, int optname, int val)
{
if (optname == SO_ZEROCOPY) {
printf("set so_zerocopyn");
}
if (setsockopt(fd, level, optname, &val, sizeof(val)))
error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
}
static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
{
int ret, len, flags;
size_t i;
len = 0;
for (i = 0; i < msg->msg_iovlen; i++)
len += msg->msg_iov[i].iov_len;
flags = MSG_DONTWAIT;
if (do_zerocopy) {
printf("set msg_zerocopyn");
flags |= MSG_ZEROCOPY;
}
ret = sendmsg(fd, msg, flags);
if (ret == -1 && errno == EAGAIN)
return false;
if (ret == -1)
error(1, errno, "send");
if (len) {
packets++;
bytes += ret;
if (do_zerocopy && ret)
expected_completions++;
}
return true;
}
static int do_setup_tx(int domain, int type, int protocol)
{
int fd;
fd = socket(domain, type, protocol);
if (fd == -1)
error(1, errno, "socket t");
do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
return fd;
}
static bool do_recv_completion(int fd)
{
struct sock_extended_err *serr;
struct msghdr msg = {};
struct cmsghdr *cm;
uint32_t hi, lo, range;
int ret, zerocopy;
char control[100];
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
return false;
if (ret == -1)
error(1, errno, "recvmsg notification");
if (msg.msg_flags & MSG_CTRUNC)
error(1, errno, "recvmsg notification: truncated");
cm = CMSG_FIRSTHDR(&msg);
if (!cm)
error(1, 0, "cmsg: no cmsg");
if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
(cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
(cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
error(1, 0, "serr: wrong type: %d.%d",
cm->cmsg_level, cm->cmsg_type);
serr = (void *) CMSG_DATA(cm);
if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
if (serr->ee_errno != 0)
error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
hi = serr->ee_data;
lo = serr->ee_info;
range = hi - lo + 1;
/* Detect notification gaps. These should not happen often, if at all.
* Gaps can occur due to drops, reordering and retransmissions.
*/
if (lo != next_completion)
fprintf(stderr, "gap: %u..%u does not append to %un",
lo, hi, next_completion);
next_completion = hi + 1;
zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
if (serr->ee_code == SO_EE_CODE_ZEROCOPY_COPIED) {
printf("zerocopy is not valid, but why? It is rediculous!n");
}
else {
printf("zerocopy is availablen");
}
if (zerocopied == -1)
zerocopied = zerocopy;
else if (zerocopied != zerocopy) {
fprintf(stderr, "serr: inconsistentn");
zerocopied = zerocopy;
}
completions += range;
return true;
}
static void do_tx(int domain, int type, int protocol)
{
struct iovec iov[3] = { {0} };
struct msghdr msg = {0};
int fd;
fd = do_setup_tx(domain, type, protocol);
struct sockaddr_in serv_addr;
memset(&serv_addr, '0', sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(5000);
inet_pton(AF_INET, "114.114.114.114", &serv_addr.sin_addr);
connect(fd, (struct sockaddr *)&serv_addr, sizeof(serv_addr));
iov[0].iov_base = payload;
iov[0].iov_len = sizeof(payload);
msg.msg_iovlen++;
msg.msg_iov = &iov[0];
// printf("sendmsgn");
do_sendmsg(fd, &msg, true);
// printf("wait notificationn");
while(!do_recv_completion(fd));
sleep(1);
if (close(fd))
error(1, errno, "close");
fprintf(stderr, "tx=%lu (%lu B) txc=%lu zc=%cn",
packets, bytes, completions,
zerocopied == 1 ? 'y' : 'n');
}
static void do_test(int domain, int type, int protocol)
{
int i;
for (i = 0; i < TESTSIZE; i++)
payload[i] = 'a' + (i % 26);
do_tx(domain, type, protocol);
}
int main()
{
do_test(AF_INET, SOCK_DGRAM, 0);
return 0;
}
在跟踪内核堆栈后,我发现skb_copy_bufs导致了这个结果,这个结果是由dev_queue_xmit_nit调用的。这意味着如果有网络抽头正在使用,MSG_ZEROCOPY通知将返回SO_EE_CODE_ZEROCOPY_COPIED。对我来说,它们是dhclient和lldpd.service。杀死它们后,代码就消失了。