blob: 7fe60f6f7d5399e1e2f44bb1332069f43ae299f5 [file] [log] [blame]
Magnus Karlssonb4b8faa2018-05-02 13:01:36 +02001// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2017 - 2018 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <assert.h>
15#include <errno.h>
16#include <getopt.h>
17#include <libgen.h>
18#include <linux/bpf.h>
19#include <linux/if_link.h>
20#include <linux/if_xdp.h>
21#include <linux/if_ether.h>
22#include <net/if.h>
23#include <signal.h>
24#include <stdbool.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <net/ethernet.h>
29#include <sys/resource.h>
30#include <sys/socket.h>
31#include <sys/mman.h>
32#include <time.h>
33#include <unistd.h>
34#include <pthread.h>
35#include <locale.h>
36#include <sys/types.h>
37#include <poll.h>
38
39#include "bpf_load.h"
40#include "bpf_util.h"
Jakub Kicinski2bf3e2e2018-05-14 22:35:02 -070041#include <bpf/bpf.h>
Magnus Karlssonb4b8faa2018-05-02 13:01:36 +020042
43#include "xdpsock.h"
44
45#ifndef SOL_XDP
46#define SOL_XDP 283
47#endif
48
49#ifndef AF_XDP
50#define AF_XDP 44
51#endif
52
53#ifndef PF_XDP
54#define PF_XDP AF_XDP
55#endif
56
57#define NUM_FRAMES 131072
58#define FRAME_HEADROOM 0
59#define FRAME_SIZE 2048
60#define NUM_DESCS 1024
61#define BATCH_SIZE 16
62
63#define FQ_NUM_DESCS 1024
64#define CQ_NUM_DESCS 1024
65
66#define DEBUG_HEXDUMP 0
67
68typedef __u32 u32;
69
70static unsigned long prev_time;
71
72enum benchmark_type {
73 BENCH_RXDROP = 0,
74 BENCH_TXONLY = 1,
75 BENCH_L2FWD = 2,
76};
77
78static enum benchmark_type opt_bench = BENCH_RXDROP;
79static u32 opt_xdp_flags;
80static const char *opt_if = "";
81static int opt_ifindex;
82static int opt_queue;
83static int opt_poll;
84static int opt_shared_packet_buffer;
85static int opt_interval = 1;
86
87struct xdp_umem_uqueue {
88 u32 cached_prod;
89 u32 cached_cons;
90 u32 mask;
91 u32 size;
92 struct xdp_umem_ring *ring;
93};
94
95struct xdp_umem {
96 char (*frames)[FRAME_SIZE];
97 struct xdp_umem_uqueue fq;
98 struct xdp_umem_uqueue cq;
99 int fd;
100};
101
102struct xdp_uqueue {
103 u32 cached_prod;
104 u32 cached_cons;
105 u32 mask;
106 u32 size;
107 struct xdp_rxtx_ring *ring;
108};
109
110struct xdpsock {
111 struct xdp_uqueue rx;
112 struct xdp_uqueue tx;
113 int sfd;
114 struct xdp_umem *umem;
115 u32 outstanding_tx;
116 unsigned long rx_npkts;
117 unsigned long tx_npkts;
118 unsigned long prev_rx_npkts;
119 unsigned long prev_tx_npkts;
120};
121
122#define MAX_SOCKS 4
123static int num_socks;
124struct xdpsock *xsks[MAX_SOCKS];
125
126static unsigned long get_nsecs(void)
127{
128 struct timespec ts;
129
130 clock_gettime(CLOCK_MONOTONIC, &ts);
131 return ts.tv_sec * 1000000000UL + ts.tv_nsec;
132}
133
134static void dump_stats(void);
135
136#define lassert(expr) \
137 do { \
138 if (!(expr)) { \
139 fprintf(stderr, "%s:%s:%i: Assertion failed: " \
140 #expr ": errno: %d/\"%s\"\n", \
141 __FILE__, __func__, __LINE__, \
142 errno, strerror(errno)); \
143 dump_stats(); \
144 exit(EXIT_FAILURE); \
145 } \
146 } while (0)
147
148#define barrier() __asm__ __volatile__("": : :"memory")
149#define u_smp_rmb() barrier()
150#define u_smp_wmb() barrier()
151#define likely(x) __builtin_expect(!!(x), 1)
152#define unlikely(x) __builtin_expect(!!(x), 0)
153
154static const char pkt_data[] =
155 "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
156 "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
157 "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
158 "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
159
160static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
161{
162 u32 free_entries = q->size - (q->cached_prod - q->cached_cons);
163
164 if (free_entries >= nb)
165 return free_entries;
166
167 /* Refresh the local tail pointer */
168 q->cached_cons = q->ring->ptrs.consumer;
169
170 return q->size - (q->cached_prod - q->cached_cons);
171}
172
173static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
174{
175 u32 free_entries = q->cached_cons - q->cached_prod;
176
177 if (free_entries >= ndescs)
178 return free_entries;
179
180 /* Refresh the local tail pointer */
181 q->cached_cons = q->ring->ptrs.consumer + q->size;
182 return q->cached_cons - q->cached_prod;
183}
184
185static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
186{
187 u32 entries = q->cached_prod - q->cached_cons;
188
189 if (entries == 0) {
190 q->cached_prod = q->ring->ptrs.producer;
191 entries = q->cached_prod - q->cached_cons;
192 }
193
194 return (entries > nb) ? nb : entries;
195}
196
197static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
198{
199 u32 entries = q->cached_prod - q->cached_cons;
200
201 if (entries == 0) {
202 q->cached_prod = q->ring->ptrs.producer;
203 entries = q->cached_prod - q->cached_cons;
204 }
205
206 return (entries > ndescs) ? ndescs : entries;
207}
208
209static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
210 struct xdp_desc *d,
211 size_t nb)
212{
213 u32 i;
214
215 if (umem_nb_free(fq, nb) < nb)
216 return -ENOSPC;
217
218 for (i = 0; i < nb; i++) {
219 u32 idx = fq->cached_prod++ & fq->mask;
220
221 fq->ring->desc[idx] = d[i].idx;
222 }
223
224 u_smp_wmb();
225
226 fq->ring->ptrs.producer = fq->cached_prod;
227
228 return 0;
229}
230
231static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
232 size_t nb)
233{
234 u32 i;
235
236 if (umem_nb_free(fq, nb) < nb)
237 return -ENOSPC;
238
239 for (i = 0; i < nb; i++) {
240 u32 idx = fq->cached_prod++ & fq->mask;
241
242 fq->ring->desc[idx] = d[i];
243 }
244
245 u_smp_wmb();
246
247 fq->ring->ptrs.producer = fq->cached_prod;
248
249 return 0;
250}
251
252static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
253 u32 *d, size_t nb)
254{
255 u32 idx, i, entries = umem_nb_avail(cq, nb);
256
257 u_smp_rmb();
258
259 for (i = 0; i < entries; i++) {
260 idx = cq->cached_cons++ & cq->mask;
261 d[i] = cq->ring->desc[idx];
262 }
263
264 if (entries > 0) {
265 u_smp_wmb();
266
267 cq->ring->ptrs.consumer = cq->cached_cons;
268 }
269
270 return entries;
271}
272
273static inline void *xq_get_data(struct xdpsock *xsk, __u32 idx, __u32 off)
274{
275 lassert(idx < NUM_FRAMES);
276 return &xsk->umem->frames[idx][off];
277}
278
279static inline int xq_enq(struct xdp_uqueue *uq,
280 const struct xdp_desc *descs,
281 unsigned int ndescs)
282{
283 struct xdp_rxtx_ring *r = uq->ring;
284 unsigned int i;
285
286 if (xq_nb_free(uq, ndescs) < ndescs)
287 return -ENOSPC;
288
289 for (i = 0; i < ndescs; i++) {
290 u32 idx = uq->cached_prod++ & uq->mask;
291
292 r->desc[idx].idx = descs[i].idx;
293 r->desc[idx].len = descs[i].len;
294 r->desc[idx].offset = descs[i].offset;
295 }
296
297 u_smp_wmb();
298
299 r->ptrs.producer = uq->cached_prod;
300 return 0;
301}
302
303static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
304 __u32 idx, unsigned int ndescs)
305{
306 struct xdp_rxtx_ring *q = uq->ring;
307 unsigned int i;
308
309 if (xq_nb_free(uq, ndescs) < ndescs)
310 return -ENOSPC;
311
312 for (i = 0; i < ndescs; i++) {
313 u32 idx = uq->cached_prod++ & uq->mask;
314
315 q->desc[idx].idx = idx + i;
316 q->desc[idx].len = sizeof(pkt_data) - 1;
317 q->desc[idx].offset = 0;
318 }
319
320 u_smp_wmb();
321
322 q->ptrs.producer = uq->cached_prod;
323 return 0;
324}
325
326static inline int xq_deq(struct xdp_uqueue *uq,
327 struct xdp_desc *descs,
328 int ndescs)
329{
330 struct xdp_rxtx_ring *r = uq->ring;
331 unsigned int idx;
332 int i, entries;
333
334 entries = xq_nb_avail(uq, ndescs);
335
336 u_smp_rmb();
337
338 for (i = 0; i < entries; i++) {
339 idx = uq->cached_cons++ & uq->mask;
340 descs[i] = r->desc[idx];
341 }
342
343 if (entries > 0) {
344 u_smp_wmb();
345
346 r->ptrs.consumer = uq->cached_cons;
347 }
348
349 return entries;
350}
351
352static void swap_mac_addresses(void *data)
353{
354 struct ether_header *eth = (struct ether_header *)data;
355 struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
356 struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
357 struct ether_addr tmp;
358
359 tmp = *src_addr;
360 *src_addr = *dst_addr;
361 *dst_addr = tmp;
362}
363
364#if DEBUG_HEXDUMP
365static void hex_dump(void *pkt, size_t length, const char *prefix)
366{
367 int i = 0;
368 const unsigned char *address = (unsigned char *)pkt;
369 const unsigned char *line = address;
370 size_t line_size = 32;
371 unsigned char c;
372
373 printf("length = %zu\n", length);
374 printf("%s | ", prefix);
375 while (length-- > 0) {
376 printf("%02X ", *address++);
377 if (!(++i % line_size) || (length == 0 && i % line_size)) {
378 if (length == 0) {
379 while (i++ % line_size)
380 printf("__ ");
381 }
382 printf(" | "); /* right close */
383 while (line < address) {
384 c = *line++;
385 printf("%c", (c < 33 || c == 255) ? 0x2E : c);
386 }
387 printf("\n");
388 if (length > 0)
389 printf("%s | ", prefix);
390 }
391 }
392 printf("\n");
393}
394#endif
395
396static size_t gen_eth_frame(char *frame)
397{
398 memcpy(frame, pkt_data, sizeof(pkt_data) - 1);
399 return sizeof(pkt_data) - 1;
400}
401
402static struct xdp_umem *xdp_umem_configure(int sfd)
403{
404 int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
405 struct xdp_umem_reg mr;
406 struct xdp_umem *umem;
407 void *bufs;
408
409 umem = calloc(1, sizeof(*umem));
410 lassert(umem);
411
412 lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
413 NUM_FRAMES * FRAME_SIZE) == 0);
414
415 mr.addr = (__u64)bufs;
416 mr.len = NUM_FRAMES * FRAME_SIZE;
417 mr.frame_size = FRAME_SIZE;
418 mr.frame_headroom = FRAME_HEADROOM;
419
420 lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0);
421 lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
422 sizeof(int)) == 0);
423 lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
424 sizeof(int)) == 0);
425
426 umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
427 FQ_NUM_DESCS * sizeof(u32),
428 PROT_READ | PROT_WRITE,
429 MAP_SHARED | MAP_POPULATE, sfd,
430 XDP_UMEM_PGOFF_FILL_RING);
431 lassert(umem->fq.ring != MAP_FAILED);
432
433 umem->fq.mask = FQ_NUM_DESCS - 1;
434 umem->fq.size = FQ_NUM_DESCS;
435
436 umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
437 CQ_NUM_DESCS * sizeof(u32),
438 PROT_READ | PROT_WRITE,
439 MAP_SHARED | MAP_POPULATE, sfd,
440 XDP_UMEM_PGOFF_COMPLETION_RING);
441 lassert(umem->cq.ring != MAP_FAILED);
442
443 umem->cq.mask = CQ_NUM_DESCS - 1;
444 umem->cq.size = CQ_NUM_DESCS;
445
446 umem->frames = (char (*)[FRAME_SIZE])bufs;
447 umem->fd = sfd;
448
449 if (opt_bench == BENCH_TXONLY) {
450 int i;
451
452 for (i = 0; i < NUM_FRAMES; i++)
453 (void)gen_eth_frame(&umem->frames[i][0]);
454 }
455
456 return umem;
457}
458
459static struct xdpsock *xsk_configure(struct xdp_umem *umem)
460{
461 struct sockaddr_xdp sxdp = {};
462 int sfd, ndescs = NUM_DESCS;
463 struct xdpsock *xsk;
464 bool shared = true;
465 u32 i;
466
467 sfd = socket(PF_XDP, SOCK_RAW, 0);
468 lassert(sfd >= 0);
469
470 xsk = calloc(1, sizeof(*xsk));
471 lassert(xsk);
472
473 xsk->sfd = sfd;
474 xsk->outstanding_tx = 0;
475
476 if (!umem) {
477 shared = false;
478 xsk->umem = xdp_umem_configure(sfd);
479 } else {
480 xsk->umem = umem;
481 }
482
483 lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING,
484 &ndescs, sizeof(int)) == 0);
485 lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
486 &ndescs, sizeof(int)) == 0);
487
488 /* Rx */
489 xsk->rx.ring = mmap(NULL,
490 sizeof(struct xdp_ring) +
491 NUM_DESCS * sizeof(struct xdp_desc),
492 PROT_READ | PROT_WRITE,
493 MAP_SHARED | MAP_POPULATE, sfd,
494 XDP_PGOFF_RX_RING);
495 lassert(xsk->rx.ring != MAP_FAILED);
496
497 if (!shared) {
498 for (i = 0; i < NUM_DESCS / 2; i++)
499 lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1)
500 == 0);
501 }
502
503 /* Tx */
504 xsk->tx.ring = mmap(NULL,
505 sizeof(struct xdp_ring) +
506 NUM_DESCS * sizeof(struct xdp_desc),
507 PROT_READ | PROT_WRITE,
508 MAP_SHARED | MAP_POPULATE, sfd,
509 XDP_PGOFF_TX_RING);
510 lassert(xsk->tx.ring != MAP_FAILED);
511
512 xsk->rx.mask = NUM_DESCS - 1;
513 xsk->rx.size = NUM_DESCS;
514
515 xsk->tx.mask = NUM_DESCS - 1;
516 xsk->tx.size = NUM_DESCS;
517
518 sxdp.sxdp_family = PF_XDP;
519 sxdp.sxdp_ifindex = opt_ifindex;
520 sxdp.sxdp_queue_id = opt_queue;
521 if (shared) {
522 sxdp.sxdp_flags = XDP_SHARED_UMEM;
523 sxdp.sxdp_shared_umem_fd = umem->fd;
524 }
525
526 lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0);
527
528 return xsk;
529}
530
531static void print_benchmark(bool running)
532{
533 const char *bench_str = "INVALID";
534
535 if (opt_bench == BENCH_RXDROP)
536 bench_str = "rxdrop";
537 else if (opt_bench == BENCH_TXONLY)
538 bench_str = "txonly";
539 else if (opt_bench == BENCH_L2FWD)
540 bench_str = "l2fwd";
541
542 printf("%s:%d %s ", opt_if, opt_queue, bench_str);
543 if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
544 printf("xdp-skb ");
545 else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
546 printf("xdp-drv ");
547 else
548 printf(" ");
549
550 if (opt_poll)
551 printf("poll() ");
552
553 if (running) {
554 printf("running...");
555 fflush(stdout);
556 }
557}
558
559static void dump_stats(void)
560{
561 unsigned long now = get_nsecs();
562 long dt = now - prev_time;
563 int i;
564
565 prev_time = now;
566
567 for (i = 0; i < num_socks; i++) {
568 char *fmt = "%-15s %'-11.0f %'-11lu\n";
569 double rx_pps, tx_pps;
570
571 rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
572 1000000000. / dt;
573 tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
574 1000000000. / dt;
575
576 printf("\n sock%d@", i);
577 print_benchmark(false);
578 printf("\n");
579
580 printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
581 dt / 1000000000.);
582 printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
583 printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
584
585 xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
586 xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
587 }
588}
589
590static void *poller(void *arg)
591{
592 (void)arg;
593 for (;;) {
594 sleep(opt_interval);
595 dump_stats();
596 }
597
598 return NULL;
599}
600
601static void int_exit(int sig)
602{
603 (void)sig;
604 dump_stats();
605 bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
606 exit(EXIT_SUCCESS);
607}
608
609static struct option long_options[] = {
610 {"rxdrop", no_argument, 0, 'r'},
611 {"txonly", no_argument, 0, 't'},
612 {"l2fwd", no_argument, 0, 'l'},
613 {"interface", required_argument, 0, 'i'},
614 {"queue", required_argument, 0, 'q'},
615 {"poll", no_argument, 0, 'p'},
616 {"shared-buffer", no_argument, 0, 's'},
617 {"xdp-skb", no_argument, 0, 'S'},
618 {"xdp-native", no_argument, 0, 'N'},
619 {"interval", required_argument, 0, 'n'},
620 {0, 0, 0, 0}
621};
622
623static void usage(const char *prog)
624{
625 const char *str =
626 " Usage: %s [OPTIONS]\n"
627 " Options:\n"
628 " -r, --rxdrop Discard all incoming packets (default)\n"
629 " -t, --txonly Only send packets\n"
630 " -l, --l2fwd MAC swap L2 forwarding\n"
631 " -i, --interface=n Run on interface n\n"
632 " -q, --queue=n Use queue n (default 0)\n"
633 " -p, --poll Use poll syscall\n"
634 " -s, --shared-buffer Use shared packet buffer\n"
635 " -S, --xdp-skb=n Use XDP skb-mod\n"
636 " -N, --xdp-native=n Enfore XDP native mode\n"
637 " -n, --interval=n Specify statistics update interval (default 1 sec).\n"
638 "\n";
639 fprintf(stderr, str, prog);
640 exit(EXIT_FAILURE);
641}
642
643static void parse_command_line(int argc, char **argv)
644{
645 int option_index, c;
646
647 opterr = 0;
648
649 for (;;) {
650 c = getopt_long(argc, argv, "rtli:q:psSNn:", long_options,
651 &option_index);
652 if (c == -1)
653 break;
654
655 switch (c) {
656 case 'r':
657 opt_bench = BENCH_RXDROP;
658 break;
659 case 't':
660 opt_bench = BENCH_TXONLY;
661 break;
662 case 'l':
663 opt_bench = BENCH_L2FWD;
664 break;
665 case 'i':
666 opt_if = optarg;
667 break;
668 case 'q':
669 opt_queue = atoi(optarg);
670 break;
671 case 's':
672 opt_shared_packet_buffer = 1;
673 break;
674 case 'p':
675 opt_poll = 1;
676 break;
677 case 'S':
678 opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
679 break;
680 case 'N':
681 opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
682 break;
683 case 'n':
684 opt_interval = atoi(optarg);
685 break;
686 default:
687 usage(basename(argv[0]));
688 }
689 }
690
691 opt_ifindex = if_nametoindex(opt_if);
692 if (!opt_ifindex) {
693 fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
694 opt_if);
695 usage(basename(argv[0]));
696 }
697}
698
699static void kick_tx(int fd)
700{
701 int ret;
702
703 ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
704 if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN)
705 return;
706 lassert(0);
707}
708
709static inline void complete_tx_l2fwd(struct xdpsock *xsk)
710{
711 u32 descs[BATCH_SIZE];
712 unsigned int rcvd;
713 size_t ndescs;
714
715 if (!xsk->outstanding_tx)
716 return;
717
718 kick_tx(xsk->sfd);
719 ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
720 xsk->outstanding_tx;
721
722 /* re-add completed Tx buffers */
723 rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs);
724 if (rcvd > 0) {
725 umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd);
726 xsk->outstanding_tx -= rcvd;
727 xsk->tx_npkts += rcvd;
728 }
729}
730
731static inline void complete_tx_only(struct xdpsock *xsk)
732{
733 u32 descs[BATCH_SIZE];
734 unsigned int rcvd;
735
736 if (!xsk->outstanding_tx)
737 return;
738
739 kick_tx(xsk->sfd);
740
741 rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE);
742 if (rcvd > 0) {
743 xsk->outstanding_tx -= rcvd;
744 xsk->tx_npkts += rcvd;
745 }
746}
747
748static void rx_drop(struct xdpsock *xsk)
749{
750 struct xdp_desc descs[BATCH_SIZE];
751 unsigned int rcvd, i;
752
753 rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
754 if (!rcvd)
755 return;
756
757 for (i = 0; i < rcvd; i++) {
758 u32 idx = descs[i].idx;
759
760 lassert(idx < NUM_FRAMES);
761#if DEBUG_HEXDUMP
762 char *pkt;
763 char buf[32];
764
765 pkt = xq_get_data(xsk, idx, descs[i].offset);
766 sprintf(buf, "idx=%d", idx);
767 hex_dump(pkt, descs[i].len, buf);
768#endif
769 }
770
771 xsk->rx_npkts += rcvd;
772
773 umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd);
774}
775
776static void rx_drop_all(void)
777{
778 struct pollfd fds[MAX_SOCKS + 1];
779 int i, ret, timeout, nfds = 1;
780
781 memset(fds, 0, sizeof(fds));
782
783 for (i = 0; i < num_socks; i++) {
784 fds[i].fd = xsks[i]->sfd;
785 fds[i].events = POLLIN;
786 timeout = 1000; /* 1sn */
787 }
788
789 for (;;) {
790 if (opt_poll) {
791 ret = poll(fds, nfds, timeout);
792 if (ret <= 0)
793 continue;
794 }
795
796 for (i = 0; i < num_socks; i++)
797 rx_drop(xsks[i]);
798 }
799}
800
801static void tx_only(struct xdpsock *xsk)
802{
803 int timeout, ret, nfds = 1;
804 struct pollfd fds[nfds + 1];
805 unsigned int idx = 0;
806
807 memset(fds, 0, sizeof(fds));
808 fds[0].fd = xsk->sfd;
809 fds[0].events = POLLOUT;
810 timeout = 1000; /* 1sn */
811
812 for (;;) {
813 if (opt_poll) {
814 ret = poll(fds, nfds, timeout);
815 if (ret <= 0)
816 continue;
817
818 if (fds[0].fd != xsk->sfd ||
819 !(fds[0].revents & POLLOUT))
820 continue;
821 }
822
823 if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) {
824 lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0);
825
826 xsk->outstanding_tx += BATCH_SIZE;
827 idx += BATCH_SIZE;
828 idx %= NUM_FRAMES;
829 }
830
831 complete_tx_only(xsk);
832 }
833}
834
835static void l2fwd(struct xdpsock *xsk)
836{
837 for (;;) {
838 struct xdp_desc descs[BATCH_SIZE];
839 unsigned int rcvd, i;
840 int ret;
841
842 for (;;) {
843 complete_tx_l2fwd(xsk);
844
845 rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
846 if (rcvd > 0)
847 break;
848 }
849
850 for (i = 0; i < rcvd; i++) {
851 char *pkt = xq_get_data(xsk, descs[i].idx,
852 descs[i].offset);
853
854 swap_mac_addresses(pkt);
855#if DEBUG_HEXDUMP
856 char buf[32];
857 u32 idx = descs[i].idx;
858
859 sprintf(buf, "idx=%d", idx);
860 hex_dump(pkt, descs[i].len, buf);
861#endif
862 }
863
864 xsk->rx_npkts += rcvd;
865
866 ret = xq_enq(&xsk->tx, descs, rcvd);
867 lassert(ret == 0);
868 xsk->outstanding_tx += rcvd;
869 }
870}
871
872int main(int argc, char **argv)
873{
874 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
875 char xdp_filename[256];
876 int i, ret, key = 0;
877 pthread_t pt;
878
879 parse_command_line(argc, argv);
880
881 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
882 fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
883 strerror(errno));
884 exit(EXIT_FAILURE);
885 }
886
887 snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
888
889 if (load_bpf_file(xdp_filename)) {
890 fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf);
891 exit(EXIT_FAILURE);
892 }
893
894 if (!prog_fd[0]) {
895 fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n",
896 strerror(errno));
897 exit(EXIT_FAILURE);
898 }
899
900 if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) {
901 fprintf(stderr, "ERROR: link set xdp fd failed\n");
902 exit(EXIT_FAILURE);
903 }
904
905 ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0);
906 if (ret) {
907 fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
908 exit(EXIT_FAILURE);
909 }
910
911 /* Create sockets... */
912 xsks[num_socks++] = xsk_configure(NULL);
913
914#if RR_LB
915 for (i = 0; i < MAX_SOCKS - 1; i++)
916 xsks[num_socks++] = xsk_configure(xsks[0]->umem);
917#endif
918
919 /* ...and insert them into the map. */
920 for (i = 0; i < num_socks; i++) {
921 key = i;
922 ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0);
923 if (ret) {
924 fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
925 exit(EXIT_FAILURE);
926 }
927 }
928
929 signal(SIGINT, int_exit);
930 signal(SIGTERM, int_exit);
931 signal(SIGABRT, int_exit);
932
933 setlocale(LC_ALL, "");
934
935 ret = pthread_create(&pt, NULL, poller, NULL);
936 lassert(ret == 0);
937
938 prev_time = get_nsecs();
939
940 if (opt_bench == BENCH_RXDROP)
941 rx_drop_all();
942 else if (opt_bench == BENCH_TXONLY)
943 tx_only(xsks[0]);
944 else
945 l2fwd(xsks[0]);
946
947 return 0;
948}