xref: /qemu/net/l2tpv3.c (revision 7a4e543de6637cda4dcc4a060b9225a863f7c721)
1  /*
2   * QEMU System Emulator
3   *
4   * Copyright (c) 2003-2008 Fabrice Bellard
5   * Copyright (c) 2012-2014 Cisco Systems
6   *
7   * Permission is hereby granted, free of charge, to any person obtaining a copy
8   * of this software and associated documentation files (the "Software"), to deal
9   * in the Software without restriction, including without limitation the rights
10   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11   * copies of the Software, and to permit persons to whom the Software is
12   * furnished to do so, subject to the following conditions:
13   *
14   * The above copyright notice and this permission notice shall be included in
15   * all copies or substantial portions of the Software.
16   *
17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23   * THE SOFTWARE.
24   */
25  
26  #include "qemu/osdep.h"
27  #include <linux/ip.h>
28  #include <netdb.h>
29  #include "net/net.h"
30  #include "clients.h"
31  #include "qemu-common.h"
32  #include "qemu/error-report.h"
33  #include "qemu/option.h"
34  #include "qemu/sockets.h"
35  #include "qemu/iov.h"
36  #include "qemu/main-loop.h"
37  
38  
39  /* The buffer size needs to be investigated for optimum numbers and
40   * optimum means of paging in on different systems. This size is
41   * chosen to be sufficient to accommodate one packet with some headers
42   */
43  
44  #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
45  #define BUFFER_SIZE 2048
46  #define IOVSIZE 2
47  #define MAX_L2TPV3_MSGCNT 64
48  #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
49  
50  /* Header set to 0x30000 signifies a data packet */
51  
52  #define L2TPV3_DATA_PACKET 0x30000
53  
54  /* IANA-assigned IP protocol ID for L2TPv3 */
55  
56  #ifndef IPPROTO_L2TP
57  #define IPPROTO_L2TP 0x73
58  #endif
59  
60  typedef struct NetL2TPV3State {
61      NetClientState nc;
62      int fd;
63  
64      /*
65       * these are used for xmit - that happens packet a time
66       * and for first sign of life packet (easier to parse that once)
67       */
68  
69      uint8_t *header_buf;
70      struct iovec *vec;
71  
72      /*
73       * these are used for receive - try to "eat" up to 32 packets at a time
74       */
75  
76      struct mmsghdr *msgvec;
77  
78      /*
79       * peer address
80       */
81  
82      struct sockaddr_storage *dgram_dst;
83      uint32_t dst_size;
84  
85      /*
86       * L2TPv3 parameters
87       */
88  
89      uint64_t rx_cookie;
90      uint64_t tx_cookie;
91      uint32_t rx_session;
92      uint32_t tx_session;
93      uint32_t header_size;
94      uint32_t counter;
95  
96      /*
97      * DOS avoidance in error handling
98      */
99  
100      bool header_mismatch;
101  
102      /*
103       * Ring buffer handling
104       */
105  
106      int queue_head;
107      int queue_tail;
108      int queue_depth;
109  
110      /*
111       * Precomputed offsets
112       */
113  
114      uint32_t offset;
115      uint32_t cookie_offset;
116      uint32_t counter_offset;
117      uint32_t session_offset;
118  
119      /* Poll Control */
120  
121      bool read_poll;
122      bool write_poll;
123  
124      /* Flags */
125  
126      bool ipv6;
127      bool udp;
128      bool has_counter;
129      bool pin_counter;
130      bool cookie;
131      bool cookie_is_64;
132  
133  } NetL2TPV3State;
134  
135  static void net_l2tpv3_send(void *opaque);
136  static void l2tpv3_writable(void *opaque);
137  
138  static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
139  {
140      qemu_set_fd_handler(s->fd,
141                          s->read_poll ? net_l2tpv3_send : NULL,
142                          s->write_poll ? l2tpv3_writable : NULL,
143                          s);
144  }
145  
146  static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
147  {
148      if (s->read_poll != enable) {
149          s->read_poll = enable;
150          l2tpv3_update_fd_handler(s);
151      }
152  }
153  
154  static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
155  {
156      if (s->write_poll != enable) {
157          s->write_poll = enable;
158          l2tpv3_update_fd_handler(s);
159      }
160  }
161  
162  static void l2tpv3_writable(void *opaque)
163  {
164      NetL2TPV3State *s = opaque;
165      l2tpv3_write_poll(s, false);
166      qemu_flush_queued_packets(&s->nc);
167  }
168  
169  static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
170  {
171      NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
172      l2tpv3_read_poll(s, true);
173  }
174  
175  static void l2tpv3_poll(NetClientState *nc, bool enable)
176  {
177      NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
178      l2tpv3_write_poll(s, enable);
179      l2tpv3_read_poll(s, enable);
180  }
181  
182  static void l2tpv3_form_header(NetL2TPV3State *s)
183  {
184      uint32_t *counter;
185  
186      if (s->udp) {
187          stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
188      }
189      stl_be_p(
190              (uint32_t *) (s->header_buf + s->session_offset),
191              s->tx_session
192          );
193      if (s->cookie) {
194          if (s->cookie_is_64) {
195              stq_be_p(
196                  (uint64_t *)(s->header_buf + s->cookie_offset),
197                  s->tx_cookie
198              );
199          } else {
200              stl_be_p(
201                  (uint32_t *) (s->header_buf + s->cookie_offset),
202                  s->tx_cookie
203              );
204          }
205      }
206      if (s->has_counter) {
207          counter = (uint32_t *)(s->header_buf + s->counter_offset);
208          if (s->pin_counter) {
209              *counter = 0;
210          } else {
211              stl_be_p(counter, ++s->counter);
212          }
213      }
214  }
215  
216  static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
217                      const struct iovec *iov,
218                      int iovcnt)
219  {
220      NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
221  
222      struct msghdr message;
223      int ret;
224  
225      if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
226          error_report(
227              "iovec too long %d > %d, change l2tpv3.h",
228              iovcnt, MAX_L2TPV3_IOVCNT
229          );
230          return -1;
231      }
232      l2tpv3_form_header(s);
233      memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
234      s->vec->iov_base = s->header_buf;
235      s->vec->iov_len = s->offset;
236      message.msg_name = s->dgram_dst;
237      message.msg_namelen = s->dst_size;
238      message.msg_iov = s->vec;
239      message.msg_iovlen = iovcnt + 1;
240      message.msg_control = NULL;
241      message.msg_controllen = 0;
242      message.msg_flags = 0;
243      do {
244          ret = sendmsg(s->fd, &message, 0);
245      } while ((ret == -1) && (errno == EINTR));
246      if (ret > 0) {
247          ret -= s->offset;
248      } else if (ret == 0) {
249          /* belt and braces - should not occur on DGRAM
250          * we should get an error and never a 0 send
251          */
252          ret = iov_size(iov, iovcnt);
253      } else {
254          /* signal upper layer that socket buffer is full */
255          ret = -errno;
256          if (ret == -EAGAIN || ret == -ENOBUFS) {
257              l2tpv3_write_poll(s, true);
258              ret = 0;
259          }
260      }
261      return ret;
262  }
263  
264  static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
265                      const uint8_t *buf,
266                      size_t size)
267  {
268      NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
269  
270      struct iovec *vec;
271      struct msghdr message;
272      ssize_t ret = 0;
273  
274      l2tpv3_form_header(s);
275      vec = s->vec;
276      vec->iov_base = s->header_buf;
277      vec->iov_len = s->offset;
278      vec++;
279      vec->iov_base = (void *) buf;
280      vec->iov_len = size;
281      message.msg_name = s->dgram_dst;
282      message.msg_namelen = s->dst_size;
283      message.msg_iov = s->vec;
284      message.msg_iovlen = 2;
285      message.msg_control = NULL;
286      message.msg_controllen = 0;
287      message.msg_flags = 0;
288      do {
289          ret = sendmsg(s->fd, &message, 0);
290      } while ((ret == -1) && (errno == EINTR));
291      if (ret > 0) {
292          ret -= s->offset;
293      } else if (ret == 0) {
294          /* belt and braces - should not occur on DGRAM
295          * we should get an error and never a 0 send
296          */
297          ret = size;
298      } else {
299          ret = -errno;
300          if (ret == -EAGAIN || ret == -ENOBUFS) {
301              /* signal upper layer that socket buffer is full */
302              l2tpv3_write_poll(s, true);
303              ret = 0;
304          }
305      }
306      return ret;
307  }
308  
309  static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
310  {
311  
312      uint32_t *session;
313      uint64_t cookie;
314  
315      if ((!s->udp) && (!s->ipv6)) {
316          buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
317      }
318  
319      /* we do not do a strict check for "data" packets as per
320      * the RFC spec because the pure IP spec does not have
321      * that anyway.
322      */
323  
324      if (s->cookie) {
325          if (s->cookie_is_64) {
326              cookie = ldq_be_p(buf + s->cookie_offset);
327          } else {
328              cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL;
329          }
330          if (cookie != s->rx_cookie) {
331              if (!s->header_mismatch) {
332                  error_report("unknown cookie id");
333              }
334              return -1;
335          }
336      }
337      session = (uint32_t *) (buf + s->session_offset);
338      if (ldl_be_p(session) != s->rx_session) {
339          if (!s->header_mismatch) {
340              error_report("session mismatch");
341          }
342          return -1;
343      }
344      return 0;
345  }
346  
347  static void net_l2tpv3_process_queue(NetL2TPV3State *s)
348  {
349      int size = 0;
350      struct iovec *vec;
351      bool bad_read;
352      int data_size;
353      struct mmsghdr *msgvec;
354  
355      /* go into ring mode only if there is a "pending" tail */
356      if (s->queue_depth > 0) {
357          do {
358              msgvec = s->msgvec + s->queue_tail;
359              if (msgvec->msg_len > 0) {
360                  data_size = msgvec->msg_len - s->header_size;
361                  vec = msgvec->msg_hdr.msg_iov;
362                  if ((data_size > 0) &&
363                      (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
364                      vec++;
365                      /* Use the legacy delivery for now, we will
366                       * switch to using our own ring as a queueing mechanism
367                       * at a later date
368                       */
369                      size = qemu_send_packet_async(
370                              &s->nc,
371                              vec->iov_base,
372                              data_size,
373                              l2tpv3_send_completed
374                          );
375                      if (size == 0) {
376                          l2tpv3_read_poll(s, false);
377                      }
378                      bad_read = false;
379                  } else {
380                      bad_read = true;
381                      if (!s->header_mismatch) {
382                          /* report error only once */
383                          error_report("l2tpv3 header verification failed");
384                          s->header_mismatch = true;
385                      }
386                  }
387              } else {
388                  bad_read = true;
389              }
390              s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
391              s->queue_depth--;
392          } while (
393                  (s->queue_depth > 0) &&
394                   qemu_can_send_packet(&s->nc) &&
395                  ((size > 0) || bad_read)
396              );
397      }
398  }
399  
400  static void net_l2tpv3_send(void *opaque)
401  {
402      NetL2TPV3State *s = opaque;
403      int target_count, count;
404      struct mmsghdr *msgvec;
405  
406      /* go into ring mode only if there is a "pending" tail */
407  
408      if (s->queue_depth) {
409  
410          /* The ring buffer we use has variable intake
411           * count of how much we can read varies - adjust accordingly
412           */
413  
414          target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
415  
416          /* Ensure we do not overrun the ring when we have
417           * a lot of enqueued packets
418           */
419  
420          if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
421              target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
422          }
423      } else {
424  
425          /* we do not have any pending packets - we can use
426          * the whole message vector linearly instead of using
427          * it as a ring
428          */
429  
430          s->queue_head = 0;
431          s->queue_tail = 0;
432          target_count = MAX_L2TPV3_MSGCNT;
433      }
434  
435      msgvec = s->msgvec + s->queue_head;
436      if (target_count > 0) {
437          do {
438              count = recvmmsg(
439                  s->fd,
440                  msgvec,
441                  target_count, MSG_DONTWAIT, NULL);
442          } while ((count == -1) && (errno == EINTR));
443          if (count < 0) {
444              /* Recv error - we still need to flush packets here,
445               * (re)set queue head to current position
446               */
447              count = 0;
448          }
449          s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
450          s->queue_depth += count;
451      }
452      net_l2tpv3_process_queue(s);
453  }
454  
455  static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
456  {
457      int i, j;
458      struct iovec *iov;
459      struct mmsghdr *cleanup = msgvec;
460      if (cleanup) {
461          for (i = 0; i < count; i++) {
462              if (cleanup->msg_hdr.msg_iov) {
463                  iov = cleanup->msg_hdr.msg_iov;
464                  for (j = 0; j < iovcount; j++) {
465                      g_free(iov->iov_base);
466                      iov++;
467                  }
468                  g_free(cleanup->msg_hdr.msg_iov);
469              }
470              cleanup++;
471          }
472          g_free(msgvec);
473      }
474  }
475  
476  static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
477  {
478      int i;
479      struct iovec *iov;
480      struct mmsghdr *msgvec, *result;
481  
482      msgvec = g_new(struct mmsghdr, count);
483      result = msgvec;
484      for (i = 0; i < count ; i++) {
485          msgvec->msg_hdr.msg_name = NULL;
486          msgvec->msg_hdr.msg_namelen = 0;
487          iov =  g_new(struct iovec, IOVSIZE);
488          msgvec->msg_hdr.msg_iov = iov;
489          iov->iov_base = g_malloc(s->header_size);
490          iov->iov_len = s->header_size;
491          iov++ ;
492          iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
493          iov->iov_len = BUFFER_SIZE;
494          msgvec->msg_hdr.msg_iovlen = 2;
495          msgvec->msg_hdr.msg_control = NULL;
496          msgvec->msg_hdr.msg_controllen = 0;
497          msgvec->msg_hdr.msg_flags = 0;
498          msgvec++;
499      }
500      return result;
501  }
502  
503  static void net_l2tpv3_cleanup(NetClientState *nc)
504  {
505      NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
506      qemu_purge_queued_packets(nc);
507      l2tpv3_read_poll(s, false);
508      l2tpv3_write_poll(s, false);
509      if (s->fd >= 0) {
510          close(s->fd);
511      }
512      destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
513      g_free(s->vec);
514      g_free(s->header_buf);
515      g_free(s->dgram_dst);
516  }
517  
518  static NetClientInfo net_l2tpv3_info = {
519      .type = NET_CLIENT_OPTIONS_KIND_L2TPV3,
520      .size = sizeof(NetL2TPV3State),
521      .receive = net_l2tpv3_receive_dgram,
522      .receive_iov = net_l2tpv3_receive_dgram_iov,
523      .poll = l2tpv3_poll,
524      .cleanup = net_l2tpv3_cleanup,
525  };
526  
527  int net_init_l2tpv3(const NetClientOptions *opts,
528                      const char *name,
529                      NetClientState *peer, Error **errp)
530  {
531      /* FIXME error_setg(errp, ...) on failure */
532      const NetdevL2TPv3Options *l2tpv3;
533      NetL2TPV3State *s;
534      NetClientState *nc;
535      int fd = -1, gairet;
536      struct addrinfo hints;
537      struct addrinfo *result = NULL;
538      char *srcport, *dstport;
539  
540      nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
541  
542      s = DO_UPCAST(NetL2TPV3State, nc, nc);
543  
544      s->queue_head = 0;
545      s->queue_tail = 0;
546      s->header_mismatch = false;
547  
548      assert(opts->type == NET_CLIENT_OPTIONS_KIND_L2TPV3);
549      l2tpv3 = opts->u.l2tpv3;
550  
551      if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
552          s->ipv6 = l2tpv3->ipv6;
553      } else {
554          s->ipv6 = false;
555      }
556  
557      if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
558          error_report("l2tpv3_open : offset must be less than 256 bytes");
559          goto outerr;
560      }
561  
562      if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
563          if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
564              s->cookie = true;
565          } else {
566              goto outerr;
567          }
568      } else {
569          s->cookie = false;
570      }
571  
572      if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
573          s->cookie_is_64  = true;
574      } else {
575          s->cookie_is_64  = false;
576      }
577  
578      if (l2tpv3->has_udp && l2tpv3->udp) {
579          s->udp = true;
580          if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
581              error_report("l2tpv3_open : need both src and dst port for udp");
582              goto outerr;
583          } else {
584              srcport = l2tpv3->srcport;
585              dstport = l2tpv3->dstport;
586          }
587      } else {
588          s->udp = false;
589          srcport = NULL;
590          dstport = NULL;
591      }
592  
593  
594      s->offset = 4;
595      s->session_offset = 0;
596      s->cookie_offset = 4;
597      s->counter_offset = 4;
598  
599      s->tx_session = l2tpv3->txsession;
600      if (l2tpv3->has_rxsession) {
601          s->rx_session = l2tpv3->rxsession;
602      } else {
603          s->rx_session = s->tx_session;
604      }
605  
606      if (s->cookie) {
607          s->rx_cookie = l2tpv3->rxcookie;
608          s->tx_cookie = l2tpv3->txcookie;
609          if (s->cookie_is_64 == true) {
610              /* 64 bit cookie */
611              s->offset += 8;
612              s->counter_offset += 8;
613          } else {
614              /* 32 bit cookie */
615              s->offset += 4;
616              s->counter_offset += 4;
617          }
618      }
619  
620      memset(&hints, 0, sizeof(hints));
621  
622      if (s->ipv6) {
623          hints.ai_family = AF_INET6;
624      } else {
625          hints.ai_family = AF_INET;
626      }
627      if (s->udp) {
628          hints.ai_socktype = SOCK_DGRAM;
629          hints.ai_protocol = 0;
630          s->offset += 4;
631          s->counter_offset += 4;
632          s->session_offset += 4;
633          s->cookie_offset += 4;
634      } else {
635          hints.ai_socktype = SOCK_RAW;
636          hints.ai_protocol = IPPROTO_L2TP;
637      }
638  
639      gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
640  
641      if ((gairet != 0) || (result == NULL)) {
642          error_report(
643              "l2tpv3_open : could not resolve src, errno = %s",
644              gai_strerror(gairet)
645          );
646          goto outerr;
647      }
648      fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
649      if (fd == -1) {
650          fd = -errno;
651          error_report("l2tpv3_open : socket creation failed, errno = %d", -fd);
652          goto outerr;
653      }
654      if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
655          error_report("l2tpv3_open :  could not bind socket err=%i", errno);
656          goto outerr;
657      }
658      if (result) {
659          freeaddrinfo(result);
660      }
661  
662      memset(&hints, 0, sizeof(hints));
663  
664      if (s->ipv6) {
665          hints.ai_family = AF_INET6;
666      } else {
667          hints.ai_family = AF_INET;
668      }
669      if (s->udp) {
670          hints.ai_socktype = SOCK_DGRAM;
671          hints.ai_protocol = 0;
672      } else {
673          hints.ai_socktype = SOCK_RAW;
674          hints.ai_protocol = IPPROTO_L2TP;
675      }
676  
677      result = NULL;
678      gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
679      if ((gairet != 0) || (result == NULL)) {
680          error_report(
681              "l2tpv3_open : could not resolve dst, error = %s",
682              gai_strerror(gairet)
683          );
684          goto outerr;
685      }
686  
687      s->dgram_dst = g_new0(struct sockaddr_storage, 1);
688      memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
689      s->dst_size = result->ai_addrlen;
690  
691      if (result) {
692          freeaddrinfo(result);
693      }
694  
695      if (l2tpv3->has_counter && l2tpv3->counter) {
696          s->has_counter = true;
697          s->offset += 4;
698      } else {
699          s->has_counter = false;
700      }
701  
702      if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
703          s->has_counter = true;  /* pin counter implies that there is counter */
704          s->pin_counter = true;
705      } else {
706          s->pin_counter = false;
707      }
708  
709      if (l2tpv3->has_offset) {
710          /* extra offset */
711          s->offset += l2tpv3->offset;
712      }
713  
714      if ((s->ipv6) || (s->udp)) {
715          s->header_size = s->offset;
716      } else {
717          s->header_size = s->offset + sizeof(struct iphdr);
718      }
719  
720      s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
721      s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT);
722      s->header_buf = g_malloc(s->header_size);
723  
724      qemu_set_nonblock(fd);
725  
726      s->fd = fd;
727      s->counter = 0;
728  
729      l2tpv3_read_poll(s, true);
730  
731      snprintf(s->nc.info_str, sizeof(s->nc.info_str),
732               "l2tpv3: connected");
733      return 0;
734  outerr:
735      qemu_del_net_client(nc);
736      if (fd >= 0) {
737          close(fd);
738      }
739      if (result) {
740          freeaddrinfo(result);
741      }
742      return -1;
743  }
744  
745