13df5d593SAsias He #include "kvm/uip.h"
23df5d593SAsias He
3a4d8c55eSSasha Levin #include <kvm/kvm.h>
43df5d593SAsias He #include <linux/virtio_net.h>
53df5d593SAsias He #include <linux/kernel.h>
63df5d593SAsias He #include <linux/list.h>
7195544b7SAsias He #include <arpa/inet.h>
83df5d593SAsias He
uip_tcp_socket_close(struct uip_tcp_socket * sk,int how)93df5d593SAsias He static int uip_tcp_socket_close(struct uip_tcp_socket *sk, int how)
103df5d593SAsias He {
113df5d593SAsias He shutdown(sk->fd, how);
123df5d593SAsias He
133df5d593SAsias He if (sk->write_done && sk->read_done) {
143df5d593SAsias He shutdown(sk->fd, SHUT_RDWR);
153df5d593SAsias He close(sk->fd);
163df5d593SAsias He
173df5d593SAsias He mutex_lock(sk->lock);
183df5d593SAsias He list_del(&sk->list);
193df5d593SAsias He mutex_unlock(sk->lock);
203df5d593SAsias He
21*d87b503fSJean-Philippe Brucker free(sk->buf);
223df5d593SAsias He free(sk);
233df5d593SAsias He }
243df5d593SAsias He
253df5d593SAsias He return 0;
263df5d593SAsias He }
273df5d593SAsias He
uip_tcp_socket_find(struct uip_tx_arg * arg,u32 sip,u32 dip,u16 sport,u16 dport)283df5d593SAsias He static struct uip_tcp_socket *uip_tcp_socket_find(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport)
293df5d593SAsias He {
303df5d593SAsias He struct list_head *sk_head;
31d3476f7dSSasha Levin struct mutex *sk_lock;
323df5d593SAsias He struct uip_tcp_socket *sk;
333df5d593SAsias He
343df5d593SAsias He sk_head = &arg->info->tcp_socket_head;
353df5d593SAsias He sk_lock = &arg->info->tcp_socket_lock;
363df5d593SAsias He
373df5d593SAsias He mutex_lock(sk_lock);
383df5d593SAsias He list_for_each_entry(sk, sk_head, list) {
393df5d593SAsias He if (sk->sip == sip && sk->dip == dip && sk->sport == sport && sk->dport == dport) {
403df5d593SAsias He mutex_unlock(sk_lock);
413df5d593SAsias He return sk;
423df5d593SAsias He }
433df5d593SAsias He }
443df5d593SAsias He mutex_unlock(sk_lock);
453df5d593SAsias He
463df5d593SAsias He return NULL;
473df5d593SAsias He }
483df5d593SAsias He
uip_tcp_socket_alloc(struct uip_tx_arg * arg,u32 sip,u32 dip,u16 sport,u16 dport)493df5d593SAsias He static struct uip_tcp_socket *uip_tcp_socket_alloc(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport)
503df5d593SAsias He {
513df5d593SAsias He struct list_head *sk_head;
523df5d593SAsias He struct uip_tcp_socket *sk;
53d3476f7dSSasha Levin struct mutex *sk_lock;
543df5d593SAsias He struct uip_tcp *tcp;
553df5d593SAsias He struct uip_ip *ip;
563df5d593SAsias He int ret;
573df5d593SAsias He
583df5d593SAsias He tcp = (struct uip_tcp *)arg->eth;
593df5d593SAsias He ip = (struct uip_ip *)arg->eth;
603df5d593SAsias He
613df5d593SAsias He sk_head = &arg->info->tcp_socket_head;
623df5d593SAsias He sk_lock = &arg->info->tcp_socket_lock;
633df5d593SAsias He
643df5d593SAsias He sk = malloc(sizeof(*sk));
653df5d593SAsias He memset(sk, 0, sizeof(*sk));
663df5d593SAsias He
673df5d593SAsias He sk->lock = sk_lock;
683df5d593SAsias He sk->info = arg->info;
693df5d593SAsias He
703df5d593SAsias He sk->fd = socket(AF_INET, SOCK_STREAM, 0);
713df5d593SAsias He sk->addr.sin_family = AF_INET;
723df5d593SAsias He sk->addr.sin_port = dport;
73195544b7SAsias He sk->addr.sin_addr.s_addr = dip;
74195544b7SAsias He
753909f9b5SAsias He pthread_cond_init(&sk->cond, NULL);
763909f9b5SAsias He
77195544b7SAsias He if (ntohl(dip) == arg->info->host_ip)
78195544b7SAsias He sk->addr.sin_addr.s_addr = inet_addr("127.0.0.1");
793df5d593SAsias He
803df5d593SAsias He ret = connect(sk->fd, (struct sockaddr *)&sk->addr, sizeof(sk->addr));
813df5d593SAsias He if (ret) {
823df5d593SAsias He free(sk);
833df5d593SAsias He return NULL;
843df5d593SAsias He }
853df5d593SAsias He
863df5d593SAsias He sk->sip = ip->sip;
873df5d593SAsias He sk->dip = ip->dip;
883df5d593SAsias He sk->sport = tcp->sport;
893df5d593SAsias He sk->dport = tcp->dport;
903df5d593SAsias He
913df5d593SAsias He mutex_lock(sk_lock);
923df5d593SAsias He list_add_tail(&sk->list, sk_head);
933df5d593SAsias He mutex_unlock(sk_lock);
943df5d593SAsias He
953df5d593SAsias He return sk;
963df5d593SAsias He }
973df5d593SAsias He
98*d87b503fSJean-Philippe Brucker /* Caller holds the sk lock */
uip_tcp_socket_free(struct uip_tcp_socket * sk)99*d87b503fSJean-Philippe Brucker static void uip_tcp_socket_free(struct uip_tcp_socket *sk)
100*d87b503fSJean-Philippe Brucker {
101*d87b503fSJean-Philippe Brucker /*
102*d87b503fSJean-Philippe Brucker * Here we assume that the virtqueues are already inactive so we don't
103*d87b503fSJean-Philippe Brucker * race with uip_tx_do_ipv4_tcp. We are racing with
104*d87b503fSJean-Philippe Brucker * uip_tcp_socket_thread though, but holding the sk lock ensures that it
105*d87b503fSJean-Philippe Brucker * cannot free data concurrently.
106*d87b503fSJean-Philippe Brucker */
107*d87b503fSJean-Philippe Brucker if (sk->thread) {
108*d87b503fSJean-Philippe Brucker pthread_cancel(sk->thread);
109*d87b503fSJean-Philippe Brucker pthread_join(sk->thread, NULL);
110*d87b503fSJean-Philippe Brucker }
111*d87b503fSJean-Philippe Brucker
112*d87b503fSJean-Philippe Brucker sk->write_done = sk->read_done = 1;
113*d87b503fSJean-Philippe Brucker uip_tcp_socket_close(sk, SHUT_RDWR);
114*d87b503fSJean-Philippe Brucker }
115*d87b503fSJean-Philippe Brucker
uip_tcp_payload_send(struct uip_tcp_socket * sk,u8 flag,u16 payload_len)1163df5d593SAsias He static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_len)
1173df5d593SAsias He {
1183df5d593SAsias He struct uip_info *info;
1193df5d593SAsias He struct uip_eth *eth2;
1203df5d593SAsias He struct uip_tcp *tcp2;
1213df5d593SAsias He struct uip_buf *buf;
1223df5d593SAsias He struct uip_ip *ip2;
1233df5d593SAsias He
1243df5d593SAsias He info = sk->info;
1253df5d593SAsias He
1263df5d593SAsias He /*
1273df5d593SAsias He * Get free buffer to send data to guest
1283df5d593SAsias He */
1293df5d593SAsias He buf = uip_buf_get_free(info);
1303df5d593SAsias He
1313df5d593SAsias He /*
1323df5d593SAsias He * Cook a ethernet frame
1333df5d593SAsias He */
1343df5d593SAsias He tcp2 = (struct uip_tcp *)buf->eth;
1353df5d593SAsias He eth2 = (struct uip_eth *)buf->eth;
1363df5d593SAsias He ip2 = (struct uip_ip *)buf->eth;
1373df5d593SAsias He
1383df5d593SAsias He eth2->src = info->host_mac;
1393df5d593SAsias He eth2->dst = info->guest_mac;
1403df5d593SAsias He eth2->type = htons(UIP_ETH_P_IP);
1413df5d593SAsias He
1423df5d593SAsias He ip2->vhl = UIP_IP_VER_4 | UIP_IP_HDR_LEN;
1433df5d593SAsias He ip2->tos = 0;
1443df5d593SAsias He ip2->id = 0;
1453df5d593SAsias He ip2->flgfrag = 0;
1463df5d593SAsias He ip2->ttl = UIP_IP_TTL;
1473df5d593SAsias He ip2->proto = UIP_IP_P_TCP;
1483df5d593SAsias He ip2->csum = 0;
1493df5d593SAsias He ip2->sip = sk->dip;
1503df5d593SAsias He ip2->dip = sk->sip;
1513df5d593SAsias He
1523df5d593SAsias He tcp2->sport = sk->dport;
1533df5d593SAsias He tcp2->dport = sk->sport;
1543df5d593SAsias He tcp2->seq = htonl(sk->seq_server);
1553df5d593SAsias He tcp2->ack = htonl(sk->ack_server);
1563df5d593SAsias He /*
1573df5d593SAsias He * Diable TCP options, tcp hdr len equals 20 bytes
1583df5d593SAsias He */
1593df5d593SAsias He tcp2->off = UIP_TCP_HDR_LEN;
1603df5d593SAsias He tcp2->flg = flag;
1613df5d593SAsias He tcp2->win = htons(UIP_TCP_WIN_SIZE);
1623df5d593SAsias He tcp2->csum = 0;
1633df5d593SAsias He tcp2->urgent = 0;
1643df5d593SAsias He
1653df5d593SAsias He if (payload_len > 0)
1663df5d593SAsias He memcpy(uip_tcp_payload(tcp2), sk->payload, payload_len);
1673df5d593SAsias He
1683df5d593SAsias He ip2->len = htons(uip_tcp_hdrlen(tcp2) + payload_len + uip_ip_hdrlen(ip2));
1693df5d593SAsias He ip2->csum = uip_csum_ip(ip2);
1703df5d593SAsias He tcp2->csum = uip_csum_tcp(tcp2);
1713df5d593SAsias He
1723df5d593SAsias He /*
1733df5d593SAsias He * virtio_net_hdr
1743df5d593SAsias He */
175643f6c08SSasha Levin buf->vnet_len = info->vnet_hdr_len;
1763df5d593SAsias He memset(buf->vnet, 0, buf->vnet_len);
1773df5d593SAsias He
1783df5d593SAsias He buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
1793df5d593SAsias He
1803df5d593SAsias He /*
1813df5d593SAsias He * Increase server seq
1823df5d593SAsias He */
1833df5d593SAsias He sk->seq_server += payload_len;
1843df5d593SAsias He
1853df5d593SAsias He /*
1863df5d593SAsias He * Send data received from socket to guest
1873df5d593SAsias He */
1883df5d593SAsias He uip_buf_set_used(info, buf);
1893df5d593SAsias He
1903df5d593SAsias He return 0;
1913df5d593SAsias He }
1923df5d593SAsias He
uip_tcp_socket_thread(void * p)1933df5d593SAsias He static void *uip_tcp_socket_thread(void *p)
1943df5d593SAsias He {
1953df5d593SAsias He struct uip_tcp_socket *sk;
1963909f9b5SAsias He int len, left, ret;
197*d87b503fSJean-Philippe Brucker u8 *pos;
1983df5d593SAsias He
199a4d8c55eSSasha Levin kvm__set_thread_name("uip-tcp");
200a4d8c55eSSasha Levin
2013df5d593SAsias He sk = p;
2023df5d593SAsias He
2033df5d593SAsias He while (1) {
204*d87b503fSJean-Philippe Brucker pos = sk->buf;
2053df5d593SAsias He
206*d87b503fSJean-Philippe Brucker ret = read(sk->fd, sk->buf, UIP_MAX_TCP_PAYLOAD);
2073df5d593SAsias He
2083df5d593SAsias He if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD)
2093df5d593SAsias He goto out;
2103df5d593SAsias He
2113909f9b5SAsias He left = ret;
2123df5d593SAsias He
2133909f9b5SAsias He while (left > 0) {
2143909f9b5SAsias He mutex_lock(sk->lock);
2153909f9b5SAsias He while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0)
216d3476f7dSSasha Levin pthread_cond_wait(&sk->cond, &sk->lock->mutex);
2173909f9b5SAsias He mutex_unlock(sk->lock);
2183909f9b5SAsias He
2193909f9b5SAsias He sk->payload = pos;
2203909f9b5SAsias He if (len > left)
2213909f9b5SAsias He len = left;
2223909f9b5SAsias He if (len > UIP_MAX_TCP_PAYLOAD)
2233909f9b5SAsias He len = UIP_MAX_TCP_PAYLOAD;
2243909f9b5SAsias He left -= len;
2253909f9b5SAsias He pos += len;
2263909f9b5SAsias He
2273909f9b5SAsias He uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, len);
2283909f9b5SAsias He }
2293df5d593SAsias He }
2303df5d593SAsias He
2313df5d593SAsias He out:
2323df5d593SAsias He /*
2333df5d593SAsias He * Close server to guest TCP connection
2343df5d593SAsias He */
2353df5d593SAsias He uip_tcp_socket_close(sk, SHUT_RD);
2363df5d593SAsias He
2373df5d593SAsias He uip_tcp_payload_send(sk, UIP_TCP_FLAG_FIN | UIP_TCP_FLAG_ACK, 0);
2383df5d593SAsias He sk->seq_server += 1;
2393df5d593SAsias He
2403df5d593SAsias He sk->read_done = 1;
2413df5d593SAsias He
2423df5d593SAsias He pthread_exit(NULL);
2433df5d593SAsias He
2443df5d593SAsias He return NULL;
2453df5d593SAsias He }
2463df5d593SAsias He
uip_tcp_socket_receive(struct uip_tcp_socket * sk)2473df5d593SAsias He static int uip_tcp_socket_receive(struct uip_tcp_socket *sk)
2483df5d593SAsias He {
249*d87b503fSJean-Philippe Brucker int ret;
250*d87b503fSJean-Philippe Brucker
251*d87b503fSJean-Philippe Brucker if (sk->thread == 0) {
252*d87b503fSJean-Philippe Brucker sk->buf = malloc(UIP_MAX_TCP_PAYLOAD);
253*d87b503fSJean-Philippe Brucker if (!sk->buf)
254*d87b503fSJean-Philippe Brucker return -ENOMEM;
255*d87b503fSJean-Philippe Brucker ret = pthread_create(&sk->thread, NULL, uip_tcp_socket_thread,
256*d87b503fSJean-Philippe Brucker (void *)sk);
257*d87b503fSJean-Philippe Brucker if (ret)
258*d87b503fSJean-Philippe Brucker free(sk->buf);
259*d87b503fSJean-Philippe Brucker return ret;
260*d87b503fSJean-Philippe Brucker }
2613df5d593SAsias He
2623df5d593SAsias He return 0;
2633df5d593SAsias He }
2643df5d593SAsias He
uip_tcp_socket_send(struct uip_tcp_socket * sk,struct uip_tcp * tcp)2653df5d593SAsias He static int uip_tcp_socket_send(struct uip_tcp_socket *sk, struct uip_tcp *tcp)
2663df5d593SAsias He {
2673df5d593SAsias He int len;
2683df5d593SAsias He int ret;
2693df5d593SAsias He u8 *payload;
2703df5d593SAsias He
2713df5d593SAsias He if (sk->write_done)
2723df5d593SAsias He return 0;
2733df5d593SAsias He
2743df5d593SAsias He payload = uip_tcp_payload(tcp);
2753df5d593SAsias He len = uip_tcp_payloadlen(tcp);
2763df5d593SAsias He
2773df5d593SAsias He ret = write(sk->fd, payload, len);
2783df5d593SAsias He if (ret != len)
2793df5d593SAsias He pr_warning("tcp send error");
2803df5d593SAsias He
2813df5d593SAsias He return ret;
2823df5d593SAsias He }
2833df5d593SAsias He
uip_tx_do_ipv4_tcp(struct uip_tx_arg * arg)2843df5d593SAsias He int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
2853df5d593SAsias He {
2863df5d593SAsias He struct uip_tcp_socket *sk;
2873df5d593SAsias He struct uip_tcp *tcp;
2883df5d593SAsias He struct uip_ip *ip;
2893df5d593SAsias He int ret;
2903df5d593SAsias He
2913df5d593SAsias He tcp = (struct uip_tcp *)arg->eth;
2923df5d593SAsias He ip = (struct uip_ip *)arg->eth;
2933df5d593SAsias He
2943df5d593SAsias He /*
2953df5d593SAsias He * Guest is trying to start a TCP session, let's fake SYN-ACK to guest
2963df5d593SAsias He */
2973df5d593SAsias He if (uip_tcp_is_syn(tcp)) {
2983df5d593SAsias He sk = uip_tcp_socket_alloc(arg, ip->sip, ip->dip, tcp->sport, tcp->dport);
2993df5d593SAsias He if (!sk)
3003df5d593SAsias He return -1;
3013df5d593SAsias He
3023909f9b5SAsias He sk->window_size = ntohs(tcp->win);
3033909f9b5SAsias He
3043df5d593SAsias He /*
3053df5d593SAsias He * Setup ISN number
3063df5d593SAsias He */
3073df5d593SAsias He sk->isn_guest = uip_tcp_isn(tcp);
3083df5d593SAsias He sk->isn_server = uip_tcp_isn_alloc();
3093df5d593SAsias He
3103df5d593SAsias He sk->seq_server = sk->isn_server;
3113df5d593SAsias He sk->ack_server = sk->isn_guest + 1;
3123df5d593SAsias He uip_tcp_payload_send(sk, UIP_TCP_FLAG_SYN | UIP_TCP_FLAG_ACK, 0);
3133df5d593SAsias He sk->seq_server += 1;
3143df5d593SAsias He
3153df5d593SAsias He /*
3163df5d593SAsias He * Start receive thread for data from remote to guest
3173df5d593SAsias He */
3183df5d593SAsias He uip_tcp_socket_receive(sk);
3193df5d593SAsias He
3203df5d593SAsias He goto out;
3213df5d593SAsias He }
3223df5d593SAsias He
3233df5d593SAsias He /*
3243df5d593SAsias He * Find socket we have allocated
3253df5d593SAsias He */
3263df5d593SAsias He sk = uip_tcp_socket_find(arg, ip->sip, ip->dip, tcp->sport, tcp->dport);
3273df5d593SAsias He if (!sk)
3283df5d593SAsias He return -1;
3293df5d593SAsias He
3303909f9b5SAsias He mutex_lock(sk->lock);
3313909f9b5SAsias He sk->window_size = ntohs(tcp->win);
3323df5d593SAsias He sk->guest_acked = ntohl(tcp->ack);
3333909f9b5SAsias He pthread_cond_signal(&sk->cond);
3343909f9b5SAsias He mutex_unlock(sk->lock);
3353df5d593SAsias He
3363df5d593SAsias He if (uip_tcp_is_fin(tcp)) {
3373df5d593SAsias He if (sk->write_done)
3383df5d593SAsias He goto out;
3393df5d593SAsias He
3403df5d593SAsias He sk->write_done = 1;
3413df5d593SAsias He sk->ack_server += 1;
3423df5d593SAsias He uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0);
3433df5d593SAsias He
3443df5d593SAsias He /*
3453df5d593SAsias He * Close guest to server TCP connection
3463df5d593SAsias He */
3473df5d593SAsias He uip_tcp_socket_close(sk, SHUT_WR);
3483df5d593SAsias He
3493df5d593SAsias He goto out;
3503df5d593SAsias He }
3513df5d593SAsias He
3523df5d593SAsias He /*
3533df5d593SAsias He * Ignore guest to server frames with zero tcp payload
3543df5d593SAsias He */
3553df5d593SAsias He if (uip_tcp_payloadlen(tcp) == 0)
3563df5d593SAsias He goto out;
3573df5d593SAsias He
3583df5d593SAsias He /*
3593df5d593SAsias He * Sent out TCP data to remote host
3603df5d593SAsias He */
3613df5d593SAsias He ret = uip_tcp_socket_send(sk, tcp);
3623df5d593SAsias He if (ret < 0)
3633df5d593SAsias He return -1;
3643df5d593SAsias He /*
3653df5d593SAsias He * Send ACK to guest imediately
3663df5d593SAsias He */
3673df5d593SAsias He sk->ack_server += ret;
3683df5d593SAsias He uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0);
3693df5d593SAsias He
3703df5d593SAsias He out:
3713df5d593SAsias He return 0;
3723df5d593SAsias He }
373*d87b503fSJean-Philippe Brucker
uip_tcp_exit(struct uip_info * info)374*d87b503fSJean-Philippe Brucker void uip_tcp_exit(struct uip_info *info)
375*d87b503fSJean-Philippe Brucker {
376*d87b503fSJean-Philippe Brucker struct uip_tcp_socket *sk, *next;
377*d87b503fSJean-Philippe Brucker
378*d87b503fSJean-Philippe Brucker mutex_lock(&info->tcp_socket_lock);
379*d87b503fSJean-Philippe Brucker list_for_each_entry_safe(sk, next, &info->tcp_socket_head, list)
380*d87b503fSJean-Philippe Brucker uip_tcp_socket_free(sk);
381*d87b503fSJean-Philippe Brucker mutex_unlock(&info->tcp_socket_lock);
382*d87b503fSJean-Philippe Brucker }
383