13df5d593SAsias He #include "kvm/uip.h" 23df5d593SAsias He 3a4d8c55eSSasha Levin #include <kvm/kvm.h> 43df5d593SAsias He #include <linux/virtio_net.h> 53df5d593SAsias He #include <linux/kernel.h> 63df5d593SAsias He #include <linux/list.h> 7195544b7SAsias He #include <arpa/inet.h> 83df5d593SAsias He 93df5d593SAsias He static int uip_tcp_socket_close(struct uip_tcp_socket *sk, int how) 103df5d593SAsias He { 113df5d593SAsias He shutdown(sk->fd, how); 123df5d593SAsias He 133df5d593SAsias He if (sk->write_done && sk->read_done) { 143df5d593SAsias He shutdown(sk->fd, SHUT_RDWR); 153df5d593SAsias He close(sk->fd); 163df5d593SAsias He 173df5d593SAsias He mutex_lock(sk->lock); 183df5d593SAsias He list_del(&sk->list); 193df5d593SAsias He mutex_unlock(sk->lock); 203df5d593SAsias He 213df5d593SAsias He free(sk); 223df5d593SAsias He } 233df5d593SAsias He 243df5d593SAsias He return 0; 253df5d593SAsias He } 263df5d593SAsias He 273df5d593SAsias He static struct uip_tcp_socket *uip_tcp_socket_find(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport) 283df5d593SAsias He { 293df5d593SAsias He struct list_head *sk_head; 30*d3476f7dSSasha Levin struct mutex *sk_lock; 313df5d593SAsias He struct uip_tcp_socket *sk; 323df5d593SAsias He 333df5d593SAsias He sk_head = &arg->info->tcp_socket_head; 343df5d593SAsias He sk_lock = &arg->info->tcp_socket_lock; 353df5d593SAsias He 363df5d593SAsias He mutex_lock(sk_lock); 373df5d593SAsias He list_for_each_entry(sk, sk_head, list) { 383df5d593SAsias He if (sk->sip == sip && sk->dip == dip && sk->sport == sport && sk->dport == dport) { 393df5d593SAsias He mutex_unlock(sk_lock); 403df5d593SAsias He return sk; 413df5d593SAsias He } 423df5d593SAsias He } 433df5d593SAsias He mutex_unlock(sk_lock); 443df5d593SAsias He 453df5d593SAsias He return NULL; 463df5d593SAsias He } 473df5d593SAsias He 483df5d593SAsias He static struct uip_tcp_socket *uip_tcp_socket_alloc(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport) 493df5d593SAsias He { 503df5d593SAsias He struct list_head *sk_head; 513df5d593SAsias He struct uip_tcp_socket *sk; 52*d3476f7dSSasha Levin struct mutex *sk_lock; 533df5d593SAsias He struct uip_tcp *tcp; 543df5d593SAsias He struct uip_ip *ip; 553df5d593SAsias He int ret; 563df5d593SAsias He 573df5d593SAsias He tcp = (struct uip_tcp *)arg->eth; 583df5d593SAsias He ip = (struct uip_ip *)arg->eth; 593df5d593SAsias He 603df5d593SAsias He sk_head = &arg->info->tcp_socket_head; 613df5d593SAsias He sk_lock = &arg->info->tcp_socket_lock; 623df5d593SAsias He 633df5d593SAsias He sk = malloc(sizeof(*sk)); 643df5d593SAsias He memset(sk, 0, sizeof(*sk)); 653df5d593SAsias He 663df5d593SAsias He sk->lock = sk_lock; 673df5d593SAsias He sk->info = arg->info; 683df5d593SAsias He 693df5d593SAsias He sk->fd = socket(AF_INET, SOCK_STREAM, 0); 703df5d593SAsias He sk->addr.sin_family = AF_INET; 713df5d593SAsias He sk->addr.sin_port = dport; 72195544b7SAsias He sk->addr.sin_addr.s_addr = dip; 73195544b7SAsias He 743909f9b5SAsias He pthread_cond_init(&sk->cond, NULL); 753909f9b5SAsias He 76195544b7SAsias He if (ntohl(dip) == arg->info->host_ip) 77195544b7SAsias He sk->addr.sin_addr.s_addr = inet_addr("127.0.0.1"); 783df5d593SAsias He 793df5d593SAsias He ret = connect(sk->fd, (struct sockaddr *)&sk->addr, sizeof(sk->addr)); 803df5d593SAsias He if (ret) { 813df5d593SAsias He free(sk); 823df5d593SAsias He return NULL; 833df5d593SAsias He } 843df5d593SAsias He 853df5d593SAsias He sk->sip = ip->sip; 863df5d593SAsias He sk->dip = ip->dip; 873df5d593SAsias He sk->sport = tcp->sport; 883df5d593SAsias He sk->dport = tcp->dport; 893df5d593SAsias He 903df5d593SAsias He mutex_lock(sk_lock); 913df5d593SAsias He list_add_tail(&sk->list, sk_head); 923df5d593SAsias He mutex_unlock(sk_lock); 933df5d593SAsias He 943df5d593SAsias He return sk; 953df5d593SAsias He } 963df5d593SAsias He 973df5d593SAsias He static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_len) 983df5d593SAsias He { 993df5d593SAsias He struct uip_info *info; 1003df5d593SAsias He struct uip_eth *eth2; 1013df5d593SAsias He struct uip_tcp *tcp2; 1023df5d593SAsias He struct uip_buf *buf; 1033df5d593SAsias He struct uip_ip *ip2; 1043df5d593SAsias He 1053df5d593SAsias He info = sk->info; 1063df5d593SAsias He 1073df5d593SAsias He /* 1083df5d593SAsias He * Get free buffer to send data to guest 1093df5d593SAsias He */ 1103df5d593SAsias He buf = uip_buf_get_free(info); 1113df5d593SAsias He 1123df5d593SAsias He /* 1133df5d593SAsias He * Cook a ethernet frame 1143df5d593SAsias He */ 1153df5d593SAsias He tcp2 = (struct uip_tcp *)buf->eth; 1163df5d593SAsias He eth2 = (struct uip_eth *)buf->eth; 1173df5d593SAsias He ip2 = (struct uip_ip *)buf->eth; 1183df5d593SAsias He 1193df5d593SAsias He eth2->src = info->host_mac; 1203df5d593SAsias He eth2->dst = info->guest_mac; 1213df5d593SAsias He eth2->type = htons(UIP_ETH_P_IP); 1223df5d593SAsias He 1233df5d593SAsias He ip2->vhl = UIP_IP_VER_4 | UIP_IP_HDR_LEN; 1243df5d593SAsias He ip2->tos = 0; 1253df5d593SAsias He ip2->id = 0; 1263df5d593SAsias He ip2->flgfrag = 0; 1273df5d593SAsias He ip2->ttl = UIP_IP_TTL; 1283df5d593SAsias He ip2->proto = UIP_IP_P_TCP; 1293df5d593SAsias He ip2->csum = 0; 1303df5d593SAsias He ip2->sip = sk->dip; 1313df5d593SAsias He ip2->dip = sk->sip; 1323df5d593SAsias He 1333df5d593SAsias He tcp2->sport = sk->dport; 1343df5d593SAsias He tcp2->dport = sk->sport; 1353df5d593SAsias He tcp2->seq = htonl(sk->seq_server); 1363df5d593SAsias He tcp2->ack = htonl(sk->ack_server); 1373df5d593SAsias He /* 1383df5d593SAsias He * Diable TCP options, tcp hdr len equals 20 bytes 1393df5d593SAsias He */ 1403df5d593SAsias He tcp2->off = UIP_TCP_HDR_LEN; 1413df5d593SAsias He tcp2->flg = flag; 1423df5d593SAsias He tcp2->win = htons(UIP_TCP_WIN_SIZE); 1433df5d593SAsias He tcp2->csum = 0; 1443df5d593SAsias He tcp2->urgent = 0; 1453df5d593SAsias He 1463df5d593SAsias He if (payload_len > 0) 1473df5d593SAsias He memcpy(uip_tcp_payload(tcp2), sk->payload, payload_len); 1483df5d593SAsias He 1493df5d593SAsias He ip2->len = htons(uip_tcp_hdrlen(tcp2) + payload_len + uip_ip_hdrlen(ip2)); 1503df5d593SAsias He ip2->csum = uip_csum_ip(ip2); 1513df5d593SAsias He tcp2->csum = uip_csum_tcp(tcp2); 1523df5d593SAsias He 1533df5d593SAsias He /* 1543df5d593SAsias He * virtio_net_hdr 1553df5d593SAsias He */ 1563df5d593SAsias He buf->vnet_len = sizeof(struct virtio_net_hdr); 1573df5d593SAsias He memset(buf->vnet, 0, buf->vnet_len); 1583df5d593SAsias He 1593df5d593SAsias He buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); 1603df5d593SAsias He 1613df5d593SAsias He /* 1623df5d593SAsias He * Increase server seq 1633df5d593SAsias He */ 1643df5d593SAsias He sk->seq_server += payload_len; 1653df5d593SAsias He 1663df5d593SAsias He /* 1673df5d593SAsias He * Send data received from socket to guest 1683df5d593SAsias He */ 1693df5d593SAsias He uip_buf_set_used(info, buf); 1703df5d593SAsias He 1713df5d593SAsias He return 0; 1723df5d593SAsias He } 1733df5d593SAsias He 1743df5d593SAsias He static void *uip_tcp_socket_thread(void *p) 1753df5d593SAsias He { 1763df5d593SAsias He struct uip_tcp_socket *sk; 1773909f9b5SAsias He int len, left, ret; 1783909f9b5SAsias He u8 *payload, *pos; 1793df5d593SAsias He 180a4d8c55eSSasha Levin kvm__set_thread_name("uip-tcp"); 181a4d8c55eSSasha Levin 1823df5d593SAsias He sk = p; 1833df5d593SAsias He 1843df5d593SAsias He payload = malloc(UIP_MAX_TCP_PAYLOAD); 1853909f9b5SAsias He if (!payload) 1863df5d593SAsias He goto out; 1873df5d593SAsias He 1883df5d593SAsias He while (1) { 1893909f9b5SAsias He pos = payload; 1903df5d593SAsias He 1913df5d593SAsias He ret = read(sk->fd, payload, UIP_MAX_TCP_PAYLOAD); 1923df5d593SAsias He 1933df5d593SAsias He if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD) 1943df5d593SAsias He goto out; 1953df5d593SAsias He 1963909f9b5SAsias He left = ret; 1973df5d593SAsias He 1983909f9b5SAsias He while (left > 0) { 1993909f9b5SAsias He mutex_lock(sk->lock); 2003909f9b5SAsias He while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0) 201*d3476f7dSSasha Levin pthread_cond_wait(&sk->cond, &sk->lock->mutex); 2023909f9b5SAsias He mutex_unlock(sk->lock); 2033909f9b5SAsias He 2043909f9b5SAsias He sk->payload = pos; 2053909f9b5SAsias He if (len > left) 2063909f9b5SAsias He len = left; 2073909f9b5SAsias He if (len > UIP_MAX_TCP_PAYLOAD) 2083909f9b5SAsias He len = UIP_MAX_TCP_PAYLOAD; 2093909f9b5SAsias He left -= len; 2103909f9b5SAsias He pos += len; 2113909f9b5SAsias He 2123909f9b5SAsias He uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, len); 2133909f9b5SAsias He } 2143df5d593SAsias He } 2153df5d593SAsias He 2163df5d593SAsias He out: 2173df5d593SAsias He /* 2183df5d593SAsias He * Close server to guest TCP connection 2193df5d593SAsias He */ 2203df5d593SAsias He uip_tcp_socket_close(sk, SHUT_RD); 2213df5d593SAsias He 2223df5d593SAsias He uip_tcp_payload_send(sk, UIP_TCP_FLAG_FIN | UIP_TCP_FLAG_ACK, 0); 2233df5d593SAsias He sk->seq_server += 1; 2243df5d593SAsias He 2253df5d593SAsias He sk->read_done = 1; 2263df5d593SAsias He 2273909f9b5SAsias He free(payload); 2283df5d593SAsias He pthread_exit(NULL); 2293df5d593SAsias He 2303df5d593SAsias He return NULL; 2313df5d593SAsias He } 2323df5d593SAsias He 2333df5d593SAsias He static int uip_tcp_socket_receive(struct uip_tcp_socket *sk) 2343df5d593SAsias He { 2353df5d593SAsias He if (sk->thread == 0) 2363df5d593SAsias He return pthread_create(&sk->thread, NULL, uip_tcp_socket_thread, (void *)sk); 2373df5d593SAsias He 2383df5d593SAsias He return 0; 2393df5d593SAsias He } 2403df5d593SAsias He 2413df5d593SAsias He static int uip_tcp_socket_send(struct uip_tcp_socket *sk, struct uip_tcp *tcp) 2423df5d593SAsias He { 2433df5d593SAsias He int len; 2443df5d593SAsias He int ret; 2453df5d593SAsias He u8 *payload; 2463df5d593SAsias He 2473df5d593SAsias He if (sk->write_done) 2483df5d593SAsias He return 0; 2493df5d593SAsias He 2503df5d593SAsias He payload = uip_tcp_payload(tcp); 2513df5d593SAsias He len = uip_tcp_payloadlen(tcp); 2523df5d593SAsias He 2533df5d593SAsias He ret = write(sk->fd, payload, len); 2543df5d593SAsias He if (ret != len) 2553df5d593SAsias He pr_warning("tcp send error"); 2563df5d593SAsias He 2573df5d593SAsias He return ret; 2583df5d593SAsias He } 2593df5d593SAsias He 2603df5d593SAsias He int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg) 2613df5d593SAsias He { 2623df5d593SAsias He struct uip_tcp_socket *sk; 2633df5d593SAsias He struct uip_tcp *tcp; 2643df5d593SAsias He struct uip_ip *ip; 2653df5d593SAsias He int ret; 2663df5d593SAsias He 2673df5d593SAsias He tcp = (struct uip_tcp *)arg->eth; 2683df5d593SAsias He ip = (struct uip_ip *)arg->eth; 2693df5d593SAsias He 2703df5d593SAsias He /* 2713df5d593SAsias He * Guest is trying to start a TCP session, let's fake SYN-ACK to guest 2723df5d593SAsias He */ 2733df5d593SAsias He if (uip_tcp_is_syn(tcp)) { 2743df5d593SAsias He sk = uip_tcp_socket_alloc(arg, ip->sip, ip->dip, tcp->sport, tcp->dport); 2753df5d593SAsias He if (!sk) 2763df5d593SAsias He return -1; 2773df5d593SAsias He 2783909f9b5SAsias He sk->window_size = ntohs(tcp->win); 2793909f9b5SAsias He 2803df5d593SAsias He /* 2813df5d593SAsias He * Setup ISN number 2823df5d593SAsias He */ 2833df5d593SAsias He sk->isn_guest = uip_tcp_isn(tcp); 2843df5d593SAsias He sk->isn_server = uip_tcp_isn_alloc(); 2853df5d593SAsias He 2863df5d593SAsias He sk->seq_server = sk->isn_server; 2873df5d593SAsias He sk->ack_server = sk->isn_guest + 1; 2883df5d593SAsias He uip_tcp_payload_send(sk, UIP_TCP_FLAG_SYN | UIP_TCP_FLAG_ACK, 0); 2893df5d593SAsias He sk->seq_server += 1; 2903df5d593SAsias He 2913df5d593SAsias He /* 2923df5d593SAsias He * Start receive thread for data from remote to guest 2933df5d593SAsias He */ 2943df5d593SAsias He uip_tcp_socket_receive(sk); 2953df5d593SAsias He 2963df5d593SAsias He goto out; 2973df5d593SAsias He } 2983df5d593SAsias He 2993df5d593SAsias He /* 3003df5d593SAsias He * Find socket we have allocated 3013df5d593SAsias He */ 3023df5d593SAsias He sk = uip_tcp_socket_find(arg, ip->sip, ip->dip, tcp->sport, tcp->dport); 3033df5d593SAsias He if (!sk) 3043df5d593SAsias He return -1; 3053df5d593SAsias He 3063909f9b5SAsias He mutex_lock(sk->lock); 3073909f9b5SAsias He sk->window_size = ntohs(tcp->win); 3083df5d593SAsias He sk->guest_acked = ntohl(tcp->ack); 3093909f9b5SAsias He pthread_cond_signal(&sk->cond); 3103909f9b5SAsias He mutex_unlock(sk->lock); 3113df5d593SAsias He 3123df5d593SAsias He if (uip_tcp_is_fin(tcp)) { 3133df5d593SAsias He if (sk->write_done) 3143df5d593SAsias He goto out; 3153df5d593SAsias He 3163df5d593SAsias He sk->write_done = 1; 3173df5d593SAsias He sk->ack_server += 1; 3183df5d593SAsias He uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0); 3193df5d593SAsias He 3203df5d593SAsias He /* 3213df5d593SAsias He * Close guest to server TCP connection 3223df5d593SAsias He */ 3233df5d593SAsias He uip_tcp_socket_close(sk, SHUT_WR); 3243df5d593SAsias He 3253df5d593SAsias He goto out; 3263df5d593SAsias He } 3273df5d593SAsias He 3283df5d593SAsias He /* 3293df5d593SAsias He * Ignore guest to server frames with zero tcp payload 3303df5d593SAsias He */ 3313df5d593SAsias He if (uip_tcp_payloadlen(tcp) == 0) 3323df5d593SAsias He goto out; 3333df5d593SAsias He 3343df5d593SAsias He /* 3353df5d593SAsias He * Sent out TCP data to remote host 3363df5d593SAsias He */ 3373df5d593SAsias He ret = uip_tcp_socket_send(sk, tcp); 3383df5d593SAsias He if (ret < 0) 3393df5d593SAsias He return -1; 3403df5d593SAsias He /* 3413df5d593SAsias He * Send ACK to guest imediately 3423df5d593SAsias He */ 3433df5d593SAsias He sk->ack_server += ret; 3443df5d593SAsias He uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0); 3453df5d593SAsias He 3463df5d593SAsias He out: 3473df5d593SAsias He return 0; 3483df5d593SAsias He } 349