1 #include "kvm/uip.h"
2
3 #include <kvm/kvm.h>
4 #include <linux/virtio_net.h>
5 #include <linux/kernel.h>
6 #include <linux/list.h>
7 #include <arpa/inet.h>
8
uip_tcp_socket_close(struct uip_tcp_socket * sk,int how)9 static int uip_tcp_socket_close(struct uip_tcp_socket *sk, int how)
10 {
11 shutdown(sk->fd, how);
12
13 if (sk->write_done && sk->read_done) {
14 shutdown(sk->fd, SHUT_RDWR);
15 close(sk->fd);
16
17 mutex_lock(sk->lock);
18 list_del(&sk->list);
19 mutex_unlock(sk->lock);
20
21 free(sk->buf);
22 free(sk);
23 }
24
25 return 0;
26 }
27
uip_tcp_socket_find(struct uip_tx_arg * arg,u32 sip,u32 dip,u16 sport,u16 dport)28 static struct uip_tcp_socket *uip_tcp_socket_find(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport)
29 {
30 struct list_head *sk_head;
31 struct mutex *sk_lock;
32 struct uip_tcp_socket *sk;
33
34 sk_head = &arg->info->tcp_socket_head;
35 sk_lock = &arg->info->tcp_socket_lock;
36
37 mutex_lock(sk_lock);
38 list_for_each_entry(sk, sk_head, list) {
39 if (sk->sip == sip && sk->dip == dip && sk->sport == sport && sk->dport == dport) {
40 mutex_unlock(sk_lock);
41 return sk;
42 }
43 }
44 mutex_unlock(sk_lock);
45
46 return NULL;
47 }
48
uip_tcp_socket_alloc(struct uip_tx_arg * arg,u32 sip,u32 dip,u16 sport,u16 dport)49 static struct uip_tcp_socket *uip_tcp_socket_alloc(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport)
50 {
51 struct list_head *sk_head;
52 struct uip_tcp_socket *sk;
53 struct mutex *sk_lock;
54 struct uip_tcp *tcp;
55 struct uip_ip *ip;
56 int ret;
57
58 tcp = (struct uip_tcp *)arg->eth;
59 ip = (struct uip_ip *)arg->eth;
60
61 sk_head = &arg->info->tcp_socket_head;
62 sk_lock = &arg->info->tcp_socket_lock;
63
64 sk = malloc(sizeof(*sk));
65 memset(sk, 0, sizeof(*sk));
66
67 sk->lock = sk_lock;
68 sk->info = arg->info;
69
70 sk->fd = socket(AF_INET, SOCK_STREAM, 0);
71 sk->addr.sin_family = AF_INET;
72 sk->addr.sin_port = dport;
73 sk->addr.sin_addr.s_addr = dip;
74
75 pthread_cond_init(&sk->cond, NULL);
76
77 if (ntohl(dip) == arg->info->host_ip)
78 sk->addr.sin_addr.s_addr = inet_addr("127.0.0.1");
79
80 ret = connect(sk->fd, (struct sockaddr *)&sk->addr, sizeof(sk->addr));
81 if (ret) {
82 free(sk);
83 return NULL;
84 }
85
86 sk->sip = ip->sip;
87 sk->dip = ip->dip;
88 sk->sport = tcp->sport;
89 sk->dport = tcp->dport;
90
91 mutex_lock(sk_lock);
92 list_add_tail(&sk->list, sk_head);
93 mutex_unlock(sk_lock);
94
95 return sk;
96 }
97
98 /* Caller holds the sk lock */
uip_tcp_socket_free(struct uip_tcp_socket * sk)99 static void uip_tcp_socket_free(struct uip_tcp_socket *sk)
100 {
101 /*
102 * Here we assume that the virtqueues are already inactive so we don't
103 * race with uip_tx_do_ipv4_tcp. We are racing with
104 * uip_tcp_socket_thread though, but holding the sk lock ensures that it
105 * cannot free data concurrently.
106 */
107 if (sk->thread) {
108 pthread_cancel(sk->thread);
109 pthread_join(sk->thread, NULL);
110 }
111
112 sk->write_done = sk->read_done = 1;
113 uip_tcp_socket_close(sk, SHUT_RDWR);
114 }
115
uip_tcp_payload_send(struct uip_tcp_socket * sk,u8 flag,u16 payload_len)116 static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_len)
117 {
118 struct uip_info *info;
119 struct uip_eth *eth2;
120 struct uip_tcp *tcp2;
121 struct uip_buf *buf;
122 struct uip_ip *ip2;
123
124 info = sk->info;
125
126 /*
127 * Get free buffer to send data to guest
128 */
129 buf = uip_buf_get_free(info);
130
131 /*
132 * Cook a ethernet frame
133 */
134 tcp2 = (struct uip_tcp *)buf->eth;
135 eth2 = (struct uip_eth *)buf->eth;
136 ip2 = (struct uip_ip *)buf->eth;
137
138 eth2->src = info->host_mac;
139 eth2->dst = info->guest_mac;
140 eth2->type = htons(UIP_ETH_P_IP);
141
142 ip2->vhl = UIP_IP_VER_4 | UIP_IP_HDR_LEN;
143 ip2->tos = 0;
144 ip2->id = 0;
145 ip2->flgfrag = 0;
146 ip2->ttl = UIP_IP_TTL;
147 ip2->proto = UIP_IP_P_TCP;
148 ip2->csum = 0;
149 ip2->sip = sk->dip;
150 ip2->dip = sk->sip;
151
152 tcp2->sport = sk->dport;
153 tcp2->dport = sk->sport;
154 tcp2->seq = htonl(sk->seq_server);
155 tcp2->ack = htonl(sk->ack_server);
156 /*
157 * Diable TCP options, tcp hdr len equals 20 bytes
158 */
159 tcp2->off = UIP_TCP_HDR_LEN;
160 tcp2->flg = flag;
161 tcp2->win = htons(UIP_TCP_WIN_SIZE);
162 tcp2->csum = 0;
163 tcp2->urgent = 0;
164
165 if (payload_len > 0)
166 memcpy(uip_tcp_payload(tcp2), sk->payload, payload_len);
167
168 ip2->len = htons(uip_tcp_hdrlen(tcp2) + payload_len + uip_ip_hdrlen(ip2));
169 ip2->csum = uip_csum_ip(ip2);
170 tcp2->csum = uip_csum_tcp(tcp2);
171
172 /*
173 * virtio_net_hdr
174 */
175 buf->vnet_len = info->vnet_hdr_len;
176 memset(buf->vnet, 0, buf->vnet_len);
177
178 buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
179
180 /*
181 * Increase server seq
182 */
183 sk->seq_server += payload_len;
184
185 /*
186 * Send data received from socket to guest
187 */
188 uip_buf_set_used(info, buf);
189
190 return 0;
191 }
192
uip_tcp_socket_thread(void * p)193 static void *uip_tcp_socket_thread(void *p)
194 {
195 struct uip_tcp_socket *sk;
196 int len, left, ret;
197 u8 *pos;
198
199 kvm__set_thread_name("uip-tcp");
200
201 sk = p;
202
203 while (1) {
204 pos = sk->buf;
205
206 ret = read(sk->fd, sk->buf, UIP_MAX_TCP_PAYLOAD);
207
208 if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD)
209 goto out;
210
211 left = ret;
212
213 while (left > 0) {
214 mutex_lock(sk->lock);
215 while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0)
216 pthread_cond_wait(&sk->cond, &sk->lock->mutex);
217 mutex_unlock(sk->lock);
218
219 sk->payload = pos;
220 if (len > left)
221 len = left;
222 if (len > UIP_MAX_TCP_PAYLOAD)
223 len = UIP_MAX_TCP_PAYLOAD;
224 left -= len;
225 pos += len;
226
227 uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, len);
228 }
229 }
230
231 out:
232 /*
233 * Close server to guest TCP connection
234 */
235 uip_tcp_socket_close(sk, SHUT_RD);
236
237 uip_tcp_payload_send(sk, UIP_TCP_FLAG_FIN | UIP_TCP_FLAG_ACK, 0);
238 sk->seq_server += 1;
239
240 sk->read_done = 1;
241
242 pthread_exit(NULL);
243
244 return NULL;
245 }
246
uip_tcp_socket_receive(struct uip_tcp_socket * sk)247 static int uip_tcp_socket_receive(struct uip_tcp_socket *sk)
248 {
249 int ret;
250
251 if (sk->thread == 0) {
252 sk->buf = malloc(UIP_MAX_TCP_PAYLOAD);
253 if (!sk->buf)
254 return -ENOMEM;
255 ret = pthread_create(&sk->thread, NULL, uip_tcp_socket_thread,
256 (void *)sk);
257 if (ret)
258 free(sk->buf);
259 return ret;
260 }
261
262 return 0;
263 }
264
uip_tcp_socket_send(struct uip_tcp_socket * sk,struct uip_tcp * tcp)265 static int uip_tcp_socket_send(struct uip_tcp_socket *sk, struct uip_tcp *tcp)
266 {
267 int len;
268 int ret;
269 u8 *payload;
270
271 if (sk->write_done)
272 return 0;
273
274 payload = uip_tcp_payload(tcp);
275 len = uip_tcp_payloadlen(tcp);
276
277 ret = write(sk->fd, payload, len);
278 if (ret != len)
279 pr_warning("tcp send error");
280
281 return ret;
282 }
283
uip_tx_do_ipv4_tcp(struct uip_tx_arg * arg)284 int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
285 {
286 struct uip_tcp_socket *sk;
287 struct uip_tcp *tcp;
288 struct uip_ip *ip;
289 int ret;
290
291 tcp = (struct uip_tcp *)arg->eth;
292 ip = (struct uip_ip *)arg->eth;
293
294 /*
295 * Guest is trying to start a TCP session, let's fake SYN-ACK to guest
296 */
297 if (uip_tcp_is_syn(tcp)) {
298 sk = uip_tcp_socket_alloc(arg, ip->sip, ip->dip, tcp->sport, tcp->dport);
299 if (!sk)
300 return -1;
301
302 sk->window_size = ntohs(tcp->win);
303
304 /*
305 * Setup ISN number
306 */
307 sk->isn_guest = uip_tcp_isn(tcp);
308 sk->isn_server = uip_tcp_isn_alloc();
309
310 sk->seq_server = sk->isn_server;
311 sk->ack_server = sk->isn_guest + 1;
312 uip_tcp_payload_send(sk, UIP_TCP_FLAG_SYN | UIP_TCP_FLAG_ACK, 0);
313 sk->seq_server += 1;
314
315 /*
316 * Start receive thread for data from remote to guest
317 */
318 uip_tcp_socket_receive(sk);
319
320 goto out;
321 }
322
323 /*
324 * Find socket we have allocated
325 */
326 sk = uip_tcp_socket_find(arg, ip->sip, ip->dip, tcp->sport, tcp->dport);
327 if (!sk)
328 return -1;
329
330 mutex_lock(sk->lock);
331 sk->window_size = ntohs(tcp->win);
332 sk->guest_acked = ntohl(tcp->ack);
333 pthread_cond_signal(&sk->cond);
334 mutex_unlock(sk->lock);
335
336 if (uip_tcp_is_fin(tcp)) {
337 if (sk->write_done)
338 goto out;
339
340 sk->write_done = 1;
341 sk->ack_server += 1;
342 uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0);
343
344 /*
345 * Close guest to server TCP connection
346 */
347 uip_tcp_socket_close(sk, SHUT_WR);
348
349 goto out;
350 }
351
352 /*
353 * Ignore guest to server frames with zero tcp payload
354 */
355 if (uip_tcp_payloadlen(tcp) == 0)
356 goto out;
357
358 /*
359 * Sent out TCP data to remote host
360 */
361 ret = uip_tcp_socket_send(sk, tcp);
362 if (ret < 0)
363 return -1;
364 /*
365 * Send ACK to guest imediately
366 */
367 sk->ack_server += ret;
368 uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0);
369
370 out:
371 return 0;
372 }
373
uip_tcp_exit(struct uip_info * info)374 void uip_tcp_exit(struct uip_info *info)
375 {
376 struct uip_tcp_socket *sk, *next;
377
378 mutex_lock(&info->tcp_socket_lock);
379 list_for_each_entry_safe(sk, next, &info->tcp_socket_head, list)
380 uip_tcp_socket_free(sk);
381 mutex_unlock(&info->tcp_socket_lock);
382 }
383