1 #include "kvm/virtio-net.h" 2 #include "kvm/virtio-pci.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 11 #include <linux/virtio_net.h> 12 #include <linux/if_tun.h> 13 #include <net/if.h> 14 #include <sys/ioctl.h> 15 #include <assert.h> 16 #include <fcntl.h> 17 #include <arpa/inet.h> 18 #include <sys/types.h> 19 #include <sys/socket.h> 20 21 #define VIRTIO_NET_IRQ 14 22 #define VIRTIO_NET_QUEUE_SIZE 128 23 #define VIRTIO_NET_NUM_QUEUES 2 24 #define VIRTIO_NET_RX_QUEUE 0 25 #define VIRTIO_NET_TX_QUEUE 1 26 #define PCI_VIRTIO_NET_DEVNUM 3 27 28 struct net_device { 29 pthread_mutex_t mutex; 30 31 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 32 struct virtio_net_config net_config; 33 uint32_t host_features; 34 uint32_t guest_features; 35 uint16_t config_vector; 36 uint8_t status; 37 uint16_t queue_selector; 38 39 pthread_t io_rx_thread; 40 pthread_mutex_t io_rx_mutex; 41 pthread_cond_t io_rx_cond; 42 43 pthread_t io_tx_thread; 44 pthread_mutex_t io_tx_mutex; 45 pthread_cond_t io_tx_cond; 46 47 int tap_fd; 48 char tap_name[IFNAMSIZ]; 49 }; 50 51 static struct net_device net_device = { 52 .mutex = PTHREAD_MUTEX_INITIALIZER, 53 54 .net_config = { 55 .mac = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}, 56 .status = VIRTIO_NET_S_LINK_UP, 57 }, 58 59 .host_features = 1UL << VIRTIO_NET_F_MAC, 60 }; 61 62 static void *virtio_net_rx_thread(void *p) 63 { 64 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 65 struct virt_queue *vq; 66 struct kvm *self; 67 uint16_t out, in; 68 uint16_t head; 69 int len; 70 71 self = p; 72 vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE]; 73 74 while (1) { 75 mutex_lock(&net_device.io_rx_mutex); 76 if (!virt_queue__available(vq)) 77 pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex); 78 mutex_unlock(&net_device.io_rx_mutex); 79 80 while (virt_queue__available(vq)) { 81 head = virt_queue__get_iov(vq, iov, &out, &in, self); 82 83 /* We do not specify GSO or CSUM features, So we can ignore virtio_net_hdr */ 84 len = readv(net_device.tap_fd, iov + 1, in - 1); 85 86 /* However, We have to tell guest we have write the virtio_net_hdr */ 87 virt_queue__set_used_elem(vq, head, sizeof(struct virtio_net_hdr) + len); 88 89 /* We should interrupt guest right now, otherwise latency is huge. */ 90 kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 91 } 92 93 } 94 95 pthread_exit(NULL); 96 return NULL; 97 98 } 99 100 static void *virtio_net_tx_thread(void *p) 101 { 102 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 103 struct virt_queue *vq; 104 struct kvm *self; 105 uint16_t out, in; 106 uint16_t head; 107 int len; 108 109 self = p; 110 vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE]; 111 112 while (1) { 113 mutex_lock(&net_device.io_tx_mutex); 114 if (!virt_queue__available(vq)) 115 pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex); 116 mutex_unlock(&net_device.io_tx_mutex); 117 118 while (virt_queue__available(vq)) { 119 head = virt_queue__get_iov(vq, iov, &out, &in, self); 120 len = writev(net_device.tap_fd, iov + 1, out - 1); 121 virt_queue__set_used_elem(vq, head, len); 122 } 123 124 kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 125 } 126 127 pthread_exit(NULL); 128 return NULL; 129 130 } 131 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count) 132 { 133 uint8_t *config_space = (uint8_t *) &net_device.net_config; 134 135 if (size != 1 || count != 1) 136 return false; 137 138 if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config)) 139 error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI); 140 141 ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]); 142 143 return true; 144 } 145 146 static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 147 { 148 unsigned long offset = port - IOPORT_VIRTIO_NET; 149 bool ret = true; 150 151 mutex_lock(&net_device.mutex); 152 153 switch (offset) { 154 case VIRTIO_PCI_HOST_FEATURES: 155 ioport__write32(data, net_device.host_features); 156 break; 157 case VIRTIO_PCI_GUEST_FEATURES: 158 ret = false; 159 break; 160 case VIRTIO_PCI_QUEUE_PFN: 161 ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn); 162 break; 163 case VIRTIO_PCI_QUEUE_NUM: 164 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 165 break; 166 case VIRTIO_PCI_QUEUE_SEL: 167 case VIRTIO_PCI_QUEUE_NOTIFY: 168 ret = false; 169 break; 170 case VIRTIO_PCI_STATUS: 171 ioport__write8(data, net_device.status); 172 break; 173 case VIRTIO_PCI_ISR: 174 ioport__write8(data, 0x1); 175 kvm__irq_line(self, VIRTIO_NET_IRQ, 0); 176 break; 177 case VIRTIO_MSI_CONFIG_VECTOR: 178 ioport__write16(data, net_device.config_vector); 179 break; 180 default: 181 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 182 }; 183 184 mutex_unlock(&net_device.mutex); 185 186 return ret; 187 } 188 189 static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index) 190 { 191 if (queue_index == VIRTIO_NET_TX_QUEUE) { 192 193 mutex_lock(&net_device.io_tx_mutex); 194 pthread_cond_signal(&net_device.io_tx_cond); 195 mutex_unlock(&net_device.io_tx_mutex); 196 197 } else if (queue_index == VIRTIO_NET_RX_QUEUE) { 198 199 mutex_lock(&net_device.io_rx_mutex); 200 pthread_cond_signal(&net_device.io_rx_cond); 201 mutex_unlock(&net_device.io_rx_mutex); 202 203 } 204 } 205 206 static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 207 { 208 unsigned long offset = port - IOPORT_VIRTIO_NET; 209 bool ret = true; 210 211 mutex_lock(&net_device.mutex); 212 213 switch (offset) { 214 case VIRTIO_PCI_GUEST_FEATURES: 215 net_device.guest_features = ioport__read32(data); 216 break; 217 case VIRTIO_PCI_QUEUE_PFN: { 218 struct virt_queue *queue; 219 void *p; 220 221 assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES); 222 223 queue = &net_device.vqs[net_device.queue_selector]; 224 queue->pfn = ioport__read32(data); 225 p = guest_flat_to_host(self, queue->pfn << 12); 226 227 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096); 228 229 break; 230 } 231 case VIRTIO_PCI_QUEUE_SEL: 232 net_device.queue_selector = ioport__read16(data); 233 break; 234 case VIRTIO_PCI_QUEUE_NOTIFY: { 235 uint16_t queue_index; 236 queue_index = ioport__read16(data); 237 virtio_net_handle_callback(self, queue_index); 238 break; 239 } 240 case VIRTIO_PCI_STATUS: 241 net_device.status = ioport__read8(data); 242 break; 243 case VIRTIO_MSI_CONFIG_VECTOR: 244 net_device.config_vector = VIRTIO_MSI_NO_VECTOR; 245 break; 246 case VIRTIO_MSI_QUEUE_VECTOR: 247 break; 248 default: 249 ret = false; 250 }; 251 252 mutex_unlock(&net_device.mutex); 253 return ret; 254 } 255 256 static struct ioport_operations virtio_net_io_ops = { 257 .io_in = virtio_net_pci_io_in, 258 .io_out = virtio_net_pci_io_out, 259 }; 260 261 #define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 262 #define PCI_DEVICE_ID_VIRTIO_NET 0x1000 263 #define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET 0x1af4 264 #define PCI_SUBSYSTEM_ID_VIRTIO_NET 0x0001 265 266 static struct pci_device_header virtio_net_pci_device = { 267 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 268 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 269 .header_type = PCI_HEADER_TYPE_NORMAL, 270 .revision_id = 0, 271 .class = 0x020000, 272 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 273 .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET, 274 .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, 275 .irq_pin = 3, 276 .irq_line = VIRTIO_NET_IRQ, 277 }; 278 279 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 280 { 281 struct ifreq ifr; 282 int sock = socket(AF_INET, SOCK_STREAM, 0); 283 struct sockaddr_in sin = {0}; 284 285 net_device.tap_fd = open("/dev/net/tun", O_RDWR); 286 if (net_device.tap_fd < 0) { 287 warning("Unable to open /dev/net/tun\n"); 288 goto fail; 289 } 290 291 memset(&ifr, 0, sizeof(ifr)); 292 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 293 294 if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) { 295 warning("Config tap device error. Are you root?"); 296 goto fail; 297 } 298 299 strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name)); 300 301 ioctl(net_device.tap_fd, TUNSETNOCSUM, 1); 302 303 304 memset(&ifr, 0, sizeof(ifr)); 305 306 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 307 308 sin.sin_addr.s_addr = inet_addr(params->host_ip); 309 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 310 ifr.ifr_addr.sa_family = AF_INET; 311 312 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 313 warning("Can not set ip address on tap device"); 314 goto fail; 315 } 316 317 memset(&ifr, 0, sizeof(ifr)); 318 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 319 ioctl(sock, SIOCGIFFLAGS, &ifr); 320 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 321 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 322 warning("Could not bring tap device up"); 323 324 close(sock); 325 326 return 1; 327 328 fail: 329 if (sock >= 0) 330 close(sock); 331 if (net_device.tap_fd >= 0) 332 close(net_device.tap_fd); 333 334 return 0; 335 } 336 337 static void virtio_net__io_thread_init(struct kvm *self) 338 { 339 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 340 pthread_cond_init(&net_device.io_tx_cond, NULL); 341 342 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 343 pthread_cond_init(&net_device.io_tx_cond, NULL); 344 345 pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self); 346 pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self); 347 } 348 349 void virtio_net__init(const struct virtio_net_parameters *params) 350 { 351 if (virtio_net__tap_init(params)) { 352 pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM); 353 ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); 354 355 virtio_net__io_thread_init(params->self); 356 } 357 } 358