1 #include "kvm/virtio-net.h" 2 #include "kvm/virtio-pci.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 #include "kvm/threadpool.h" 11 12 #include <linux/virtio_net.h> 13 #include <linux/if_tun.h> 14 #include <net/if.h> 15 #include <sys/ioctl.h> 16 #include <assert.h> 17 #include <fcntl.h> 18 #include <arpa/inet.h> 19 #include <sys/types.h> 20 #include <sys/socket.h> 21 #include <unistd.h> 22 #include <sys/wait.h> 23 24 #define VIRTIO_NET_IRQ 14 25 #define VIRTIO_NET_PIN 3 26 27 #define VIRTIO_NET_QUEUE_SIZE 128 28 #define VIRTIO_NET_NUM_QUEUES 2 29 #define VIRTIO_NET_RX_QUEUE 0 30 #define VIRTIO_NET_TX_QUEUE 1 31 #define PCI_VIRTIO_NET_DEVNUM 3 32 33 struct net_device { 34 pthread_mutex_t mutex; 35 36 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 37 struct virtio_net_config net_config; 38 uint32_t host_features; 39 uint32_t guest_features; 40 uint16_t config_vector; 41 uint8_t status; 42 uint16_t queue_selector; 43 44 int tap_fd; 45 char tap_name[IFNAMSIZ]; 46 void *jobs[VIRTIO_NET_NUM_QUEUES]; 47 }; 48 49 static struct net_device net_device = { 50 .mutex = PTHREAD_MUTEX_INITIALIZER, 51 52 .net_config = { 53 .mac = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}, 54 .status = VIRTIO_NET_S_LINK_UP, 55 }, 56 .host_features = 1UL << VIRTIO_NET_F_MAC | 57 1UL << VIRTIO_NET_F_CSUM | 58 1UL << VIRTIO_NET_F_HOST_UFO | 59 1UL << VIRTIO_NET_F_HOST_TSO4 | 60 1UL << VIRTIO_NET_F_HOST_TSO6 | 61 1UL << VIRTIO_NET_F_GUEST_UFO | 62 1UL << VIRTIO_NET_F_GUEST_TSO4 | 63 1UL << VIRTIO_NET_F_GUEST_TSO6, 64 }; 65 66 static void virtio_net_rx_callback(struct kvm *self, void *param) 67 { 68 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 69 struct virt_queue *vq; 70 uint16_t out, in; 71 uint16_t head; 72 int len; 73 74 vq = param; 75 76 while (virt_queue__available(vq)) { 77 head = virt_queue__get_iov(vq, iov, &out, &in, self); 78 len = readv(net_device.tap_fd, iov, in); 79 virt_queue__set_used_elem(vq, head, len); 80 } 81 82 kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 83 } 84 85 static void virtio_net_tx_callback(struct kvm *self, void *param) 86 { 87 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 88 struct virt_queue *vq; 89 uint16_t out, in; 90 uint16_t head; 91 int len; 92 93 vq = param; 94 95 while (virt_queue__available(vq)) { 96 head = virt_queue__get_iov(vq, iov, &out, &in, self); 97 len = writev(net_device.tap_fd, iov, out); 98 virt_queue__set_used_elem(vq, head, len); 99 } 100 101 kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 102 } 103 104 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count) 105 { 106 uint8_t *config_space = (uint8_t *) &net_device.net_config; 107 108 if (size != 1 || count != 1) 109 return false; 110 111 if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config)) 112 error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI); 113 114 ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]); 115 116 return true; 117 } 118 119 static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 120 { 121 unsigned long offset = port - IOPORT_VIRTIO_NET; 122 bool ret = true; 123 124 mutex_lock(&net_device.mutex); 125 126 switch (offset) { 127 case VIRTIO_PCI_HOST_FEATURES: 128 ioport__write32(data, net_device.host_features); 129 break; 130 case VIRTIO_PCI_GUEST_FEATURES: 131 ret = false; 132 break; 133 case VIRTIO_PCI_QUEUE_PFN: 134 ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn); 135 break; 136 case VIRTIO_PCI_QUEUE_NUM: 137 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 138 break; 139 case VIRTIO_PCI_QUEUE_SEL: 140 case VIRTIO_PCI_QUEUE_NOTIFY: 141 ret = false; 142 break; 143 case VIRTIO_PCI_STATUS: 144 ioport__write8(data, net_device.status); 145 break; 146 case VIRTIO_PCI_ISR: 147 ioport__write8(data, 0x1); 148 kvm__irq_line(self, VIRTIO_NET_IRQ, 0); 149 break; 150 case VIRTIO_MSI_CONFIG_VECTOR: 151 ioport__write16(data, net_device.config_vector); 152 break; 153 default: 154 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 155 }; 156 157 mutex_unlock(&net_device.mutex); 158 159 return ret; 160 } 161 162 static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index) 163 { 164 thread_pool__signal_work(net_device.jobs[queue_index]); 165 } 166 167 static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 168 { 169 unsigned long offset = port - IOPORT_VIRTIO_NET; 170 bool ret = true; 171 172 mutex_lock(&net_device.mutex); 173 174 switch (offset) { 175 case VIRTIO_PCI_GUEST_FEATURES: 176 net_device.guest_features = ioport__read32(data); 177 break; 178 case VIRTIO_PCI_QUEUE_PFN: { 179 struct virt_queue *queue; 180 void *p; 181 182 assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES); 183 184 queue = &net_device.vqs[net_device.queue_selector]; 185 queue->pfn = ioport__read32(data); 186 p = guest_flat_to_host(self, queue->pfn << 12); 187 188 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096); 189 190 if (net_device.queue_selector == VIRTIO_NET_TX_QUEUE) 191 net_device.jobs[net_device.queue_selector] = 192 thread_pool__add_jobtype(self, virtio_net_tx_callback, queue); 193 else if (net_device.queue_selector == VIRTIO_NET_RX_QUEUE) 194 net_device.jobs[net_device.queue_selector] = 195 thread_pool__add_jobtype(self, virtio_net_rx_callback, queue); 196 197 break; 198 } 199 case VIRTIO_PCI_QUEUE_SEL: 200 net_device.queue_selector = ioport__read16(data); 201 break; 202 case VIRTIO_PCI_QUEUE_NOTIFY: { 203 uint16_t queue_index; 204 queue_index = ioport__read16(data); 205 virtio_net_handle_callback(self, queue_index); 206 break; 207 } 208 case VIRTIO_PCI_STATUS: 209 net_device.status = ioport__read8(data); 210 break; 211 case VIRTIO_MSI_CONFIG_VECTOR: 212 net_device.config_vector = VIRTIO_MSI_NO_VECTOR; 213 break; 214 case VIRTIO_MSI_QUEUE_VECTOR: 215 break; 216 default: 217 ret = false; 218 }; 219 220 mutex_unlock(&net_device.mutex); 221 return ret; 222 } 223 224 static struct ioport_operations virtio_net_io_ops = { 225 .io_in = virtio_net_pci_io_in, 226 .io_out = virtio_net_pci_io_out, 227 }; 228 229 #define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 230 #define PCI_DEVICE_ID_VIRTIO_NET 0x1000 231 #define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET 0x1af4 232 #define PCI_SUBSYSTEM_ID_VIRTIO_NET 0x0001 233 234 static struct pci_device_header virtio_net_pci_device = { 235 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 236 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 237 .header_type = PCI_HEADER_TYPE_NORMAL, 238 .revision_id = 0, 239 .class = 0x020000, 240 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 241 .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET, 242 .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, 243 .irq_pin = VIRTIO_NET_PIN, 244 .irq_line = VIRTIO_NET_IRQ, 245 }; 246 247 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 248 { 249 int sock = socket(AF_INET, SOCK_STREAM, 0); 250 int i, pid, status, offload, hdr_len; 251 struct sockaddr_in sin = {0}; 252 struct ifreq ifr; 253 254 for (i = 0 ; i < 6 ; i++) 255 net_device.net_config.mac[i] = params->guest_mac[i]; 256 257 net_device.tap_fd = open("/dev/net/tun", O_RDWR); 258 if (net_device.tap_fd < 0) { 259 warning("Unable to open /dev/net/tun"); 260 goto fail; 261 } 262 263 memset(&ifr, 0, sizeof(ifr)); 264 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 265 if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) { 266 warning("Config tap device error. Are you root?"); 267 goto fail; 268 } 269 270 strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name)); 271 272 if (ioctl(net_device.tap_fd, TUNSETNOCSUM, 1) < 0) { 273 warning("Config tap device TUNSETNOCSUM error"); 274 goto fail; 275 } 276 277 hdr_len = sizeof(struct virtio_net_hdr); 278 if (ioctl(net_device.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 279 warning("Config tap device TUNSETVNETHDRSZ error"); 280 goto fail; 281 } 282 283 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 284 if (ioctl(net_device.tap_fd, TUNSETOFFLOAD, offload) < 0) { 285 warning("Config tap device TUNSETOFFLOAD error"); 286 goto fail; 287 } 288 289 if (strcmp(params->script, "none")) { 290 pid = fork(); 291 if (pid == 0) { 292 execl(params->script, params->script, net_device.tap_name, NULL); 293 _exit(1); 294 } else { 295 waitpid(pid, &status, 0); 296 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 297 warning("Fail to setup tap by %s", params->script); 298 goto fail; 299 } 300 } 301 } else { 302 memset(&ifr, 0, sizeof(ifr)); 303 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 304 sin.sin_addr.s_addr = inet_addr(params->host_ip); 305 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 306 ifr.ifr_addr.sa_family = AF_INET; 307 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 308 warning("Could not set ip address on tap device"); 309 goto fail; 310 } 311 } 312 313 memset(&ifr, 0, sizeof(ifr)); 314 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 315 ioctl(sock, SIOCGIFFLAGS, &ifr); 316 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 317 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 318 warning("Could not bring tap device up"); 319 320 close(sock); 321 322 return 1; 323 324 fail: 325 if (sock >= 0) 326 close(sock); 327 if (net_device.tap_fd >= 0) 328 close(net_device.tap_fd); 329 330 return 0; 331 } 332 333 void virtio_net__init(const struct virtio_net_parameters *params) 334 { 335 if (virtio_net__tap_init(params)) { 336 pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM); 337 ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); 338 } 339 } 340