1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/sched.h>
15 #include <linux/timer.h>
16 #include <linux/workqueue.h>
17 #include <linux/utsname.h>
18 #include <linux/version.h>
19 #include <net/sch_generic.h>
20 #include <net/xdp_sock_drv.h>
21 #include "gve.h"
22 #include "gve_dqo.h"
23 #include "gve_adminq.h"
24 #include "gve_register.h"
25
26 #define GVE_DEFAULT_RX_COPYBREAK (256)
27
28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
29 #define GVE_VERSION "1.0.0"
30 #define GVE_VERSION_PREFIX "GVE-"
31
32 // Minimum amount of time between queue kicks in msec (10 seconds)
33 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
34
35 char gve_driver_name[] = "gve";
36 const char gve_version_str[] = GVE_VERSION;
37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
38
gve_verify_driver_compatibility(struct gve_priv * priv)39 static int gve_verify_driver_compatibility(struct gve_priv *priv)
40 {
41 int err;
42 struct gve_driver_info *driver_info;
43 dma_addr_t driver_info_bus;
44
45 driver_info = dma_alloc_coherent(&priv->pdev->dev,
46 sizeof(struct gve_driver_info),
47 &driver_info_bus, GFP_KERNEL);
48 if (!driver_info)
49 return -ENOMEM;
50
51 *driver_info = (struct gve_driver_info) {
52 .os_type = 1, /* Linux */
53 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
54 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
55 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
56 .driver_capability_flags = {
57 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
61 },
62 };
63 strscpy(driver_info->os_version_str1, utsname()->release,
64 sizeof(driver_info->os_version_str1));
65 strscpy(driver_info->os_version_str2, utsname()->version,
66 sizeof(driver_info->os_version_str2));
67
68 err = gve_adminq_verify_driver_compatibility(priv,
69 sizeof(struct gve_driver_info),
70 driver_info_bus);
71
72 /* It's ok if the device doesn't support this */
73 if (err == -EOPNOTSUPP)
74 err = 0;
75
76 dma_free_coherent(&priv->pdev->dev,
77 sizeof(struct gve_driver_info),
78 driver_info, driver_info_bus);
79 return err;
80 }
81
gve_features_check(struct sk_buff * skb,struct net_device * dev,netdev_features_t features)82 static netdev_features_t gve_features_check(struct sk_buff *skb,
83 struct net_device *dev,
84 netdev_features_t features)
85 {
86 struct gve_priv *priv = netdev_priv(dev);
87
88 if (!gve_is_gqi(priv))
89 return gve_features_check_dqo(skb, dev, features);
90
91 return features;
92 }
93
gve_start_xmit(struct sk_buff * skb,struct net_device * dev)94 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
95 {
96 struct gve_priv *priv = netdev_priv(dev);
97
98 if (gve_is_gqi(priv))
99 return gve_tx(skb, dev);
100 else
101 return gve_tx_dqo(skb, dev);
102 }
103
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)104 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
105 {
106 struct gve_priv *priv = netdev_priv(dev);
107 unsigned int start;
108 u64 packets, bytes;
109 int num_tx_queues;
110 int ring;
111
112 num_tx_queues = gve_num_tx_queues(priv);
113 if (priv->rx) {
114 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
115 do {
116 start =
117 u64_stats_fetch_begin(&priv->rx[ring].statss);
118 packets = priv->rx[ring].rpackets;
119 bytes = priv->rx[ring].rbytes;
120 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
121 start));
122 s->rx_packets += packets;
123 s->rx_bytes += bytes;
124 }
125 }
126 if (priv->tx) {
127 for (ring = 0; ring < num_tx_queues; ring++) {
128 do {
129 start =
130 u64_stats_fetch_begin(&priv->tx[ring].statss);
131 packets = priv->tx[ring].pkt_done;
132 bytes = priv->tx[ring].bytes_done;
133 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
134 start));
135 s->tx_packets += packets;
136 s->tx_bytes += bytes;
137 }
138 }
139 }
140
gve_alloc_counter_array(struct gve_priv * priv)141 static int gve_alloc_counter_array(struct gve_priv *priv)
142 {
143 priv->counter_array =
144 dma_alloc_coherent(&priv->pdev->dev,
145 priv->num_event_counters *
146 sizeof(*priv->counter_array),
147 &priv->counter_array_bus, GFP_KERNEL);
148 if (!priv->counter_array)
149 return -ENOMEM;
150
151 return 0;
152 }
153
gve_free_counter_array(struct gve_priv * priv)154 static void gve_free_counter_array(struct gve_priv *priv)
155 {
156 if (!priv->counter_array)
157 return;
158
159 dma_free_coherent(&priv->pdev->dev,
160 priv->num_event_counters *
161 sizeof(*priv->counter_array),
162 priv->counter_array, priv->counter_array_bus);
163 priv->counter_array = NULL;
164 }
165
166 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)167 static void gve_stats_report_task(struct work_struct *work)
168 {
169 struct gve_priv *priv = container_of(work, struct gve_priv,
170 stats_report_task);
171 if (gve_get_do_report_stats(priv)) {
172 gve_handle_report_stats(priv);
173 gve_clear_do_report_stats(priv);
174 }
175 }
176
gve_stats_report_schedule(struct gve_priv * priv)177 static void gve_stats_report_schedule(struct gve_priv *priv)
178 {
179 if (!gve_get_probe_in_progress(priv) &&
180 !gve_get_reset_in_progress(priv)) {
181 gve_set_do_report_stats(priv);
182 queue_work(priv->gve_wq, &priv->stats_report_task);
183 }
184 }
185
gve_stats_report_timer(struct timer_list * t)186 static void gve_stats_report_timer(struct timer_list *t)
187 {
188 struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
189
190 mod_timer(&priv->stats_report_timer,
191 round_jiffies(jiffies +
192 msecs_to_jiffies(priv->stats_report_timer_period)));
193 gve_stats_report_schedule(priv);
194 }
195
gve_alloc_stats_report(struct gve_priv * priv)196 static int gve_alloc_stats_report(struct gve_priv *priv)
197 {
198 int tx_stats_num, rx_stats_num;
199
200 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
201 gve_num_tx_queues(priv);
202 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
203 priv->rx_cfg.num_queues;
204 priv->stats_report_len = struct_size(priv->stats_report, stats,
205 size_add(tx_stats_num, rx_stats_num));
206 priv->stats_report =
207 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
208 &priv->stats_report_bus, GFP_KERNEL);
209 if (!priv->stats_report)
210 return -ENOMEM;
211 /* Set up timer for the report-stats task */
212 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
213 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
214 return 0;
215 }
216
gve_free_stats_report(struct gve_priv * priv)217 static void gve_free_stats_report(struct gve_priv *priv)
218 {
219 if (!priv->stats_report)
220 return;
221
222 del_timer_sync(&priv->stats_report_timer);
223 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
224 priv->stats_report, priv->stats_report_bus);
225 priv->stats_report = NULL;
226 }
227
gve_mgmnt_intr(int irq,void * arg)228 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
229 {
230 struct gve_priv *priv = arg;
231
232 queue_work(priv->gve_wq, &priv->service_task);
233 return IRQ_HANDLED;
234 }
235
gve_intr(int irq,void * arg)236 static irqreturn_t gve_intr(int irq, void *arg)
237 {
238 struct gve_notify_block *block = arg;
239 struct gve_priv *priv = block->priv;
240
241 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
242 napi_schedule_irqoff(&block->napi);
243 return IRQ_HANDLED;
244 }
245
gve_intr_dqo(int irq,void * arg)246 static irqreturn_t gve_intr_dqo(int irq, void *arg)
247 {
248 struct gve_notify_block *block = arg;
249
250 /* Interrupts are automatically masked */
251 napi_schedule_irqoff(&block->napi);
252 return IRQ_HANDLED;
253 }
254
gve_napi_poll(struct napi_struct * napi,int budget)255 static int gve_napi_poll(struct napi_struct *napi, int budget)
256 {
257 struct gve_notify_block *block;
258 __be32 __iomem *irq_doorbell;
259 bool reschedule = false;
260 struct gve_priv *priv;
261 int work_done = 0;
262
263 block = container_of(napi, struct gve_notify_block, napi);
264 priv = block->priv;
265
266 if (block->tx) {
267 if (block->tx->q_num < priv->tx_cfg.num_queues)
268 reschedule |= gve_tx_poll(block, budget);
269 else if (budget)
270 reschedule |= gve_xdp_poll(block, budget);
271 }
272
273 if (!budget)
274 return 0;
275
276 if (block->rx) {
277 work_done = gve_rx_poll(block, budget);
278 reschedule |= work_done == budget;
279 }
280
281 if (reschedule)
282 return budget;
283
284 /* Complete processing - don't unmask irq if busy polling is enabled */
285 if (likely(napi_complete_done(napi, work_done))) {
286 irq_doorbell = gve_irq_doorbell(priv, block);
287 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
288
289 /* Ensure IRQ ACK is visible before we check pending work.
290 * If queue had issued updates, it would be truly visible.
291 */
292 mb();
293
294 if (block->tx)
295 reschedule |= gve_tx_clean_pending(priv, block->tx);
296 if (block->rx)
297 reschedule |= gve_rx_work_pending(block->rx);
298
299 if (reschedule && napi_schedule(napi))
300 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
301 }
302 return work_done;
303 }
304
gve_napi_poll_dqo(struct napi_struct * napi,int budget)305 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
306 {
307 struct gve_notify_block *block =
308 container_of(napi, struct gve_notify_block, napi);
309 struct gve_priv *priv = block->priv;
310 bool reschedule = false;
311 int work_done = 0;
312
313 if (block->tx)
314 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
315
316 if (!budget)
317 return 0;
318
319 if (block->rx) {
320 work_done = gve_rx_poll_dqo(block, budget);
321 reschedule |= work_done == budget;
322 }
323
324 if (reschedule)
325 return budget;
326
327 if (likely(napi_complete_done(napi, work_done))) {
328 /* Enable interrupts again.
329 *
330 * We don't need to repoll afterwards because HW supports the
331 * PCI MSI-X PBA feature.
332 *
333 * Another interrupt would be triggered if a new event came in
334 * since the last one.
335 */
336 gve_write_irq_doorbell_dqo(priv, block,
337 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
338 }
339
340 return work_done;
341 }
342
gve_alloc_notify_blocks(struct gve_priv * priv)343 static int gve_alloc_notify_blocks(struct gve_priv *priv)
344 {
345 int num_vecs_requested = priv->num_ntfy_blks + 1;
346 unsigned int active_cpus;
347 int vecs_enabled;
348 int i, j;
349 int err;
350
351 priv->msix_vectors = kvcalloc(num_vecs_requested,
352 sizeof(*priv->msix_vectors), GFP_KERNEL);
353 if (!priv->msix_vectors)
354 return -ENOMEM;
355 for (i = 0; i < num_vecs_requested; i++)
356 priv->msix_vectors[i].entry = i;
357 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
358 GVE_MIN_MSIX, num_vecs_requested);
359 if (vecs_enabled < 0) {
360 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
361 GVE_MIN_MSIX, vecs_enabled);
362 err = vecs_enabled;
363 goto abort_with_msix_vectors;
364 }
365 if (vecs_enabled != num_vecs_requested) {
366 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
367 int vecs_per_type = new_num_ntfy_blks / 2;
368 int vecs_left = new_num_ntfy_blks % 2;
369
370 priv->num_ntfy_blks = new_num_ntfy_blks;
371 priv->mgmt_msix_idx = priv->num_ntfy_blks;
372 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
373 vecs_per_type);
374 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
375 vecs_per_type + vecs_left);
376 dev_err(&priv->pdev->dev,
377 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
378 vecs_enabled, priv->tx_cfg.max_queues,
379 priv->rx_cfg.max_queues);
380 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
381 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
382 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
383 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
384 }
385 /* Half the notification blocks go to TX and half to RX */
386 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
387
388 /* Setup Management Vector - the last vector */
389 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
390 pci_name(priv->pdev));
391 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
392 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
393 if (err) {
394 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
395 goto abort_with_msix_enabled;
396 }
397 priv->irq_db_indices =
398 dma_alloc_coherent(&priv->pdev->dev,
399 priv->num_ntfy_blks *
400 sizeof(*priv->irq_db_indices),
401 &priv->irq_db_indices_bus, GFP_KERNEL);
402 if (!priv->irq_db_indices) {
403 err = -ENOMEM;
404 goto abort_with_mgmt_vector;
405 }
406
407 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
408 sizeof(*priv->ntfy_blocks), GFP_KERNEL);
409 if (!priv->ntfy_blocks) {
410 err = -ENOMEM;
411 goto abort_with_irq_db_indices;
412 }
413
414 /* Setup the other blocks - the first n-1 vectors */
415 for (i = 0; i < priv->num_ntfy_blks; i++) {
416 struct gve_notify_block *block = &priv->ntfy_blocks[i];
417 int msix_idx = i;
418
419 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
420 i, pci_name(priv->pdev));
421 block->priv = priv;
422 err = request_irq(priv->msix_vectors[msix_idx].vector,
423 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
424 0, block->name, block);
425 if (err) {
426 dev_err(&priv->pdev->dev,
427 "Failed to receive msix vector %d\n", i);
428 goto abort_with_some_ntfy_blocks;
429 }
430 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
431 get_cpu_mask(i % active_cpus));
432 block->irq_db_index = &priv->irq_db_indices[i].index;
433 }
434 return 0;
435 abort_with_some_ntfy_blocks:
436 for (j = 0; j < i; j++) {
437 struct gve_notify_block *block = &priv->ntfy_blocks[j];
438 int msix_idx = j;
439
440 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
441 NULL);
442 free_irq(priv->msix_vectors[msix_idx].vector, block);
443 }
444 kvfree(priv->ntfy_blocks);
445 priv->ntfy_blocks = NULL;
446 abort_with_irq_db_indices:
447 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
448 sizeof(*priv->irq_db_indices),
449 priv->irq_db_indices, priv->irq_db_indices_bus);
450 priv->irq_db_indices = NULL;
451 abort_with_mgmt_vector:
452 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
453 abort_with_msix_enabled:
454 pci_disable_msix(priv->pdev);
455 abort_with_msix_vectors:
456 kvfree(priv->msix_vectors);
457 priv->msix_vectors = NULL;
458 return err;
459 }
460
gve_free_notify_blocks(struct gve_priv * priv)461 static void gve_free_notify_blocks(struct gve_priv *priv)
462 {
463 int i;
464
465 if (!priv->msix_vectors)
466 return;
467
468 /* Free the irqs */
469 for (i = 0; i < priv->num_ntfy_blks; i++) {
470 struct gve_notify_block *block = &priv->ntfy_blocks[i];
471 int msix_idx = i;
472
473 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
474 NULL);
475 free_irq(priv->msix_vectors[msix_idx].vector, block);
476 }
477 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
478 kvfree(priv->ntfy_blocks);
479 priv->ntfy_blocks = NULL;
480 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
481 sizeof(*priv->irq_db_indices),
482 priv->irq_db_indices, priv->irq_db_indices_bus);
483 priv->irq_db_indices = NULL;
484 pci_disable_msix(priv->pdev);
485 kvfree(priv->msix_vectors);
486 priv->msix_vectors = NULL;
487 }
488
gve_setup_device_resources(struct gve_priv * priv)489 static int gve_setup_device_resources(struct gve_priv *priv)
490 {
491 int err;
492
493 err = gve_alloc_counter_array(priv);
494 if (err)
495 return err;
496 err = gve_alloc_notify_blocks(priv);
497 if (err)
498 goto abort_with_counter;
499 err = gve_alloc_stats_report(priv);
500 if (err)
501 goto abort_with_ntfy_blocks;
502 err = gve_adminq_configure_device_resources(priv,
503 priv->counter_array_bus,
504 priv->num_event_counters,
505 priv->irq_db_indices_bus,
506 priv->num_ntfy_blks);
507 if (unlikely(err)) {
508 dev_err(&priv->pdev->dev,
509 "could not setup device_resources: err=%d\n", err);
510 err = -ENXIO;
511 goto abort_with_stats_report;
512 }
513
514 if (!gve_is_gqi(priv)) {
515 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
516 GFP_KERNEL);
517 if (!priv->ptype_lut_dqo) {
518 err = -ENOMEM;
519 goto abort_with_stats_report;
520 }
521 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
522 if (err) {
523 dev_err(&priv->pdev->dev,
524 "Failed to get ptype map: err=%d\n", err);
525 goto abort_with_ptype_lut;
526 }
527 }
528
529 err = gve_adminq_report_stats(priv, priv->stats_report_len,
530 priv->stats_report_bus,
531 GVE_STATS_REPORT_TIMER_PERIOD);
532 if (err)
533 dev_err(&priv->pdev->dev,
534 "Failed to report stats: err=%d\n", err);
535 gve_set_device_resources_ok(priv);
536 return 0;
537
538 abort_with_ptype_lut:
539 kvfree(priv->ptype_lut_dqo);
540 priv->ptype_lut_dqo = NULL;
541 abort_with_stats_report:
542 gve_free_stats_report(priv);
543 abort_with_ntfy_blocks:
544 gve_free_notify_blocks(priv);
545 abort_with_counter:
546 gve_free_counter_array(priv);
547
548 return err;
549 }
550
551 static void gve_trigger_reset(struct gve_priv *priv);
552
gve_teardown_device_resources(struct gve_priv * priv)553 static void gve_teardown_device_resources(struct gve_priv *priv)
554 {
555 int err;
556
557 /* Tell device its resources are being freed */
558 if (gve_get_device_resources_ok(priv)) {
559 /* detach the stats report */
560 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
561 if (err) {
562 dev_err(&priv->pdev->dev,
563 "Failed to detach stats report: err=%d\n", err);
564 gve_trigger_reset(priv);
565 }
566 err = gve_adminq_deconfigure_device_resources(priv);
567 if (err) {
568 dev_err(&priv->pdev->dev,
569 "Could not deconfigure device resources: err=%d\n",
570 err);
571 gve_trigger_reset(priv);
572 }
573 }
574
575 kvfree(priv->ptype_lut_dqo);
576 priv->ptype_lut_dqo = NULL;
577
578 gve_free_counter_array(priv);
579 gve_free_notify_blocks(priv);
580 gve_free_stats_report(priv);
581 gve_clear_device_resources_ok(priv);
582 }
583
gve_add_napi(struct gve_priv * priv,int ntfy_idx,int (* gve_poll)(struct napi_struct *,int))584 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
585 int (*gve_poll)(struct napi_struct *, int))
586 {
587 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
588
589 netif_napi_add(priv->dev, &block->napi, gve_poll);
590 }
591
gve_remove_napi(struct gve_priv * priv,int ntfy_idx)592 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
593 {
594 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
595
596 netif_napi_del(&block->napi);
597 }
598
gve_register_xdp_qpls(struct gve_priv * priv)599 static int gve_register_xdp_qpls(struct gve_priv *priv)
600 {
601 int start_id;
602 int err;
603 int i;
604
605 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
606 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
607 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
608 if (err) {
609 netif_err(priv, drv, priv->dev,
610 "failed to register queue page list %d\n",
611 priv->qpls[i].id);
612 /* This failure will trigger a reset - no need to clean
613 * up
614 */
615 return err;
616 }
617 }
618 return 0;
619 }
620
gve_register_qpls(struct gve_priv * priv)621 static int gve_register_qpls(struct gve_priv *priv)
622 {
623 int start_id;
624 int err;
625 int i;
626
627 start_id = gve_tx_start_qpl_id(priv);
628 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
629 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
630 if (err) {
631 netif_err(priv, drv, priv->dev,
632 "failed to register queue page list %d\n",
633 priv->qpls[i].id);
634 /* This failure will trigger a reset - no need to clean
635 * up
636 */
637 return err;
638 }
639 }
640
641 start_id = gve_rx_start_qpl_id(priv);
642 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
643 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
644 if (err) {
645 netif_err(priv, drv, priv->dev,
646 "failed to register queue page list %d\n",
647 priv->qpls[i].id);
648 /* This failure will trigger a reset - no need to clean
649 * up
650 */
651 return err;
652 }
653 }
654 return 0;
655 }
656
gve_unregister_xdp_qpls(struct gve_priv * priv)657 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
658 {
659 int start_id;
660 int err;
661 int i;
662
663 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
664 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
665 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
666 /* This failure will trigger a reset - no need to clean up */
667 if (err) {
668 netif_err(priv, drv, priv->dev,
669 "Failed to unregister queue page list %d\n",
670 priv->qpls[i].id);
671 return err;
672 }
673 }
674 return 0;
675 }
676
gve_unregister_qpls(struct gve_priv * priv)677 static int gve_unregister_qpls(struct gve_priv *priv)
678 {
679 int start_id;
680 int err;
681 int i;
682
683 start_id = gve_tx_start_qpl_id(priv);
684 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
685 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
686 /* This failure will trigger a reset - no need to clean up */
687 if (err) {
688 netif_err(priv, drv, priv->dev,
689 "Failed to unregister queue page list %d\n",
690 priv->qpls[i].id);
691 return err;
692 }
693 }
694
695 start_id = gve_rx_start_qpl_id(priv);
696 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
697 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
698 /* This failure will trigger a reset - no need to clean up */
699 if (err) {
700 netif_err(priv, drv, priv->dev,
701 "Failed to unregister queue page list %d\n",
702 priv->qpls[i].id);
703 return err;
704 }
705 }
706 return 0;
707 }
708
gve_create_xdp_rings(struct gve_priv * priv)709 static int gve_create_xdp_rings(struct gve_priv *priv)
710 {
711 int err;
712
713 err = gve_adminq_create_tx_queues(priv,
714 gve_xdp_tx_start_queue_id(priv),
715 priv->num_xdp_queues);
716 if (err) {
717 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
718 priv->num_xdp_queues);
719 /* This failure will trigger a reset - no need to clean
720 * up
721 */
722 return err;
723 }
724 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
725 priv->num_xdp_queues);
726
727 return 0;
728 }
729
gve_create_rings(struct gve_priv * priv)730 static int gve_create_rings(struct gve_priv *priv)
731 {
732 int num_tx_queues = gve_num_tx_queues(priv);
733 int err;
734 int i;
735
736 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
737 if (err) {
738 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
739 num_tx_queues);
740 /* This failure will trigger a reset - no need to clean
741 * up
742 */
743 return err;
744 }
745 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
746 num_tx_queues);
747
748 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
749 if (err) {
750 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
751 priv->rx_cfg.num_queues);
752 /* This failure will trigger a reset - no need to clean
753 * up
754 */
755 return err;
756 }
757 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
758 priv->rx_cfg.num_queues);
759
760 if (gve_is_gqi(priv)) {
761 /* Rx data ring has been prefilled with packet buffers at queue
762 * allocation time.
763 *
764 * Write the doorbell to provide descriptor slots and packet
765 * buffers to the NIC.
766 */
767 for (i = 0; i < priv->rx_cfg.num_queues; i++)
768 gve_rx_write_doorbell(priv, &priv->rx[i]);
769 } else {
770 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
771 /* Post buffers and ring doorbell. */
772 gve_rx_post_buffers_dqo(&priv->rx[i]);
773 }
774 }
775
776 return 0;
777 }
778
add_napi_init_xdp_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))779 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
780 int (*napi_poll)(struct napi_struct *napi,
781 int budget))
782 {
783 int start_id = gve_xdp_tx_start_queue_id(priv);
784 int i;
785
786 /* Add xdp tx napi & init sync stats*/
787 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
788 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
789
790 u64_stats_init(&priv->tx[i].statss);
791 priv->tx[i].ntfy_id = ntfy_idx;
792 gve_add_napi(priv, ntfy_idx, napi_poll);
793 }
794 }
795
add_napi_init_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))796 static void add_napi_init_sync_stats(struct gve_priv *priv,
797 int (*napi_poll)(struct napi_struct *napi,
798 int budget))
799 {
800 int i;
801
802 /* Add tx napi & init sync stats*/
803 for (i = 0; i < gve_num_tx_queues(priv); i++) {
804 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
805
806 u64_stats_init(&priv->tx[i].statss);
807 priv->tx[i].ntfy_id = ntfy_idx;
808 gve_add_napi(priv, ntfy_idx, napi_poll);
809 }
810 /* Add rx napi & init sync stats*/
811 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
812 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
813
814 u64_stats_init(&priv->rx[i].statss);
815 priv->rx[i].ntfy_id = ntfy_idx;
816 gve_add_napi(priv, ntfy_idx, napi_poll);
817 }
818 }
819
gve_tx_free_rings(struct gve_priv * priv,int start_id,int num_rings)820 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
821 {
822 if (gve_is_gqi(priv)) {
823 gve_tx_free_rings_gqi(priv, start_id, num_rings);
824 } else {
825 gve_tx_free_rings_dqo(priv);
826 }
827 }
828
gve_alloc_xdp_rings(struct gve_priv * priv)829 static int gve_alloc_xdp_rings(struct gve_priv *priv)
830 {
831 int start_id;
832 int err = 0;
833
834 if (!priv->num_xdp_queues)
835 return 0;
836
837 start_id = gve_xdp_tx_start_queue_id(priv);
838 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
839 if (err)
840 return err;
841 add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
842
843 return 0;
844 }
845
gve_alloc_rings(struct gve_priv * priv)846 static int gve_alloc_rings(struct gve_priv *priv)
847 {
848 int err;
849
850 /* Setup tx rings */
851 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
852 GFP_KERNEL);
853 if (!priv->tx)
854 return -ENOMEM;
855
856 if (gve_is_gqi(priv))
857 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
858 else
859 err = gve_tx_alloc_rings_dqo(priv);
860 if (err)
861 goto free_tx;
862
863 /* Setup rx rings */
864 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
865 GFP_KERNEL);
866 if (!priv->rx) {
867 err = -ENOMEM;
868 goto free_tx_queue;
869 }
870
871 if (gve_is_gqi(priv))
872 err = gve_rx_alloc_rings(priv);
873 else
874 err = gve_rx_alloc_rings_dqo(priv);
875 if (err)
876 goto free_rx;
877
878 if (gve_is_gqi(priv))
879 add_napi_init_sync_stats(priv, gve_napi_poll);
880 else
881 add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
882
883 return 0;
884
885 free_rx:
886 kvfree(priv->rx);
887 priv->rx = NULL;
888 free_tx_queue:
889 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
890 free_tx:
891 kvfree(priv->tx);
892 priv->tx = NULL;
893 return err;
894 }
895
gve_destroy_xdp_rings(struct gve_priv * priv)896 static int gve_destroy_xdp_rings(struct gve_priv *priv)
897 {
898 int start_id;
899 int err;
900
901 start_id = gve_xdp_tx_start_queue_id(priv);
902 err = gve_adminq_destroy_tx_queues(priv,
903 start_id,
904 priv->num_xdp_queues);
905 if (err) {
906 netif_err(priv, drv, priv->dev,
907 "failed to destroy XDP queues\n");
908 /* This failure will trigger a reset - no need to clean up */
909 return err;
910 }
911 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
912
913 return 0;
914 }
915
gve_destroy_rings(struct gve_priv * priv)916 static int gve_destroy_rings(struct gve_priv *priv)
917 {
918 int num_tx_queues = gve_num_tx_queues(priv);
919 int err;
920
921 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
922 if (err) {
923 netif_err(priv, drv, priv->dev,
924 "failed to destroy tx queues\n");
925 /* This failure will trigger a reset - no need to clean up */
926 return err;
927 }
928 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
929 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
930 if (err) {
931 netif_err(priv, drv, priv->dev,
932 "failed to destroy rx queues\n");
933 /* This failure will trigger a reset - no need to clean up */
934 return err;
935 }
936 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
937 return 0;
938 }
939
gve_rx_free_rings(struct gve_priv * priv)940 static void gve_rx_free_rings(struct gve_priv *priv)
941 {
942 if (gve_is_gqi(priv))
943 gve_rx_free_rings_gqi(priv);
944 else
945 gve_rx_free_rings_dqo(priv);
946 }
947
gve_free_xdp_rings(struct gve_priv * priv)948 static void gve_free_xdp_rings(struct gve_priv *priv)
949 {
950 int ntfy_idx, start_id;
951 int i;
952
953 start_id = gve_xdp_tx_start_queue_id(priv);
954 if (priv->tx) {
955 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
956 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
957 gve_remove_napi(priv, ntfy_idx);
958 }
959 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
960 }
961 }
962
gve_free_rings(struct gve_priv * priv)963 static void gve_free_rings(struct gve_priv *priv)
964 {
965 int num_tx_queues = gve_num_tx_queues(priv);
966 int ntfy_idx;
967 int i;
968
969 if (priv->tx) {
970 for (i = 0; i < num_tx_queues; i++) {
971 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
972 gve_remove_napi(priv, ntfy_idx);
973 }
974 gve_tx_free_rings(priv, 0, num_tx_queues);
975 kvfree(priv->tx);
976 priv->tx = NULL;
977 }
978 if (priv->rx) {
979 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
980 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
981 gve_remove_napi(priv, ntfy_idx);
982 }
983 gve_rx_free_rings(priv);
984 kvfree(priv->rx);
985 priv->rx = NULL;
986 }
987 }
988
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir,gfp_t gfp_flags)989 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
990 struct page **page, dma_addr_t *dma,
991 enum dma_data_direction dir, gfp_t gfp_flags)
992 {
993 *page = alloc_page(gfp_flags);
994 if (!*page) {
995 priv->page_alloc_fail++;
996 return -ENOMEM;
997 }
998 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
999 if (dma_mapping_error(dev, *dma)) {
1000 priv->dma_mapping_error++;
1001 put_page(*page);
1002 return -ENOMEM;
1003 }
1004 return 0;
1005 }
1006
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)1007 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
1008 int pages)
1009 {
1010 struct gve_queue_page_list *qpl = &priv->qpls[id];
1011 int err;
1012 int i;
1013
1014 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
1015 netif_err(priv, drv, priv->dev,
1016 "Reached max number of registered pages %llu > %llu\n",
1017 pages + priv->num_registered_pages,
1018 priv->max_registered_pages);
1019 return -EINVAL;
1020 }
1021
1022 qpl->id = id;
1023 qpl->num_entries = 0;
1024 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1025 /* caller handles clean up */
1026 if (!qpl->pages)
1027 return -ENOMEM;
1028 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1029 /* caller handles clean up */
1030 if (!qpl->page_buses)
1031 return -ENOMEM;
1032
1033 for (i = 0; i < pages; i++) {
1034 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1035 &qpl->page_buses[i],
1036 gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1037 /* caller handles clean up */
1038 if (err)
1039 return -ENOMEM;
1040 qpl->num_entries++;
1041 }
1042 priv->num_registered_pages += pages;
1043
1044 return 0;
1045 }
1046
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)1047 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1048 enum dma_data_direction dir)
1049 {
1050 if (!dma_mapping_error(dev, dma))
1051 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1052 if (page)
1053 put_page(page);
1054 }
1055
gve_free_queue_page_list(struct gve_priv * priv,u32 id)1056 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
1057 {
1058 struct gve_queue_page_list *qpl = &priv->qpls[id];
1059 int i;
1060
1061 if (!qpl->pages)
1062 return;
1063 if (!qpl->page_buses)
1064 goto free_pages;
1065
1066 for (i = 0; i < qpl->num_entries; i++)
1067 gve_free_page(&priv->pdev->dev, qpl->pages[i],
1068 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1069
1070 kvfree(qpl->page_buses);
1071 qpl->page_buses = NULL;
1072 free_pages:
1073 kvfree(qpl->pages);
1074 qpl->pages = NULL;
1075 priv->num_registered_pages -= qpl->num_entries;
1076 }
1077
gve_alloc_xdp_qpls(struct gve_priv * priv)1078 static int gve_alloc_xdp_qpls(struct gve_priv *priv)
1079 {
1080 int start_id;
1081 int i, j;
1082 int err;
1083
1084 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1085 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
1086 err = gve_alloc_queue_page_list(priv, i,
1087 priv->tx_pages_per_qpl);
1088 if (err)
1089 goto free_qpls;
1090 }
1091
1092 return 0;
1093
1094 free_qpls:
1095 for (j = start_id; j <= i; j++)
1096 gve_free_queue_page_list(priv, j);
1097 return err;
1098 }
1099
gve_alloc_qpls(struct gve_priv * priv)1100 static int gve_alloc_qpls(struct gve_priv *priv)
1101 {
1102 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1103 int page_count;
1104 int start_id;
1105 int i, j;
1106 int err;
1107
1108 if (!gve_is_qpl(priv))
1109 return 0;
1110
1111 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
1112 if (!priv->qpls)
1113 return -ENOMEM;
1114
1115 start_id = gve_tx_start_qpl_id(priv);
1116 page_count = priv->tx_pages_per_qpl;
1117 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
1118 err = gve_alloc_queue_page_list(priv, i,
1119 page_count);
1120 if (err)
1121 goto free_qpls;
1122 }
1123
1124 start_id = gve_rx_start_qpl_id(priv);
1125
1126 /* For GQI_QPL number of pages allocated have 1:1 relationship with
1127 * number of descriptors. For DQO, number of pages required are
1128 * more than descriptors (because of out of order completions).
1129 */
1130 page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
1131 priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
1132 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
1133 err = gve_alloc_queue_page_list(priv, i,
1134 page_count);
1135 if (err)
1136 goto free_qpls;
1137 }
1138
1139 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
1140 sizeof(unsigned long) * BITS_PER_BYTE;
1141 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1142 sizeof(unsigned long), GFP_KERNEL);
1143 if (!priv->qpl_cfg.qpl_id_map) {
1144 err = -ENOMEM;
1145 goto free_qpls;
1146 }
1147
1148 return 0;
1149
1150 free_qpls:
1151 for (j = 0; j <= i; j++)
1152 gve_free_queue_page_list(priv, j);
1153 kvfree(priv->qpls);
1154 priv->qpls = NULL;
1155 return err;
1156 }
1157
gve_free_xdp_qpls(struct gve_priv * priv)1158 static void gve_free_xdp_qpls(struct gve_priv *priv)
1159 {
1160 int start_id;
1161 int i;
1162
1163 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1164 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
1165 gve_free_queue_page_list(priv, i);
1166 }
1167
gve_free_qpls(struct gve_priv * priv)1168 static void gve_free_qpls(struct gve_priv *priv)
1169 {
1170 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1171 int i;
1172
1173 if (!priv->qpls)
1174 return;
1175
1176 kvfree(priv->qpl_cfg.qpl_id_map);
1177 priv->qpl_cfg.qpl_id_map = NULL;
1178
1179 for (i = 0; i < max_queues; i++)
1180 gve_free_queue_page_list(priv, i);
1181
1182 kvfree(priv->qpls);
1183 priv->qpls = NULL;
1184 }
1185
1186 /* Use this to schedule a reset when the device is capable of continuing
1187 * to handle other requests in its current state. If it is not, do a reset
1188 * in thread instead.
1189 */
gve_schedule_reset(struct gve_priv * priv)1190 void gve_schedule_reset(struct gve_priv *priv)
1191 {
1192 gve_set_do_reset(priv);
1193 queue_work(priv->gve_wq, &priv->service_task);
1194 }
1195
1196 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1197 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1198 static void gve_turndown(struct gve_priv *priv);
1199 static void gve_turnup(struct gve_priv *priv);
1200
gve_reg_xdp_info(struct gve_priv * priv,struct net_device * dev)1201 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1202 {
1203 struct napi_struct *napi;
1204 struct gve_rx_ring *rx;
1205 int err = 0;
1206 int i, j;
1207 u32 tx_qid;
1208
1209 if (!priv->num_xdp_queues)
1210 return 0;
1211
1212 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1213 rx = &priv->rx[i];
1214 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1215
1216 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1217 napi->napi_id);
1218 if (err)
1219 goto err;
1220 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1221 MEM_TYPE_PAGE_SHARED, NULL);
1222 if (err)
1223 goto err;
1224 rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1225 if (rx->xsk_pool) {
1226 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1227 napi->napi_id);
1228 if (err)
1229 goto err;
1230 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1231 MEM_TYPE_XSK_BUFF_POOL, NULL);
1232 if (err)
1233 goto err;
1234 xsk_pool_set_rxq_info(rx->xsk_pool,
1235 &rx->xsk_rxq);
1236 }
1237 }
1238
1239 for (i = 0; i < priv->num_xdp_queues; i++) {
1240 tx_qid = gve_xdp_tx_queue_id(priv, i);
1241 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1242 }
1243 return 0;
1244
1245 err:
1246 for (j = i; j >= 0; j--) {
1247 rx = &priv->rx[j];
1248 if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1249 xdp_rxq_info_unreg(&rx->xdp_rxq);
1250 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1251 xdp_rxq_info_unreg(&rx->xsk_rxq);
1252 }
1253 return err;
1254 }
1255
gve_unreg_xdp_info(struct gve_priv * priv)1256 static void gve_unreg_xdp_info(struct gve_priv *priv)
1257 {
1258 int i, tx_qid;
1259
1260 if (!priv->num_xdp_queues)
1261 return;
1262
1263 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1264 struct gve_rx_ring *rx = &priv->rx[i];
1265
1266 xdp_rxq_info_unreg(&rx->xdp_rxq);
1267 if (rx->xsk_pool) {
1268 xdp_rxq_info_unreg(&rx->xsk_rxq);
1269 rx->xsk_pool = NULL;
1270 }
1271 }
1272
1273 for (i = 0; i < priv->num_xdp_queues; i++) {
1274 tx_qid = gve_xdp_tx_queue_id(priv, i);
1275 priv->tx[tx_qid].xsk_pool = NULL;
1276 }
1277 }
1278
gve_drain_page_cache(struct gve_priv * priv)1279 static void gve_drain_page_cache(struct gve_priv *priv)
1280 {
1281 struct page_frag_cache *nc;
1282 int i;
1283
1284 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1285 nc = &priv->rx[i].page_cache;
1286 if (nc->va) {
1287 __page_frag_cache_drain(virt_to_page(nc->va),
1288 nc->pagecnt_bias);
1289 nc->va = NULL;
1290 }
1291 }
1292 }
1293
gve_open(struct net_device * dev)1294 static int gve_open(struct net_device *dev)
1295 {
1296 struct gve_priv *priv = netdev_priv(dev);
1297 int err;
1298
1299 if (priv->xdp_prog)
1300 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1301 else
1302 priv->num_xdp_queues = 0;
1303
1304 err = gve_alloc_qpls(priv);
1305 if (err)
1306 return err;
1307
1308 err = gve_alloc_rings(priv);
1309 if (err)
1310 goto free_qpls;
1311
1312 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1313 if (err)
1314 goto free_rings;
1315 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1316 if (err)
1317 goto free_rings;
1318
1319 err = gve_reg_xdp_info(priv, dev);
1320 if (err)
1321 goto free_rings;
1322
1323 err = gve_register_qpls(priv);
1324 if (err)
1325 goto reset;
1326
1327 if (!gve_is_gqi(priv)) {
1328 /* Hard code this for now. This may be tuned in the future for
1329 * performance.
1330 */
1331 priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
1332 }
1333 err = gve_create_rings(priv);
1334 if (err)
1335 goto reset;
1336
1337 gve_set_device_rings_ok(priv);
1338
1339 if (gve_get_report_stats(priv))
1340 mod_timer(&priv->stats_report_timer,
1341 round_jiffies(jiffies +
1342 msecs_to_jiffies(priv->stats_report_timer_period)));
1343
1344 gve_turnup(priv);
1345 queue_work(priv->gve_wq, &priv->service_task);
1346 priv->interface_up_cnt++;
1347 return 0;
1348
1349 free_rings:
1350 gve_free_rings(priv);
1351 free_qpls:
1352 gve_free_qpls(priv);
1353 return err;
1354
1355 reset:
1356 /* This must have been called from a reset due to the rtnl lock
1357 * so just return at this point.
1358 */
1359 if (gve_get_reset_in_progress(priv))
1360 return err;
1361 /* Otherwise reset before returning */
1362 gve_reset_and_teardown(priv, true);
1363 /* if this fails there is nothing we can do so just ignore the return */
1364 gve_reset_recovery(priv, false);
1365 /* return the original error */
1366 return err;
1367 }
1368
gve_close(struct net_device * dev)1369 static int gve_close(struct net_device *dev)
1370 {
1371 struct gve_priv *priv = netdev_priv(dev);
1372 int err;
1373
1374 netif_carrier_off(dev);
1375 if (gve_get_device_rings_ok(priv)) {
1376 gve_turndown(priv);
1377 gve_drain_page_cache(priv);
1378 err = gve_destroy_rings(priv);
1379 if (err)
1380 goto err;
1381 err = gve_unregister_qpls(priv);
1382 if (err)
1383 goto err;
1384 gve_clear_device_rings_ok(priv);
1385 }
1386 del_timer_sync(&priv->stats_report_timer);
1387
1388 gve_unreg_xdp_info(priv);
1389 gve_free_rings(priv);
1390 gve_free_qpls(priv);
1391 priv->interface_down_cnt++;
1392 return 0;
1393
1394 err:
1395 /* This must have been called from a reset due to the rtnl lock
1396 * so just return at this point.
1397 */
1398 if (gve_get_reset_in_progress(priv))
1399 return err;
1400 /* Otherwise reset before returning */
1401 gve_reset_and_teardown(priv, true);
1402 return gve_reset_recovery(priv, false);
1403 }
1404
gve_remove_xdp_queues(struct gve_priv * priv)1405 static int gve_remove_xdp_queues(struct gve_priv *priv)
1406 {
1407 int err;
1408
1409 err = gve_destroy_xdp_rings(priv);
1410 if (err)
1411 return err;
1412
1413 err = gve_unregister_xdp_qpls(priv);
1414 if (err)
1415 return err;
1416
1417 gve_unreg_xdp_info(priv);
1418 gve_free_xdp_rings(priv);
1419 gve_free_xdp_qpls(priv);
1420 priv->num_xdp_queues = 0;
1421 return 0;
1422 }
1423
gve_add_xdp_queues(struct gve_priv * priv)1424 static int gve_add_xdp_queues(struct gve_priv *priv)
1425 {
1426 int err;
1427
1428 priv->num_xdp_queues = priv->tx_cfg.num_queues;
1429
1430 err = gve_alloc_xdp_qpls(priv);
1431 if (err)
1432 goto err;
1433
1434 err = gve_alloc_xdp_rings(priv);
1435 if (err)
1436 goto free_xdp_qpls;
1437
1438 err = gve_reg_xdp_info(priv, priv->dev);
1439 if (err)
1440 goto free_xdp_rings;
1441
1442 err = gve_register_xdp_qpls(priv);
1443 if (err)
1444 goto free_xdp_rings;
1445
1446 err = gve_create_xdp_rings(priv);
1447 if (err)
1448 goto free_xdp_rings;
1449
1450 return 0;
1451
1452 free_xdp_rings:
1453 gve_free_xdp_rings(priv);
1454 free_xdp_qpls:
1455 gve_free_xdp_qpls(priv);
1456 err:
1457 priv->num_xdp_queues = 0;
1458 return err;
1459 }
1460
gve_handle_link_status(struct gve_priv * priv,bool link_status)1461 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1462 {
1463 if (!gve_get_napi_enabled(priv))
1464 return;
1465
1466 if (link_status == netif_carrier_ok(priv->dev))
1467 return;
1468
1469 if (link_status) {
1470 netdev_info(priv->dev, "Device link is up.\n");
1471 netif_carrier_on(priv->dev);
1472 } else {
1473 netdev_info(priv->dev, "Device link is down.\n");
1474 netif_carrier_off(priv->dev);
1475 }
1476 }
1477
gve_set_xdp(struct gve_priv * priv,struct bpf_prog * prog,struct netlink_ext_ack * extack)1478 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1479 struct netlink_ext_ack *extack)
1480 {
1481 struct bpf_prog *old_prog;
1482 int err = 0;
1483 u32 status;
1484
1485 old_prog = READ_ONCE(priv->xdp_prog);
1486 if (!netif_carrier_ok(priv->dev)) {
1487 WRITE_ONCE(priv->xdp_prog, prog);
1488 if (old_prog)
1489 bpf_prog_put(old_prog);
1490 return 0;
1491 }
1492
1493 gve_turndown(priv);
1494 if (!old_prog && prog) {
1495 // Allocate XDP TX queues if an XDP program is
1496 // being installed
1497 err = gve_add_xdp_queues(priv);
1498 if (err)
1499 goto out;
1500 } else if (old_prog && !prog) {
1501 // Remove XDP TX queues if an XDP program is
1502 // being uninstalled
1503 err = gve_remove_xdp_queues(priv);
1504 if (err)
1505 goto out;
1506 }
1507 WRITE_ONCE(priv->xdp_prog, prog);
1508 if (old_prog)
1509 bpf_prog_put(old_prog);
1510
1511 out:
1512 gve_turnup(priv);
1513 status = ioread32be(&priv->reg_bar0->device_status);
1514 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1515 return err;
1516 }
1517
gve_xsk_pool_enable(struct net_device * dev,struct xsk_buff_pool * pool,u16 qid)1518 static int gve_xsk_pool_enable(struct net_device *dev,
1519 struct xsk_buff_pool *pool,
1520 u16 qid)
1521 {
1522 struct gve_priv *priv = netdev_priv(dev);
1523 struct napi_struct *napi;
1524 struct gve_rx_ring *rx;
1525 int tx_qid;
1526 int err;
1527
1528 if (qid >= priv->rx_cfg.num_queues) {
1529 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1530 return -EINVAL;
1531 }
1532 if (xsk_pool_get_rx_frame_size(pool) <
1533 priv->dev->max_mtu + sizeof(struct ethhdr)) {
1534 dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1535 return -EINVAL;
1536 }
1537
1538 err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1539 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1540 if (err)
1541 return err;
1542
1543 /* If XDP prog is not installed, return */
1544 if (!priv->xdp_prog)
1545 return 0;
1546
1547 rx = &priv->rx[qid];
1548 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1549 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1550 if (err)
1551 goto err;
1552
1553 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1554 MEM_TYPE_XSK_BUFF_POOL, NULL);
1555 if (err)
1556 goto err;
1557
1558 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1559 rx->xsk_pool = pool;
1560
1561 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1562 priv->tx[tx_qid].xsk_pool = pool;
1563
1564 return 0;
1565 err:
1566 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1567 xdp_rxq_info_unreg(&rx->xsk_rxq);
1568
1569 xsk_pool_dma_unmap(pool,
1570 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1571 return err;
1572 }
1573
gve_xsk_pool_disable(struct net_device * dev,u16 qid)1574 static int gve_xsk_pool_disable(struct net_device *dev,
1575 u16 qid)
1576 {
1577 struct gve_priv *priv = netdev_priv(dev);
1578 struct napi_struct *napi_rx;
1579 struct napi_struct *napi_tx;
1580 struct xsk_buff_pool *pool;
1581 int tx_qid;
1582
1583 pool = xsk_get_pool_from_qid(dev, qid);
1584 if (!pool)
1585 return -EINVAL;
1586 if (qid >= priv->rx_cfg.num_queues)
1587 return -EINVAL;
1588
1589 /* If XDP prog is not installed, unmap DMA and return */
1590 if (!priv->xdp_prog)
1591 goto done;
1592
1593 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1594 if (!netif_running(dev)) {
1595 priv->rx[qid].xsk_pool = NULL;
1596 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1597 priv->tx[tx_qid].xsk_pool = NULL;
1598 goto done;
1599 }
1600
1601 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1602 napi_disable(napi_rx); /* make sure current rx poll is done */
1603
1604 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1605 napi_disable(napi_tx); /* make sure current tx poll is done */
1606
1607 priv->rx[qid].xsk_pool = NULL;
1608 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1609 priv->tx[tx_qid].xsk_pool = NULL;
1610 smp_mb(); /* Make sure it is visible to the workers on datapath */
1611
1612 napi_enable(napi_rx);
1613 if (gve_rx_work_pending(&priv->rx[qid]))
1614 napi_schedule(napi_rx);
1615
1616 napi_enable(napi_tx);
1617 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1618 napi_schedule(napi_tx);
1619
1620 done:
1621 xsk_pool_dma_unmap(pool,
1622 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1623 return 0;
1624 }
1625
gve_xsk_wakeup(struct net_device * dev,u32 queue_id,u32 flags)1626 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1627 {
1628 struct gve_priv *priv = netdev_priv(dev);
1629 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1630
1631 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1632 return -EINVAL;
1633
1634 if (flags & XDP_WAKEUP_TX) {
1635 struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1636 struct napi_struct *napi =
1637 &priv->ntfy_blocks[tx->ntfy_id].napi;
1638
1639 if (!napi_if_scheduled_mark_missed(napi)) {
1640 /* Call local_bh_enable to trigger SoftIRQ processing */
1641 local_bh_disable();
1642 napi_schedule(napi);
1643 local_bh_enable();
1644 }
1645
1646 tx->xdp_xsk_wakeup++;
1647 }
1648
1649 return 0;
1650 }
1651
verify_xdp_configuration(struct net_device * dev)1652 static int verify_xdp_configuration(struct net_device *dev)
1653 {
1654 struct gve_priv *priv = netdev_priv(dev);
1655
1656 if (dev->features & NETIF_F_LRO) {
1657 netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1658 return -EOPNOTSUPP;
1659 }
1660
1661 if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1662 netdev_warn(dev, "XDP is not supported in mode %d.\n",
1663 priv->queue_format);
1664 return -EOPNOTSUPP;
1665 }
1666
1667 if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
1668 netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1669 dev->mtu);
1670 return -EOPNOTSUPP;
1671 }
1672
1673 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1674 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1675 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1676 priv->rx_cfg.num_queues,
1677 priv->tx_cfg.num_queues,
1678 priv->tx_cfg.max_queues);
1679 return -EINVAL;
1680 }
1681 return 0;
1682 }
1683
gve_xdp(struct net_device * dev,struct netdev_bpf * xdp)1684 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1685 {
1686 struct gve_priv *priv = netdev_priv(dev);
1687 int err;
1688
1689 err = verify_xdp_configuration(dev);
1690 if (err)
1691 return err;
1692 switch (xdp->command) {
1693 case XDP_SETUP_PROG:
1694 return gve_set_xdp(priv, xdp->prog, xdp->extack);
1695 case XDP_SETUP_XSK_POOL:
1696 if (xdp->xsk.pool)
1697 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1698 else
1699 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1700 default:
1701 return -EINVAL;
1702 }
1703 }
1704
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)1705 int gve_adjust_queues(struct gve_priv *priv,
1706 struct gve_queue_config new_rx_config,
1707 struct gve_queue_config new_tx_config)
1708 {
1709 int err;
1710
1711 if (netif_carrier_ok(priv->dev)) {
1712 /* To make this process as simple as possible we teardown the
1713 * device, set the new configuration, and then bring the device
1714 * up again.
1715 */
1716 err = gve_close(priv->dev);
1717 /* we have already tried to reset in close,
1718 * just fail at this point
1719 */
1720 if (err)
1721 return err;
1722 priv->tx_cfg = new_tx_config;
1723 priv->rx_cfg = new_rx_config;
1724
1725 err = gve_open(priv->dev);
1726 if (err)
1727 goto err;
1728
1729 return 0;
1730 }
1731 /* Set the config for the next up. */
1732 priv->tx_cfg = new_tx_config;
1733 priv->rx_cfg = new_rx_config;
1734
1735 return 0;
1736 err:
1737 netif_err(priv, drv, priv->dev,
1738 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1739 gve_turndown(priv);
1740 return err;
1741 }
1742
gve_turndown(struct gve_priv * priv)1743 static void gve_turndown(struct gve_priv *priv)
1744 {
1745 int idx;
1746
1747 if (netif_carrier_ok(priv->dev))
1748 netif_carrier_off(priv->dev);
1749
1750 if (!gve_get_napi_enabled(priv))
1751 return;
1752
1753 /* Disable napi to prevent more work from coming in */
1754 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1755 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1756 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1757
1758 napi_disable(&block->napi);
1759 }
1760 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1761 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1762 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1763
1764 napi_disable(&block->napi);
1765 }
1766
1767 /* Stop tx queues */
1768 netif_tx_disable(priv->dev);
1769
1770 gve_clear_napi_enabled(priv);
1771 gve_clear_report_stats(priv);
1772 }
1773
gve_turnup(struct gve_priv * priv)1774 static void gve_turnup(struct gve_priv *priv)
1775 {
1776 int idx;
1777
1778 /* Start the tx queues */
1779 netif_tx_start_all_queues(priv->dev);
1780
1781 /* Enable napi and unmask interrupts for all queues */
1782 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1783 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1784 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1785
1786 napi_enable(&block->napi);
1787 if (gve_is_gqi(priv)) {
1788 iowrite32be(0, gve_irq_doorbell(priv, block));
1789 } else {
1790 gve_set_itr_coalesce_usecs_dqo(priv, block,
1791 priv->tx_coalesce_usecs);
1792 }
1793 }
1794 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1795 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1796 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1797
1798 napi_enable(&block->napi);
1799 if (gve_is_gqi(priv)) {
1800 iowrite32be(0, gve_irq_doorbell(priv, block));
1801 } else {
1802 gve_set_itr_coalesce_usecs_dqo(priv, block,
1803 priv->rx_coalesce_usecs);
1804 }
1805 }
1806
1807 gve_set_napi_enabled(priv);
1808 }
1809
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)1810 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1811 {
1812 struct gve_notify_block *block;
1813 struct gve_tx_ring *tx = NULL;
1814 struct gve_priv *priv;
1815 u32 last_nic_done;
1816 u32 current_time;
1817 u32 ntfy_idx;
1818
1819 netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1820 priv = netdev_priv(dev);
1821 if (txqueue > priv->tx_cfg.num_queues)
1822 goto reset;
1823
1824 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1825 if (ntfy_idx >= priv->num_ntfy_blks)
1826 goto reset;
1827
1828 block = &priv->ntfy_blocks[ntfy_idx];
1829 tx = block->tx;
1830
1831 current_time = jiffies_to_msecs(jiffies);
1832 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1833 goto reset;
1834
1835 /* Check to see if there are missed completions, which will allow us to
1836 * kick the queue.
1837 */
1838 last_nic_done = gve_tx_load_event_counter(priv, tx);
1839 if (last_nic_done - tx->done) {
1840 netdev_info(dev, "Kicking queue %d", txqueue);
1841 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1842 napi_schedule(&block->napi);
1843 tx->last_kick_msec = current_time;
1844 goto out;
1845 } // Else reset.
1846
1847 reset:
1848 gve_schedule_reset(priv);
1849
1850 out:
1851 if (tx)
1852 tx->queue_timeout++;
1853 priv->tx_timeo_cnt++;
1854 }
1855
gve_set_features(struct net_device * netdev,netdev_features_t features)1856 static int gve_set_features(struct net_device *netdev,
1857 netdev_features_t features)
1858 {
1859 const netdev_features_t orig_features = netdev->features;
1860 struct gve_priv *priv = netdev_priv(netdev);
1861 int err;
1862
1863 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1864 netdev->features ^= NETIF_F_LRO;
1865 if (netif_carrier_ok(netdev)) {
1866 /* To make this process as simple as possible we
1867 * teardown the device, set the new configuration,
1868 * and then bring the device up again.
1869 */
1870 err = gve_close(netdev);
1871 /* We have already tried to reset in close, just fail
1872 * at this point.
1873 */
1874 if (err)
1875 goto err;
1876
1877 err = gve_open(netdev);
1878 if (err)
1879 goto err;
1880 }
1881 }
1882
1883 return 0;
1884 err:
1885 /* Reverts the change on error. */
1886 netdev->features = orig_features;
1887 netif_err(priv, drv, netdev,
1888 "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1889 return err;
1890 }
1891
1892 static const struct net_device_ops gve_netdev_ops = {
1893 .ndo_start_xmit = gve_start_xmit,
1894 .ndo_features_check = gve_features_check,
1895 .ndo_open = gve_open,
1896 .ndo_stop = gve_close,
1897 .ndo_get_stats64 = gve_get_stats,
1898 .ndo_tx_timeout = gve_tx_timeout,
1899 .ndo_set_features = gve_set_features,
1900 .ndo_bpf = gve_xdp,
1901 .ndo_xdp_xmit = gve_xdp_xmit,
1902 .ndo_xsk_wakeup = gve_xsk_wakeup,
1903 };
1904
gve_handle_status(struct gve_priv * priv,u32 status)1905 static void gve_handle_status(struct gve_priv *priv, u32 status)
1906 {
1907 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1908 dev_info(&priv->pdev->dev, "Device requested reset.\n");
1909 gve_set_do_reset(priv);
1910 }
1911 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1912 priv->stats_report_trigger_cnt++;
1913 gve_set_do_report_stats(priv);
1914 }
1915 }
1916
gve_handle_reset(struct gve_priv * priv)1917 static void gve_handle_reset(struct gve_priv *priv)
1918 {
1919 /* A service task will be scheduled at the end of probe to catch any
1920 * resets that need to happen, and we don't want to reset until
1921 * probe is done.
1922 */
1923 if (gve_get_probe_in_progress(priv))
1924 return;
1925
1926 if (gve_get_do_reset(priv)) {
1927 rtnl_lock();
1928 gve_reset(priv, false);
1929 rtnl_unlock();
1930 }
1931 }
1932
gve_handle_report_stats(struct gve_priv * priv)1933 void gve_handle_report_stats(struct gve_priv *priv)
1934 {
1935 struct stats *stats = priv->stats_report->stats;
1936 int idx, stats_idx = 0;
1937 unsigned int start = 0;
1938 u64 tx_bytes;
1939
1940 if (!gve_get_report_stats(priv))
1941 return;
1942
1943 be64_add_cpu(&priv->stats_report->written_count, 1);
1944 /* tx stats */
1945 if (priv->tx) {
1946 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1947 u32 last_completion = 0;
1948 u32 tx_frames = 0;
1949
1950 /* DQO doesn't currently support these metrics. */
1951 if (gve_is_gqi(priv)) {
1952 last_completion = priv->tx[idx].done;
1953 tx_frames = priv->tx[idx].req;
1954 }
1955
1956 do {
1957 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1958 tx_bytes = priv->tx[idx].bytes_done;
1959 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1960 stats[stats_idx++] = (struct stats) {
1961 .stat_name = cpu_to_be32(TX_WAKE_CNT),
1962 .value = cpu_to_be64(priv->tx[idx].wake_queue),
1963 .queue_id = cpu_to_be32(idx),
1964 };
1965 stats[stats_idx++] = (struct stats) {
1966 .stat_name = cpu_to_be32(TX_STOP_CNT),
1967 .value = cpu_to_be64(priv->tx[idx].stop_queue),
1968 .queue_id = cpu_to_be32(idx),
1969 };
1970 stats[stats_idx++] = (struct stats) {
1971 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1972 .value = cpu_to_be64(tx_frames),
1973 .queue_id = cpu_to_be32(idx),
1974 };
1975 stats[stats_idx++] = (struct stats) {
1976 .stat_name = cpu_to_be32(TX_BYTES_SENT),
1977 .value = cpu_to_be64(tx_bytes),
1978 .queue_id = cpu_to_be32(idx),
1979 };
1980 stats[stats_idx++] = (struct stats) {
1981 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1982 .value = cpu_to_be64(last_completion),
1983 .queue_id = cpu_to_be32(idx),
1984 };
1985 stats[stats_idx++] = (struct stats) {
1986 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1987 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
1988 .queue_id = cpu_to_be32(idx),
1989 };
1990 }
1991 }
1992 /* rx stats */
1993 if (priv->rx) {
1994 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1995 stats[stats_idx++] = (struct stats) {
1996 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1997 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1998 .queue_id = cpu_to_be32(idx),
1999 };
2000 stats[stats_idx++] = (struct stats) {
2001 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
2002 .value = cpu_to_be64(priv->rx[0].fill_cnt),
2003 .queue_id = cpu_to_be32(idx),
2004 };
2005 }
2006 }
2007 }
2008
2009 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)2010 static void gve_service_task(struct work_struct *work)
2011 {
2012 struct gve_priv *priv = container_of(work, struct gve_priv,
2013 service_task);
2014 u32 status = ioread32be(&priv->reg_bar0->device_status);
2015
2016 gve_handle_status(priv, status);
2017
2018 gve_handle_reset(priv);
2019 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2020 }
2021
gve_set_netdev_xdp_features(struct gve_priv * priv)2022 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2023 {
2024 if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2025 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2026 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2027 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2028 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2029 } else {
2030 priv->dev->xdp_features = 0;
2031 }
2032 }
2033
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)2034 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2035 {
2036 int num_ntfy;
2037 int err;
2038
2039 /* Set up the adminq */
2040 err = gve_adminq_alloc(&priv->pdev->dev, priv);
2041 if (err) {
2042 dev_err(&priv->pdev->dev,
2043 "Failed to alloc admin queue: err=%d\n", err);
2044 return err;
2045 }
2046
2047 err = gve_verify_driver_compatibility(priv);
2048 if (err) {
2049 dev_err(&priv->pdev->dev,
2050 "Could not verify driver compatibility: err=%d\n", err);
2051 goto err;
2052 }
2053
2054 if (skip_describe_device)
2055 goto setup_device;
2056
2057 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2058 /* Get the initial information we need from the device */
2059 err = gve_adminq_describe_device(priv);
2060 if (err) {
2061 dev_err(&priv->pdev->dev,
2062 "Could not get device information: err=%d\n", err);
2063 goto err;
2064 }
2065 priv->dev->mtu = priv->dev->max_mtu;
2066 num_ntfy = pci_msix_vec_count(priv->pdev);
2067 if (num_ntfy <= 0) {
2068 dev_err(&priv->pdev->dev,
2069 "could not count MSI-x vectors: err=%d\n", num_ntfy);
2070 err = num_ntfy;
2071 goto err;
2072 } else if (num_ntfy < GVE_MIN_MSIX) {
2073 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2074 GVE_MIN_MSIX, num_ntfy);
2075 err = -EINVAL;
2076 goto err;
2077 }
2078
2079 /* Big TCP is only supported on DQ*/
2080 if (!gve_is_gqi(priv))
2081 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2082
2083 priv->num_registered_pages = 0;
2084 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2085 /* gvnic has one Notification Block per MSI-x vector, except for the
2086 * management vector
2087 */
2088 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2089 priv->mgmt_msix_idx = priv->num_ntfy_blks;
2090
2091 priv->tx_cfg.max_queues =
2092 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2093 priv->rx_cfg.max_queues =
2094 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2095
2096 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2097 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2098 if (priv->default_num_queues > 0) {
2099 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2100 priv->tx_cfg.num_queues);
2101 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2102 priv->rx_cfg.num_queues);
2103 }
2104
2105 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2106 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2107 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2108 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2109
2110 if (!gve_is_gqi(priv)) {
2111 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2112 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2113 }
2114
2115 setup_device:
2116 gve_set_netdev_xdp_features(priv);
2117 err = gve_setup_device_resources(priv);
2118 if (!err)
2119 return 0;
2120 err:
2121 gve_adminq_free(&priv->pdev->dev, priv);
2122 return err;
2123 }
2124
gve_teardown_priv_resources(struct gve_priv * priv)2125 static void gve_teardown_priv_resources(struct gve_priv *priv)
2126 {
2127 gve_teardown_device_resources(priv);
2128 gve_adminq_free(&priv->pdev->dev, priv);
2129 }
2130
gve_trigger_reset(struct gve_priv * priv)2131 static void gve_trigger_reset(struct gve_priv *priv)
2132 {
2133 /* Reset the device by releasing the AQ */
2134 gve_adminq_release(priv);
2135 }
2136
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)2137 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2138 {
2139 gve_trigger_reset(priv);
2140 /* With the reset having already happened, close cannot fail */
2141 if (was_up)
2142 gve_close(priv->dev);
2143 gve_teardown_priv_resources(priv);
2144 }
2145
gve_reset_recovery(struct gve_priv * priv,bool was_up)2146 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2147 {
2148 int err;
2149
2150 err = gve_init_priv(priv, true);
2151 if (err)
2152 goto err;
2153 if (was_up) {
2154 err = gve_open(priv->dev);
2155 if (err)
2156 goto err;
2157 }
2158 return 0;
2159 err:
2160 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2161 gve_turndown(priv);
2162 return err;
2163 }
2164
gve_reset(struct gve_priv * priv,bool attempt_teardown)2165 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2166 {
2167 bool was_up = netif_carrier_ok(priv->dev);
2168 int err;
2169
2170 dev_info(&priv->pdev->dev, "Performing reset\n");
2171 gve_clear_do_reset(priv);
2172 gve_set_reset_in_progress(priv);
2173 /* If we aren't attempting to teardown normally, just go turndown and
2174 * reset right away.
2175 */
2176 if (!attempt_teardown) {
2177 gve_turndown(priv);
2178 gve_reset_and_teardown(priv, was_up);
2179 } else {
2180 /* Otherwise attempt to close normally */
2181 if (was_up) {
2182 err = gve_close(priv->dev);
2183 /* If that fails reset as we did above */
2184 if (err)
2185 gve_reset_and_teardown(priv, was_up);
2186 }
2187 /* Clean up any remaining resources */
2188 gve_teardown_priv_resources(priv);
2189 }
2190
2191 /* Set it all back up */
2192 err = gve_reset_recovery(priv, was_up);
2193 gve_clear_reset_in_progress(priv);
2194 priv->reset_cnt++;
2195 priv->interface_up_cnt = 0;
2196 priv->interface_down_cnt = 0;
2197 priv->stats_report_trigger_cnt = 0;
2198 return err;
2199 }
2200
gve_write_version(u8 __iomem * driver_version_register)2201 static void gve_write_version(u8 __iomem *driver_version_register)
2202 {
2203 const char *c = gve_version_prefix;
2204
2205 while (*c) {
2206 writeb(*c, driver_version_register);
2207 c++;
2208 }
2209
2210 c = gve_version_str;
2211 while (*c) {
2212 writeb(*c, driver_version_register);
2213 c++;
2214 }
2215 writeb('\n', driver_version_register);
2216 }
2217
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)2218 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2219 {
2220 int max_tx_queues, max_rx_queues;
2221 struct net_device *dev;
2222 __be32 __iomem *db_bar;
2223 struct gve_registers __iomem *reg_bar;
2224 struct gve_priv *priv;
2225 int err;
2226
2227 err = pci_enable_device(pdev);
2228 if (err)
2229 return err;
2230
2231 err = pci_request_regions(pdev, gve_driver_name);
2232 if (err)
2233 goto abort_with_enabled;
2234
2235 pci_set_master(pdev);
2236
2237 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2238 if (err) {
2239 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2240 goto abort_with_pci_region;
2241 }
2242
2243 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2244 if (!reg_bar) {
2245 dev_err(&pdev->dev, "Failed to map pci bar!\n");
2246 err = -ENOMEM;
2247 goto abort_with_pci_region;
2248 }
2249
2250 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2251 if (!db_bar) {
2252 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2253 err = -ENOMEM;
2254 goto abort_with_reg_bar;
2255 }
2256
2257 gve_write_version(®_bar->driver_version);
2258 /* Get max queues to alloc etherdev */
2259 max_tx_queues = ioread32be(®_bar->max_tx_queues);
2260 max_rx_queues = ioread32be(®_bar->max_rx_queues);
2261 /* Alloc and setup the netdev and priv */
2262 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2263 if (!dev) {
2264 dev_err(&pdev->dev, "could not allocate netdev\n");
2265 err = -ENOMEM;
2266 goto abort_with_db_bar;
2267 }
2268 SET_NETDEV_DEV(dev, &pdev->dev);
2269 pci_set_drvdata(pdev, dev);
2270 dev->ethtool_ops = &gve_ethtool_ops;
2271 dev->netdev_ops = &gve_netdev_ops;
2272
2273 /* Set default and supported features.
2274 *
2275 * Features might be set in other locations as well (such as
2276 * `gve_adminq_describe_device`).
2277 */
2278 dev->hw_features = NETIF_F_HIGHDMA;
2279 dev->hw_features |= NETIF_F_SG;
2280 dev->hw_features |= NETIF_F_HW_CSUM;
2281 dev->hw_features |= NETIF_F_TSO;
2282 dev->hw_features |= NETIF_F_TSO6;
2283 dev->hw_features |= NETIF_F_TSO_ECN;
2284 dev->hw_features |= NETIF_F_RXCSUM;
2285 dev->hw_features |= NETIF_F_RXHASH;
2286 dev->features = dev->hw_features;
2287 dev->watchdog_timeo = 5 * HZ;
2288 dev->min_mtu = ETH_MIN_MTU;
2289 netif_carrier_off(dev);
2290
2291 priv = netdev_priv(dev);
2292 priv->dev = dev;
2293 priv->pdev = pdev;
2294 priv->msg_enable = DEFAULT_MSG_LEVEL;
2295 priv->reg_bar0 = reg_bar;
2296 priv->db_bar2 = db_bar;
2297 priv->service_task_flags = 0x0;
2298 priv->state_flags = 0x0;
2299 priv->ethtool_flags = 0x0;
2300
2301 gve_set_probe_in_progress(priv);
2302 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2303 if (!priv->gve_wq) {
2304 dev_err(&pdev->dev, "Could not allocate workqueue");
2305 err = -ENOMEM;
2306 goto abort_with_netdev;
2307 }
2308 INIT_WORK(&priv->service_task, gve_service_task);
2309 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2310 priv->tx_cfg.max_queues = max_tx_queues;
2311 priv->rx_cfg.max_queues = max_rx_queues;
2312
2313 err = gve_init_priv(priv, false);
2314 if (err)
2315 goto abort_with_wq;
2316
2317 err = register_netdev(dev);
2318 if (err)
2319 goto abort_with_gve_init;
2320
2321 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2322 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2323 gve_clear_probe_in_progress(priv);
2324 queue_work(priv->gve_wq, &priv->service_task);
2325 return 0;
2326
2327 abort_with_gve_init:
2328 gve_teardown_priv_resources(priv);
2329
2330 abort_with_wq:
2331 destroy_workqueue(priv->gve_wq);
2332
2333 abort_with_netdev:
2334 free_netdev(dev);
2335
2336 abort_with_db_bar:
2337 pci_iounmap(pdev, db_bar);
2338
2339 abort_with_reg_bar:
2340 pci_iounmap(pdev, reg_bar);
2341
2342 abort_with_pci_region:
2343 pci_release_regions(pdev);
2344
2345 abort_with_enabled:
2346 pci_disable_device(pdev);
2347 return err;
2348 }
2349
gve_remove(struct pci_dev * pdev)2350 static void gve_remove(struct pci_dev *pdev)
2351 {
2352 struct net_device *netdev = pci_get_drvdata(pdev);
2353 struct gve_priv *priv = netdev_priv(netdev);
2354 __be32 __iomem *db_bar = priv->db_bar2;
2355 void __iomem *reg_bar = priv->reg_bar0;
2356
2357 unregister_netdev(netdev);
2358 gve_teardown_priv_resources(priv);
2359 destroy_workqueue(priv->gve_wq);
2360 free_netdev(netdev);
2361 pci_iounmap(pdev, db_bar);
2362 pci_iounmap(pdev, reg_bar);
2363 pci_release_regions(pdev);
2364 pci_disable_device(pdev);
2365 }
2366
gve_shutdown(struct pci_dev * pdev)2367 static void gve_shutdown(struct pci_dev *pdev)
2368 {
2369 struct net_device *netdev = pci_get_drvdata(pdev);
2370 struct gve_priv *priv = netdev_priv(netdev);
2371 bool was_up = netif_carrier_ok(priv->dev);
2372
2373 rtnl_lock();
2374 if (was_up && gve_close(priv->dev)) {
2375 /* If the dev was up, attempt to close, if close fails, reset */
2376 gve_reset_and_teardown(priv, was_up);
2377 } else {
2378 /* If the dev wasn't up or close worked, finish tearing down */
2379 gve_teardown_priv_resources(priv);
2380 }
2381 rtnl_unlock();
2382 }
2383
2384 #ifdef CONFIG_PM
gve_suspend(struct pci_dev * pdev,pm_message_t state)2385 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2386 {
2387 struct net_device *netdev = pci_get_drvdata(pdev);
2388 struct gve_priv *priv = netdev_priv(netdev);
2389 bool was_up = netif_carrier_ok(priv->dev);
2390
2391 priv->suspend_cnt++;
2392 rtnl_lock();
2393 if (was_up && gve_close(priv->dev)) {
2394 /* If the dev was up, attempt to close, if close fails, reset */
2395 gve_reset_and_teardown(priv, was_up);
2396 } else {
2397 /* If the dev wasn't up or close worked, finish tearing down */
2398 gve_teardown_priv_resources(priv);
2399 }
2400 priv->up_before_suspend = was_up;
2401 rtnl_unlock();
2402 return 0;
2403 }
2404
gve_resume(struct pci_dev * pdev)2405 static int gve_resume(struct pci_dev *pdev)
2406 {
2407 struct net_device *netdev = pci_get_drvdata(pdev);
2408 struct gve_priv *priv = netdev_priv(netdev);
2409 int err;
2410
2411 priv->resume_cnt++;
2412 rtnl_lock();
2413 err = gve_reset_recovery(priv, priv->up_before_suspend);
2414 rtnl_unlock();
2415 return err;
2416 }
2417 #endif /* CONFIG_PM */
2418
2419 static const struct pci_device_id gve_id_table[] = {
2420 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2421 { }
2422 };
2423
2424 static struct pci_driver gve_driver = {
2425 .name = gve_driver_name,
2426 .id_table = gve_id_table,
2427 .probe = gve_probe,
2428 .remove = gve_remove,
2429 .shutdown = gve_shutdown,
2430 #ifdef CONFIG_PM
2431 .suspend = gve_suspend,
2432 .resume = gve_resume,
2433 #endif
2434 };
2435
2436 module_pci_driver(gve_driver);
2437
2438 MODULE_DEVICE_TABLE(pci, gve_id_table);
2439 MODULE_AUTHOR("Google, Inc.");
2440 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2441 MODULE_LICENSE("Dual MIT/GPL");
2442 MODULE_VERSION(GVE_VERSION);
2443