1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2024-2025 Intel Corporation */
3
4 #define DEFAULT_SYMBOL_NAMESPACE "LIBETH"
5
6 #include <linux/export.h>
7
8 #include <net/libeth/rx.h>
9
10 /* Rx buffer management */
11
12 /**
13 * libeth_rx_hw_len_mtu - get the actual buffer size to be passed to HW
14 * @pp: &page_pool_params of the netdev to calculate the size for
15 * @max_len: maximum buffer size for a single descriptor
16 *
17 * Return: HW-writeable length per one buffer to pass it to the HW accounting:
18 * MTU the @dev has, HW required alignment, minimum and maximum allowed values,
19 * and system's page size.
20 */
libeth_rx_hw_len_mtu(const struct page_pool_params * pp,u32 max_len)21 static u32 libeth_rx_hw_len_mtu(const struct page_pool_params *pp, u32 max_len)
22 {
23 u32 len;
24
25 len = READ_ONCE(pp->netdev->mtu) + LIBETH_RX_LL_LEN;
26 len = ALIGN(len, LIBETH_RX_BUF_STRIDE);
27 len = min3(len, ALIGN_DOWN(max_len ? : U32_MAX, LIBETH_RX_BUF_STRIDE),
28 pp->max_len);
29
30 return len;
31 }
32
33 /**
34 * libeth_rx_hw_len_truesize - get the short buffer size to be passed to HW
35 * @pp: &page_pool_params of the netdev to calculate the size for
36 * @max_len: maximum buffer size for a single descriptor
37 * @truesize: desired truesize for the buffers
38 *
39 * Return: HW-writeable length per one buffer to pass it to the HW ignoring the
40 * MTU and closest to the passed truesize. Can be used for "short" buffer
41 * queues to fragment pages more efficiently.
42 */
libeth_rx_hw_len_truesize(const struct page_pool_params * pp,u32 max_len,u32 truesize)43 static u32 libeth_rx_hw_len_truesize(const struct page_pool_params *pp,
44 u32 max_len, u32 truesize)
45 {
46 u32 min, len;
47
48 min = SKB_HEAD_ALIGN(pp->offset + LIBETH_RX_BUF_STRIDE);
49 truesize = clamp(roundup_pow_of_two(truesize), roundup_pow_of_two(min),
50 PAGE_SIZE << LIBETH_RX_PAGE_ORDER);
51
52 len = SKB_WITH_OVERHEAD(truesize - pp->offset);
53 len = ALIGN_DOWN(len, LIBETH_RX_BUF_STRIDE) ? : LIBETH_RX_BUF_STRIDE;
54 len = min3(len, ALIGN_DOWN(max_len ? : U32_MAX, LIBETH_RX_BUF_STRIDE),
55 pp->max_len);
56
57 return len;
58 }
59
60 /**
61 * libeth_rx_page_pool_params - calculate params with the stack overhead
62 * @fq: buffer queue to calculate the size for
63 * @pp: &page_pool_params of the netdev
64 *
65 * Set the PP params to will all needed stack overhead (headroom, tailroom) and
66 * both the HW buffer length and the truesize for all types of buffers. For
67 * "short" buffers, truesize never exceeds the "wanted" one; for the rest,
68 * it can be up to the page size.
69 *
70 * Return: true on success, false on invalid input params.
71 */
libeth_rx_page_pool_params(struct libeth_fq * fq,struct page_pool_params * pp)72 static bool libeth_rx_page_pool_params(struct libeth_fq *fq,
73 struct page_pool_params *pp)
74 {
75 pp->offset = fq->xdp ? LIBETH_XDP_HEADROOM : LIBETH_SKB_HEADROOM;
76 /* HW-writeable / syncable length per one page */
77 pp->max_len = LIBETH_RX_PAGE_LEN(pp->offset);
78
79 /* HW-writeable length per buffer */
80 switch (fq->type) {
81 case LIBETH_FQE_MTU:
82 fq->buf_len = libeth_rx_hw_len_mtu(pp, fq->buf_len);
83 break;
84 case LIBETH_FQE_SHORT:
85 fq->buf_len = libeth_rx_hw_len_truesize(pp, fq->buf_len,
86 fq->truesize);
87 break;
88 case LIBETH_FQE_HDR:
89 fq->buf_len = ALIGN(LIBETH_MAX_HEAD, LIBETH_RX_BUF_STRIDE);
90 break;
91 default:
92 return false;
93 }
94
95 /* Buffer size to allocate */
96 fq->truesize = roundup_pow_of_two(SKB_HEAD_ALIGN(pp->offset +
97 fq->buf_len));
98
99 return true;
100 }
101
102 /**
103 * libeth_rx_page_pool_params_zc - calculate params without the stack overhead
104 * @fq: buffer queue to calculate the size for
105 * @pp: &page_pool_params of the netdev
106 *
107 * Set the PP params to exclude the stack overhead and both the buffer length
108 * and the truesize, which are equal for the data buffers. Note that this
109 * requires separate header buffers to be always active and account the
110 * overhead.
111 * With the MTU == ``PAGE_SIZE``, this allows the kernel to enable the zerocopy
112 * mode.
113 *
114 * Return: true on success, false on invalid input params.
115 */
libeth_rx_page_pool_params_zc(struct libeth_fq * fq,struct page_pool_params * pp)116 static bool libeth_rx_page_pool_params_zc(struct libeth_fq *fq,
117 struct page_pool_params *pp)
118 {
119 u32 mtu, max;
120
121 pp->offset = 0;
122 pp->max_len = PAGE_SIZE << LIBETH_RX_PAGE_ORDER;
123
124 switch (fq->type) {
125 case LIBETH_FQE_MTU:
126 mtu = READ_ONCE(pp->netdev->mtu);
127 break;
128 case LIBETH_FQE_SHORT:
129 mtu = fq->truesize;
130 break;
131 default:
132 return false;
133 }
134
135 mtu = roundup_pow_of_two(mtu);
136 max = min(rounddown_pow_of_two(fq->buf_len ? : U32_MAX),
137 pp->max_len);
138
139 fq->buf_len = clamp(mtu, LIBETH_RX_BUF_STRIDE, max);
140 fq->truesize = fq->buf_len;
141
142 return true;
143 }
144
145 /**
146 * libeth_rx_fq_create - create a PP with the default libeth settings
147 * @fq: buffer queue struct to fill
148 * @napi: &napi_struct covering this PP (no usage outside its poll loops)
149 *
150 * Return: %0 on success, -%errno on failure.
151 */
libeth_rx_fq_create(struct libeth_fq * fq,struct napi_struct * napi)152 int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi)
153 {
154 struct page_pool_params pp = {
155 .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
156 .order = LIBETH_RX_PAGE_ORDER,
157 .pool_size = fq->count,
158 .nid = fq->nid,
159 .dev = napi->dev->dev.parent,
160 .netdev = napi->dev,
161 .napi = napi,
162 };
163 struct libeth_fqe *fqes;
164 struct page_pool *pool;
165 int ret;
166
167 pp.dma_dir = fq->xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
168
169 if (!fq->hsplit)
170 ret = libeth_rx_page_pool_params(fq, &pp);
171 else
172 ret = libeth_rx_page_pool_params_zc(fq, &pp);
173 if (!ret)
174 return -EINVAL;
175
176 pool = page_pool_create(&pp);
177 if (IS_ERR(pool))
178 return PTR_ERR(pool);
179
180 fqes = kvcalloc_node(fq->count, sizeof(*fqes), GFP_KERNEL, fq->nid);
181 if (!fqes) {
182 ret = -ENOMEM;
183 goto err_buf;
184 }
185
186 ret = xdp_reg_page_pool(pool);
187 if (ret)
188 goto err_mem;
189
190 fq->fqes = fqes;
191 fq->pp = pool;
192
193 return 0;
194
195 err_mem:
196 kvfree(fqes);
197 err_buf:
198 page_pool_destroy(pool);
199
200 return ret;
201 }
202 EXPORT_SYMBOL_GPL(libeth_rx_fq_create);
203
204 /**
205 * libeth_rx_fq_destroy - destroy a &page_pool created by libeth
206 * @fq: buffer queue to process
207 */
libeth_rx_fq_destroy(struct libeth_fq * fq)208 void libeth_rx_fq_destroy(struct libeth_fq *fq)
209 {
210 xdp_unreg_page_pool(fq->pp);
211 kvfree(fq->fqes);
212 page_pool_destroy(fq->pp);
213 }
214 EXPORT_SYMBOL_GPL(libeth_rx_fq_destroy);
215
216 /**
217 * libeth_rx_recycle_slow - recycle libeth netmem
218 * @netmem: network memory to recycle
219 *
220 * To be used on exceptions or rare cases not requiring fast inline recycling.
221 */
libeth_rx_recycle_slow(netmem_ref netmem)222 void __cold libeth_rx_recycle_slow(netmem_ref netmem)
223 {
224 page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false);
225 }
226 EXPORT_SYMBOL_GPL(libeth_rx_recycle_slow);
227
228 /* Converting abstract packet type numbers into a software structure with
229 * the packet parameters to do O(1) lookup on Rx.
230 */
231
232 static const u16 libeth_rx_pt_xdp_oip[] = {
233 [LIBETH_RX_PT_OUTER_L2] = XDP_RSS_TYPE_NONE,
234 [LIBETH_RX_PT_OUTER_IPV4] = XDP_RSS_L3_IPV4,
235 [LIBETH_RX_PT_OUTER_IPV6] = XDP_RSS_L3_IPV6,
236 };
237
238 static const u16 libeth_rx_pt_xdp_iprot[] = {
239 [LIBETH_RX_PT_INNER_NONE] = XDP_RSS_TYPE_NONE,
240 [LIBETH_RX_PT_INNER_UDP] = XDP_RSS_L4_UDP,
241 [LIBETH_RX_PT_INNER_TCP] = XDP_RSS_L4_TCP,
242 [LIBETH_RX_PT_INNER_SCTP] = XDP_RSS_L4_SCTP,
243 [LIBETH_RX_PT_INNER_ICMP] = XDP_RSS_L4_ICMP,
244 [LIBETH_RX_PT_INNER_TIMESYNC] = XDP_RSS_TYPE_NONE,
245 };
246
247 static const u16 libeth_rx_pt_xdp_pl[] = {
248 [LIBETH_RX_PT_PAYLOAD_NONE] = XDP_RSS_TYPE_NONE,
249 [LIBETH_RX_PT_PAYLOAD_L2] = XDP_RSS_TYPE_NONE,
250 [LIBETH_RX_PT_PAYLOAD_L3] = XDP_RSS_TYPE_NONE,
251 [LIBETH_RX_PT_PAYLOAD_L4] = XDP_RSS_L4,
252 };
253
254 /**
255 * libeth_rx_pt_gen_hash_type - generate an XDP RSS hash type for a PT
256 * @pt: PT structure to evaluate
257 *
258 * Generates ```hash_type``` field with XDP RSS type values from the parsed
259 * packet parameters if they're obtained dynamically at runtime.
260 */
libeth_rx_pt_gen_hash_type(struct libeth_rx_pt * pt)261 void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt)
262 {
263 pt->hash_type = 0;
264 pt->hash_type |= libeth_rx_pt_xdp_oip[pt->outer_ip];
265 pt->hash_type |= libeth_rx_pt_xdp_iprot[pt->inner_prot];
266 pt->hash_type |= libeth_rx_pt_xdp_pl[pt->payload_layer];
267 }
268 EXPORT_SYMBOL_GPL(libeth_rx_pt_gen_hash_type);
269
270 /* Module */
271
272 MODULE_DESCRIPTION("Common Ethernet library");
273 MODULE_LICENSE("GPL");
274