1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2018 Chelsio Communications, Inc.
4 *
5 * Written by: Atul Gupta (atul.gupta@chelsio.com)
6 */
7
8 #include <linux/module.h>
9 #include <linux/list.h>
10 #include <linux/workqueue.h>
11 #include <linux/skbuff.h>
12 #include <linux/timer.h>
13 #include <linux/notifier.h>
14 #include <linux/inetdevice.h>
15 #include <linux/ip.h>
16 #include <linux/tcp.h>
17 #include <linux/sched/signal.h>
18 #include <net/tcp.h>
19 #include <net/busy_poll.h>
20 #include <crypto/aes.h>
21
22 #include "chtls.h"
23 #include "chtls_cm.h"
24
is_tls_tx(struct chtls_sock * csk)25 static bool is_tls_tx(struct chtls_sock *csk)
26 {
27 return csk->tlshws.txkey >= 0;
28 }
29
is_tls_rx(struct chtls_sock * csk)30 static bool is_tls_rx(struct chtls_sock *csk)
31 {
32 return csk->tlshws.rxkey >= 0;
33 }
34
data_sgl_len(const struct sk_buff * skb)35 static int data_sgl_len(const struct sk_buff *skb)
36 {
37 unsigned int cnt;
38
39 cnt = skb_shinfo(skb)->nr_frags;
40 return sgl_len(cnt) * 8;
41 }
42
nos_ivs(struct sock * sk,unsigned int size)43 static int nos_ivs(struct sock *sk, unsigned int size)
44 {
45 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
46
47 return DIV_ROUND_UP(size, csk->tlshws.mfs);
48 }
49
set_ivs_imm(struct sock * sk,const struct sk_buff * skb)50 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
51 {
52 int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
53 int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
54
55 if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
56 MAX_IMM_OFLD_TX_DATA_WR_LEN) {
57 ULP_SKB_CB(skb)->ulp.tls.iv = 1;
58 return 1;
59 }
60 ULP_SKB_CB(skb)->ulp.tls.iv = 0;
61 return 0;
62 }
63
max_ivs_size(struct sock * sk,int size)64 static int max_ivs_size(struct sock *sk, int size)
65 {
66 return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
67 }
68
ivs_size(struct sock * sk,const struct sk_buff * skb)69 static int ivs_size(struct sock *sk, const struct sk_buff *skb)
70 {
71 return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
72 CIPHER_BLOCK_SIZE) : 0;
73 }
74
flowc_wr_credits(int nparams,int * flowclenp)75 static int flowc_wr_credits(int nparams, int *flowclenp)
76 {
77 int flowclen16, flowclen;
78
79 flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
80 flowclen16 = DIV_ROUND_UP(flowclen, 16);
81 flowclen = flowclen16 * 16;
82
83 if (flowclenp)
84 *flowclenp = flowclen;
85
86 return flowclen16;
87 }
88
create_flowc_wr_skb(struct sock * sk,struct fw_flowc_wr * flowc,int flowclen)89 static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
90 struct fw_flowc_wr *flowc,
91 int flowclen)
92 {
93 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
94 struct sk_buff *skb;
95
96 skb = alloc_skb(flowclen, GFP_ATOMIC);
97 if (!skb)
98 return NULL;
99
100 __skb_put_data(skb, flowc, flowclen);
101 skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
102
103 return skb;
104 }
105
send_flowc_wr(struct sock * sk,struct fw_flowc_wr * flowc,int flowclen)106 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
107 int flowclen)
108 {
109 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
110 struct tcp_sock *tp = tcp_sk(sk);
111 struct sk_buff *skb;
112 int flowclen16;
113 int ret;
114
115 flowclen16 = flowclen / 16;
116
117 if (csk_flag(sk, CSK_TX_DATA_SENT)) {
118 skb = create_flowc_wr_skb(sk, flowc, flowclen);
119 if (!skb)
120 return -ENOMEM;
121
122 skb_entail(sk, skb,
123 ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
124 return 0;
125 }
126
127 ret = cxgb4_immdata_send(csk->egress_dev,
128 csk->txq_idx,
129 flowc, flowclen);
130 if (!ret)
131 return flowclen16;
132 skb = create_flowc_wr_skb(sk, flowc, flowclen);
133 if (!skb)
134 return -ENOMEM;
135 send_or_defer(sk, tp, skb, 0);
136 return flowclen16;
137 }
138
tcp_state_to_flowc_state(u8 state)139 static u8 tcp_state_to_flowc_state(u8 state)
140 {
141 switch (state) {
142 case TCP_ESTABLISHED:
143 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
144 case TCP_CLOSE_WAIT:
145 return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
146 case TCP_FIN_WAIT1:
147 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
148 case TCP_CLOSING:
149 return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
150 case TCP_LAST_ACK:
151 return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
152 case TCP_FIN_WAIT2:
153 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
154 }
155
156 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
157 }
158
send_tx_flowc_wr(struct sock * sk,int compl,u32 snd_nxt,u32 rcv_nxt)159 int send_tx_flowc_wr(struct sock *sk, int compl,
160 u32 snd_nxt, u32 rcv_nxt)
161 {
162 DEFINE_RAW_FLEX(struct fw_flowc_wr, flowc, mnemval, FW_FLOWC_MNEM_MAX);
163 int nparams, paramidx, flowclen16, flowclen;
164 struct chtls_sock *csk;
165 struct tcp_sock *tp;
166
167 csk = rcu_dereference_sk_user_data(sk);
168 tp = tcp_sk(sk);
169
170 #define FLOWC_PARAM(__m, __v) \
171 do { \
172 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
173 flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
174 paramidx++; \
175 } while (0)
176
177 paramidx = 0;
178
179 FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
180 FLOWC_PARAM(CH, csk->tx_chan);
181 FLOWC_PARAM(PORT, csk->tx_chan);
182 FLOWC_PARAM(IQID, csk->rss_qid);
183 FLOWC_PARAM(SNDNXT, tp->snd_nxt);
184 FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
185 FLOWC_PARAM(SNDBUF, csk->sndbuf);
186 FLOWC_PARAM(MSS, tp->mss_cache);
187 FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
188
189 if (SND_WSCALE(tp))
190 FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
191
192 if (csk->ulp_mode == ULP_MODE_TLS)
193 FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
194
195 if (csk->tlshws.fcplenmax)
196 FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
197
198 nparams = paramidx;
199 #undef FLOWC_PARAM
200
201 flowclen16 = flowc_wr_credits(nparams, &flowclen);
202 flowc->op_to_nparams =
203 cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
204 FW_WR_COMPL_V(compl) |
205 FW_FLOWC_WR_NPARAMS_V(nparams));
206 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
207 FW_WR_FLOWID_V(csk->tid));
208
209 return send_flowc_wr(sk, flowc, flowclen);
210 }
211
212 /* Copy IVs to WR */
tls_copy_ivs(struct sock * sk,struct sk_buff * skb)213 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
214
215 {
216 struct chtls_sock *csk;
217 unsigned char *iv_loc;
218 struct chtls_hws *hws;
219 unsigned char *ivs;
220 u16 number_of_ivs;
221 struct page *page;
222 int err = 0;
223
224 csk = rcu_dereference_sk_user_data(sk);
225 hws = &csk->tlshws;
226 number_of_ivs = nos_ivs(sk, skb->len);
227
228 if (number_of_ivs > MAX_IVS_PAGE) {
229 pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
230 return -ENOMEM;
231 }
232
233 /* generate the IVs */
234 ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
235 if (!ivs)
236 return -ENOMEM;
237 get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
238
239 if (skb_ulp_tls_iv_imm(skb)) {
240 /* send the IVs as immediate data in the WR */
241 iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
242 CIPHER_BLOCK_SIZE);
243 if (iv_loc)
244 memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
245
246 hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
247 } else {
248 /* Send the IVs as sgls */
249 /* Already accounted IV DSGL for credits */
250 skb_shinfo(skb)->nr_frags--;
251 page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
252 if (!page) {
253 pr_info("%s : Page allocation for IVs failed\n",
254 __func__);
255 err = -ENOMEM;
256 goto out;
257 }
258 memcpy(page_address(page), ivs, number_of_ivs *
259 CIPHER_BLOCK_SIZE);
260 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
261 number_of_ivs * CIPHER_BLOCK_SIZE);
262 hws->ivsize = 0;
263 }
264 out:
265 kfree(ivs);
266 return err;
267 }
268
269 /* Copy Key to WR */
tls_copy_tx_key(struct sock * sk,struct sk_buff * skb)270 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
271 {
272 struct ulptx_sc_memrd *sc_memrd;
273 struct chtls_sock *csk;
274 struct chtls_dev *cdev;
275 struct ulptx_idata *sc;
276 struct chtls_hws *hws;
277 u32 immdlen;
278 int kaddr;
279
280 csk = rcu_dereference_sk_user_data(sk);
281 hws = &csk->tlshws;
282 cdev = csk->cdev;
283
284 immdlen = sizeof(*sc) + sizeof(*sc_memrd);
285 kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
286 sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
287 if (sc) {
288 sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
289 sc->len = htonl(0);
290 sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
291 sc_memrd->cmd_to_len =
292 htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
293 ULP_TX_SC_MORE_V(1) |
294 ULPTX_LEN16_V(hws->keylen >> 4));
295 sc_memrd->addr = htonl(kaddr);
296 }
297 }
298
tlstx_incr_seqnum(struct chtls_hws * hws)299 static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
300 {
301 return hws->tx_seq_no++;
302 }
303
is_sg_request(const struct sk_buff * skb)304 static bool is_sg_request(const struct sk_buff *skb)
305 {
306 return skb->peeked ||
307 (skb->len > MAX_IMM_ULPTX_WR_LEN);
308 }
309
310 /*
311 * Returns true if an sk_buff carries urgent data.
312 */
skb_urgent(struct sk_buff * skb)313 static bool skb_urgent(struct sk_buff *skb)
314 {
315 return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
316 }
317
318 /* TLS content type for CPL SFO */
tls_content_type(unsigned char content_type)319 static unsigned char tls_content_type(unsigned char content_type)
320 {
321 switch (content_type) {
322 case TLS_HDR_TYPE_CCS:
323 return CPL_TX_TLS_SFO_TYPE_CCS;
324 case TLS_HDR_TYPE_ALERT:
325 return CPL_TX_TLS_SFO_TYPE_ALERT;
326 case TLS_HDR_TYPE_HANDSHAKE:
327 return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
328 case TLS_HDR_TYPE_HEARTBEAT:
329 return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
330 }
331 return CPL_TX_TLS_SFO_TYPE_DATA;
332 }
333
tls_tx_data_wr(struct sock * sk,struct sk_buff * skb,int dlen,int tls_immd,u32 credits,int expn,int pdus)334 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
335 int dlen, int tls_immd, u32 credits,
336 int expn, int pdus)
337 {
338 struct fw_tlstx_data_wr *req_wr;
339 struct cpl_tx_tls_sfo *req_cpl;
340 unsigned int wr_ulp_mode_force;
341 struct tls_scmd *updated_scmd;
342 unsigned char data_type;
343 struct chtls_sock *csk;
344 struct net_device *dev;
345 struct chtls_hws *hws;
346 struct tls_scmd *scmd;
347 struct adapter *adap;
348 unsigned char *req;
349 int immd_len;
350 int iv_imm;
351 int len;
352
353 csk = rcu_dereference_sk_user_data(sk);
354 iv_imm = skb_ulp_tls_iv_imm(skb);
355 dev = csk->egress_dev;
356 adap = netdev2adap(dev);
357 hws = &csk->tlshws;
358 scmd = &hws->scmd;
359 len = dlen + expn;
360
361 dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
362 atomic_inc(&adap->chcr_stats.tls_pdu_tx);
363
364 updated_scmd = scmd;
365 updated_scmd->seqno_numivs &= 0xffffff80;
366 updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
367 hws->scmd = *updated_scmd;
368
369 req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
370 req_cpl = (struct cpl_tx_tls_sfo *)req;
371 req = (unsigned char *)__skb_push(skb, (sizeof(struct
372 fw_tlstx_data_wr)));
373
374 req_wr = (struct fw_tlstx_data_wr *)req;
375 immd_len = (tls_immd ? dlen : 0);
376 req_wr->op_to_immdlen =
377 htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
378 FW_TLSTX_DATA_WR_COMPL_V(1) |
379 FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
380 req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
381 FW_TLSTX_DATA_WR_LEN16_V(credits));
382 wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
383
384 if (is_sg_request(skb))
385 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
386 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
387 FW_OFLD_TX_DATA_WR_SHOVE_F);
388
389 req_wr->lsodisable_to_flags =
390 htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
391 TX_URG_V(skb_urgent(skb)) |
392 T6_TX_FORCE_F | wr_ulp_mode_force |
393 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
394 skb_queue_empty(&csk->txq)));
395
396 req_wr->ctxloc_to_exp =
397 htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
398 FW_TLSTX_DATA_WR_EXP_V(expn) |
399 FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
400 FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
401 FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
402
403 /* Fill in the length */
404 req_wr->plen = htonl(len);
405 req_wr->mfs = htons(hws->mfs);
406 req_wr->adjustedplen_pkd =
407 htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
408 req_wr->expinplenmax_pkd =
409 htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
410 req_wr->pdusinplenmax_pkd =
411 FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
412 req_wr->r10 = 0;
413
414 data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
415 req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
416 CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
417 CPL_TX_TLS_SFO_CPL_LEN_V(2) |
418 CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
419 req_cpl->pld_len = htonl(len - expn);
420
421 req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
422 ((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
423 TLS_HDR_TYPE_HEARTBEAT : 0) |
424 CPL_TX_TLS_SFO_PROTOVER_V(0));
425
426 /* create the s-command */
427 req_cpl->r1_lo = 0;
428 req_cpl->seqno_numivs = cpu_to_be32(hws->scmd.seqno_numivs);
429 req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
430 req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
431 }
432
433 /*
434 * Calculate the TLS data expansion size
435 */
chtls_expansion_size(struct sock * sk,int data_len,int fullpdu,unsigned short * pducnt)436 static int chtls_expansion_size(struct sock *sk, int data_len,
437 int fullpdu,
438 unsigned short *pducnt)
439 {
440 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
441 struct chtls_hws *hws = &csk->tlshws;
442 struct tls_scmd *scmd = &hws->scmd;
443 int fragsize = hws->mfs;
444 int expnsize = 0;
445 int fragleft;
446 int fragcnt;
447 int expppdu;
448
449 if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
450 SCMD_CIPH_MODE_AES_GCM) {
451 expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
452 TLS_HEADER_LENGTH;
453
454 if (fullpdu) {
455 *pducnt = data_len / (expppdu + fragsize);
456 if (*pducnt > 32)
457 *pducnt = 32;
458 else if (!*pducnt)
459 *pducnt = 1;
460 expnsize = (*pducnt) * expppdu;
461 return expnsize;
462 }
463 fragcnt = (data_len / fragsize);
464 expnsize = fragcnt * expppdu;
465 fragleft = data_len % fragsize;
466 if (fragleft > 0)
467 expnsize += expppdu;
468 }
469 return expnsize;
470 }
471
472 /* WR with IV, KEY and CPL SFO added */
make_tlstx_data_wr(struct sock * sk,struct sk_buff * skb,int tls_tx_imm,int tls_len,u32 credits)473 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
474 int tls_tx_imm, int tls_len, u32 credits)
475 {
476 unsigned short pdus_per_ulp = 0;
477 struct chtls_sock *csk;
478 struct chtls_hws *hws;
479 int expn_sz;
480 int pdus;
481
482 csk = rcu_dereference_sk_user_data(sk);
483 hws = &csk->tlshws;
484 pdus = DIV_ROUND_UP(tls_len, hws->mfs);
485 expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
486 if (!hws->compute) {
487 hws->expansion = chtls_expansion_size(sk,
488 hws->fcplenmax,
489 1, &pdus_per_ulp);
490 hws->pdus = pdus_per_ulp;
491 hws->adjustlen = hws->pdus *
492 ((hws->expansion / hws->pdus) + hws->mfs);
493 hws->compute = 1;
494 }
495 if (tls_copy_ivs(sk, skb))
496 return;
497 tls_copy_tx_key(sk, skb);
498 tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
499 hws->tx_seq_no += (pdus - 1);
500 }
501
make_tx_data_wr(struct sock * sk,struct sk_buff * skb,unsigned int immdlen,int len,u32 credits,u32 compl)502 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
503 unsigned int immdlen, int len,
504 u32 credits, u32 compl)
505 {
506 struct fw_ofld_tx_data_wr *req;
507 unsigned int wr_ulp_mode_force;
508 struct chtls_sock *csk;
509 unsigned int opcode;
510
511 csk = rcu_dereference_sk_user_data(sk);
512 opcode = FW_OFLD_TX_DATA_WR;
513
514 req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
515 req->op_to_immdlen = htonl(WR_OP_V(opcode) |
516 FW_WR_COMPL_V(compl) |
517 FW_WR_IMMDLEN_V(immdlen));
518 req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
519 FW_WR_LEN16_V(credits));
520
521 wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
522 if (is_sg_request(skb))
523 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
524 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
525 FW_OFLD_TX_DATA_WR_SHOVE_F);
526
527 req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
528 TX_URG_V(skb_urgent(skb)) |
529 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
530 skb_queue_empty(&csk->txq)));
531 req->plen = htonl(len);
532 }
533
chtls_wr_size(struct chtls_sock * csk,const struct sk_buff * skb,bool size)534 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
535 bool size)
536 {
537 int wr_size;
538
539 wr_size = TLS_WR_CPL_LEN;
540 wr_size += KEY_ON_MEM_SZ;
541 wr_size += ivs_size(csk->sk, skb);
542
543 if (size)
544 return wr_size;
545
546 /* frags counted for IV dsgl */
547 if (!skb_ulp_tls_iv_imm(skb))
548 skb_shinfo(skb)->nr_frags++;
549
550 return wr_size;
551 }
552
is_ofld_imm(struct chtls_sock * csk,const struct sk_buff * skb)553 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
554 {
555 int length = skb->len;
556
557 if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
558 return false;
559
560 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
561 /* Check TLS header len for Immediate */
562 if (csk->ulp_mode == ULP_MODE_TLS &&
563 skb_ulp_tls_inline(skb))
564 length += chtls_wr_size(csk, skb, true);
565 else
566 length += sizeof(struct fw_ofld_tx_data_wr);
567
568 return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
569 }
570 return true;
571 }
572
calc_tx_flits(const struct sk_buff * skb,unsigned int immdlen)573 static unsigned int calc_tx_flits(const struct sk_buff *skb,
574 unsigned int immdlen)
575 {
576 unsigned int flits, cnt;
577
578 flits = immdlen / 8; /* headers */
579 cnt = skb_shinfo(skb)->nr_frags;
580 if (skb_tail_pointer(skb) != skb_transport_header(skb))
581 cnt++;
582 return flits + sgl_len(cnt);
583 }
584
arp_failure_discard(void * handle,struct sk_buff * skb)585 static void arp_failure_discard(void *handle, struct sk_buff *skb)
586 {
587 kfree_skb(skb);
588 }
589
chtls_push_frames(struct chtls_sock * csk,int comp)590 int chtls_push_frames(struct chtls_sock *csk, int comp)
591 {
592 struct chtls_hws *hws = &csk->tlshws;
593 struct tcp_sock *tp;
594 struct sk_buff *skb;
595 int total_size = 0;
596 struct sock *sk;
597 int wr_size;
598
599 wr_size = sizeof(struct fw_ofld_tx_data_wr);
600 sk = csk->sk;
601 tp = tcp_sk(sk);
602
603 if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
604 return 0;
605
606 if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
607 return 0;
608
609 while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
610 (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
611 skb_queue_len(&csk->txq) > 1)) {
612 unsigned int credit_len = skb->len;
613 unsigned int credits_needed;
614 unsigned int completion = 0;
615 int tls_len = skb->len;/* TLS data len before IV/key */
616 unsigned int immdlen;
617 int len = skb->len; /* length [ulp bytes] inserted by hw */
618 int flowclen16 = 0;
619 int tls_tx_imm = 0;
620
621 immdlen = skb->len;
622 if (!is_ofld_imm(csk, skb)) {
623 immdlen = skb_transport_offset(skb);
624 if (skb_ulp_tls_inline(skb))
625 wr_size = chtls_wr_size(csk, skb, false);
626 credit_len = 8 * calc_tx_flits(skb, immdlen);
627 } else {
628 if (skb_ulp_tls_inline(skb)) {
629 wr_size = chtls_wr_size(csk, skb, false);
630 tls_tx_imm = 1;
631 }
632 }
633 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
634 credit_len += wr_size;
635 credits_needed = DIV_ROUND_UP(credit_len, 16);
636 if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
637 flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
638 tp->rcv_nxt);
639 if (flowclen16 <= 0)
640 break;
641 csk->wr_credits -= flowclen16;
642 csk->wr_unacked += flowclen16;
643 csk->wr_nondata += flowclen16;
644 csk_set_flag(csk, CSK_TX_DATA_SENT);
645 }
646
647 if (csk->wr_credits < credits_needed) {
648 if (skb_ulp_tls_inline(skb) &&
649 !skb_ulp_tls_iv_imm(skb))
650 skb_shinfo(skb)->nr_frags--;
651 break;
652 }
653
654 __skb_unlink(skb, &csk->txq);
655 skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
656 CPL_PRIORITY_DATA);
657 if (hws->ofld)
658 hws->txqid = (skb->queue_mapping >> 1);
659 skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
660 csk->wr_credits -= credits_needed;
661 csk->wr_unacked += credits_needed;
662 csk->wr_nondata = 0;
663 enqueue_wr(csk, skb);
664
665 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
666 if ((comp && csk->wr_unacked == credits_needed) ||
667 (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
668 csk->wr_unacked >= csk->wr_max_credits / 2) {
669 completion = 1;
670 csk->wr_unacked = 0;
671 }
672 if (skb_ulp_tls_inline(skb))
673 make_tlstx_data_wr(sk, skb, tls_tx_imm,
674 tls_len, credits_needed);
675 else
676 make_tx_data_wr(sk, skb, immdlen, len,
677 credits_needed, completion);
678 tp->snd_nxt += len;
679 tp->lsndtime = tcp_jiffies32;
680 if (completion)
681 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
682 } else {
683 struct cpl_close_con_req *req = cplhdr(skb);
684 unsigned int cmd = CPL_OPCODE_G(ntohl
685 (OPCODE_TID(req)));
686
687 if (cmd == CPL_CLOSE_CON_REQ)
688 csk_set_flag(csk,
689 CSK_CLOSE_CON_REQUESTED);
690
691 if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
692 (csk->wr_unacked >= csk->wr_max_credits / 2)) {
693 req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
694 csk->wr_unacked = 0;
695 }
696 }
697 total_size += skb->truesize;
698 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
699 csk_set_flag(csk, CSK_TX_WAIT_IDLE);
700 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
701 cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
702 }
703 sk->sk_wmem_queued -= total_size;
704 return total_size;
705 }
706
mark_urg(struct tcp_sock * tp,int flags,struct sk_buff * skb)707 static void mark_urg(struct tcp_sock *tp, int flags,
708 struct sk_buff *skb)
709 {
710 if (unlikely(flags & MSG_OOB)) {
711 tp->snd_up = tp->write_seq;
712 ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
713 ULPCB_FLAG_BARRIER |
714 ULPCB_FLAG_NO_APPEND |
715 ULPCB_FLAG_NEED_HDR;
716 }
717 }
718
719 /*
720 * Returns true if a connection should send more data to TCP engine
721 */
should_push(struct sock * sk)722 static bool should_push(struct sock *sk)
723 {
724 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
725 struct chtls_dev *cdev = csk->cdev;
726 struct tcp_sock *tp = tcp_sk(sk);
727
728 /*
729 * If we've released our offload resources there's nothing to do ...
730 */
731 if (!cdev)
732 return false;
733
734 /*
735 * If there aren't any work requests in flight, or there isn't enough
736 * data in flight, or Nagle is off then send the current TX_DATA
737 * otherwise hold it and wait to accumulate more data.
738 */
739 return csk->wr_credits == csk->wr_max_credits ||
740 (tp->nonagle & TCP_NAGLE_OFF);
741 }
742
743 /*
744 * Returns true if a TCP socket is corked.
745 */
corked(const struct tcp_sock * tp,int flags)746 static bool corked(const struct tcp_sock *tp, int flags)
747 {
748 return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
749 }
750
751 /*
752 * Returns true if a send should try to push new data.
753 */
send_should_push(struct sock * sk,int flags)754 static bool send_should_push(struct sock *sk, int flags)
755 {
756 return should_push(sk) && !corked(tcp_sk(sk), flags);
757 }
758
chtls_tcp_push(struct sock * sk,int flags)759 void chtls_tcp_push(struct sock *sk, int flags)
760 {
761 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
762 int qlen = skb_queue_len(&csk->txq);
763
764 if (likely(qlen)) {
765 struct sk_buff *skb = skb_peek_tail(&csk->txq);
766 struct tcp_sock *tp = tcp_sk(sk);
767
768 mark_urg(tp, flags, skb);
769
770 if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
771 corked(tp, flags)) {
772 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
773 return;
774 }
775
776 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
777 if (qlen == 1 &&
778 ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
779 should_push(sk)))
780 chtls_push_frames(csk, 1);
781 }
782 }
783
784 /*
785 * Calculate the size for a new send sk_buff. It's maximum size so we can
786 * pack lots of data into it, unless we plan to send it immediately, in which
787 * case we size it more tightly.
788 *
789 * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
790 * arise in normal cases and when it does we are just wasting memory.
791 */
select_size(struct sock * sk,int io_len,int flags,int len)792 static int select_size(struct sock *sk, int io_len, int flags, int len)
793 {
794 const int pgbreak = SKB_MAX_HEAD(len);
795
796 /*
797 * If the data wouldn't fit in the main body anyway, put only the
798 * header in the main body so it can use immediate data and place all
799 * the payload in page fragments.
800 */
801 if (io_len > pgbreak)
802 return 0;
803
804 /*
805 * If we will be accumulating payload get a large main body.
806 */
807 if (!send_should_push(sk, flags))
808 return pgbreak;
809
810 return io_len;
811 }
812
skb_entail(struct sock * sk,struct sk_buff * skb,int flags)813 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
814 {
815 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
816 struct tcp_sock *tp = tcp_sk(sk);
817
818 ULP_SKB_CB(skb)->seq = tp->write_seq;
819 ULP_SKB_CB(skb)->flags = flags;
820 __skb_queue_tail(&csk->txq, skb);
821 sk->sk_wmem_queued += skb->truesize;
822
823 if (TCP_PAGE(sk) && TCP_OFF(sk)) {
824 put_page(TCP_PAGE(sk));
825 TCP_PAGE(sk) = NULL;
826 TCP_OFF(sk) = 0;
827 }
828 }
829
get_tx_skb(struct sock * sk,int size)830 static struct sk_buff *get_tx_skb(struct sock *sk, int size)
831 {
832 struct sk_buff *skb;
833
834 skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
835 if (likely(skb)) {
836 skb_reserve(skb, TX_HEADER_LEN);
837 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
838 skb_reset_transport_header(skb);
839 }
840 return skb;
841 }
842
get_record_skb(struct sock * sk,int size,bool zcopy)843 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
844 {
845 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
846 struct sk_buff *skb;
847
848 skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
849 KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
850 sk->sk_allocation);
851 if (likely(skb)) {
852 skb_reserve(skb, (TX_TLSHDR_LEN +
853 KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
854 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
855 skb_reset_transport_header(skb);
856 ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
857 ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
858 }
859 return skb;
860 }
861
tx_skb_finalize(struct sk_buff * skb)862 static void tx_skb_finalize(struct sk_buff *skb)
863 {
864 struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
865
866 if (!(cb->flags & ULPCB_FLAG_NO_HDR))
867 cb->flags = ULPCB_FLAG_NEED_HDR;
868 cb->flags |= ULPCB_FLAG_NO_APPEND;
869 }
870
push_frames_if_head(struct sock * sk)871 static void push_frames_if_head(struct sock *sk)
872 {
873 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
874
875 if (skb_queue_len(&csk->txq) == 1)
876 chtls_push_frames(csk, 1);
877 }
878
chtls_skb_copy_to_page_nocache(struct sock * sk,struct iov_iter * from,struct sk_buff * skb,struct page * page,int off,int copy)879 static int chtls_skb_copy_to_page_nocache(struct sock *sk,
880 struct iov_iter *from,
881 struct sk_buff *skb,
882 struct page *page,
883 int off, int copy)
884 {
885 int err;
886
887 err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
888 off, copy, skb->len);
889 if (err)
890 return err;
891
892 skb->len += copy;
893 skb->data_len += copy;
894 skb->truesize += copy;
895 sk->sk_wmem_queued += copy;
896 return 0;
897 }
898
csk_mem_free(struct chtls_dev * cdev,struct sock * sk)899 static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
900 {
901 return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0);
902 }
903
csk_wait_memory(struct chtls_dev * cdev,struct sock * sk,long * timeo_p)904 static int csk_wait_memory(struct chtls_dev *cdev,
905 struct sock *sk, long *timeo_p)
906 {
907 DEFINE_WAIT_FUNC(wait, woken_wake_function);
908 int ret, err = 0;
909 long current_timeo;
910 long vm_wait = 0;
911 bool noblock;
912
913 current_timeo = *timeo_p;
914 noblock = (*timeo_p ? false : true);
915 if (csk_mem_free(cdev, sk)) {
916 current_timeo = get_random_u32_below(HZ / 5) + 2;
917 vm_wait = get_random_u32_below(HZ / 5) + 2;
918 }
919
920 add_wait_queue(sk_sleep(sk), &wait);
921 while (1) {
922 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
923
924 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
925 goto do_error;
926 if (!*timeo_p) {
927 if (noblock)
928 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
929 goto do_nonblock;
930 }
931 if (signal_pending(current))
932 goto do_interrupted;
933 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
934 if (csk_mem_free(cdev, sk) && !vm_wait)
935 break;
936
937 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
938 sk->sk_write_pending++;
939 ret = sk_wait_event(sk, ¤t_timeo, sk->sk_err ||
940 (sk->sk_shutdown & SEND_SHUTDOWN) ||
941 (csk_mem_free(cdev, sk) && !vm_wait),
942 &wait);
943 sk->sk_write_pending--;
944 if (ret < 0)
945 goto do_error;
946
947 if (vm_wait) {
948 vm_wait -= current_timeo;
949 current_timeo = *timeo_p;
950 if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
951 current_timeo -= vm_wait;
952 if (current_timeo < 0)
953 current_timeo = 0;
954 }
955 vm_wait = 0;
956 }
957 *timeo_p = current_timeo;
958 }
959 do_rm_wq:
960 remove_wait_queue(sk_sleep(sk), &wait);
961 return err;
962 do_error:
963 err = -EPIPE;
964 goto do_rm_wq;
965 do_nonblock:
966 err = -EAGAIN;
967 goto do_rm_wq;
968 do_interrupted:
969 err = sock_intr_errno(*timeo_p);
970 goto do_rm_wq;
971 }
972
chtls_proccess_cmsg(struct sock * sk,struct msghdr * msg,unsigned char * record_type)973 static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
974 unsigned char *record_type)
975 {
976 struct cmsghdr *cmsg;
977 int rc = -EINVAL;
978
979 for_each_cmsghdr(cmsg, msg) {
980 if (!CMSG_OK(msg, cmsg))
981 return -EINVAL;
982 if (cmsg->cmsg_level != SOL_TLS)
983 continue;
984
985 switch (cmsg->cmsg_type) {
986 case TLS_SET_RECORD_TYPE:
987 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type)))
988 return -EINVAL;
989
990 if (msg->msg_flags & MSG_MORE)
991 return -EINVAL;
992
993 *record_type = *(unsigned char *)CMSG_DATA(cmsg);
994 rc = 0;
995 break;
996 default:
997 return -EINVAL;
998 }
999 }
1000
1001 return rc;
1002 }
1003
chtls_sendmsg(struct sock * sk,struct msghdr * msg,size_t size)1004 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1005 {
1006 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1007 struct chtls_dev *cdev = csk->cdev;
1008 struct tcp_sock *tp = tcp_sk(sk);
1009 struct sk_buff *skb;
1010 int mss, flags, err;
1011 int recordsz = 0;
1012 int copied = 0;
1013 long timeo;
1014
1015 lock_sock(sk);
1016 flags = msg->msg_flags;
1017 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1018
1019 if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
1020 err = sk_stream_wait_connect(sk, &timeo);
1021 if (err)
1022 goto out_err;
1023 }
1024
1025 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1026 err = -EPIPE;
1027 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1028 goto out_err;
1029
1030 mss = csk->mss;
1031 csk_set_flag(csk, CSK_TX_MORE_DATA);
1032
1033 while (msg_data_left(msg)) {
1034 int copy = 0;
1035
1036 skb = skb_peek_tail(&csk->txq);
1037 if (skb) {
1038 copy = mss - skb->len;
1039 skb->ip_summed = CHECKSUM_UNNECESSARY;
1040 }
1041 if (!csk_mem_free(cdev, sk))
1042 goto wait_for_sndbuf;
1043
1044 if (is_tls_tx(csk) && !csk->tlshws.txleft) {
1045 unsigned char record_type = TLS_RECORD_TYPE_DATA;
1046
1047 if (unlikely(msg->msg_controllen)) {
1048 err = chtls_proccess_cmsg(sk, msg,
1049 &record_type);
1050 if (err)
1051 goto out_err;
1052
1053 /* Avoid appending tls handshake, alert to tls data */
1054 if (skb)
1055 tx_skb_finalize(skb);
1056 }
1057
1058 recordsz = size;
1059 csk->tlshws.txleft = recordsz;
1060 csk->tlshws.type = record_type;
1061 }
1062
1063 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1064 copy <= 0) {
1065 new_buf:
1066 if (skb) {
1067 tx_skb_finalize(skb);
1068 push_frames_if_head(sk);
1069 }
1070
1071 if (is_tls_tx(csk)) {
1072 skb = get_record_skb(sk,
1073 select_size(sk,
1074 recordsz,
1075 flags,
1076 TX_TLSHDR_LEN),
1077 false);
1078 } else {
1079 skb = get_tx_skb(sk,
1080 select_size(sk, size, flags,
1081 TX_HEADER_LEN));
1082 }
1083 if (unlikely(!skb))
1084 goto wait_for_memory;
1085
1086 skb->ip_summed = CHECKSUM_UNNECESSARY;
1087 copy = mss;
1088 }
1089 if (copy > size)
1090 copy = size;
1091
1092 if (msg->msg_flags & MSG_SPLICE_PAGES) {
1093 err = skb_splice_from_iter(skb, &msg->msg_iter, copy);
1094 if (err < 0) {
1095 if (err == -EMSGSIZE)
1096 goto new_buf;
1097 goto do_fault;
1098 }
1099 copy = err;
1100 sk_wmem_queued_add(sk, copy);
1101 } else if (skb_tailroom(skb) > 0) {
1102 copy = min(copy, skb_tailroom(skb));
1103 if (is_tls_tx(csk))
1104 copy = min_t(int, copy, csk->tlshws.txleft);
1105 err = skb_add_data_nocache(sk, skb,
1106 &msg->msg_iter, copy);
1107 if (err)
1108 goto do_fault;
1109 } else {
1110 int i = skb_shinfo(skb)->nr_frags;
1111 struct page *page = TCP_PAGE(sk);
1112 int pg_size = PAGE_SIZE;
1113 int off = TCP_OFF(sk);
1114 bool merge;
1115
1116 if (page)
1117 pg_size = page_size(page);
1118 if (off < pg_size &&
1119 skb_can_coalesce(skb, i, page, off)) {
1120 merge = true;
1121 goto copy;
1122 }
1123 merge = false;
1124 if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
1125 MAX_SKB_FRAGS))
1126 goto new_buf;
1127
1128 if (page && off == pg_size) {
1129 put_page(page);
1130 TCP_PAGE(sk) = page = NULL;
1131 pg_size = PAGE_SIZE;
1132 }
1133
1134 if (!page) {
1135 gfp_t gfp = sk->sk_allocation;
1136 int order = cdev->send_page_order;
1137
1138 if (order) {
1139 page = alloc_pages(gfp | __GFP_COMP |
1140 __GFP_NOWARN |
1141 __GFP_NORETRY,
1142 order);
1143 if (page)
1144 pg_size <<= order;
1145 }
1146 if (!page) {
1147 page = alloc_page(gfp);
1148 pg_size = PAGE_SIZE;
1149 }
1150 if (!page)
1151 goto wait_for_memory;
1152 off = 0;
1153 }
1154 copy:
1155 if (copy > pg_size - off)
1156 copy = pg_size - off;
1157 if (is_tls_tx(csk))
1158 copy = min_t(int, copy, csk->tlshws.txleft);
1159
1160 err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
1161 skb, page,
1162 off, copy);
1163 if (unlikely(err)) {
1164 if (!TCP_PAGE(sk)) {
1165 TCP_PAGE(sk) = page;
1166 TCP_OFF(sk) = 0;
1167 }
1168 goto do_fault;
1169 }
1170 /* Update the skb. */
1171 if (merge) {
1172 skb_frag_size_add(
1173 &skb_shinfo(skb)->frags[i - 1],
1174 copy);
1175 } else {
1176 skb_fill_page_desc(skb, i, page, off, copy);
1177 if (off + copy < pg_size) {
1178 /* space left keep page */
1179 get_page(page);
1180 TCP_PAGE(sk) = page;
1181 } else {
1182 TCP_PAGE(sk) = NULL;
1183 }
1184 }
1185 TCP_OFF(sk) = off + copy;
1186 }
1187 if (unlikely(skb->len == mss))
1188 tx_skb_finalize(skb);
1189 tp->write_seq += copy;
1190 copied += copy;
1191 size -= copy;
1192
1193 if (is_tls_tx(csk))
1194 csk->tlshws.txleft -= copy;
1195
1196 if (corked(tp, flags) &&
1197 (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1198 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1199
1200 if (size == 0)
1201 goto out;
1202
1203 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
1204 push_frames_if_head(sk);
1205 continue;
1206 wait_for_sndbuf:
1207 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1208 wait_for_memory:
1209 err = csk_wait_memory(cdev, sk, &timeo);
1210 if (err)
1211 goto do_error;
1212 }
1213 out:
1214 csk_reset_flag(csk, CSK_TX_MORE_DATA);
1215 if (copied)
1216 chtls_tcp_push(sk, flags);
1217 done:
1218 release_sock(sk);
1219 return copied;
1220 do_fault:
1221 if (!skb->len) {
1222 __skb_unlink(skb, &csk->txq);
1223 sk->sk_wmem_queued -= skb->truesize;
1224 __kfree_skb(skb);
1225 }
1226 do_error:
1227 if (copied)
1228 goto out;
1229 out_err:
1230 if (csk_conn_inline(csk))
1231 csk_reset_flag(csk, CSK_TX_MORE_DATA);
1232 copied = sk_stream_error(sk, flags, err);
1233 goto done;
1234 }
1235
chtls_splice_eof(struct socket * sock)1236 void chtls_splice_eof(struct socket *sock)
1237 {
1238 struct sock *sk = sock->sk;
1239
1240 lock_sock(sk);
1241 chtls_tcp_push(sk, 0);
1242 release_sock(sk);
1243 }
1244
chtls_select_window(struct sock * sk)1245 static void chtls_select_window(struct sock *sk)
1246 {
1247 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1248 struct tcp_sock *tp = tcp_sk(sk);
1249 unsigned int wnd = tp->rcv_wnd;
1250
1251 wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
1252 wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
1253
1254 if (wnd > MAX_RCV_WND)
1255 wnd = MAX_RCV_WND;
1256
1257 /*
1258 * Check if we need to grow the receive window in response to an increase in
1259 * the socket's receive buffer size. Some applications increase the buffer
1260 * size dynamically and rely on the window to grow accordingly.
1261 */
1262
1263 if (wnd > tp->rcv_wnd) {
1264 tp->rcv_wup -= wnd - tp->rcv_wnd;
1265 tp->rcv_wnd = wnd;
1266 /* Mark the receive window as updated */
1267 csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
1268 }
1269 }
1270
1271 /*
1272 * Send RX credits through an RX_DATA_ACK CPL message. We are permitted
1273 * to return without sending the message in case we cannot allocate
1274 * an sk_buff. Returns the number of credits sent.
1275 */
send_rx_credits(struct chtls_sock * csk,u32 credits)1276 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
1277 {
1278 struct cpl_rx_data_ack *req;
1279 struct sk_buff *skb;
1280
1281 skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
1282 if (!skb)
1283 return 0;
1284 __skb_put(skb, sizeof(*req));
1285 req = (struct cpl_rx_data_ack *)skb->head;
1286
1287 set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
1288 INIT_TP_WR(req, csk->tid);
1289 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
1290 csk->tid));
1291 req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
1292 RX_FORCE_ACK_F);
1293 cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
1294 return credits;
1295 }
1296
1297 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
1298 TCPF_FIN_WAIT1 | \
1299 TCPF_FIN_WAIT2)
1300
1301 /*
1302 * Called after some received data has been read. It returns RX credits
1303 * to the HW for the amount of data processed.
1304 */
chtls_cleanup_rbuf(struct sock * sk,int copied)1305 static void chtls_cleanup_rbuf(struct sock *sk, int copied)
1306 {
1307 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1308 struct tcp_sock *tp;
1309 int must_send;
1310 u32 credits;
1311 u32 thres;
1312
1313 thres = 15 * 1024;
1314
1315 if (!sk_in_state(sk, CREDIT_RETURN_STATE))
1316 return;
1317
1318 chtls_select_window(sk);
1319 tp = tcp_sk(sk);
1320 credits = tp->copied_seq - tp->rcv_wup;
1321 if (unlikely(!credits))
1322 return;
1323
1324 /*
1325 * For coalescing to work effectively ensure the receive window has
1326 * at least 16KB left.
1327 */
1328 must_send = credits + 16384 >= tp->rcv_wnd;
1329
1330 if (must_send || credits >= thres)
1331 tp->rcv_wup += send_rx_credits(csk, credits);
1332 }
1333
chtls_pt_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags,int * addr_len)1334 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1335 int flags, int *addr_len)
1336 {
1337 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1338 struct chtls_hws *hws = &csk->tlshws;
1339 struct net_device *dev = csk->egress_dev;
1340 struct adapter *adap = netdev2adap(dev);
1341 struct tcp_sock *tp = tcp_sk(sk);
1342 unsigned long avail;
1343 int buffers_freed;
1344 int copied = 0;
1345 int target;
1346 long timeo;
1347 int ret;
1348
1349 buffers_freed = 0;
1350
1351 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1352 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1353
1354 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1355 chtls_cleanup_rbuf(sk, copied);
1356
1357 do {
1358 struct sk_buff *skb;
1359 u32 offset = 0;
1360
1361 if (unlikely(tp->urg_data &&
1362 tp->urg_seq == tp->copied_seq)) {
1363 if (copied)
1364 break;
1365 if (signal_pending(current)) {
1366 copied = timeo ? sock_intr_errno(timeo) :
1367 -EAGAIN;
1368 break;
1369 }
1370 }
1371 skb = skb_peek(&sk->sk_receive_queue);
1372 if (skb)
1373 goto found_ok_skb;
1374 if (csk->wr_credits &&
1375 skb_queue_len(&csk->txq) &&
1376 chtls_push_frames(csk, csk->wr_credits ==
1377 csk->wr_max_credits))
1378 sk->sk_write_space(sk);
1379
1380 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1381 break;
1382
1383 if (copied) {
1384 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1385 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1386 signal_pending(current))
1387 break;
1388
1389 if (!timeo)
1390 break;
1391 } else {
1392 if (sock_flag(sk, SOCK_DONE))
1393 break;
1394 if (sk->sk_err) {
1395 copied = sock_error(sk);
1396 break;
1397 }
1398 if (sk->sk_shutdown & RCV_SHUTDOWN)
1399 break;
1400 if (sk->sk_state == TCP_CLOSE) {
1401 copied = -ENOTCONN;
1402 break;
1403 }
1404 if (!timeo) {
1405 copied = -EAGAIN;
1406 break;
1407 }
1408 if (signal_pending(current)) {
1409 copied = sock_intr_errno(timeo);
1410 break;
1411 }
1412 }
1413 if (READ_ONCE(sk->sk_backlog.tail)) {
1414 release_sock(sk);
1415 lock_sock(sk);
1416 chtls_cleanup_rbuf(sk, copied);
1417 continue;
1418 }
1419
1420 if (copied >= target)
1421 break;
1422 chtls_cleanup_rbuf(sk, copied);
1423 ret = sk_wait_data(sk, &timeo, NULL);
1424 if (ret < 0) {
1425 copied = copied ? : ret;
1426 goto unlock;
1427 }
1428 continue;
1429 found_ok_skb:
1430 if (!skb->len) {
1431 skb_dstref_steal(skb);
1432 __skb_unlink(skb, &sk->sk_receive_queue);
1433 kfree_skb(skb);
1434
1435 if (!copied && !timeo) {
1436 copied = -EAGAIN;
1437 break;
1438 }
1439
1440 if (copied < target) {
1441 release_sock(sk);
1442 lock_sock(sk);
1443 continue;
1444 }
1445 break;
1446 }
1447 offset = hws->copied_seq;
1448 avail = skb->len - offset;
1449 if (len < avail)
1450 avail = len;
1451
1452 if (unlikely(tp->urg_data)) {
1453 u32 urg_offset = tp->urg_seq - tp->copied_seq;
1454
1455 if (urg_offset < avail) {
1456 if (urg_offset) {
1457 avail = urg_offset;
1458 } else if (!sock_flag(sk, SOCK_URGINLINE)) {
1459 /* First byte is urgent, skip */
1460 tp->copied_seq++;
1461 offset++;
1462 avail--;
1463 if (!avail)
1464 goto skip_copy;
1465 }
1466 }
1467 }
1468 /* Set record type if not already done. For a non-data record,
1469 * do not proceed if record type could not be copied.
1470 */
1471 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1472 struct tls_hdr *thdr = (struct tls_hdr *)skb->data;
1473 int cerr = 0;
1474
1475 cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
1476 sizeof(thdr->type), &thdr->type);
1477
1478 if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) {
1479 copied = -EIO;
1480 break;
1481 }
1482 /* don't send tls header, skip copy */
1483 goto skip_copy;
1484 }
1485
1486 if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
1487 if (!copied) {
1488 copied = -EFAULT;
1489 break;
1490 }
1491 }
1492
1493 copied += avail;
1494 len -= avail;
1495 hws->copied_seq += avail;
1496 skip_copy:
1497 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1498 tp->urg_data = 0;
1499
1500 if ((avail + offset) >= skb->len) {
1501 struct sk_buff *next_skb;
1502 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1503 tp->copied_seq += skb->len;
1504 hws->rcvpld = skb->hdr_len;
1505 } else {
1506 atomic_inc(&adap->chcr_stats.tls_pdu_rx);
1507 tp->copied_seq += hws->rcvpld;
1508 }
1509 chtls_free_skb(sk, skb);
1510 buffers_freed++;
1511 hws->copied_seq = 0;
1512 next_skb = skb_peek(&sk->sk_receive_queue);
1513 if (copied >= target && !next_skb)
1514 break;
1515 if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
1516 break;
1517 }
1518 } while (len > 0);
1519
1520 if (buffers_freed)
1521 chtls_cleanup_rbuf(sk, copied);
1522
1523 unlock:
1524 release_sock(sk);
1525 return copied;
1526 }
1527
1528 /*
1529 * Peek at data in a socket's receive buffer.
1530 */
peekmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags)1531 static int peekmsg(struct sock *sk, struct msghdr *msg,
1532 size_t len, int flags)
1533 {
1534 struct tcp_sock *tp = tcp_sk(sk);
1535 u32 peek_seq, offset;
1536 struct sk_buff *skb;
1537 int copied = 0;
1538 size_t avail; /* amount of available data in current skb */
1539 long timeo;
1540 int ret;
1541
1542 lock_sock(sk);
1543 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1544 peek_seq = tp->copied_seq;
1545
1546 do {
1547 if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
1548 if (copied)
1549 break;
1550 if (signal_pending(current)) {
1551 copied = timeo ? sock_intr_errno(timeo) :
1552 -EAGAIN;
1553 break;
1554 }
1555 }
1556
1557 skb_queue_walk(&sk->sk_receive_queue, skb) {
1558 offset = peek_seq - ULP_SKB_CB(skb)->seq;
1559 if (offset < skb->len)
1560 goto found_ok_skb;
1561 }
1562
1563 /* empty receive queue */
1564 if (copied)
1565 break;
1566 if (sock_flag(sk, SOCK_DONE))
1567 break;
1568 if (sk->sk_err) {
1569 copied = sock_error(sk);
1570 break;
1571 }
1572 if (sk->sk_shutdown & RCV_SHUTDOWN)
1573 break;
1574 if (sk->sk_state == TCP_CLOSE) {
1575 copied = -ENOTCONN;
1576 break;
1577 }
1578 if (!timeo) {
1579 copied = -EAGAIN;
1580 break;
1581 }
1582 if (signal_pending(current)) {
1583 copied = sock_intr_errno(timeo);
1584 break;
1585 }
1586
1587 if (READ_ONCE(sk->sk_backlog.tail)) {
1588 /* Do not sleep, just process backlog. */
1589 release_sock(sk);
1590 lock_sock(sk);
1591 } else {
1592 ret = sk_wait_data(sk, &timeo, NULL);
1593 if (ret < 0) {
1594 /* here 'copied' is 0 due to previous checks */
1595 copied = ret;
1596 break;
1597 }
1598 }
1599
1600 if (unlikely(peek_seq != tp->copied_seq)) {
1601 if (net_ratelimit())
1602 pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
1603 current->comm, current->pid);
1604 peek_seq = tp->copied_seq;
1605 }
1606 continue;
1607
1608 found_ok_skb:
1609 avail = skb->len - offset;
1610 if (len < avail)
1611 avail = len;
1612 /*
1613 * Do we have urgent data here? We need to skip over the
1614 * urgent byte.
1615 */
1616 if (unlikely(tp->urg_data)) {
1617 u32 urg_offset = tp->urg_seq - peek_seq;
1618
1619 if (urg_offset < avail) {
1620 /*
1621 * The amount of data we are preparing to copy
1622 * contains urgent data.
1623 */
1624 if (!urg_offset) { /* First byte is urgent */
1625 if (!sock_flag(sk, SOCK_URGINLINE)) {
1626 peek_seq++;
1627 offset++;
1628 avail--;
1629 }
1630 if (!avail)
1631 continue;
1632 } else {
1633 /* stop short of the urgent data */
1634 avail = urg_offset;
1635 }
1636 }
1637 }
1638
1639 /*
1640 * If MSG_TRUNC is specified the data is discarded.
1641 */
1642 if (likely(!(flags & MSG_TRUNC)))
1643 if (skb_copy_datagram_msg(skb, offset, msg, len)) {
1644 if (!copied) {
1645 copied = -EFAULT;
1646 break;
1647 }
1648 }
1649 peek_seq += avail;
1650 copied += avail;
1651 len -= avail;
1652 } while (len > 0);
1653
1654 release_sock(sk);
1655 return copied;
1656 }
1657
chtls_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags,int * addr_len)1658 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1659 int flags, int *addr_len)
1660 {
1661 struct tcp_sock *tp = tcp_sk(sk);
1662 struct chtls_sock *csk;
1663 unsigned long avail; /* amount of available data in current skb */
1664 int buffers_freed;
1665 int copied = 0;
1666 long timeo;
1667 int target; /* Read at least this many bytes */
1668 int ret;
1669
1670 buffers_freed = 0;
1671
1672 if (unlikely(flags & MSG_OOB))
1673 return tcp_prot.recvmsg(sk, msg, len, flags, addr_len);
1674
1675 if (unlikely(flags & MSG_PEEK))
1676 return peekmsg(sk, msg, len, flags);
1677
1678 if (sk_can_busy_loop(sk) &&
1679 skb_queue_empty_lockless(&sk->sk_receive_queue) &&
1680 sk->sk_state == TCP_ESTABLISHED)
1681 sk_busy_loop(sk, flags & MSG_DONTWAIT);
1682
1683 lock_sock(sk);
1684 csk = rcu_dereference_sk_user_data(sk);
1685
1686 if (is_tls_rx(csk))
1687 return chtls_pt_recvmsg(sk, msg, len, flags, addr_len);
1688
1689 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1690 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1691
1692 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1693 chtls_cleanup_rbuf(sk, copied);
1694
1695 do {
1696 struct sk_buff *skb;
1697 u32 offset;
1698
1699 if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
1700 if (copied)
1701 break;
1702 if (signal_pending(current)) {
1703 copied = timeo ? sock_intr_errno(timeo) :
1704 -EAGAIN;
1705 break;
1706 }
1707 }
1708
1709 skb = skb_peek(&sk->sk_receive_queue);
1710 if (skb)
1711 goto found_ok_skb;
1712
1713 if (csk->wr_credits &&
1714 skb_queue_len(&csk->txq) &&
1715 chtls_push_frames(csk, csk->wr_credits ==
1716 csk->wr_max_credits))
1717 sk->sk_write_space(sk);
1718
1719 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1720 break;
1721
1722 if (copied) {
1723 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1724 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1725 signal_pending(current))
1726 break;
1727 } else {
1728 if (sock_flag(sk, SOCK_DONE))
1729 break;
1730 if (sk->sk_err) {
1731 copied = sock_error(sk);
1732 break;
1733 }
1734 if (sk->sk_shutdown & RCV_SHUTDOWN)
1735 break;
1736 if (sk->sk_state == TCP_CLOSE) {
1737 copied = -ENOTCONN;
1738 break;
1739 }
1740 if (!timeo) {
1741 copied = -EAGAIN;
1742 break;
1743 }
1744 if (signal_pending(current)) {
1745 copied = sock_intr_errno(timeo);
1746 break;
1747 }
1748 }
1749
1750 if (READ_ONCE(sk->sk_backlog.tail)) {
1751 release_sock(sk);
1752 lock_sock(sk);
1753 chtls_cleanup_rbuf(sk, copied);
1754 continue;
1755 }
1756
1757 if (copied >= target)
1758 break;
1759 chtls_cleanup_rbuf(sk, copied);
1760 ret = sk_wait_data(sk, &timeo, NULL);
1761 if (ret < 0) {
1762 copied = copied ? : ret;
1763 goto unlock;
1764 }
1765 continue;
1766
1767 found_ok_skb:
1768 if (!skb->len) {
1769 chtls_kfree_skb(sk, skb);
1770 if (!copied && !timeo) {
1771 copied = -EAGAIN;
1772 break;
1773 }
1774
1775 if (copied < target)
1776 continue;
1777
1778 break;
1779 }
1780
1781 offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
1782 avail = skb->len - offset;
1783 if (len < avail)
1784 avail = len;
1785
1786 if (unlikely(tp->urg_data)) {
1787 u32 urg_offset = tp->urg_seq - tp->copied_seq;
1788
1789 if (urg_offset < avail) {
1790 if (urg_offset) {
1791 avail = urg_offset;
1792 } else if (!sock_flag(sk, SOCK_URGINLINE)) {
1793 tp->copied_seq++;
1794 offset++;
1795 avail--;
1796 if (!avail)
1797 goto skip_copy;
1798 }
1799 }
1800 }
1801
1802 if (likely(!(flags & MSG_TRUNC))) {
1803 if (skb_copy_datagram_msg(skb, offset,
1804 msg, avail)) {
1805 if (!copied) {
1806 copied = -EFAULT;
1807 break;
1808 }
1809 }
1810 }
1811
1812 tp->copied_seq += avail;
1813 copied += avail;
1814 len -= avail;
1815
1816 skip_copy:
1817 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1818 tp->urg_data = 0;
1819
1820 if (avail + offset >= skb->len) {
1821 chtls_free_skb(sk, skb);
1822 buffers_freed++;
1823
1824 if (copied >= target &&
1825 !skb_peek(&sk->sk_receive_queue))
1826 break;
1827 }
1828 } while (len > 0);
1829
1830 if (buffers_freed)
1831 chtls_cleanup_rbuf(sk, copied);
1832
1833 unlock:
1834 release_sock(sk);
1835 return copied;
1836 }
1837