1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include <linux/skbuff.h> 8 9 #include "rxe.h" 10 #include "rxe_loc.h" 11 #include "rxe_queue.h" 12 13 static char *resp_state_name[] = { 14 [RESPST_NONE] = "NONE", 15 [RESPST_GET_REQ] = "GET_REQ", 16 [RESPST_CHK_PSN] = "CHK_PSN", 17 [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", 18 [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", 19 [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", 20 [RESPST_CHK_LENGTH] = "CHK_LENGTH", 21 [RESPST_CHK_RKEY] = "CHK_RKEY", 22 [RESPST_EXECUTE] = "EXECUTE", 23 [RESPST_READ_REPLY] = "READ_REPLY", 24 [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", 25 [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", 26 [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", 27 [RESPST_COMPLETE] = "COMPLETE", 28 [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", 29 [RESPST_CLEANUP] = "CLEANUP", 30 [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", 31 [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", 32 [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", 33 [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", 34 [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", 35 [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", 36 [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", 37 [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", 38 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", 39 [RESPST_ERR_RNR] = "ERR_RNR", 40 [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", 41 [RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION", 42 [RESPST_ERR_LENGTH] = "ERR_LENGTH", 43 [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", 44 [RESPST_ERROR] = "ERROR", 45 [RESPST_DONE] = "DONE", 46 [RESPST_EXIT] = "EXIT", 47 }; 48 49 /* rxe_recv calls here to add a request packet to the input queue */ 50 void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) 51 { 52 skb_queue_tail(&qp->req_pkts, skb); 53 rxe_sched_task(&qp->recv_task); 54 } 55 56 static inline enum resp_states get_req(struct rxe_qp *qp, 57 struct rxe_pkt_info **pkt_p) 58 { 59 struct sk_buff *skb; 60 61 skb = skb_peek(&qp->req_pkts); 62 if (!skb) 63 return RESPST_EXIT; 64 65 *pkt_p = SKB_TO_PKT(skb); 66 67 return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; 68 } 69 70 static enum resp_states check_psn(struct rxe_qp *qp, 71 struct rxe_pkt_info *pkt) 72 { 73 int diff = psn_compare(pkt->psn, qp->resp.psn); 74 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 75 76 switch (qp_type(qp)) { 77 case IB_QPT_RC: 78 if (diff > 0) { 79 if (qp->resp.sent_psn_nak) 80 return RESPST_CLEANUP; 81 82 qp->resp.sent_psn_nak = 1; 83 rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ); 84 return RESPST_ERR_PSN_OUT_OF_SEQ; 85 86 } else if (diff < 0) { 87 rxe_counter_inc(rxe, RXE_CNT_DUP_REQ); 88 return RESPST_DUPLICATE_REQUEST; 89 } 90 91 if (qp->resp.sent_psn_nak) 92 qp->resp.sent_psn_nak = 0; 93 94 break; 95 96 case IB_QPT_UC: 97 if (qp->resp.drop_msg || diff != 0) { 98 if (pkt->mask & RXE_START_MASK) { 99 qp->resp.drop_msg = 0; 100 return RESPST_CHK_OP_SEQ; 101 } 102 103 qp->resp.drop_msg = 1; 104 return RESPST_CLEANUP; 105 } 106 break; 107 default: 108 break; 109 } 110 111 return RESPST_CHK_OP_SEQ; 112 } 113 114 static enum resp_states check_op_seq(struct rxe_qp *qp, 115 struct rxe_pkt_info *pkt) 116 { 117 switch (qp_type(qp)) { 118 case IB_QPT_RC: 119 switch (qp->resp.opcode) { 120 case IB_OPCODE_RC_SEND_FIRST: 121 case IB_OPCODE_RC_SEND_MIDDLE: 122 switch (pkt->opcode) { 123 case IB_OPCODE_RC_SEND_MIDDLE: 124 case IB_OPCODE_RC_SEND_LAST: 125 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 126 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 127 return RESPST_CHK_OP_VALID; 128 default: 129 return RESPST_ERR_MISSING_OPCODE_LAST_C; 130 } 131 132 case IB_OPCODE_RC_RDMA_WRITE_FIRST: 133 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 134 switch (pkt->opcode) { 135 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 136 case IB_OPCODE_RC_RDMA_WRITE_LAST: 137 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 138 return RESPST_CHK_OP_VALID; 139 default: 140 return RESPST_ERR_MISSING_OPCODE_LAST_C; 141 } 142 143 default: 144 switch (pkt->opcode) { 145 case IB_OPCODE_RC_SEND_MIDDLE: 146 case IB_OPCODE_RC_SEND_LAST: 147 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 148 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 149 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 150 case IB_OPCODE_RC_RDMA_WRITE_LAST: 151 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 152 return RESPST_ERR_MISSING_OPCODE_FIRST; 153 default: 154 return RESPST_CHK_OP_VALID; 155 } 156 } 157 break; 158 159 case IB_QPT_UC: 160 switch (qp->resp.opcode) { 161 case IB_OPCODE_UC_SEND_FIRST: 162 case IB_OPCODE_UC_SEND_MIDDLE: 163 switch (pkt->opcode) { 164 case IB_OPCODE_UC_SEND_MIDDLE: 165 case IB_OPCODE_UC_SEND_LAST: 166 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 167 return RESPST_CHK_OP_VALID; 168 default: 169 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 170 } 171 172 case IB_OPCODE_UC_RDMA_WRITE_FIRST: 173 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 174 switch (pkt->opcode) { 175 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 176 case IB_OPCODE_UC_RDMA_WRITE_LAST: 177 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 178 return RESPST_CHK_OP_VALID; 179 default: 180 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 181 } 182 183 default: 184 switch (pkt->opcode) { 185 case IB_OPCODE_UC_SEND_MIDDLE: 186 case IB_OPCODE_UC_SEND_LAST: 187 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 188 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 189 case IB_OPCODE_UC_RDMA_WRITE_LAST: 190 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 191 qp->resp.drop_msg = 1; 192 return RESPST_CLEANUP; 193 default: 194 return RESPST_CHK_OP_VALID; 195 } 196 } 197 break; 198 199 default: 200 return RESPST_CHK_OP_VALID; 201 } 202 } 203 204 static bool check_qp_attr_access(struct rxe_qp *qp, 205 struct rxe_pkt_info *pkt) 206 { 207 if (((pkt->mask & RXE_READ_MASK) && 208 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || 209 ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && 210 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || 211 ((pkt->mask & RXE_ATOMIC_MASK) && 212 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 213 return false; 214 215 if (pkt->mask & RXE_FLUSH_MASK) { 216 u32 flush_type = feth_plt(pkt); 217 218 if ((flush_type & IB_FLUSH_GLOBAL && 219 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || 220 (flush_type & IB_FLUSH_PERSISTENT && 221 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) 222 return false; 223 } 224 225 return true; 226 } 227 228 static enum resp_states check_op_valid(struct rxe_qp *qp, 229 struct rxe_pkt_info *pkt) 230 { 231 switch (qp_type(qp)) { 232 case IB_QPT_RC: 233 if (!check_qp_attr_access(qp, pkt)) 234 return RESPST_ERR_UNSUPPORTED_OPCODE; 235 236 break; 237 238 case IB_QPT_UC: 239 if ((pkt->mask & RXE_WRITE_MASK) && 240 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { 241 qp->resp.drop_msg = 1; 242 return RESPST_CLEANUP; 243 } 244 245 break; 246 247 case IB_QPT_UD: 248 case IB_QPT_GSI: 249 break; 250 251 default: 252 WARN_ON_ONCE(1); 253 break; 254 } 255 256 return RESPST_CHK_RESOURCE; 257 } 258 259 static enum resp_states get_srq_wqe(struct rxe_qp *qp) 260 { 261 struct rxe_srq *srq = qp->srq; 262 struct rxe_queue *q = srq->rq.queue; 263 struct rxe_recv_wqe *wqe; 264 struct ib_event ev; 265 unsigned int count; 266 size_t size; 267 unsigned long flags; 268 269 if (srq->error) 270 return RESPST_ERR_RNR; 271 272 spin_lock_irqsave(&srq->rq.consumer_lock, flags); 273 274 wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); 275 if (!wqe) { 276 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 277 return RESPST_ERR_RNR; 278 } 279 280 /* don't trust user space data */ 281 if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { 282 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 283 rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); 284 return RESPST_ERR_MALFORMED_WQE; 285 } 286 size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); 287 memcpy(&qp->resp.srq_wqe, wqe, size); 288 289 qp->resp.wqe = &qp->resp.srq_wqe.wqe; 290 queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); 291 count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); 292 293 if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { 294 srq->limit = 0; 295 goto event; 296 } 297 298 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 299 return RESPST_CHK_LENGTH; 300 301 event: 302 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 303 ev.device = qp->ibqp.device; 304 ev.element.srq = qp->ibqp.srq; 305 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 306 srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); 307 return RESPST_CHK_LENGTH; 308 } 309 310 static enum resp_states check_resource(struct rxe_qp *qp, 311 struct rxe_pkt_info *pkt) 312 { 313 struct rxe_srq *srq = qp->srq; 314 315 if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { 316 /* it is the requesters job to not send 317 * too many read/atomic ops, we just 318 * recycle the responder resource queue 319 */ 320 if (likely(qp->attr.max_dest_rd_atomic > 0)) 321 return RESPST_CHK_LENGTH; 322 else 323 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; 324 } 325 326 if (pkt->mask & RXE_RWR_MASK) { 327 if (srq) 328 return get_srq_wqe(qp); 329 330 qp->resp.wqe = queue_head(qp->rq.queue, 331 QUEUE_TYPE_FROM_CLIENT); 332 return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; 333 } 334 335 return RESPST_CHK_LENGTH; 336 } 337 338 static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, 339 struct rxe_pkt_info *pkt) 340 { 341 /* 342 * See IBA C9-92 343 * For UD QPs we only check if the packet will fit in the 344 * receive buffer later. For RDMA operations additional 345 * length checks are performed in check_rkey. 346 */ 347 if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) { 348 unsigned int payload = payload_size(pkt); 349 unsigned int recv_buffer_len = 0; 350 int i; 351 352 for (i = 0; i < qp->resp.wqe->dma.num_sge; i++) 353 recv_buffer_len += qp->resp.wqe->dma.sge[i].length; 354 if (payload + sizeof(union rdma_network_hdr) > recv_buffer_len) { 355 rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n"); 356 return RESPST_ERR_LENGTH; 357 } 358 } 359 360 if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || 361 (qp_type(qp) == IB_QPT_UC))) { 362 unsigned int mtu = qp->mtu; 363 unsigned int payload = payload_size(pkt); 364 365 if ((pkt->mask & RXE_START_MASK) && 366 (pkt->mask & RXE_END_MASK)) { 367 if (unlikely(payload > mtu)) { 368 rxe_dbg_qp(qp, "only packet too long\n"); 369 return RESPST_ERR_LENGTH; 370 } 371 } else if ((pkt->mask & RXE_START_MASK) || 372 (pkt->mask & RXE_MIDDLE_MASK)) { 373 if (unlikely(payload != mtu)) { 374 rxe_dbg_qp(qp, "first or middle packet not mtu\n"); 375 return RESPST_ERR_LENGTH; 376 } 377 } else if (pkt->mask & RXE_END_MASK) { 378 if (unlikely((payload == 0) || (payload > mtu))) { 379 rxe_dbg_qp(qp, "last packet zero or too long\n"); 380 return RESPST_ERR_LENGTH; 381 } 382 } 383 } 384 385 /* See IBA C9-94 */ 386 if (pkt->mask & RXE_RETH_MASK) { 387 if (reth_len(pkt) > (1U << 31)) { 388 rxe_dbg_qp(qp, "dma length too long\n"); 389 return RESPST_ERR_LENGTH; 390 } 391 } 392 393 if (pkt->mask & RXE_RDMA_OP_MASK) 394 return RESPST_CHK_RKEY; 395 else 396 return RESPST_EXECUTE; 397 } 398 399 /* if the reth length field is zero we can assume nothing 400 * about the rkey value and should not validate or use it. 401 * Instead set qp->resp.rkey to 0 which is an invalid rkey 402 * value since the minimum index part is 1. 403 */ 404 static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 405 { 406 unsigned int length = reth_len(pkt); 407 408 qp->resp.va = reth_va(pkt); 409 qp->resp.offset = 0; 410 qp->resp.resid = length; 411 qp->resp.length = length; 412 if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0) 413 qp->resp.rkey = 0; 414 else 415 qp->resp.rkey = reth_rkey(pkt); 416 } 417 418 static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 419 { 420 qp->resp.va = atmeth_va(pkt); 421 qp->resp.offset = 0; 422 qp->resp.rkey = atmeth_rkey(pkt); 423 qp->resp.resid = sizeof(u64); 424 } 425 426 /* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL 427 * if an invalid rkey is received or the rdma length is zero. For middle 428 * or last packets use the stored value of mr. 429 */ 430 static enum resp_states check_rkey(struct rxe_qp *qp, 431 struct rxe_pkt_info *pkt) 432 { 433 struct rxe_mr *mr = NULL; 434 struct rxe_mw *mw = NULL; 435 u64 va; 436 u32 rkey; 437 u32 resid; 438 u32 pktlen; 439 int mtu = qp->mtu; 440 enum resp_states state; 441 int access = 0; 442 443 /* parse RETH or ATMETH header for first/only packets 444 * for va, length, rkey, etc. or use current value for 445 * middle/last packets. 446 */ 447 if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 448 if (pkt->mask & RXE_RETH_MASK) 449 qp_resp_from_reth(qp, pkt); 450 451 access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ 452 : IB_ACCESS_REMOTE_WRITE; 453 } else if (pkt->mask & RXE_FLUSH_MASK) { 454 u32 flush_type = feth_plt(pkt); 455 456 if (pkt->mask & RXE_RETH_MASK) 457 qp_resp_from_reth(qp, pkt); 458 459 if (flush_type & IB_FLUSH_GLOBAL) 460 access |= IB_ACCESS_FLUSH_GLOBAL; 461 if (flush_type & IB_FLUSH_PERSISTENT) 462 access |= IB_ACCESS_FLUSH_PERSISTENT; 463 } else if (pkt->mask & RXE_ATOMIC_MASK) { 464 qp_resp_from_atmeth(qp, pkt); 465 access = IB_ACCESS_REMOTE_ATOMIC; 466 } else { 467 /* shouldn't happen */ 468 WARN_ON(1); 469 } 470 471 /* A zero-byte read or write op is not required to 472 * set an addr or rkey. See C9-88 473 */ 474 if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && 475 (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { 476 qp->resp.mr = NULL; 477 return RESPST_EXECUTE; 478 } 479 480 va = qp->resp.va; 481 rkey = qp->resp.rkey; 482 resid = qp->resp.resid; 483 pktlen = payload_size(pkt); 484 485 if (rkey_is_mw(rkey)) { 486 mw = rxe_lookup_mw(qp, access, rkey); 487 if (!mw) { 488 rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); 489 state = RESPST_ERR_RKEY_VIOLATION; 490 goto err; 491 } 492 493 mr = mw->mr; 494 if (!mr) { 495 rxe_dbg_qp(qp, "MW doesn't have an MR\n"); 496 state = RESPST_ERR_RKEY_VIOLATION; 497 goto err; 498 } 499 500 if (mw->access & IB_ZERO_BASED) 501 qp->resp.offset = mw->addr; 502 503 rxe_get(mr); 504 rxe_put(mw); 505 mw = NULL; 506 } else { 507 mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); 508 if (!mr) { 509 rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); 510 state = RESPST_ERR_RKEY_VIOLATION; 511 goto err; 512 } 513 } 514 515 if (pkt->mask & RXE_FLUSH_MASK) { 516 /* FLUSH MR may not set va or resid 517 * no need to check range since we will flush whole mr 518 */ 519 if (feth_sel(pkt) == IB_FLUSH_MR) 520 goto skip_check_range; 521 } 522 523 if (mr_check_range(mr, va + qp->resp.offset, resid)) { 524 state = RESPST_ERR_RKEY_VIOLATION; 525 goto err; 526 } 527 528 skip_check_range: 529 if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 530 if (resid > mtu) { 531 if (pktlen != mtu || bth_pad(pkt)) { 532 state = RESPST_ERR_LENGTH; 533 goto err; 534 } 535 } else { 536 if (pktlen != resid) { 537 state = RESPST_ERR_LENGTH; 538 goto err; 539 } 540 if ((bth_pad(pkt) != (0x3 & (-resid)))) { 541 /* This case may not be exactly that 542 * but nothing else fits. 543 */ 544 state = RESPST_ERR_LENGTH; 545 goto err; 546 } 547 } 548 } 549 550 WARN_ON_ONCE(qp->resp.mr); 551 552 qp->resp.mr = mr; 553 return RESPST_EXECUTE; 554 555 err: 556 qp->resp.mr = NULL; 557 if (mr) 558 rxe_put(mr); 559 if (mw) 560 rxe_put(mw); 561 562 return state; 563 } 564 565 static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, 566 int data_len) 567 { 568 int err; 569 570 err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, 571 data_addr, data_len, RXE_TO_MR_OBJ); 572 if (unlikely(err)) 573 return (err == -ENOSPC) ? RESPST_ERR_LENGTH 574 : RESPST_ERR_MALFORMED_WQE; 575 576 return RESPST_NONE; 577 } 578 579 static enum resp_states write_data_in(struct rxe_qp *qp, 580 struct rxe_pkt_info *pkt) 581 { 582 enum resp_states rc = RESPST_NONE; 583 int err; 584 int data_len = payload_size(pkt); 585 586 err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset, 587 payload_addr(pkt), data_len, RXE_TO_MR_OBJ); 588 if (err) { 589 rc = RESPST_ERR_RKEY_VIOLATION; 590 goto out; 591 } 592 593 qp->resp.va += data_len; 594 qp->resp.resid -= data_len; 595 596 out: 597 return rc; 598 } 599 600 static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, 601 struct rxe_pkt_info *pkt, 602 int type) 603 { 604 struct resp_res *res; 605 u32 pkts; 606 607 res = &qp->resp.resources[qp->resp.res_head]; 608 rxe_advance_resp_resource(qp); 609 free_rd_atomic_resource(res); 610 611 res->type = type; 612 res->replay = 0; 613 614 switch (type) { 615 case RXE_READ_MASK: 616 res->read.va = qp->resp.va + qp->resp.offset; 617 res->read.va_org = qp->resp.va + qp->resp.offset; 618 res->read.resid = qp->resp.resid; 619 res->read.length = qp->resp.resid; 620 res->read.rkey = qp->resp.rkey; 621 622 pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); 623 res->first_psn = pkt->psn; 624 res->cur_psn = pkt->psn; 625 res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; 626 627 res->state = rdatm_res_state_new; 628 break; 629 case RXE_ATOMIC_MASK: 630 case RXE_ATOMIC_WRITE_MASK: 631 res->first_psn = pkt->psn; 632 res->last_psn = pkt->psn; 633 res->cur_psn = pkt->psn; 634 break; 635 case RXE_FLUSH_MASK: 636 res->flush.va = qp->resp.va + qp->resp.offset; 637 res->flush.length = qp->resp.length; 638 res->flush.type = feth_plt(pkt); 639 res->flush.level = feth_sel(pkt); 640 } 641 642 return res; 643 } 644 645 static enum resp_states process_flush(struct rxe_qp *qp, 646 struct rxe_pkt_info *pkt) 647 { 648 u64 length, start; 649 struct rxe_mr *mr = qp->resp.mr; 650 struct resp_res *res = qp->resp.res; 651 652 /* oA19-14, oA19-15 */ 653 if (res && res->replay) 654 return RESPST_ACKNOWLEDGE; 655 else if (!res) { 656 res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); 657 qp->resp.res = res; 658 } 659 660 if (res->flush.level == IB_FLUSH_RANGE) { 661 start = res->flush.va; 662 length = res->flush.length; 663 } else { /* level == IB_FLUSH_MR */ 664 start = mr->ibmr.iova; 665 length = mr->ibmr.length; 666 } 667 668 if (res->flush.type & IB_FLUSH_PERSISTENT) { 669 if (rxe_flush_pmem_iova(mr, start, length)) 670 return RESPST_ERR_RKEY_VIOLATION; 671 /* Make data persistent. */ 672 wmb(); 673 } else if (res->flush.type & IB_FLUSH_GLOBAL) { 674 /* Make data global visibility. */ 675 wmb(); 676 } 677 678 qp->resp.msn++; 679 680 /* next expected psn, read handles this separately */ 681 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 682 qp->resp.ack_psn = qp->resp.psn; 683 684 qp->resp.opcode = pkt->opcode; 685 qp->resp.status = IB_WC_SUCCESS; 686 687 return RESPST_ACKNOWLEDGE; 688 } 689 690 static enum resp_states atomic_reply(struct rxe_qp *qp, 691 struct rxe_pkt_info *pkt) 692 { 693 struct rxe_mr *mr = qp->resp.mr; 694 struct resp_res *res = qp->resp.res; 695 int err; 696 697 if (!res) { 698 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK); 699 qp->resp.res = res; 700 } 701 702 if (!res->replay) { 703 u64 iova = qp->resp.va + qp->resp.offset; 704 705 if (is_odp_mr(mr)) 706 err = rxe_odp_atomic_op(mr, iova, pkt->opcode, 707 atmeth_comp(pkt), 708 atmeth_swap_add(pkt), 709 &res->atomic.orig_val); 710 else 711 err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode, 712 atmeth_comp(pkt), 713 atmeth_swap_add(pkt), 714 &res->atomic.orig_val); 715 if (err) 716 return err; 717 718 qp->resp.msn++; 719 720 /* next expected psn, read handles this separately */ 721 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 722 qp->resp.ack_psn = qp->resp.psn; 723 724 qp->resp.opcode = pkt->opcode; 725 qp->resp.status = IB_WC_SUCCESS; 726 } 727 728 return RESPST_ACKNOWLEDGE; 729 } 730 731 static enum resp_states atomic_write_reply(struct rxe_qp *qp, 732 struct rxe_pkt_info *pkt) 733 { 734 struct resp_res *res = qp->resp.res; 735 struct rxe_mr *mr; 736 u64 value; 737 u64 iova; 738 int err; 739 740 if (!res) { 741 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); 742 qp->resp.res = res; 743 } 744 745 if (res->replay) 746 return RESPST_ACKNOWLEDGE; 747 748 mr = qp->resp.mr; 749 value = *(u64 *)payload_addr(pkt); 750 iova = qp->resp.va + qp->resp.offset; 751 752 /* See IBA oA19-28 */ 753 if (unlikely(mr->state != RXE_MR_STATE_VALID)) { 754 rxe_dbg_mr(mr, "mr not in valid state\n"); 755 return RESPST_ERR_RKEY_VIOLATION; 756 } 757 758 if (is_odp_mr(mr)) 759 err = rxe_odp_do_atomic_write(mr, iova, value); 760 else 761 err = rxe_mr_do_atomic_write(mr, iova, value); 762 if (err) 763 return err; 764 765 qp->resp.resid = 0; 766 qp->resp.msn++; 767 768 /* next expected psn, read handles this separately */ 769 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 770 qp->resp.ack_psn = qp->resp.psn; 771 772 qp->resp.opcode = pkt->opcode; 773 qp->resp.status = IB_WC_SUCCESS; 774 775 return RESPST_ACKNOWLEDGE; 776 } 777 778 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, 779 struct rxe_pkt_info *ack, 780 int opcode, 781 int payload, 782 u32 psn, 783 u8 syndrome) 784 { 785 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 786 struct sk_buff *skb; 787 int paylen; 788 int pad; 789 int err; 790 791 /* 792 * allocate packet 793 */ 794 pad = (-payload) & 0x3; 795 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; 796 797 skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack); 798 if (!skb) 799 return NULL; 800 801 ack->qp = qp; 802 ack->opcode = opcode; 803 ack->mask = rxe_opcode[opcode].mask; 804 ack->paylen = paylen; 805 ack->psn = psn; 806 807 bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL, 808 qp->attr.dest_qp_num, 0, psn); 809 810 if (ack->mask & RXE_AETH_MASK) { 811 aeth_set_syn(ack, syndrome); 812 aeth_set_msn(ack, qp->resp.msn); 813 } 814 815 if (ack->mask & RXE_ATMACK_MASK) 816 atmack_set_orig(ack, qp->resp.res->atomic.orig_val); 817 818 err = rxe_prepare(&qp->pri_av, ack, skb); 819 if (err) { 820 kfree_skb(skb); 821 return NULL; 822 } 823 824 return skb; 825 } 826 827 /** 828 * rxe_recheck_mr - revalidate MR from rkey and get a reference 829 * @qp: the qp 830 * @rkey: the rkey 831 * 832 * This code allows the MR to be invalidated or deregistered or 833 * the MW if one was used to be invalidated or deallocated. 834 * It is assumed that the access permissions if originally good 835 * are OK and the mappings to be unchanged. 836 * 837 * TODO: If someone reregisters an MR to change its size or 838 * access permissions during the processing of an RDMA read 839 * we should kill the responder resource and complete the 840 * operation with an error. 841 * 842 * Return: mr on success else NULL 843 */ 844 static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) 845 { 846 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 847 struct rxe_mr *mr; 848 struct rxe_mw *mw; 849 850 if (rkey_is_mw(rkey)) { 851 mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); 852 if (!mw) 853 return NULL; 854 855 mr = mw->mr; 856 if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || 857 !mr || mr->state != RXE_MR_STATE_VALID) { 858 rxe_put(mw); 859 return NULL; 860 } 861 862 rxe_get(mr); 863 rxe_put(mw); 864 865 return mr; 866 } 867 868 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 869 if (!mr) 870 return NULL; 871 872 if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { 873 rxe_put(mr); 874 return NULL; 875 } 876 877 return mr; 878 } 879 880 /* RDMA read response. If res is not NULL, then we have a current RDMA request 881 * being processed or replayed. 882 */ 883 static enum resp_states read_reply(struct rxe_qp *qp, 884 struct rxe_pkt_info *req_pkt) 885 { 886 struct rxe_pkt_info ack_pkt; 887 struct sk_buff *skb; 888 int mtu = qp->mtu; 889 enum resp_states state; 890 int payload; 891 int opcode; 892 int err; 893 struct resp_res *res = qp->resp.res; 894 struct rxe_mr *mr; 895 896 if (!res) { 897 res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK); 898 qp->resp.res = res; 899 } 900 901 if (res->state == rdatm_res_state_new) { 902 if (!res->replay || qp->resp.length == 0) { 903 /* if length == 0 mr will be NULL (is ok) 904 * otherwise qp->resp.mr holds a ref on mr 905 * which we transfer to mr and drop below. 906 */ 907 mr = qp->resp.mr; 908 qp->resp.mr = NULL; 909 } else { 910 mr = rxe_recheck_mr(qp, res->read.rkey); 911 if (!mr) 912 return RESPST_ERR_RKEY_VIOLATION; 913 } 914 915 if (res->read.resid <= mtu) 916 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; 917 else 918 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; 919 } else { 920 /* re-lookup mr from rkey on all later packets. 921 * length will be non-zero. This can fail if someone 922 * modifies or destroys the mr since the first packet. 923 */ 924 mr = rxe_recheck_mr(qp, res->read.rkey); 925 if (!mr) 926 return RESPST_ERR_RKEY_VIOLATION; 927 928 if (res->read.resid > mtu) 929 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; 930 else 931 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; 932 } 933 934 res->state = rdatm_res_state_next; 935 936 payload = min_t(int, res->read.resid, mtu); 937 938 skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, 939 res->cur_psn, AETH_ACK_UNLIMITED); 940 if (!skb) { 941 state = RESPST_ERR_RNR; 942 goto err_out; 943 } 944 945 err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), 946 payload, RXE_FROM_MR_OBJ); 947 if (err) { 948 kfree_skb(skb); 949 state = RESPST_ERR_RKEY_VIOLATION; 950 goto err_out; 951 } 952 953 if (bth_pad(&ack_pkt)) { 954 u8 *pad = payload_addr(&ack_pkt) + payload; 955 956 memset(pad, 0, bth_pad(&ack_pkt)); 957 } 958 959 /* rxe_xmit_packet always consumes the skb */ 960 err = rxe_xmit_packet(qp, &ack_pkt, skb); 961 if (err) { 962 state = RESPST_ERR_RNR; 963 goto err_out; 964 } 965 966 res->read.va += payload; 967 res->read.resid -= payload; 968 res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; 969 970 if (res->read.resid > 0) { 971 state = RESPST_DONE; 972 } else { 973 qp->resp.res = NULL; 974 if (!res->replay) 975 qp->resp.opcode = -1; 976 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) 977 qp->resp.psn = res->cur_psn; 978 state = RESPST_CLEANUP; 979 } 980 981 err_out: 982 if (mr) 983 rxe_put(mr); 984 return state; 985 } 986 987 static int invalidate_rkey(struct rxe_qp *qp, u32 rkey) 988 { 989 if (rkey_is_mw(rkey)) 990 return rxe_invalidate_mw(qp, rkey); 991 else 992 return rxe_invalidate_mr(qp, rkey); 993 } 994 995 /* Executes a new request. A retried request never reach that function (send 996 * and writes are discarded, and reads and atomics are retried elsewhere. 997 */ 998 static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 999 { 1000 enum resp_states err; 1001 struct sk_buff *skb = PKT_TO_SKB(pkt); 1002 union rdma_network_hdr hdr; 1003 1004 if (pkt->mask & RXE_SEND_MASK) { 1005 if (qp_type(qp) == IB_QPT_UD || 1006 qp_type(qp) == IB_QPT_GSI) { 1007 if (skb->protocol == htons(ETH_P_IP)) { 1008 memset(&hdr.reserved, 0, 1009 sizeof(hdr.reserved)); 1010 memcpy(&hdr.roce4grh, ip_hdr(skb), 1011 sizeof(hdr.roce4grh)); 1012 err = send_data_in(qp, &hdr, sizeof(hdr)); 1013 } else { 1014 err = send_data_in(qp, ipv6_hdr(skb), 1015 sizeof(hdr)); 1016 } 1017 if (err) 1018 return err; 1019 } 1020 err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); 1021 if (err) 1022 return err; 1023 } else if (pkt->mask & RXE_WRITE_MASK) { 1024 err = write_data_in(qp, pkt); 1025 if (err) 1026 return err; 1027 } else if (pkt->mask & RXE_READ_MASK) { 1028 /* For RDMA Read we can increment the msn now. See C9-148. */ 1029 qp->resp.msn++; 1030 return RESPST_READ_REPLY; 1031 } else if (pkt->mask & RXE_ATOMIC_MASK) { 1032 return RESPST_ATOMIC_REPLY; 1033 } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { 1034 return RESPST_ATOMIC_WRITE_REPLY; 1035 } else if (pkt->mask & RXE_FLUSH_MASK) { 1036 return RESPST_PROCESS_FLUSH; 1037 } else { 1038 /* Unreachable */ 1039 WARN_ON_ONCE(1); 1040 } 1041 1042 if (pkt->mask & RXE_IETH_MASK) { 1043 u32 rkey = ieth_rkey(pkt); 1044 1045 err = invalidate_rkey(qp, rkey); 1046 if (err) 1047 return RESPST_ERR_INVALIDATE_RKEY; 1048 } 1049 1050 if (pkt->mask & RXE_END_MASK) 1051 /* We successfully processed this new request. */ 1052 qp->resp.msn++; 1053 1054 /* next expected psn, read handles this separately */ 1055 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 1056 qp->resp.ack_psn = qp->resp.psn; 1057 1058 qp->resp.opcode = pkt->opcode; 1059 qp->resp.status = IB_WC_SUCCESS; 1060 1061 if (pkt->mask & RXE_COMP_MASK) 1062 return RESPST_COMPLETE; 1063 else if (qp_type(qp) == IB_QPT_RC) 1064 return RESPST_ACKNOWLEDGE; 1065 else 1066 return RESPST_CLEANUP; 1067 } 1068 1069 static enum resp_states do_complete(struct rxe_qp *qp, 1070 struct rxe_pkt_info *pkt) 1071 { 1072 struct rxe_cqe cqe; 1073 struct ib_wc *wc = &cqe.ibwc; 1074 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1075 struct rxe_recv_wqe *wqe = qp->resp.wqe; 1076 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1077 unsigned long flags; 1078 1079 if (!wqe) 1080 goto finish; 1081 1082 memset(&cqe, 0, sizeof(cqe)); 1083 1084 if (qp->rcq->is_user) { 1085 uwc->status = qp->resp.status; 1086 uwc->qp_num = qp->ibqp.qp_num; 1087 uwc->wr_id = wqe->wr_id; 1088 } else { 1089 wc->status = qp->resp.status; 1090 wc->qp = &qp->ibqp; 1091 wc->wr_id = wqe->wr_id; 1092 } 1093 1094 if (wc->status == IB_WC_SUCCESS) { 1095 rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV); 1096 wc->opcode = (pkt->mask & RXE_IMMDT_MASK && 1097 pkt->mask & RXE_WRITE_MASK) ? 1098 IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; 1099 wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && 1100 pkt->mask & RXE_WRITE_MASK) ? 1101 qp->resp.length : wqe->dma.length - wqe->dma.resid; 1102 1103 /* fields after byte_len are different between kernel and user 1104 * space 1105 */ 1106 if (qp->rcq->is_user) { 1107 uwc->wc_flags = IB_WC_GRH; 1108 1109 if (pkt->mask & RXE_IMMDT_MASK) { 1110 uwc->wc_flags |= IB_WC_WITH_IMM; 1111 uwc->ex.imm_data = immdt_imm(pkt); 1112 } 1113 1114 if (pkt->mask & RXE_IETH_MASK) { 1115 uwc->wc_flags |= IB_WC_WITH_INVALIDATE; 1116 uwc->ex.invalidate_rkey = ieth_rkey(pkt); 1117 } 1118 1119 if (pkt->mask & RXE_DETH_MASK) 1120 uwc->src_qp = deth_sqp(pkt); 1121 1122 uwc->port_num = qp->attr.port_num; 1123 } else { 1124 struct sk_buff *skb = PKT_TO_SKB(pkt); 1125 1126 wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; 1127 if (skb->protocol == htons(ETH_P_IP)) 1128 wc->network_hdr_type = RDMA_NETWORK_IPV4; 1129 else 1130 wc->network_hdr_type = RDMA_NETWORK_IPV6; 1131 1132 if (is_vlan_dev(skb->dev)) { 1133 wc->wc_flags |= IB_WC_WITH_VLAN; 1134 wc->vlan_id = vlan_dev_vlan_id(skb->dev); 1135 } 1136 1137 if (pkt->mask & RXE_IMMDT_MASK) { 1138 wc->wc_flags |= IB_WC_WITH_IMM; 1139 wc->ex.imm_data = immdt_imm(pkt); 1140 } 1141 1142 if (pkt->mask & RXE_IETH_MASK) { 1143 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 1144 wc->ex.invalidate_rkey = ieth_rkey(pkt); 1145 } 1146 1147 if (pkt->mask & RXE_DETH_MASK) 1148 wc->src_qp = deth_sqp(pkt); 1149 1150 wc->port_num = qp->attr.port_num; 1151 } 1152 } else { 1153 if (wc->status != IB_WC_WR_FLUSH_ERR) 1154 rxe_err_qp(qp, "non-flush error status = %d\n", 1155 wc->status); 1156 } 1157 1158 /* have copy for srq and reference for !srq */ 1159 if (!qp->srq) 1160 queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); 1161 1162 qp->resp.wqe = NULL; 1163 1164 if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) 1165 return RESPST_ERR_CQ_OVERFLOW; 1166 1167 finish: 1168 spin_lock_irqsave(&qp->state_lock, flags); 1169 if (unlikely(qp_state(qp) == IB_QPS_ERR)) { 1170 spin_unlock_irqrestore(&qp->state_lock, flags); 1171 return RESPST_CHK_RESOURCE; 1172 } 1173 spin_unlock_irqrestore(&qp->state_lock, flags); 1174 1175 if (unlikely(!pkt)) 1176 return RESPST_DONE; 1177 if (qp_type(qp) == IB_QPT_RC) 1178 return RESPST_ACKNOWLEDGE; 1179 else 1180 return RESPST_CLEANUP; 1181 } 1182 1183 1184 static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, 1185 int opcode, const char *msg) 1186 { 1187 int err; 1188 struct rxe_pkt_info ack_pkt; 1189 struct sk_buff *skb; 1190 1191 skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); 1192 if (!skb) 1193 return -ENOMEM; 1194 1195 err = rxe_xmit_packet(qp, &ack_pkt, skb); 1196 if (err) 1197 rxe_dbg_qp(qp, "Failed sending %s\n", msg); 1198 1199 return err; 1200 } 1201 1202 static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1203 { 1204 return send_common_ack(qp, syndrome, psn, 1205 IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); 1206 } 1207 1208 static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1209 { 1210 int ret = send_common_ack(qp, syndrome, psn, 1211 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); 1212 1213 /* have to clear this since it is used to trigger 1214 * long read replies 1215 */ 1216 qp->resp.res = NULL; 1217 return ret; 1218 } 1219 1220 static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1221 { 1222 int ret = send_common_ack(qp, syndrome, psn, 1223 IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, 1224 "RDMA READ response of length zero ACK"); 1225 1226 /* have to clear this since it is used to trigger 1227 * long read replies 1228 */ 1229 qp->resp.res = NULL; 1230 return ret; 1231 } 1232 1233 static enum resp_states acknowledge(struct rxe_qp *qp, 1234 struct rxe_pkt_info *pkt) 1235 { 1236 if (qp_type(qp) != IB_QPT_RC) 1237 return RESPST_CLEANUP; 1238 1239 if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) 1240 send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); 1241 else if (pkt->mask & RXE_ATOMIC_MASK) 1242 send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1243 else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) 1244 send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1245 else if (bth_ack(pkt)) 1246 send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1247 1248 return RESPST_CLEANUP; 1249 } 1250 1251 static enum resp_states cleanup(struct rxe_qp *qp, 1252 struct rxe_pkt_info *pkt) 1253 { 1254 struct sk_buff *skb; 1255 1256 if (pkt) { 1257 skb = skb_dequeue(&qp->req_pkts); 1258 rxe_put(qp); 1259 kfree_skb(skb); 1260 ib_device_put(qp->ibqp.device); 1261 } 1262 1263 if (qp->resp.mr) { 1264 rxe_put(qp->resp.mr); 1265 qp->resp.mr = NULL; 1266 } 1267 1268 return RESPST_DONE; 1269 } 1270 1271 static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) 1272 { 1273 int i; 1274 1275 for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { 1276 struct resp_res *res = &qp->resp.resources[i]; 1277 1278 if (res->type == 0) 1279 continue; 1280 1281 if (psn_compare(psn, res->first_psn) >= 0 && 1282 psn_compare(psn, res->last_psn) <= 0) { 1283 return res; 1284 } 1285 } 1286 1287 return NULL; 1288 } 1289 1290 static enum resp_states duplicate_request(struct rxe_qp *qp, 1291 struct rxe_pkt_info *pkt) 1292 { 1293 enum resp_states rc; 1294 u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; 1295 1296 if (pkt->mask & RXE_SEND_MASK || 1297 pkt->mask & RXE_WRITE_MASK) { 1298 /* SEND. Ack again and cleanup. C9-105. */ 1299 send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); 1300 return RESPST_CLEANUP; 1301 } else if (pkt->mask & RXE_FLUSH_MASK) { 1302 struct resp_res *res; 1303 1304 /* Find the operation in our list of responder resources. */ 1305 res = find_resource(qp, pkt->psn); 1306 if (res) { 1307 res->replay = 1; 1308 res->cur_psn = pkt->psn; 1309 qp->resp.res = res; 1310 rc = RESPST_PROCESS_FLUSH; 1311 goto out; 1312 } 1313 1314 /* Resource not found. Class D error. Drop the request. */ 1315 rc = RESPST_CLEANUP; 1316 goto out; 1317 } else if (pkt->mask & RXE_READ_MASK) { 1318 struct resp_res *res; 1319 1320 res = find_resource(qp, pkt->psn); 1321 if (!res) { 1322 /* Resource not found. Class D error. Drop the 1323 * request. 1324 */ 1325 rc = RESPST_CLEANUP; 1326 goto out; 1327 } else { 1328 /* Ensure this new request is the same as the previous 1329 * one or a subset of it. 1330 */ 1331 u64 iova = reth_va(pkt); 1332 u32 resid = reth_len(pkt); 1333 1334 if (iova < res->read.va_org || 1335 resid > res->read.length || 1336 (iova + resid) > (res->read.va_org + 1337 res->read.length)) { 1338 rc = RESPST_CLEANUP; 1339 goto out; 1340 } 1341 1342 if (reth_rkey(pkt) != res->read.rkey) { 1343 rc = RESPST_CLEANUP; 1344 goto out; 1345 } 1346 1347 res->cur_psn = pkt->psn; 1348 res->state = (pkt->psn == res->first_psn) ? 1349 rdatm_res_state_new : 1350 rdatm_res_state_replay; 1351 res->replay = 1; 1352 1353 /* Reset the resource, except length. */ 1354 res->read.va_org = iova; 1355 res->read.va = iova; 1356 res->read.resid = resid; 1357 1358 /* Replay the RDMA read reply. */ 1359 qp->resp.res = res; 1360 rc = RESPST_READ_REPLY; 1361 goto out; 1362 } 1363 } else { 1364 struct resp_res *res; 1365 1366 /* Find the operation in our list of responder resources. */ 1367 res = find_resource(qp, pkt->psn); 1368 if (res) { 1369 res->replay = 1; 1370 res->cur_psn = pkt->psn; 1371 qp->resp.res = res; 1372 rc = pkt->mask & RXE_ATOMIC_MASK ? 1373 RESPST_ATOMIC_REPLY : 1374 RESPST_ATOMIC_WRITE_REPLY; 1375 goto out; 1376 } 1377 1378 /* Resource not found. Class D error. Drop the request. */ 1379 rc = RESPST_CLEANUP; 1380 goto out; 1381 } 1382 out: 1383 return rc; 1384 } 1385 1386 /* Process a class A or C. Both are treated the same in this implementation. */ 1387 static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome, 1388 enum ib_wc_status status) 1389 { 1390 qp->resp.aeth_syndrome = syndrome; 1391 qp->resp.status = status; 1392 1393 /* indicate that we should go through the ERROR state */ 1394 qp->resp.goto_error = 1; 1395 } 1396 1397 static enum resp_states do_class_d1e_error(struct rxe_qp *qp) 1398 { 1399 /* UC */ 1400 if (qp->srq) { 1401 /* Class E */ 1402 qp->resp.drop_msg = 1; 1403 if (qp->resp.wqe) { 1404 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1405 return RESPST_COMPLETE; 1406 } else { 1407 return RESPST_CLEANUP; 1408 } 1409 } else { 1410 /* Class D1. This packet may be the start of a 1411 * new message and could be valid. The previous 1412 * message is invalid and ignored. reset the 1413 * recv wr to its original state 1414 */ 1415 if (qp->resp.wqe) { 1416 qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; 1417 qp->resp.wqe->dma.cur_sge = 0; 1418 qp->resp.wqe->dma.sge_offset = 0; 1419 qp->resp.opcode = -1; 1420 } 1421 1422 if (qp->resp.mr) { 1423 rxe_put(qp->resp.mr); 1424 qp->resp.mr = NULL; 1425 } 1426 1427 return RESPST_CLEANUP; 1428 } 1429 } 1430 1431 /* drain incoming request packet queue */ 1432 static void drain_req_pkts(struct rxe_qp *qp) 1433 { 1434 struct sk_buff *skb; 1435 1436 while ((skb = skb_dequeue(&qp->req_pkts))) { 1437 rxe_put(qp); 1438 kfree_skb(skb); 1439 ib_device_put(qp->ibqp.device); 1440 } 1441 } 1442 1443 /* complete receive wqe with flush error */ 1444 static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe) 1445 { 1446 struct rxe_cqe cqe = {}; 1447 struct ib_wc *wc = &cqe.ibwc; 1448 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1449 int err; 1450 1451 if (qp->rcq->is_user) { 1452 uwc->wr_id = wqe->wr_id; 1453 uwc->status = IB_WC_WR_FLUSH_ERR; 1454 uwc->qp_num = qp_num(qp); 1455 } else { 1456 wc->wr_id = wqe->wr_id; 1457 wc->status = IB_WC_WR_FLUSH_ERR; 1458 wc->qp = &qp->ibqp; 1459 } 1460 1461 err = rxe_cq_post(qp->rcq, &cqe, 0); 1462 if (err) 1463 rxe_dbg_cq(qp->rcq, "post cq failed err = %d\n", err); 1464 1465 return err; 1466 } 1467 1468 /* drain and optionally complete the recive queue 1469 * if unable to complete a wqe stop completing and 1470 * just flush the remaining wqes 1471 */ 1472 static void flush_recv_queue(struct rxe_qp *qp, bool notify) 1473 { 1474 struct rxe_queue *q = qp->rq.queue; 1475 struct rxe_recv_wqe *wqe; 1476 int err; 1477 1478 if (qp->srq) { 1479 if (notify && qp->ibqp.event_handler) { 1480 struct ib_event ev; 1481 1482 ev.device = qp->ibqp.device; 1483 ev.element.qp = &qp->ibqp; 1484 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 1485 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 1486 } 1487 return; 1488 } 1489 1490 /* recv queue not created. nothing to do. */ 1491 if (!qp->rq.queue) 1492 return; 1493 1494 while ((wqe = queue_head(q, q->type))) { 1495 if (notify) { 1496 err = flush_recv_wqe(qp, wqe); 1497 if (err) 1498 notify = 0; 1499 } 1500 queue_advance_consumer(q, q->type); 1501 } 1502 1503 qp->resp.wqe = NULL; 1504 } 1505 1506 int rxe_receiver(struct rxe_qp *qp) 1507 { 1508 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1509 enum resp_states state; 1510 struct rxe_pkt_info *pkt = NULL; 1511 int ret; 1512 unsigned long flags; 1513 1514 spin_lock_irqsave(&qp->state_lock, flags); 1515 if (!qp->valid || qp_state(qp) == IB_QPS_ERR || 1516 qp_state(qp) == IB_QPS_RESET) { 1517 bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); 1518 1519 drain_req_pkts(qp); 1520 flush_recv_queue(qp, notify); 1521 spin_unlock_irqrestore(&qp->state_lock, flags); 1522 goto exit; 1523 } 1524 spin_unlock_irqrestore(&qp->state_lock, flags); 1525 1526 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; 1527 1528 state = RESPST_GET_REQ; 1529 1530 while (1) { 1531 rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); 1532 switch (state) { 1533 case RESPST_GET_REQ: 1534 state = get_req(qp, &pkt); 1535 break; 1536 case RESPST_CHK_PSN: 1537 state = check_psn(qp, pkt); 1538 break; 1539 case RESPST_CHK_OP_SEQ: 1540 state = check_op_seq(qp, pkt); 1541 break; 1542 case RESPST_CHK_OP_VALID: 1543 state = check_op_valid(qp, pkt); 1544 break; 1545 case RESPST_CHK_RESOURCE: 1546 state = check_resource(qp, pkt); 1547 break; 1548 case RESPST_CHK_LENGTH: 1549 state = rxe_resp_check_length(qp, pkt); 1550 break; 1551 case RESPST_CHK_RKEY: 1552 state = check_rkey(qp, pkt); 1553 break; 1554 case RESPST_EXECUTE: 1555 state = execute(qp, pkt); 1556 break; 1557 case RESPST_COMPLETE: 1558 state = do_complete(qp, pkt); 1559 break; 1560 case RESPST_READ_REPLY: 1561 state = read_reply(qp, pkt); 1562 break; 1563 case RESPST_ATOMIC_REPLY: 1564 state = atomic_reply(qp, pkt); 1565 break; 1566 case RESPST_ATOMIC_WRITE_REPLY: 1567 state = atomic_write_reply(qp, pkt); 1568 break; 1569 case RESPST_PROCESS_FLUSH: 1570 state = process_flush(qp, pkt); 1571 break; 1572 case RESPST_ACKNOWLEDGE: 1573 state = acknowledge(qp, pkt); 1574 break; 1575 case RESPST_CLEANUP: 1576 state = cleanup(qp, pkt); 1577 break; 1578 case RESPST_DUPLICATE_REQUEST: 1579 state = duplicate_request(qp, pkt); 1580 break; 1581 case RESPST_ERR_PSN_OUT_OF_SEQ: 1582 /* RC only - Class B. Drop packet. */ 1583 send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); 1584 state = RESPST_CLEANUP; 1585 break; 1586 1587 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: 1588 case RESPST_ERR_MISSING_OPCODE_FIRST: 1589 case RESPST_ERR_MISSING_OPCODE_LAST_C: 1590 case RESPST_ERR_UNSUPPORTED_OPCODE: 1591 case RESPST_ERR_MISALIGNED_ATOMIC: 1592 /* RC Only - Class C. */ 1593 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1594 IB_WC_REM_INV_REQ_ERR); 1595 state = RESPST_COMPLETE; 1596 break; 1597 1598 case RESPST_ERR_MISSING_OPCODE_LAST_D1E: 1599 state = do_class_d1e_error(qp); 1600 break; 1601 case RESPST_ERR_RNR: 1602 if (qp_type(qp) == IB_QPT_RC) { 1603 rxe_counter_inc(rxe, RXE_CNT_SND_RNR); 1604 /* RC - class B */ 1605 send_ack(qp, AETH_RNR_NAK | 1606 (~AETH_TYPE_MASK & 1607 qp->attr.min_rnr_timer), 1608 pkt->psn); 1609 } else { 1610 /* UD/UC - class D */ 1611 qp->resp.drop_msg = 1; 1612 } 1613 state = RESPST_CLEANUP; 1614 break; 1615 1616 case RESPST_ERR_RKEY_VIOLATION: 1617 if (qp_type(qp) == IB_QPT_RC) { 1618 /* Class C */ 1619 do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, 1620 IB_WC_REM_ACCESS_ERR); 1621 state = RESPST_COMPLETE; 1622 } else { 1623 qp->resp.drop_msg = 1; 1624 if (qp->srq) { 1625 /* UC/SRQ Class D */ 1626 qp->resp.status = IB_WC_REM_ACCESS_ERR; 1627 state = RESPST_COMPLETE; 1628 } else { 1629 /* UC/non-SRQ Class E. */ 1630 state = RESPST_CLEANUP; 1631 } 1632 } 1633 break; 1634 1635 case RESPST_ERR_INVALIDATE_RKEY: 1636 /* RC - Class J. */ 1637 qp->resp.goto_error = 1; 1638 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1639 state = RESPST_COMPLETE; 1640 break; 1641 1642 case RESPST_ERR_LENGTH: 1643 if (qp_type(qp) == IB_QPT_RC) { 1644 /* Class C */ 1645 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1646 IB_WC_REM_INV_REQ_ERR); 1647 state = RESPST_COMPLETE; 1648 } else if (qp->srq) { 1649 /* UC/UD - class E */ 1650 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1651 state = RESPST_COMPLETE; 1652 } else { 1653 /* UC/UD - class D */ 1654 qp->resp.drop_msg = 1; 1655 state = RESPST_CLEANUP; 1656 } 1657 break; 1658 1659 case RESPST_ERR_MALFORMED_WQE: 1660 /* All, Class A. */ 1661 do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, 1662 IB_WC_LOC_QP_OP_ERR); 1663 state = RESPST_COMPLETE; 1664 break; 1665 1666 case RESPST_ERR_CQ_OVERFLOW: 1667 /* All - Class G */ 1668 state = RESPST_ERROR; 1669 break; 1670 1671 case RESPST_DONE: 1672 if (qp->resp.goto_error) { 1673 state = RESPST_ERROR; 1674 break; 1675 } 1676 1677 goto done; 1678 1679 case RESPST_EXIT: 1680 if (qp->resp.goto_error) { 1681 state = RESPST_ERROR; 1682 break; 1683 } 1684 1685 goto exit; 1686 1687 case RESPST_ERROR: 1688 qp->resp.goto_error = 0; 1689 rxe_dbg_qp(qp, "moved to error state\n"); 1690 rxe_qp_error(qp); 1691 goto exit; 1692 1693 default: 1694 WARN_ON_ONCE(1); 1695 } 1696 } 1697 1698 /* A non-zero return value will cause rxe_do_task to 1699 * exit its loop and end the work item. A zero return 1700 * will continue looping and return to rxe_responder 1701 */ 1702 done: 1703 ret = 0; 1704 goto out; 1705 exit: 1706 ret = -EAGAIN; 1707 out: 1708 return ret; 1709 } 1710