1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Common code for the NVMe target.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5 */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/module.h>
8 #include <linux/random.h>
9 #include <linux/rculist.h>
10 #include <linux/pci-p2pdma.h>
11 #include <linux/scatterlist.h>
12
13 #include <generated/utsrelease.h>
14
15 #define CREATE_TRACE_POINTS
16 #include "trace.h"
17
18 #include "nvmet.h"
19 #include "debugfs.h"
20
21 struct kmem_cache *nvmet_bvec_cache;
22 struct workqueue_struct *buffered_io_wq;
23 struct workqueue_struct *zbd_wq;
24 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
25 static DEFINE_IDA(cntlid_ida);
26
27 struct workqueue_struct *nvmet_wq;
28 EXPORT_SYMBOL_GPL(nvmet_wq);
29
30 /*
31 * This read/write semaphore is used to synchronize access to configuration
32 * information on a target system that will result in discovery log page
33 * information change for at least one host.
34 * The full list of resources to protected by this semaphore is:
35 *
36 * - subsystems list
37 * - per-subsystem allowed hosts list
38 * - allow_any_host subsystem attribute
39 * - nvmet_genctr
40 * - the nvmet_transports array
41 *
42 * When updating any of those lists/structures write lock should be obtained,
43 * while when reading (popolating discovery log page or checking host-subsystem
44 * link) read lock is obtained to allow concurrent reads.
45 */
46 DECLARE_RWSEM(nvmet_config_sem);
47
48 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
49 u64 nvmet_ana_chgcnt;
50 DECLARE_RWSEM(nvmet_ana_sem);
51
errno_to_nvme_status(struct nvmet_req * req,int errno)52 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
53 {
54 switch (errno) {
55 case 0:
56 return NVME_SC_SUCCESS;
57 case -ENOSPC:
58 req->error_loc = offsetof(struct nvme_rw_command, length);
59 return NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR;
60 case -EREMOTEIO:
61 req->error_loc = offsetof(struct nvme_rw_command, slba);
62 return NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
63 case -EOPNOTSUPP:
64 req->error_loc = offsetof(struct nvme_common_command, opcode);
65 switch (req->cmd->common.opcode) {
66 case nvme_cmd_dsm:
67 case nvme_cmd_write_zeroes:
68 return NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR;
69 default:
70 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
71 }
72 break;
73 case -ENODATA:
74 req->error_loc = offsetof(struct nvme_rw_command, nsid);
75 return NVME_SC_ACCESS_DENIED;
76 case -EIO:
77 fallthrough;
78 default:
79 req->error_loc = offsetof(struct nvme_common_command, opcode);
80 return NVME_SC_INTERNAL | NVME_STATUS_DNR;
81 }
82 }
83
nvmet_report_invalid_opcode(struct nvmet_req * req)84 u16 nvmet_report_invalid_opcode(struct nvmet_req *req)
85 {
86 pr_debug("unhandled cmd %d on qid %d\n", req->cmd->common.opcode,
87 req->sq->qid);
88
89 req->error_loc = offsetof(struct nvme_common_command, opcode);
90 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
91 }
92
93 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
94 const char *subsysnqn);
95
nvmet_copy_to_sgl(struct nvmet_req * req,off_t off,const void * buf,size_t len)96 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
97 size_t len)
98 {
99 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
100 req->error_loc = offsetof(struct nvme_common_command, dptr);
101 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
102 }
103 return 0;
104 }
105
nvmet_copy_from_sgl(struct nvmet_req * req,off_t off,void * buf,size_t len)106 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
107 {
108 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
109 req->error_loc = offsetof(struct nvme_common_command, dptr);
110 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
111 }
112 return 0;
113 }
114
nvmet_zero_sgl(struct nvmet_req * req,off_t off,size_t len)115 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
116 {
117 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) {
118 req->error_loc = offsetof(struct nvme_common_command, dptr);
119 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
120 }
121 return 0;
122 }
123
nvmet_max_nsid(struct nvmet_subsys * subsys)124 static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
125 {
126 struct nvmet_ns *cur;
127 unsigned long idx;
128 u32 nsid = 0;
129
130 nvmet_for_each_enabled_ns(&subsys->namespaces, idx, cur)
131 nsid = cur->nsid;
132
133 return nsid;
134 }
135
nvmet_async_event_result(struct nvmet_async_event * aen)136 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
137 {
138 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
139 }
140
nvmet_async_events_failall(struct nvmet_ctrl * ctrl)141 static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
142 {
143 struct nvmet_req *req;
144
145 mutex_lock(&ctrl->lock);
146 while (ctrl->nr_async_event_cmds) {
147 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
148 mutex_unlock(&ctrl->lock);
149 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_STATUS_DNR);
150 mutex_lock(&ctrl->lock);
151 }
152 mutex_unlock(&ctrl->lock);
153 }
154
nvmet_async_events_process(struct nvmet_ctrl * ctrl)155 static void nvmet_async_events_process(struct nvmet_ctrl *ctrl)
156 {
157 struct nvmet_async_event *aen;
158 struct nvmet_req *req;
159
160 mutex_lock(&ctrl->lock);
161 while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) {
162 aen = list_first_entry(&ctrl->async_events,
163 struct nvmet_async_event, entry);
164 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
165 nvmet_set_result(req, nvmet_async_event_result(aen));
166
167 list_del(&aen->entry);
168 kfree(aen);
169
170 mutex_unlock(&ctrl->lock);
171 trace_nvmet_async_event(ctrl, req->cqe->result.u32);
172 nvmet_req_complete(req, 0);
173 mutex_lock(&ctrl->lock);
174 }
175 mutex_unlock(&ctrl->lock);
176 }
177
nvmet_async_events_free(struct nvmet_ctrl * ctrl)178 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
179 {
180 struct nvmet_async_event *aen, *tmp;
181
182 mutex_lock(&ctrl->lock);
183 list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) {
184 list_del(&aen->entry);
185 kfree(aen);
186 }
187 mutex_unlock(&ctrl->lock);
188 }
189
nvmet_async_event_work(struct work_struct * work)190 static void nvmet_async_event_work(struct work_struct *work)
191 {
192 struct nvmet_ctrl *ctrl =
193 container_of(work, struct nvmet_ctrl, async_event_work);
194
195 nvmet_async_events_process(ctrl);
196 }
197
nvmet_add_async_event(struct nvmet_ctrl * ctrl,u8 event_type,u8 event_info,u8 log_page)198 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
199 u8 event_info, u8 log_page)
200 {
201 struct nvmet_async_event *aen;
202
203 aen = kmalloc(sizeof(*aen), GFP_KERNEL);
204 if (!aen)
205 return;
206
207 aen->event_type = event_type;
208 aen->event_info = event_info;
209 aen->log_page = log_page;
210
211 mutex_lock(&ctrl->lock);
212 list_add_tail(&aen->entry, &ctrl->async_events);
213 mutex_unlock(&ctrl->lock);
214
215 queue_work(nvmet_wq, &ctrl->async_event_work);
216 }
217
nvmet_add_to_changed_ns_log(struct nvmet_ctrl * ctrl,__le32 nsid)218 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
219 {
220 u32 i;
221
222 mutex_lock(&ctrl->lock);
223 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
224 goto out_unlock;
225
226 for (i = 0; i < ctrl->nr_changed_ns; i++) {
227 if (ctrl->changed_ns_list[i] == nsid)
228 goto out_unlock;
229 }
230
231 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
232 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
233 ctrl->nr_changed_ns = U32_MAX;
234 goto out_unlock;
235 }
236
237 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
238 out_unlock:
239 mutex_unlock(&ctrl->lock);
240 }
241
nvmet_ns_changed(struct nvmet_subsys * subsys,u32 nsid)242 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
243 {
244 struct nvmet_ctrl *ctrl;
245
246 lockdep_assert_held(&subsys->lock);
247
248 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
249 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
250 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
251 continue;
252 nvmet_add_async_event(ctrl, NVME_AER_NOTICE,
253 NVME_AER_NOTICE_NS_CHANGED,
254 NVME_LOG_CHANGED_NS);
255 }
256 }
257
nvmet_send_ana_event(struct nvmet_subsys * subsys,struct nvmet_port * port)258 void nvmet_send_ana_event(struct nvmet_subsys *subsys,
259 struct nvmet_port *port)
260 {
261 struct nvmet_ctrl *ctrl;
262
263 mutex_lock(&subsys->lock);
264 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
265 if (port && ctrl->port != port)
266 continue;
267 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE))
268 continue;
269 nvmet_add_async_event(ctrl, NVME_AER_NOTICE,
270 NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
271 }
272 mutex_unlock(&subsys->lock);
273 }
274
nvmet_port_send_ana_event(struct nvmet_port * port)275 void nvmet_port_send_ana_event(struct nvmet_port *port)
276 {
277 struct nvmet_subsys_link *p;
278
279 down_read(&nvmet_config_sem);
280 list_for_each_entry(p, &port->subsystems, entry)
281 nvmet_send_ana_event(p->subsys, port);
282 up_read(&nvmet_config_sem);
283 }
284
nvmet_register_transport(const struct nvmet_fabrics_ops * ops)285 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
286 {
287 int ret = 0;
288
289 down_write(&nvmet_config_sem);
290 if (nvmet_transports[ops->type])
291 ret = -EINVAL;
292 else
293 nvmet_transports[ops->type] = ops;
294 up_write(&nvmet_config_sem);
295
296 return ret;
297 }
298 EXPORT_SYMBOL_GPL(nvmet_register_transport);
299
nvmet_unregister_transport(const struct nvmet_fabrics_ops * ops)300 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
301 {
302 down_write(&nvmet_config_sem);
303 nvmet_transports[ops->type] = NULL;
304 up_write(&nvmet_config_sem);
305 }
306 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
307
nvmet_port_del_ctrls(struct nvmet_port * port,struct nvmet_subsys * subsys)308 void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys)
309 {
310 struct nvmet_ctrl *ctrl;
311
312 mutex_lock(&subsys->lock);
313 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
314 if (ctrl->port == port)
315 ctrl->ops->delete_ctrl(ctrl);
316 }
317 mutex_unlock(&subsys->lock);
318 }
319
nvmet_enable_port(struct nvmet_port * port)320 int nvmet_enable_port(struct nvmet_port *port)
321 {
322 const struct nvmet_fabrics_ops *ops;
323 int ret;
324
325 lockdep_assert_held(&nvmet_config_sem);
326
327 if (port->disc_addr.trtype == NVMF_TRTYPE_MAX)
328 return -EINVAL;
329
330 ops = nvmet_transports[port->disc_addr.trtype];
331 if (!ops) {
332 up_write(&nvmet_config_sem);
333 request_module("nvmet-transport-%d", port->disc_addr.trtype);
334 down_write(&nvmet_config_sem);
335 ops = nvmet_transports[port->disc_addr.trtype];
336 if (!ops) {
337 pr_err("transport type %d not supported\n",
338 port->disc_addr.trtype);
339 return -EINVAL;
340 }
341 }
342
343 if (!try_module_get(ops->owner))
344 return -EINVAL;
345
346 /*
347 * If the user requested PI support and the transport isn't pi capable,
348 * don't enable the port.
349 */
350 if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) {
351 pr_err("T10-PI is not supported by transport type %d\n",
352 port->disc_addr.trtype);
353 ret = -EINVAL;
354 goto out_put;
355 }
356
357 ret = ops->add_port(port);
358 if (ret)
359 goto out_put;
360
361 /* If the transport didn't set inline_data_size, then disable it. */
362 if (port->inline_data_size < 0)
363 port->inline_data_size = 0;
364
365 /*
366 * If the transport didn't set the max_queue_size properly, then clamp
367 * it to the target limits. Also set default values in case the
368 * transport didn't set it at all.
369 */
370 if (port->max_queue_size < 0)
371 port->max_queue_size = NVMET_MAX_QUEUE_SIZE;
372 else
373 port->max_queue_size = clamp_t(int, port->max_queue_size,
374 NVMET_MIN_QUEUE_SIZE,
375 NVMET_MAX_QUEUE_SIZE);
376
377 port->enabled = true;
378 port->tr_ops = ops;
379 return 0;
380
381 out_put:
382 module_put(ops->owner);
383 return ret;
384 }
385
nvmet_disable_port(struct nvmet_port * port)386 void nvmet_disable_port(struct nvmet_port *port)
387 {
388 const struct nvmet_fabrics_ops *ops;
389
390 lockdep_assert_held(&nvmet_config_sem);
391
392 port->enabled = false;
393 port->tr_ops = NULL;
394
395 ops = nvmet_transports[port->disc_addr.trtype];
396 ops->remove_port(port);
397 module_put(ops->owner);
398 }
399
nvmet_keep_alive_timer(struct work_struct * work)400 static void nvmet_keep_alive_timer(struct work_struct *work)
401 {
402 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
403 struct nvmet_ctrl, ka_work);
404 bool reset_tbkas = ctrl->reset_tbkas;
405
406 ctrl->reset_tbkas = false;
407 if (reset_tbkas) {
408 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
409 ctrl->cntlid);
410 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
411 return;
412 }
413
414 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
415 ctrl->cntlid, ctrl->kato);
416
417 nvmet_ctrl_fatal_error(ctrl);
418 }
419
nvmet_start_keep_alive_timer(struct nvmet_ctrl * ctrl)420 void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
421 {
422 if (unlikely(ctrl->kato == 0))
423 return;
424
425 pr_debug("ctrl %d start keep-alive timer for %d secs\n",
426 ctrl->cntlid, ctrl->kato);
427
428 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
429 }
430
nvmet_stop_keep_alive_timer(struct nvmet_ctrl * ctrl)431 void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
432 {
433 if (unlikely(ctrl->kato == 0))
434 return;
435
436 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
437
438 cancel_delayed_work_sync(&ctrl->ka_work);
439 }
440
nvmet_req_find_ns(struct nvmet_req * req)441 u16 nvmet_req_find_ns(struct nvmet_req *req)
442 {
443 u32 nsid = le32_to_cpu(req->cmd->common.nsid);
444 struct nvmet_subsys *subsys = nvmet_req_subsys(req);
445
446 req->ns = xa_load(&subsys->namespaces, nsid);
447 if (unlikely(!req->ns || !req->ns->enabled)) {
448 req->error_loc = offsetof(struct nvme_common_command, nsid);
449 if (!req->ns) /* ns doesn't exist! */
450 return NVME_SC_INVALID_NS | NVME_STATUS_DNR;
451
452 /* ns exists but it's disabled */
453 req->ns = NULL;
454 return NVME_SC_INTERNAL_PATH_ERROR;
455 }
456
457 percpu_ref_get(&req->ns->ref);
458 return NVME_SC_SUCCESS;
459 }
460
nvmet_destroy_namespace(struct percpu_ref * ref)461 static void nvmet_destroy_namespace(struct percpu_ref *ref)
462 {
463 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
464
465 complete(&ns->disable_done);
466 }
467
nvmet_put_namespace(struct nvmet_ns * ns)468 void nvmet_put_namespace(struct nvmet_ns *ns)
469 {
470 percpu_ref_put(&ns->ref);
471 }
472
nvmet_ns_dev_disable(struct nvmet_ns * ns)473 static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
474 {
475 nvmet_bdev_ns_disable(ns);
476 nvmet_file_ns_disable(ns);
477 }
478
nvmet_p2pmem_ns_enable(struct nvmet_ns * ns)479 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
480 {
481 int ret;
482 struct pci_dev *p2p_dev;
483
484 if (!ns->use_p2pmem)
485 return 0;
486
487 if (!ns->bdev) {
488 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
489 return -EINVAL;
490 }
491
492 if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) {
493 pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
494 ns->device_path);
495 return -EINVAL;
496 }
497
498 if (ns->p2p_dev) {
499 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
500 if (ret < 0)
501 return -EINVAL;
502 } else {
503 /*
504 * Right now we just check that there is p2pmem available so
505 * we can report an error to the user right away if there
506 * is not. We'll find the actual device to use once we
507 * setup the controller when the port's device is available.
508 */
509
510 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
511 if (!p2p_dev) {
512 pr_err("no peer-to-peer memory is available for %s\n",
513 ns->device_path);
514 return -EINVAL;
515 }
516
517 pci_dev_put(p2p_dev);
518 }
519
520 return 0;
521 }
522
523 /*
524 * Note: ctrl->subsys->lock should be held when calling this function
525 */
nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl * ctrl,struct nvmet_ns * ns)526 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
527 struct nvmet_ns *ns)
528 {
529 struct device *clients[2];
530 struct pci_dev *p2p_dev;
531 int ret;
532
533 if (!ctrl->p2p_client || !ns->use_p2pmem)
534 return;
535
536 if (ns->p2p_dev) {
537 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
538 if (ret < 0)
539 return;
540
541 p2p_dev = pci_dev_get(ns->p2p_dev);
542 } else {
543 clients[0] = ctrl->p2p_client;
544 clients[1] = nvmet_ns_dev(ns);
545
546 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
547 if (!p2p_dev) {
548 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
549 dev_name(ctrl->p2p_client), ns->device_path);
550 return;
551 }
552 }
553
554 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
555 if (ret < 0)
556 pci_dev_put(p2p_dev);
557
558 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
559 ns->nsid);
560 }
561
nvmet_ns_revalidate(struct nvmet_ns * ns)562 bool nvmet_ns_revalidate(struct nvmet_ns *ns)
563 {
564 loff_t oldsize = ns->size;
565
566 if (ns->bdev)
567 nvmet_bdev_ns_revalidate(ns);
568 else
569 nvmet_file_ns_revalidate(ns);
570
571 return oldsize != ns->size;
572 }
573
nvmet_ns_enable(struct nvmet_ns * ns)574 int nvmet_ns_enable(struct nvmet_ns *ns)
575 {
576 struct nvmet_subsys *subsys = ns->subsys;
577 struct nvmet_ctrl *ctrl;
578 int ret;
579
580 mutex_lock(&subsys->lock);
581 ret = 0;
582
583 if (nvmet_is_passthru_subsys(subsys)) {
584 pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
585 goto out_unlock;
586 }
587
588 if (ns->enabled)
589 goto out_unlock;
590
591 ret = -EMFILE;
592
593 ret = nvmet_bdev_ns_enable(ns);
594 if (ret == -ENOTBLK)
595 ret = nvmet_file_ns_enable(ns);
596 if (ret)
597 goto out_unlock;
598
599 ret = nvmet_p2pmem_ns_enable(ns);
600 if (ret)
601 goto out_dev_disable;
602
603 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
604 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
605
606 if (ns->pr.enable) {
607 ret = nvmet_pr_init_ns(ns);
608 if (ret)
609 goto out_dev_put;
610 }
611
612 if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL))
613 goto out_pr_exit;
614
615 nvmet_ns_changed(subsys, ns->nsid);
616 ns->enabled = true;
617 xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED);
618 ret = 0;
619 out_unlock:
620 mutex_unlock(&subsys->lock);
621 return ret;
622 out_pr_exit:
623 if (ns->pr.enable)
624 nvmet_pr_exit_ns(ns);
625 out_dev_put:
626 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
627 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
628 out_dev_disable:
629 nvmet_ns_dev_disable(ns);
630 goto out_unlock;
631 }
632
nvmet_ns_disable(struct nvmet_ns * ns)633 void nvmet_ns_disable(struct nvmet_ns *ns)
634 {
635 struct nvmet_subsys *subsys = ns->subsys;
636 struct nvmet_ctrl *ctrl;
637
638 mutex_lock(&subsys->lock);
639 if (!ns->enabled)
640 goto out_unlock;
641
642 ns->enabled = false;
643 xa_clear_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED);
644
645 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
646 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
647
648 mutex_unlock(&subsys->lock);
649
650 /*
651 * Now that we removed the namespaces from the lookup list, we
652 * can kill the per_cpu ref and wait for any remaining references
653 * to be dropped, as well as a RCU grace period for anyone only
654 * using the namepace under rcu_read_lock(). Note that we can't
655 * use call_rcu here as we need to ensure the namespaces have
656 * been fully destroyed before unloading the module.
657 */
658 percpu_ref_kill(&ns->ref);
659 synchronize_rcu();
660 wait_for_completion(&ns->disable_done);
661 percpu_ref_exit(&ns->ref);
662
663 if (ns->pr.enable)
664 nvmet_pr_exit_ns(ns);
665
666 mutex_lock(&subsys->lock);
667 nvmet_ns_changed(subsys, ns->nsid);
668 nvmet_ns_dev_disable(ns);
669 out_unlock:
670 mutex_unlock(&subsys->lock);
671 }
672
nvmet_ns_free(struct nvmet_ns * ns)673 void nvmet_ns_free(struct nvmet_ns *ns)
674 {
675 struct nvmet_subsys *subsys = ns->subsys;
676
677 nvmet_ns_disable(ns);
678
679 mutex_lock(&subsys->lock);
680
681 xa_erase(&subsys->namespaces, ns->nsid);
682 if (ns->nsid == subsys->max_nsid)
683 subsys->max_nsid = nvmet_max_nsid(subsys);
684
685 subsys->nr_namespaces--;
686 mutex_unlock(&subsys->lock);
687
688 down_write(&nvmet_ana_sem);
689 nvmet_ana_group_enabled[ns->anagrpid]--;
690 up_write(&nvmet_ana_sem);
691
692 kfree(ns->device_path);
693 kfree(ns);
694 }
695
nvmet_ns_alloc(struct nvmet_subsys * subsys,u32 nsid)696 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
697 {
698 struct nvmet_ns *ns;
699
700 mutex_lock(&subsys->lock);
701
702 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
703 goto out_unlock;
704
705 ns = kzalloc(sizeof(*ns), GFP_KERNEL);
706 if (!ns)
707 goto out_unlock;
708
709 init_completion(&ns->disable_done);
710
711 ns->nsid = nsid;
712 ns->subsys = subsys;
713
714 if (ns->nsid > subsys->max_nsid)
715 subsys->max_nsid = nsid;
716
717 if (xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL))
718 goto out_exit;
719
720 subsys->nr_namespaces++;
721
722 mutex_unlock(&subsys->lock);
723
724 down_write(&nvmet_ana_sem);
725 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
726 nvmet_ana_group_enabled[ns->anagrpid]++;
727 up_write(&nvmet_ana_sem);
728
729 uuid_gen(&ns->uuid);
730 ns->buffered_io = false;
731 ns->csi = NVME_CSI_NVM;
732
733 return ns;
734 out_exit:
735 subsys->max_nsid = nvmet_max_nsid(subsys);
736 kfree(ns);
737 out_unlock:
738 mutex_unlock(&subsys->lock);
739 return NULL;
740 }
741
nvmet_update_sq_head(struct nvmet_req * req)742 static void nvmet_update_sq_head(struct nvmet_req *req)
743 {
744 if (req->sq->size) {
745 u32 old_sqhd, new_sqhd;
746
747 old_sqhd = READ_ONCE(req->sq->sqhd);
748 do {
749 new_sqhd = (old_sqhd + 1) % req->sq->size;
750 } while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd));
751 }
752 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
753 }
754
nvmet_set_error(struct nvmet_req * req,u16 status)755 static void nvmet_set_error(struct nvmet_req *req, u16 status)
756 {
757 struct nvmet_ctrl *ctrl = req->sq->ctrl;
758 struct nvme_error_slot *new_error_slot;
759 unsigned long flags;
760
761 req->cqe->status = cpu_to_le16(status << 1);
762
763 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC)
764 return;
765
766 spin_lock_irqsave(&ctrl->error_lock, flags);
767 ctrl->err_counter++;
768 new_error_slot =
769 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS];
770
771 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter);
772 new_error_slot->sqid = cpu_to_le16(req->sq->qid);
773 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id);
774 new_error_slot->status_field = cpu_to_le16(status << 1);
775 new_error_slot->param_error_location = cpu_to_le16(req->error_loc);
776 new_error_slot->lba = cpu_to_le64(req->error_slba);
777 new_error_slot->nsid = req->cmd->common.nsid;
778 spin_unlock_irqrestore(&ctrl->error_lock, flags);
779
780 /* set the more bit for this request */
781 req->cqe->status |= cpu_to_le16(1 << 14);
782 }
783
__nvmet_req_complete(struct nvmet_req * req,u16 status)784 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
785 {
786 struct nvmet_ns *ns = req->ns;
787 struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref;
788
789 if (!req->sq->sqhd_disabled)
790 nvmet_update_sq_head(req);
791 req->cqe->sq_id = cpu_to_le16(req->sq->qid);
792 req->cqe->command_id = req->cmd->common.command_id;
793
794 if (unlikely(status))
795 nvmet_set_error(req, status);
796
797 trace_nvmet_req_complete(req);
798
799 req->ops->queue_response(req);
800
801 if (pc_ref)
802 nvmet_pr_put_ns_pc_ref(pc_ref);
803 if (ns)
804 nvmet_put_namespace(ns);
805 }
806
nvmet_req_complete(struct nvmet_req * req,u16 status)807 void nvmet_req_complete(struct nvmet_req *req, u16 status)
808 {
809 struct nvmet_sq *sq = req->sq;
810
811 __nvmet_req_complete(req, status);
812 percpu_ref_put(&sq->ref);
813 }
814 EXPORT_SYMBOL_GPL(nvmet_req_complete);
815
nvmet_cq_setup(struct nvmet_ctrl * ctrl,struct nvmet_cq * cq,u16 qid,u16 size)816 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
817 u16 qid, u16 size)
818 {
819 cq->qid = qid;
820 cq->size = size;
821 }
822
nvmet_sq_setup(struct nvmet_ctrl * ctrl,struct nvmet_sq * sq,u16 qid,u16 size)823 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
824 u16 qid, u16 size)
825 {
826 sq->sqhd = 0;
827 sq->qid = qid;
828 sq->size = size;
829
830 ctrl->sqs[qid] = sq;
831 }
832
nvmet_confirm_sq(struct percpu_ref * ref)833 static void nvmet_confirm_sq(struct percpu_ref *ref)
834 {
835 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
836
837 complete(&sq->confirm_done);
838 }
839
nvmet_check_cqid(struct nvmet_ctrl * ctrl,u16 cqid)840 u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid)
841 {
842 if (!ctrl->sqs)
843 return NVME_SC_INTERNAL | NVME_STATUS_DNR;
844
845 if (cqid > ctrl->subsys->max_qid)
846 return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
847
848 /*
849 * Note: For PCI controllers, the NVMe specifications allows multiple
850 * SQs to share a single CQ. However, we do not support this yet, so
851 * check that there is no SQ defined for a CQ. If one exist, then the
852 * CQ ID is invalid for creation as well as when the CQ is being
853 * deleted (as that would mean that the SQ was not deleted before the
854 * CQ).
855 */
856 if (ctrl->sqs[cqid])
857 return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
858
859 return NVME_SC_SUCCESS;
860 }
861
nvmet_cq_create(struct nvmet_ctrl * ctrl,struct nvmet_cq * cq,u16 qid,u16 size)862 u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
863 u16 qid, u16 size)
864 {
865 u16 status;
866
867 status = nvmet_check_cqid(ctrl, qid);
868 if (status != NVME_SC_SUCCESS)
869 return status;
870
871 nvmet_cq_setup(ctrl, cq, qid, size);
872
873 return NVME_SC_SUCCESS;
874 }
875 EXPORT_SYMBOL_GPL(nvmet_cq_create);
876
nvmet_check_sqid(struct nvmet_ctrl * ctrl,u16 sqid,bool create)877 u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid,
878 bool create)
879 {
880 if (!ctrl->sqs)
881 return NVME_SC_INTERNAL | NVME_STATUS_DNR;
882
883 if (sqid > ctrl->subsys->max_qid)
884 return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
885
886 if ((create && ctrl->sqs[sqid]) ||
887 (!create && !ctrl->sqs[sqid]))
888 return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
889
890 return NVME_SC_SUCCESS;
891 }
892
nvmet_sq_create(struct nvmet_ctrl * ctrl,struct nvmet_sq * sq,u16 sqid,u16 size)893 u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
894 u16 sqid, u16 size)
895 {
896 u16 status;
897 int ret;
898
899 if (!kref_get_unless_zero(&ctrl->ref))
900 return NVME_SC_INTERNAL | NVME_STATUS_DNR;
901
902 status = nvmet_check_sqid(ctrl, sqid, true);
903 if (status != NVME_SC_SUCCESS)
904 return status;
905
906 ret = nvmet_sq_init(sq);
907 if (ret) {
908 status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
909 goto ctrl_put;
910 }
911
912 nvmet_sq_setup(ctrl, sq, sqid, size);
913 sq->ctrl = ctrl;
914
915 return NVME_SC_SUCCESS;
916
917 ctrl_put:
918 nvmet_ctrl_put(ctrl);
919 return status;
920 }
921 EXPORT_SYMBOL_GPL(nvmet_sq_create);
922
nvmet_sq_destroy(struct nvmet_sq * sq)923 void nvmet_sq_destroy(struct nvmet_sq *sq)
924 {
925 struct nvmet_ctrl *ctrl = sq->ctrl;
926
927 /*
928 * If this is the admin queue, complete all AERs so that our
929 * queue doesn't have outstanding requests on it.
930 */
931 if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
932 nvmet_async_events_failall(ctrl);
933 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
934 wait_for_completion(&sq->confirm_done);
935 wait_for_completion(&sq->free_done);
936 percpu_ref_exit(&sq->ref);
937 nvmet_auth_sq_free(sq);
938
939 /*
940 * we must reference the ctrl again after waiting for inflight IO
941 * to complete. Because admin connect may have sneaked in after we
942 * store sq->ctrl locally, but before we killed the percpu_ref. the
943 * admin connect allocates and assigns sq->ctrl, which now needs a
944 * final ref put, as this ctrl is going away.
945 */
946 ctrl = sq->ctrl;
947
948 if (ctrl) {
949 /*
950 * The teardown flow may take some time, and the host may not
951 * send us keep-alive during this period, hence reset the
952 * traffic based keep-alive timer so we don't trigger a
953 * controller teardown as a result of a keep-alive expiration.
954 */
955 ctrl->reset_tbkas = true;
956 sq->ctrl->sqs[sq->qid] = NULL;
957 nvmet_ctrl_put(ctrl);
958 sq->ctrl = NULL; /* allows reusing the queue later */
959 }
960 }
961 EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
962
nvmet_sq_free(struct percpu_ref * ref)963 static void nvmet_sq_free(struct percpu_ref *ref)
964 {
965 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
966
967 complete(&sq->free_done);
968 }
969
nvmet_sq_init(struct nvmet_sq * sq)970 int nvmet_sq_init(struct nvmet_sq *sq)
971 {
972 int ret;
973
974 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
975 if (ret) {
976 pr_err("percpu_ref init failed!\n");
977 return ret;
978 }
979 init_completion(&sq->free_done);
980 init_completion(&sq->confirm_done);
981 nvmet_auth_sq_init(sq);
982
983 return 0;
984 }
985 EXPORT_SYMBOL_GPL(nvmet_sq_init);
986
nvmet_check_ana_state(struct nvmet_port * port,struct nvmet_ns * ns)987 static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
988 struct nvmet_ns *ns)
989 {
990 enum nvme_ana_state state = port->ana_state[ns->anagrpid];
991
992 if (unlikely(state == NVME_ANA_INACCESSIBLE))
993 return NVME_SC_ANA_INACCESSIBLE;
994 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
995 return NVME_SC_ANA_PERSISTENT_LOSS;
996 if (unlikely(state == NVME_ANA_CHANGE))
997 return NVME_SC_ANA_TRANSITION;
998 return 0;
999 }
1000
nvmet_io_cmd_check_access(struct nvmet_req * req)1001 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
1002 {
1003 if (unlikely(req->ns->readonly)) {
1004 switch (req->cmd->common.opcode) {
1005 case nvme_cmd_read:
1006 case nvme_cmd_flush:
1007 break;
1008 default:
1009 return NVME_SC_NS_WRITE_PROTECTED;
1010 }
1011 }
1012
1013 return 0;
1014 }
1015
nvmet_io_cmd_transfer_len(struct nvmet_req * req)1016 static u32 nvmet_io_cmd_transfer_len(struct nvmet_req *req)
1017 {
1018 struct nvme_command *cmd = req->cmd;
1019 u32 metadata_len = 0;
1020
1021 if (nvme_is_fabrics(cmd))
1022 return nvmet_fabrics_io_cmd_data_len(req);
1023
1024 if (!req->ns)
1025 return 0;
1026
1027 switch (req->cmd->common.opcode) {
1028 case nvme_cmd_read:
1029 case nvme_cmd_write:
1030 case nvme_cmd_zone_append:
1031 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
1032 metadata_len = nvmet_rw_metadata_len(req);
1033 return nvmet_rw_data_len(req) + metadata_len;
1034 case nvme_cmd_dsm:
1035 return nvmet_dsm_len(req);
1036 case nvme_cmd_zone_mgmt_recv:
1037 return (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
1038 default:
1039 return 0;
1040 }
1041 }
1042
nvmet_parse_io_cmd(struct nvmet_req * req)1043 static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
1044 {
1045 struct nvme_command *cmd = req->cmd;
1046 u16 ret;
1047
1048 if (nvme_is_fabrics(cmd))
1049 return nvmet_parse_fabrics_io_cmd(req);
1050
1051 if (unlikely(!nvmet_check_auth_status(req)))
1052 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR;
1053
1054 ret = nvmet_check_ctrl_status(req);
1055 if (unlikely(ret))
1056 return ret;
1057
1058 if (nvmet_is_passthru_req(req))
1059 return nvmet_parse_passthru_io_cmd(req);
1060
1061 ret = nvmet_req_find_ns(req);
1062 if (unlikely(ret))
1063 return ret;
1064
1065 ret = nvmet_check_ana_state(req->port, req->ns);
1066 if (unlikely(ret)) {
1067 req->error_loc = offsetof(struct nvme_common_command, nsid);
1068 return ret;
1069 }
1070 ret = nvmet_io_cmd_check_access(req);
1071 if (unlikely(ret)) {
1072 req->error_loc = offsetof(struct nvme_common_command, nsid);
1073 return ret;
1074 }
1075
1076 if (req->ns->pr.enable) {
1077 ret = nvmet_parse_pr_cmd(req);
1078 if (!ret)
1079 return ret;
1080 }
1081
1082 switch (req->ns->csi) {
1083 case NVME_CSI_NVM:
1084 if (req->ns->file)
1085 ret = nvmet_file_parse_io_cmd(req);
1086 else
1087 ret = nvmet_bdev_parse_io_cmd(req);
1088 break;
1089 case NVME_CSI_ZNS:
1090 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
1091 ret = nvmet_bdev_zns_parse_io_cmd(req);
1092 else
1093 ret = NVME_SC_INVALID_IO_CMD_SET;
1094 break;
1095 default:
1096 ret = NVME_SC_INVALID_IO_CMD_SET;
1097 }
1098 if (ret)
1099 return ret;
1100
1101 if (req->ns->pr.enable) {
1102 ret = nvmet_pr_check_cmd_access(req);
1103 if (ret)
1104 return ret;
1105
1106 ret = nvmet_pr_get_ns_pc_ref(req);
1107 }
1108 return ret;
1109 }
1110
nvmet_req_init(struct nvmet_req * req,struct nvmet_cq * cq,struct nvmet_sq * sq,const struct nvmet_fabrics_ops * ops)1111 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
1112 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
1113 {
1114 u8 flags = req->cmd->common.flags;
1115 u16 status;
1116
1117 req->cq = cq;
1118 req->sq = sq;
1119 req->ops = ops;
1120 req->sg = NULL;
1121 req->metadata_sg = NULL;
1122 req->sg_cnt = 0;
1123 req->metadata_sg_cnt = 0;
1124 req->transfer_len = 0;
1125 req->metadata_len = 0;
1126 req->cqe->result.u64 = 0;
1127 req->cqe->status = 0;
1128 req->cqe->sq_head = 0;
1129 req->ns = NULL;
1130 req->error_loc = NVMET_NO_ERROR_LOC;
1131 req->error_slba = 0;
1132 req->pc_ref = NULL;
1133
1134 /* no support for fused commands yet */
1135 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
1136 req->error_loc = offsetof(struct nvme_common_command, flags);
1137 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
1138 goto fail;
1139 }
1140
1141 /*
1142 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
1143 * contains an address of a single contiguous physical buffer that is
1144 * byte aligned. For PCI controllers, this is optional so not enforced.
1145 */
1146 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
1147 if (!req->sq->ctrl || !nvmet_is_pci_ctrl(req->sq->ctrl)) {
1148 req->error_loc =
1149 offsetof(struct nvme_common_command, flags);
1150 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
1151 goto fail;
1152 }
1153 }
1154
1155 if (unlikely(!req->sq->ctrl))
1156 /* will return an error for any non-connect command: */
1157 status = nvmet_parse_connect_cmd(req);
1158 else if (likely(req->sq->qid != 0))
1159 status = nvmet_parse_io_cmd(req);
1160 else
1161 status = nvmet_parse_admin_cmd(req);
1162
1163 if (status)
1164 goto fail;
1165
1166 trace_nvmet_req_init(req, req->cmd);
1167
1168 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
1169 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
1170 goto fail;
1171 }
1172
1173 if (sq->ctrl)
1174 sq->ctrl->reset_tbkas = true;
1175
1176 return true;
1177
1178 fail:
1179 __nvmet_req_complete(req, status);
1180 return false;
1181 }
1182 EXPORT_SYMBOL_GPL(nvmet_req_init);
1183
nvmet_req_uninit(struct nvmet_req * req)1184 void nvmet_req_uninit(struct nvmet_req *req)
1185 {
1186 percpu_ref_put(&req->sq->ref);
1187 if (req->pc_ref)
1188 nvmet_pr_put_ns_pc_ref(req->pc_ref);
1189 if (req->ns)
1190 nvmet_put_namespace(req->ns);
1191 }
1192 EXPORT_SYMBOL_GPL(nvmet_req_uninit);
1193
nvmet_req_transfer_len(struct nvmet_req * req)1194 size_t nvmet_req_transfer_len(struct nvmet_req *req)
1195 {
1196 if (likely(req->sq->qid != 0))
1197 return nvmet_io_cmd_transfer_len(req);
1198 if (unlikely(!req->sq->ctrl))
1199 return nvmet_connect_cmd_data_len(req);
1200 return nvmet_admin_cmd_data_len(req);
1201 }
1202 EXPORT_SYMBOL_GPL(nvmet_req_transfer_len);
1203
nvmet_check_transfer_len(struct nvmet_req * req,size_t len)1204 bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
1205 {
1206 if (unlikely(len != req->transfer_len)) {
1207 u16 status;
1208
1209 req->error_loc = offsetof(struct nvme_common_command, dptr);
1210 if (req->cmd->common.flags & NVME_CMD_SGL_ALL)
1211 status = NVME_SC_SGL_INVALID_DATA;
1212 else
1213 status = NVME_SC_INVALID_FIELD;
1214 nvmet_req_complete(req, status | NVME_STATUS_DNR);
1215 return false;
1216 }
1217
1218 return true;
1219 }
1220 EXPORT_SYMBOL_GPL(nvmet_check_transfer_len);
1221
nvmet_check_data_len_lte(struct nvmet_req * req,size_t data_len)1222 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
1223 {
1224 if (unlikely(data_len > req->transfer_len)) {
1225 u16 status;
1226
1227 req->error_loc = offsetof(struct nvme_common_command, dptr);
1228 if (req->cmd->common.flags & NVME_CMD_SGL_ALL)
1229 status = NVME_SC_SGL_INVALID_DATA;
1230 else
1231 status = NVME_SC_INVALID_FIELD;
1232 nvmet_req_complete(req, status | NVME_STATUS_DNR);
1233 return false;
1234 }
1235
1236 return true;
1237 }
1238
nvmet_data_transfer_len(struct nvmet_req * req)1239 static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
1240 {
1241 return req->transfer_len - req->metadata_len;
1242 }
1243
nvmet_req_alloc_p2pmem_sgls(struct pci_dev * p2p_dev,struct nvmet_req * req)1244 static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev,
1245 struct nvmet_req *req)
1246 {
1247 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
1248 nvmet_data_transfer_len(req));
1249 if (!req->sg)
1250 goto out_err;
1251
1252 if (req->metadata_len) {
1253 req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev,
1254 &req->metadata_sg_cnt, req->metadata_len);
1255 if (!req->metadata_sg)
1256 goto out_free_sg;
1257 }
1258
1259 req->p2p_dev = p2p_dev;
1260
1261 return 0;
1262 out_free_sg:
1263 pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
1264 out_err:
1265 return -ENOMEM;
1266 }
1267
nvmet_req_find_p2p_dev(struct nvmet_req * req)1268 static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req)
1269 {
1270 if (!IS_ENABLED(CONFIG_PCI_P2PDMA) ||
1271 !req->sq->ctrl || !req->sq->qid || !req->ns)
1272 return NULL;
1273 return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid);
1274 }
1275
nvmet_req_alloc_sgls(struct nvmet_req * req)1276 int nvmet_req_alloc_sgls(struct nvmet_req *req)
1277 {
1278 struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req);
1279
1280 if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req))
1281 return 0;
1282
1283 req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
1284 &req->sg_cnt);
1285 if (unlikely(!req->sg))
1286 goto out;
1287
1288 if (req->metadata_len) {
1289 req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL,
1290 &req->metadata_sg_cnt);
1291 if (unlikely(!req->metadata_sg))
1292 goto out_free;
1293 }
1294
1295 return 0;
1296 out_free:
1297 sgl_free(req->sg);
1298 out:
1299 return -ENOMEM;
1300 }
1301 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls);
1302
nvmet_req_free_sgls(struct nvmet_req * req)1303 void nvmet_req_free_sgls(struct nvmet_req *req)
1304 {
1305 if (req->p2p_dev) {
1306 pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
1307 if (req->metadata_sg)
1308 pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
1309 req->p2p_dev = NULL;
1310 } else {
1311 sgl_free(req->sg);
1312 if (req->metadata_sg)
1313 sgl_free(req->metadata_sg);
1314 }
1315
1316 req->sg = NULL;
1317 req->metadata_sg = NULL;
1318 req->sg_cnt = 0;
1319 req->metadata_sg_cnt = 0;
1320 }
1321 EXPORT_SYMBOL_GPL(nvmet_req_free_sgls);
1322
nvmet_css_supported(u8 cc_css)1323 static inline bool nvmet_css_supported(u8 cc_css)
1324 {
1325 switch (cc_css << NVME_CC_CSS_SHIFT) {
1326 case NVME_CC_CSS_NVM:
1327 case NVME_CC_CSS_CSI:
1328 return true;
1329 default:
1330 return false;
1331 }
1332 }
1333
nvmet_start_ctrl(struct nvmet_ctrl * ctrl)1334 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
1335 {
1336 lockdep_assert_held(&ctrl->lock);
1337
1338 /*
1339 * Only I/O controllers should verify iosqes,iocqes.
1340 * Strictly speaking, the spec says a discovery controller
1341 * should verify iosqes,iocqes are zeroed, however that
1342 * would break backwards compatibility, so don't enforce it.
1343 */
1344 if (!nvmet_is_disc_subsys(ctrl->subsys) &&
1345 (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
1346 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) {
1347 ctrl->csts = NVME_CSTS_CFS;
1348 return;
1349 }
1350
1351 if (nvmet_cc_mps(ctrl->cc) != 0 ||
1352 nvmet_cc_ams(ctrl->cc) != 0 ||
1353 !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) {
1354 ctrl->csts = NVME_CSTS_CFS;
1355 return;
1356 }
1357
1358 ctrl->csts = NVME_CSTS_RDY;
1359
1360 /*
1361 * Controllers that are not yet enabled should not really enforce the
1362 * keep alive timeout, but we still want to track a timeout and cleanup
1363 * in case a host died before it enabled the controller. Hence, simply
1364 * reset the keep alive timer when the controller is enabled.
1365 */
1366 if (ctrl->kato)
1367 mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
1368 }
1369
nvmet_clear_ctrl(struct nvmet_ctrl * ctrl)1370 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
1371 {
1372 lockdep_assert_held(&ctrl->lock);
1373
1374 /* XXX: tear down queues? */
1375 ctrl->csts &= ~NVME_CSTS_RDY;
1376 ctrl->cc = 0;
1377 }
1378
nvmet_update_cc(struct nvmet_ctrl * ctrl,u32 new)1379 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
1380 {
1381 u32 old;
1382
1383 mutex_lock(&ctrl->lock);
1384 old = ctrl->cc;
1385 ctrl->cc = new;
1386
1387 if (nvmet_cc_en(new) && !nvmet_cc_en(old))
1388 nvmet_start_ctrl(ctrl);
1389 if (!nvmet_cc_en(new) && nvmet_cc_en(old))
1390 nvmet_clear_ctrl(ctrl);
1391 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
1392 nvmet_clear_ctrl(ctrl);
1393 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
1394 }
1395 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
1396 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
1397 mutex_unlock(&ctrl->lock);
1398 }
1399 EXPORT_SYMBOL_GPL(nvmet_update_cc);
1400
nvmet_init_cap(struct nvmet_ctrl * ctrl)1401 static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
1402 {
1403 /* command sets supported: NVMe command set: */
1404 ctrl->cap = (1ULL << 37);
1405 /* Controller supports one or more I/O Command Sets */
1406 ctrl->cap |= (1ULL << 43);
1407 /* CC.EN timeout in 500msec units: */
1408 ctrl->cap |= (15ULL << 24);
1409 /* maximum queue entries supported: */
1410 if (ctrl->ops->get_max_queue_size)
1411 ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl),
1412 ctrl->port->max_queue_size) - 1;
1413 else
1414 ctrl->cap |= ctrl->port->max_queue_size - 1;
1415
1416 if (nvmet_is_passthru_subsys(ctrl->subsys))
1417 nvmet_passthrough_override_cap(ctrl);
1418 }
1419
nvmet_ctrl_find_get(const char * subsysnqn,const char * hostnqn,u16 cntlid,struct nvmet_req * req)1420 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
1421 const char *hostnqn, u16 cntlid,
1422 struct nvmet_req *req)
1423 {
1424 struct nvmet_ctrl *ctrl = NULL;
1425 struct nvmet_subsys *subsys;
1426
1427 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1428 if (!subsys) {
1429 pr_warn("connect request for invalid subsystem %s!\n",
1430 subsysnqn);
1431 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
1432 goto out;
1433 }
1434
1435 mutex_lock(&subsys->lock);
1436 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
1437 if (ctrl->cntlid == cntlid) {
1438 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
1439 pr_warn("hostnqn mismatch.\n");
1440 continue;
1441 }
1442 if (!kref_get_unless_zero(&ctrl->ref))
1443 continue;
1444
1445 /* ctrl found */
1446 goto found;
1447 }
1448 }
1449
1450 ctrl = NULL; /* ctrl not found */
1451 pr_warn("could not find controller %d for subsys %s / host %s\n",
1452 cntlid, subsysnqn, hostnqn);
1453 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
1454
1455 found:
1456 mutex_unlock(&subsys->lock);
1457 nvmet_subsys_put(subsys);
1458 out:
1459 return ctrl;
1460 }
1461
nvmet_check_ctrl_status(struct nvmet_req * req)1462 u16 nvmet_check_ctrl_status(struct nvmet_req *req)
1463 {
1464 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
1465 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
1466 req->cmd->common.opcode, req->sq->qid);
1467 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
1468 }
1469
1470 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
1471 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
1472 req->cmd->common.opcode, req->sq->qid);
1473 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
1474 }
1475
1476 if (unlikely(!nvmet_check_auth_status(req))) {
1477 pr_warn("qid %d not authenticated\n", req->sq->qid);
1478 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR;
1479 }
1480 return 0;
1481 }
1482
nvmet_host_allowed(struct nvmet_subsys * subsys,const char * hostnqn)1483 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn)
1484 {
1485 struct nvmet_host_link *p;
1486
1487 lockdep_assert_held(&nvmet_config_sem);
1488
1489 if (subsys->allow_any_host)
1490 return true;
1491
1492 if (nvmet_is_disc_subsys(subsys)) /* allow all access to disc subsys */
1493 return true;
1494
1495 list_for_each_entry(p, &subsys->hosts, entry) {
1496 if (!strcmp(nvmet_host_name(p->host), hostnqn))
1497 return true;
1498 }
1499
1500 return false;
1501 }
1502
1503 /*
1504 * Note: ctrl->subsys->lock should be held when calling this function
1505 */
nvmet_setup_p2p_ns_map(struct nvmet_ctrl * ctrl,struct device * p2p_client)1506 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
1507 struct device *p2p_client)
1508 {
1509 struct nvmet_ns *ns;
1510 unsigned long idx;
1511
1512 if (!p2p_client)
1513 return;
1514
1515 ctrl->p2p_client = get_device(p2p_client);
1516
1517 nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns)
1518 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
1519 }
1520
1521 /*
1522 * Note: ctrl->subsys->lock should be held when calling this function
1523 */
nvmet_release_p2p_ns_map(struct nvmet_ctrl * ctrl)1524 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
1525 {
1526 struct radix_tree_iter iter;
1527 void __rcu **slot;
1528
1529 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
1530 pci_dev_put(radix_tree_deref_slot(slot));
1531
1532 put_device(ctrl->p2p_client);
1533 }
1534
nvmet_fatal_error_handler(struct work_struct * work)1535 static void nvmet_fatal_error_handler(struct work_struct *work)
1536 {
1537 struct nvmet_ctrl *ctrl =
1538 container_of(work, struct nvmet_ctrl, fatal_err_work);
1539
1540 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
1541 ctrl->ops->delete_ctrl(ctrl);
1542 }
1543
nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args * args)1544 struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
1545 {
1546 struct nvmet_subsys *subsys;
1547 struct nvmet_ctrl *ctrl;
1548 u32 kato = args->kato;
1549 u8 dhchap_status;
1550 int ret;
1551
1552 args->status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
1553 subsys = nvmet_find_get_subsys(args->port, args->subsysnqn);
1554 if (!subsys) {
1555 pr_warn("connect request for invalid subsystem %s!\n",
1556 args->subsysnqn);
1557 args->result = IPO_IATTR_CONNECT_DATA(subsysnqn);
1558 args->error_loc = offsetof(struct nvme_common_command, dptr);
1559 return NULL;
1560 }
1561
1562 down_read(&nvmet_config_sem);
1563 if (!nvmet_host_allowed(subsys, args->hostnqn)) {
1564 pr_info("connect by host %s for subsystem %s not allowed\n",
1565 args->hostnqn, args->subsysnqn);
1566 args->result = IPO_IATTR_CONNECT_DATA(hostnqn);
1567 up_read(&nvmet_config_sem);
1568 args->status = NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR;
1569 args->error_loc = offsetof(struct nvme_common_command, dptr);
1570 goto out_put_subsystem;
1571 }
1572 up_read(&nvmet_config_sem);
1573
1574 args->status = NVME_SC_INTERNAL;
1575 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
1576 if (!ctrl)
1577 goto out_put_subsystem;
1578 mutex_init(&ctrl->lock);
1579
1580 ctrl->port = args->port;
1581 ctrl->ops = args->ops;
1582
1583 #ifdef CONFIG_NVME_TARGET_PASSTHRU
1584 /* By default, set loop targets to clear IDS by default */
1585 if (ctrl->port->disc_addr.trtype == NVMF_TRTYPE_LOOP)
1586 subsys->clear_ids = 1;
1587 #endif
1588
1589 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
1590 INIT_LIST_HEAD(&ctrl->async_events);
1591 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
1592 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
1593 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
1594
1595 memcpy(ctrl->subsysnqn, args->subsysnqn, NVMF_NQN_SIZE);
1596 memcpy(ctrl->hostnqn, args->hostnqn, NVMF_NQN_SIZE);
1597
1598 kref_init(&ctrl->ref);
1599 ctrl->subsys = subsys;
1600 ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
1601 nvmet_init_cap(ctrl);
1602 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
1603
1604 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
1605 sizeof(__le32), GFP_KERNEL);
1606 if (!ctrl->changed_ns_list)
1607 goto out_free_ctrl;
1608
1609 ctrl->sqs = kcalloc(subsys->max_qid + 1,
1610 sizeof(struct nvmet_sq *),
1611 GFP_KERNEL);
1612 if (!ctrl->sqs)
1613 goto out_free_changed_ns_list;
1614
1615 ret = ida_alloc_range(&cntlid_ida,
1616 subsys->cntlid_min, subsys->cntlid_max,
1617 GFP_KERNEL);
1618 if (ret < 0) {
1619 args->status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR;
1620 goto out_free_sqs;
1621 }
1622 ctrl->cntlid = ret;
1623
1624 /*
1625 * Discovery controllers may use some arbitrary high value
1626 * in order to cleanup stale discovery sessions
1627 */
1628 if (nvmet_is_disc_subsys(ctrl->subsys) && !kato)
1629 kato = NVMET_DISC_KATO_MS;
1630
1631 /* keep-alive timeout in seconds */
1632 ctrl->kato = DIV_ROUND_UP(kato, 1000);
1633
1634 ctrl->err_counter = 0;
1635 spin_lock_init(&ctrl->error_lock);
1636
1637 nvmet_start_keep_alive_timer(ctrl);
1638
1639 mutex_lock(&subsys->lock);
1640 ret = nvmet_ctrl_init_pr(ctrl);
1641 if (ret)
1642 goto init_pr_fail;
1643 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
1644 nvmet_setup_p2p_ns_map(ctrl, args->p2p_client);
1645 nvmet_debugfs_ctrl_setup(ctrl);
1646 mutex_unlock(&subsys->lock);
1647
1648 if (args->hostid)
1649 uuid_copy(&ctrl->hostid, args->hostid);
1650
1651 dhchap_status = nvmet_setup_auth(ctrl, args->sq);
1652 if (dhchap_status) {
1653 pr_err("Failed to setup authentication, dhchap status %u\n",
1654 dhchap_status);
1655 nvmet_ctrl_put(ctrl);
1656 if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED)
1657 args->status =
1658 NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR;
1659 else
1660 args->status = NVME_SC_INTERNAL;
1661 return NULL;
1662 }
1663
1664 args->status = NVME_SC_SUCCESS;
1665
1666 pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s%s.\n",
1667 nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm",
1668 ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
1669 ctrl->pi_support ? " T10-PI is enabled" : "",
1670 nvmet_has_auth(ctrl, args->sq) ? " with DH-HMAC-CHAP" : "",
1671 nvmet_queue_tls_keyid(args->sq) ? ", TLS" : "");
1672
1673 return ctrl;
1674
1675 init_pr_fail:
1676 mutex_unlock(&subsys->lock);
1677 nvmet_stop_keep_alive_timer(ctrl);
1678 ida_free(&cntlid_ida, ctrl->cntlid);
1679 out_free_sqs:
1680 kfree(ctrl->sqs);
1681 out_free_changed_ns_list:
1682 kfree(ctrl->changed_ns_list);
1683 out_free_ctrl:
1684 kfree(ctrl);
1685 out_put_subsystem:
1686 nvmet_subsys_put(subsys);
1687 return NULL;
1688 }
1689 EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl);
1690
nvmet_ctrl_free(struct kref * ref)1691 static void nvmet_ctrl_free(struct kref *ref)
1692 {
1693 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
1694 struct nvmet_subsys *subsys = ctrl->subsys;
1695
1696 mutex_lock(&subsys->lock);
1697 nvmet_ctrl_destroy_pr(ctrl);
1698 nvmet_release_p2p_ns_map(ctrl);
1699 list_del(&ctrl->subsys_entry);
1700 mutex_unlock(&subsys->lock);
1701
1702 nvmet_stop_keep_alive_timer(ctrl);
1703
1704 flush_work(&ctrl->async_event_work);
1705 cancel_work_sync(&ctrl->fatal_err_work);
1706
1707 nvmet_destroy_auth(ctrl);
1708
1709 nvmet_debugfs_ctrl_free(ctrl);
1710
1711 ida_free(&cntlid_ida, ctrl->cntlid);
1712
1713 nvmet_async_events_free(ctrl);
1714 kfree(ctrl->sqs);
1715 kfree(ctrl->changed_ns_list);
1716 kfree(ctrl);
1717
1718 nvmet_subsys_put(subsys);
1719 }
1720
nvmet_ctrl_put(struct nvmet_ctrl * ctrl)1721 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
1722 {
1723 kref_put(&ctrl->ref, nvmet_ctrl_free);
1724 }
1725 EXPORT_SYMBOL_GPL(nvmet_ctrl_put);
1726
nvmet_ctrl_fatal_error(struct nvmet_ctrl * ctrl)1727 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
1728 {
1729 mutex_lock(&ctrl->lock);
1730 if (!(ctrl->csts & NVME_CSTS_CFS)) {
1731 ctrl->csts |= NVME_CSTS_CFS;
1732 queue_work(nvmet_wq, &ctrl->fatal_err_work);
1733 }
1734 mutex_unlock(&ctrl->lock);
1735 }
1736 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
1737
nvmet_ctrl_host_traddr(struct nvmet_ctrl * ctrl,char * traddr,size_t traddr_len)1738 ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl,
1739 char *traddr, size_t traddr_len)
1740 {
1741 if (!ctrl->ops->host_traddr)
1742 return -EOPNOTSUPP;
1743 return ctrl->ops->host_traddr(ctrl, traddr, traddr_len);
1744 }
1745
nvmet_find_get_subsys(struct nvmet_port * port,const char * subsysnqn)1746 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
1747 const char *subsysnqn)
1748 {
1749 struct nvmet_subsys_link *p;
1750
1751 if (!port)
1752 return NULL;
1753
1754 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
1755 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
1756 return NULL;
1757 return nvmet_disc_subsys;
1758 }
1759
1760 down_read(&nvmet_config_sem);
1761 if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn,
1762 NVMF_NQN_SIZE)) {
1763 if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) {
1764 up_read(&nvmet_config_sem);
1765 return nvmet_disc_subsys;
1766 }
1767 }
1768 list_for_each_entry(p, &port->subsystems, entry) {
1769 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
1770 NVMF_NQN_SIZE)) {
1771 if (!kref_get_unless_zero(&p->subsys->ref))
1772 break;
1773 up_read(&nvmet_config_sem);
1774 return p->subsys;
1775 }
1776 }
1777 up_read(&nvmet_config_sem);
1778 return NULL;
1779 }
1780
nvmet_subsys_alloc(const char * subsysnqn,enum nvme_subsys_type type)1781 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
1782 enum nvme_subsys_type type)
1783 {
1784 struct nvmet_subsys *subsys;
1785 char serial[NVMET_SN_MAX_SIZE / 2];
1786 int ret;
1787
1788 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
1789 if (!subsys)
1790 return ERR_PTR(-ENOMEM);
1791
1792 subsys->ver = NVMET_DEFAULT_VS;
1793 /* generate a random serial number as our controllers are ephemeral: */
1794 get_random_bytes(&serial, sizeof(serial));
1795 bin2hex(subsys->serial, &serial, sizeof(serial));
1796
1797 subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL);
1798 if (!subsys->model_number) {
1799 ret = -ENOMEM;
1800 goto free_subsys;
1801 }
1802
1803 subsys->ieee_oui = 0;
1804
1805 subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL);
1806 if (!subsys->firmware_rev) {
1807 ret = -ENOMEM;
1808 goto free_mn;
1809 }
1810
1811 switch (type) {
1812 case NVME_NQN_NVME:
1813 subsys->max_qid = NVMET_NR_QUEUES;
1814 break;
1815 case NVME_NQN_DISC:
1816 case NVME_NQN_CURR:
1817 subsys->max_qid = 0;
1818 break;
1819 default:
1820 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
1821 ret = -EINVAL;
1822 goto free_fr;
1823 }
1824 subsys->type = type;
1825 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
1826 GFP_KERNEL);
1827 if (!subsys->subsysnqn) {
1828 ret = -ENOMEM;
1829 goto free_fr;
1830 }
1831 subsys->cntlid_min = NVME_CNTLID_MIN;
1832 subsys->cntlid_max = NVME_CNTLID_MAX;
1833 kref_init(&subsys->ref);
1834
1835 mutex_init(&subsys->lock);
1836 xa_init(&subsys->namespaces);
1837 INIT_LIST_HEAD(&subsys->ctrls);
1838 INIT_LIST_HEAD(&subsys->hosts);
1839
1840 ret = nvmet_debugfs_subsys_setup(subsys);
1841 if (ret)
1842 goto free_subsysnqn;
1843
1844 return subsys;
1845
1846 free_subsysnqn:
1847 kfree(subsys->subsysnqn);
1848 free_fr:
1849 kfree(subsys->firmware_rev);
1850 free_mn:
1851 kfree(subsys->model_number);
1852 free_subsys:
1853 kfree(subsys);
1854 return ERR_PTR(ret);
1855 }
1856
nvmet_subsys_free(struct kref * ref)1857 static void nvmet_subsys_free(struct kref *ref)
1858 {
1859 struct nvmet_subsys *subsys =
1860 container_of(ref, struct nvmet_subsys, ref);
1861
1862 WARN_ON_ONCE(!xa_empty(&subsys->namespaces));
1863
1864 nvmet_debugfs_subsys_free(subsys);
1865
1866 xa_destroy(&subsys->namespaces);
1867 nvmet_passthru_subsys_free(subsys);
1868
1869 kfree(subsys->subsysnqn);
1870 kfree(subsys->model_number);
1871 kfree(subsys->firmware_rev);
1872 kfree(subsys);
1873 }
1874
nvmet_subsys_del_ctrls(struct nvmet_subsys * subsys)1875 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
1876 {
1877 struct nvmet_ctrl *ctrl;
1878
1879 mutex_lock(&subsys->lock);
1880 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
1881 ctrl->ops->delete_ctrl(ctrl);
1882 mutex_unlock(&subsys->lock);
1883 }
1884
nvmet_subsys_put(struct nvmet_subsys * subsys)1885 void nvmet_subsys_put(struct nvmet_subsys *subsys)
1886 {
1887 kref_put(&subsys->ref, nvmet_subsys_free);
1888 }
1889
nvmet_init(void)1890 static int __init nvmet_init(void)
1891 {
1892 int error = -ENOMEM;
1893
1894 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
1895
1896 nvmet_bvec_cache = kmem_cache_create("nvmet-bvec",
1897 NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0,
1898 SLAB_HWCACHE_ALIGN, NULL);
1899 if (!nvmet_bvec_cache)
1900 return -ENOMEM;
1901
1902 zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0);
1903 if (!zbd_wq)
1904 goto out_destroy_bvec_cache;
1905
1906 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
1907 WQ_MEM_RECLAIM, 0);
1908 if (!buffered_io_wq)
1909 goto out_free_zbd_work_queue;
1910
1911 nvmet_wq = alloc_workqueue("nvmet-wq",
1912 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0);
1913 if (!nvmet_wq)
1914 goto out_free_buffered_work_queue;
1915
1916 error = nvmet_init_discovery();
1917 if (error)
1918 goto out_free_nvmet_work_queue;
1919
1920 error = nvmet_init_debugfs();
1921 if (error)
1922 goto out_exit_discovery;
1923
1924 error = nvmet_init_configfs();
1925 if (error)
1926 goto out_exit_debugfs;
1927
1928 return 0;
1929
1930 out_exit_debugfs:
1931 nvmet_exit_debugfs();
1932 out_exit_discovery:
1933 nvmet_exit_discovery();
1934 out_free_nvmet_work_queue:
1935 destroy_workqueue(nvmet_wq);
1936 out_free_buffered_work_queue:
1937 destroy_workqueue(buffered_io_wq);
1938 out_free_zbd_work_queue:
1939 destroy_workqueue(zbd_wq);
1940 out_destroy_bvec_cache:
1941 kmem_cache_destroy(nvmet_bvec_cache);
1942 return error;
1943 }
1944
nvmet_exit(void)1945 static void __exit nvmet_exit(void)
1946 {
1947 nvmet_exit_configfs();
1948 nvmet_exit_debugfs();
1949 nvmet_exit_discovery();
1950 ida_destroy(&cntlid_ida);
1951 destroy_workqueue(nvmet_wq);
1952 destroy_workqueue(buffered_io_wq);
1953 destroy_workqueue(zbd_wq);
1954 kmem_cache_destroy(nvmet_bvec_cache);
1955
1956 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1957 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1958 }
1959
1960 module_init(nvmet_init);
1961 module_exit(nvmet_exit);
1962
1963 MODULE_DESCRIPTION("NVMe target core framework");
1964 MODULE_LICENSE("GPL v2");
1965