Lines Matching +full:v +full:- +full:pos +full:- +full:supply
3 rbd.c -- Export ceph rados objects as a Linux block device
27 Documentation/ABI/testing/sysfs-bus-rbd
43 #include <linux/blk-mq.h>
58 * -EINVAL without updating it.
60 static int atomic_inc_return_safe(atomic_t *v)
64 counter = (unsigned int)atomic_fetch_add_unless(v, 1, 0);
68 atomic_dec(v);
70 return -EINVAL;
73 /* Decrement the counter. Return the resulting value, or -EINVAL */
74 static int atomic_dec_return_safe(atomic_t *v)
78 counter = atomic_dec_return(v);
82 atomic_inc(v);
84 return -EINVAL;
96 (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
100 #define RBD_SNAP_HEAD_NAME "-"
105 #define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1)
144 * block device image metadata (in-memory version)
170 * user-mapped image, the names are supplied and the id's associated
175 * non-null if the image it represents is a child in a layered
249 * . v .
252 * . v v (deep-copyup .
254 * flattened) v | . .
255 * . v . .
257 * | not needed) v
258 * v .
352 list_for_each_entry(oreq, &(ireq)->object_extents, ex.oe_item)
354 list_for_each_entry_safe(oreq, n, &(ireq)->object_extents, ex.oe_item)
455 * Flag bits for rbd_dev->flags:
456 * - REMOVING (which is coupled with rbd_dev->open_count) is protected
457 * by rbd_dev->lock
462 RBD_DEV_FLAG_READONLY, /* -o ro or snapshot */
473 /* Slab caches for frequently-allocated structures */
488 * single-major requires >= 0.75 version of userspace rbd utility.
515 return test_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
520 return rbd_dev->spec->snap_id != CEPH_NOSNAP;
525 lockdep_assert_held(&rbd_dev->lock_rwsem);
527 return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED ||
528 rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING;
535 down_read(&rbd_dev->lock_rwsem);
537 up_read(&rbd_dev->lock_rwsem);
569 return attr->mode;
604 else if (rbd_dev->disk)
606 RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf);
607 else if (rbd_dev->spec && rbd_dev->spec->image_name)
609 RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf);
610 else if (rbd_dev->spec && rbd_dev->spec->image_id)
612 RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf);
651 rbd_assert(pending->num_pending > 0);
653 if (*result && !pending->result)
654 pending->result = *result;
655 if (--pending->num_pending)
658 *result = pending->result;
664 struct rbd_device *rbd_dev = disk->private_data;
667 spin_lock_irq(&rbd_dev->lock);
668 if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags))
671 rbd_dev->open_count++;
672 spin_unlock_irq(&rbd_dev->lock);
674 return -ENOENT;
676 (void) get_device(&rbd_dev->dev);
683 struct rbd_device *rbd_dev = disk->private_data;
686 spin_lock_irq(&rbd_dev->lock);
687 open_count_before = rbd_dev->open_count--;
688 spin_unlock_irq(&rbd_dev->lock);
691 put_device(&rbd_dev->dev);
707 int ret = -ENOMEM;
714 kref_init(&rbdc->kref);
715 INIT_LIST_HEAD(&rbdc->node);
717 rbdc->client = ceph_create_client(ceph_opts, rbdc);
718 if (IS_ERR(rbdc->client))
720 ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
722 ret = ceph_open_session(rbdc->client);
727 list_add_tail(&rbdc->node, &rbd_client_list);
734 ceph_destroy_client(rbdc->client);
747 kref_get(&rbdc->kref);
760 if (ceph_opts->flags & CEPH_OPT_NOSHARE)
765 if (!ceph_compare_options(ceph_opts, iter->client)) {
878 list_del(&rbdc->node);
881 ceph_destroy_client(rbdc->client);
892 kref_put(&rbdc->kref, rbd_client_release);
911 * Using an existing client. Make sure ->pg_pools is up to
914 ret = ceph_wait_for_latest_osdmap(rbdc->client,
915 rbdc->client->options->mount_timeout);
940 if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)))
943 /* The bio layer requires at least sector-sized I/O */
945 if (ondisk->options.order < SECTOR_SHIFT)
950 if (ondisk->options.order > 8 * sizeof (int) - 1)
957 snap_count = le32_to_cpu(ondisk->snap_count);
958 size = SIZE_MAX - sizeof (struct ceph_snap_context);
966 size -= snap_count * sizeof (__le64);
967 if ((u64) size < le64_to_cpu(ondisk->snap_names_len))
978 return 1U << header->obj_order;
983 if (rbd_dev->header.stripe_unit == 0 ||
984 rbd_dev->header.stripe_count == 0) {
985 rbd_dev->header.stripe_unit = rbd_obj_bytes(&rbd_dev->header);
986 rbd_dev->header.stripe_count = 1;
989 rbd_dev->layout.stripe_unit = rbd_dev->header.stripe_unit;
990 rbd_dev->layout.stripe_count = rbd_dev->header.stripe_count;
991 rbd_dev->layout.object_size = rbd_obj_bytes(&rbd_dev->header);
992 rbd_dev->layout.pool_id = rbd_dev->header.data_pool_id == CEPH_NOPOOL ?
993 rbd_dev->spec->pool_id : rbd_dev->header.data_pool_id;
994 RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
999 kfree(header->object_prefix);
1000 ceph_put_snap_context(header->snapc);
1001 kfree(header->snap_sizes);
1002 kfree(header->snap_names);
1009 * on-disk header.
1020 int ret = -ENOMEM;
1026 object_prefix = kstrndup(ondisk->object_prefix,
1027 sizeof(ondisk->object_prefix),
1030 return -ENOMEM;
1035 snap_count = le32_to_cpu(ondisk->snap_count);
1039 snapc->seq = le64_to_cpu(ondisk->snap_seq);
1042 u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len);
1054 sizeof(*header->snap_sizes),
1068 memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len);
1069 snaps = ondisk->snaps;
1071 snapc->snaps[i] = le64_to_cpu(snaps[i].id);
1079 header->object_prefix = object_prefix;
1080 header->obj_order = ondisk->options.order;
1085 header->image_size = le64_to_cpu(ondisk->image_size);
1086 header->snapc = snapc;
1087 header->snap_names = snap_names;
1088 header->snap_sizes = snap_sizes;
1092 ret = -EIO;
1106 rbd_assert(which < rbd_dev->header.snapc->num_snaps);
1110 snap_name = rbd_dev->header.snap_names;
1111 while (which--)
1128 return snap_id1 == snap_id2 ? 0 : -1;
1143 struct ceph_snap_context *snapc = rbd_dev->header.snapc;
1146 found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps,
1149 return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX;
1160 return ERR_PTR(-ENOENT);
1163 return snap_name ? snap_name : ERR_PTR(-ENOMEM);
1171 rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
1172 if (rbd_dev->image_format == 1)
1181 rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
1183 *snap_size = rbd_dev->header.image_size;
1184 } else if (rbd_dev->image_format == 1) {
1189 return -ENOENT;
1191 *snap_size = rbd_dev->header.snap_sizes[which];
1207 u64 snap_id = rbd_dev->spec->snap_id;
1215 rbd_dev->mapping.size = size;
1221 rbd_dev->mapping.size = 0;
1255 switch (obj_req->img_request->data_type) {
1257 zero_bios(&obj_req->bio_pos, off, bytes);
1261 zero_bvecs(&obj_req->bvec_pos, off, bytes);
1273 kref_read(&obj_request->kref));
1274 kref_put(&obj_request->kref, rbd_obj_request_destroy);
1280 rbd_assert(obj_request->img_request == NULL);
1283 obj_request->img_request = img_request;
1291 list_del(&obj_request->ex.oe_item);
1292 rbd_assert(obj_request->img_request == img_request);
1298 struct rbd_obj_request *obj_req = osd_req->r_priv;
1301 __func__, osd_req, obj_req, obj_req->ex.oe_objno,
1302 obj_req->ex.oe_off, obj_req->ex.oe_len);
1303 ceph_osdc_start_request(osd_req->r_osdc, osd_req);
1313 set_bit(IMG_REQ_LAYERED, &img_request->flags);
1318 return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0;
1323 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
1325 return !obj_req->ex.oe_off &&
1326 obj_req->ex.oe_len == rbd_dev->layout.object_size;
1331 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
1333 return obj_req->ex.oe_off + obj_req->ex.oe_len ==
1334 rbd_dev->layout.object_size;
1342 rbd_assert(obj_req->img_request->snapc);
1344 if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
1346 obj_req->ex.oe_objno);
1350 if (!obj_req->num_img_extents) {
1352 obj_req->ex.oe_objno);
1357 !obj_req->img_request->snapc->num_snaps) {
1359 obj_req->ex.oe_objno);
1363 obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
1368 return ceph_file_extents_bytes(obj_req->img_extents,
1369 obj_req->num_img_extents);
1374 switch (img_req->op_type) {
1388 struct rbd_obj_request *obj_req = osd_req->r_priv;
1392 osd_req->r_result, obj_req);
1399 if (osd_req->r_result > 0 && rbd_img_is_write(obj_req->img_request))
1402 result = osd_req->r_result;
1409 struct rbd_obj_request *obj_request = osd_req->r_priv;
1410 struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
1411 struct ceph_options *opt = rbd_dev->rbd_client->client->options;
1413 osd_req->r_flags = CEPH_OSD_FLAG_READ | opt->read_from_replica;
1414 osd_req->r_snapid = obj_request->img_request->snap_id;
1419 struct rbd_obj_request *obj_request = osd_req->r_priv;
1421 osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
1422 ktime_get_real_ts64(&osd_req->r_mtime);
1423 osd_req->r_data_offset = obj_request->ex.oe_off;
1430 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
1431 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
1433 const char *name_format = rbd_dev->image_format == 1 ?
1439 return ERR_PTR(-ENOMEM);
1441 list_add_tail(&req->r_private_item, &obj_req->osd_reqs);
1442 req->r_callback = rbd_osd_req_callback;
1443 req->r_priv = obj_req;
1449 ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
1450 req->r_base_oloc.pool = rbd_dev->layout.pool_id;
1452 ret = ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
1453 rbd_dev->header.object_prefix,
1454 obj_req->ex.oe_objno);
1464 rbd_assert(obj_req->img_request->snapc);
1465 return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
1477 ceph_object_extent_init(&obj_request->ex);
1478 INIT_LIST_HEAD(&obj_request->osd_reqs);
1479 mutex_init(&obj_request->state_mutex);
1480 kref_init(&obj_request->kref);
1496 while (!list_empty(&obj_request->osd_reqs)) {
1497 osd_req = list_first_entry(&obj_request->osd_reqs,
1499 list_del_init(&osd_req->r_private_item);
1503 switch (obj_request->img_request->data_type) {
1509 kfree(obj_request->bvec_pos.bvecs);
1515 kfree(obj_request->img_extents);
1516 if (obj_request->copyup_bvecs) {
1517 for (i = 0; i < obj_request->copyup_bvec_count; i++) {
1518 if (obj_request->copyup_bvecs[i].bv_page)
1519 __free_page(obj_request->copyup_bvecs[i].bv_page);
1521 kfree(obj_request->copyup_bvecs);
1533 rbd_spec_put(rbd_dev->parent_spec);
1534 rbd_dev->parent_spec = NULL;
1535 rbd_dev->parent_overlap = 0;
1540 * image's parent fields can be safely torn down--after there are no
1541 * more in-flight requests to the parent image. When the last
1548 if (!rbd_dev->parent_spec)
1551 counter = atomic_dec_return_safe(&rbd_dev->parent_ref);
1564 * If an image has a non-zero parent overlap, get a reference to its
1567 * Returns true if the rbd device has a parent with a non-zero
1575 if (!rbd_dev->parent_spec)
1578 if (rbd_dev->parent_overlap)
1579 counter = atomic_inc_return_safe(&rbd_dev->parent_ref);
1593 img_request->rbd_dev = rbd_dev;
1594 img_request->op_type = op_type;
1596 INIT_LIST_HEAD(&img_request->lock_item);
1597 INIT_LIST_HEAD(&img_request->object_extents);
1598 mutex_init(&img_request->state_mutex);
1608 struct rbd_device *rbd_dev = img_req->rbd_dev;
1610 lockdep_assert_held(&rbd_dev->header_rwsem);
1613 img_req->snap_id = rbd_dev->spec->snap_id;
1626 WARN_ON(!list_empty(&img_request->lock_item));
1631 rbd_dev_parent_put(img_request->rbd_dev);
1634 ceph_put_snap_context(img_request->snapc);
1636 if (test_bit(IMG_REQ_CHILD, &img_request->flags))
1642 #define OBJ_MASK ((1 << BITS_PER_OBJ) - 1)
1649 rbd_assert(objno < rbd_dev->object_map_size);
1651 *shift = (OBJS_PER_BYTE - off - 1) * BITS_PER_OBJ;
1659 lockdep_assert_held(&rbd_dev->object_map_lock);
1661 return (rbd_dev->object_map[index] >> shift) & OBJ_MASK;
1670 lockdep_assert_held(&rbd_dev->object_map_lock);
1674 p = &rbd_dev->object_map[index];
1682 spin_lock(&rbd_dev->object_map_lock);
1684 spin_unlock(&rbd_dev->object_map_lock);
1691 * An image mapped read-only can't use the object map -- it isn't
1701 return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) &&
1702 !(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID));
1722 rbd_dev->spec->image_id);
1725 rbd_dev->spec->image_id, snap_id);
1730 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
1742 ret = ceph_cls_lock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME,
1744 if (ret != -EBUSY || broke_lock) {
1745 if (ret == -EEXIST)
1752 ret = ceph_cls_lock_info(osdc, &oid, &rbd_dev->header_oloc,
1756 if (ret == -ENOENT)
1770 ret = ceph_cls_break_lock(osdc, &oid, &rbd_dev->header_oloc,
1775 if (ret == -ENOENT)
1788 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
1794 ret = ceph_cls_unlock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME,
1796 if (ret && ret != -ENOENT)
1822 return -EINVAL;
1827 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
1838 rbd_assert(!rbd_dev->object_map && !rbd_dev->object_map_size);
1840 num_objects = ceph_get_num_objects(&rbd_dev->layout,
1841 rbd_dev->mapping.size);
1850 rbd_object_map_name(rbd_dev, rbd_dev->spec->snap_id, &oid);
1851 ret = ceph_osdc_call(osdc, &oid, &rbd_dev->header_oloc,
1866 ret = -EINVAL;
1871 ret = -EINVAL;
1875 rbd_dev->object_map = kvmalloc(object_map_bytes, GFP_KERNEL);
1876 if (!rbd_dev->object_map) {
1877 ret = -ENOMEM;
1881 rbd_dev->object_map_size = object_map_size;
1882 ceph_copy_from_page_vector(pages, rbd_dev->object_map,
1892 kvfree(rbd_dev->object_map);
1893 rbd_dev->object_map = NULL;
1894 rbd_dev->object_map_size = 0;
1911 if (rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID)
1953 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
1960 if (osd_req->r_result)
1961 return osd_req->r_result;
1966 if (osd_req->r_num_ops == 1)
1970 * Update in-memory HEAD object map.
1972 rbd_assert(osd_req->r_num_ops == 2);
1974 rbd_assert(osd_data->type == CEPH_OSD_DATA_TYPE_PAGES);
1976 p = page_address(osd_data->pages[0]);
1978 rbd_assert(objno == obj_req->ex.oe_objno);
1985 spin_lock(&rbd_dev->object_map_lock);
1990 spin_unlock(&rbd_dev->object_map_lock);
1997 struct rbd_obj_request *obj_req = osd_req->r_priv;
2001 osd_req->r_result, obj_req);
2046 osd_req_op_cls_request_data_pages(req, which, pages, p - start, 0,
2053 * 0 - object map update sent
2054 * 1 - object map update isn't needed
2055 * <0 - error
2060 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
2061 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
2068 if (!update_needed(rbd_dev, obj_req->ex.oe_objno, new_state))
2076 return -ENOMEM;
2078 list_add_tail(&req->r_private_item, &obj_req->osd_reqs);
2079 req->r_callback = rbd_object_map_callback;
2080 req->r_priv = obj_req;
2082 rbd_object_map_name(rbd_dev, snap_id, &req->r_base_oid);
2083 ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
2084 req->r_flags = CEPH_OSD_FLAG_WRITE;
2085 ktime_get_real_ts64(&req->r_mtime);
2098 ret = rbd_cls_object_map_update(req, which, obj_req->ex.oe_objno,
2117 while (cnt && img_extents[cnt - 1].fe_off >= overlap)
2118 cnt--;
2121 struct ceph_file_extent *ex = &img_extents[cnt - 1];
2124 if (ex->fe_off + ex->fe_len > overlap)
2125 ex->fe_len = overlap - ex->fe_off;
2138 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
2141 if (!rbd_dev->parent_overlap)
2144 ret = ceph_extent_to_file(&rbd_dev->layout, obj_req->ex.oe_objno,
2145 entire ? 0 : obj_req->ex.oe_off,
2146 entire ? rbd_dev->layout.object_size :
2147 obj_req->ex.oe_len,
2148 &obj_req->img_extents,
2149 &obj_req->num_img_extents);
2153 prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
2154 rbd_dev->parent_overlap);
2160 struct rbd_obj_request *obj_req = osd_req->r_priv;
2162 switch (obj_req->img_request->data_type) {
2165 &obj_req->bio_pos,
2166 obj_req->ex.oe_len);
2170 rbd_assert(obj_req->bvec_pos.iter.bi_size ==
2171 obj_req->ex.oe_len);
2172 rbd_assert(obj_req->bvec_idx == obj_req->bvec_count);
2174 &obj_req->bvec_pos);
2207 struct rbd_obj_request *obj_req = osd_req->r_priv;
2214 osd_req_op_cls_request_data_bvecs(osd_req, which, obj_req->copyup_bvecs,
2215 obj_req->copyup_bvec_count, bytes);
2221 obj_req->read_state = RBD_OBJ_READ_START;
2228 struct rbd_obj_request *obj_req = osd_req->r_priv;
2229 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
2233 !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) {
2235 rbd_dev->layout.object_size,
2236 rbd_dev->layout.object_size,
2237 rbd_dev->opts->alloc_hint_flags);
2246 obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
2259 obj_req->write_state = RBD_OBJ_WRITE_START;
2272 struct rbd_obj_request *obj_req = osd_req->r_priv;
2274 if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) {
2275 rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION);
2280 obj_req->ex.oe_off, obj_req->ex.oe_len,
2287 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
2299 if (rbd_dev->opts->alloc_size != rbd_dev->layout.object_size ||
2301 off = round_up(obj_req->ex.oe_off, rbd_dev->opts->alloc_size);
2302 next_off = round_down(obj_req->ex.oe_off + obj_req->ex.oe_len,
2303 rbd_dev->opts->alloc_size);
2307 dout("%s %p %llu~%llu -> %llu~%llu\n", __func__,
2308 obj_req, obj_req->ex.oe_off, obj_req->ex.oe_len,
2309 off, next_off - off);
2310 obj_req->ex.oe_off = off;
2311 obj_req->ex.oe_len = next_off - off;
2319 obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
2320 if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents)
2321 obj_req->flags |= RBD_OBJ_FLAG_DELETION;
2323 obj_req->write_state = RBD_OBJ_WRITE_START;
2330 struct rbd_obj_request *obj_req = osd_req->r_priv;
2334 if (obj_req->num_img_extents) {
2335 if (!(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED))
2340 rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION);
2351 obj_req->ex.oe_off, obj_req->ex.oe_len,
2364 if (!obj_req->num_img_extents) {
2365 obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
2367 obj_req->flags |= RBD_OBJ_FLAG_DELETION;
2370 obj_req->write_state = RBD_OBJ_WRITE_START;
2376 struct rbd_img_request *img_req = obj_req->img_request;
2378 switch (img_req->op_type) {
2380 if (!use_object_map(img_req->rbd_dev) ||
2381 !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST))
2388 if (rbd_obj_is_entire(obj_req) && obj_req->num_img_extents &&
2389 !(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED))
2401 struct rbd_obj_request *obj_req = osd_req->r_priv;
2403 switch (obj_req->img_request->op_type) {
2429 switch (img_req->op_type) {
2453 img_req->state = RBD_IMG_START;
2464 union rbd_img_fill_iter *pos;
2481 return &obj_req->ex;
2487 * because ->set_pos_fn() should be called only once per object.
2493 return l->stripe_unit != l->object_size;
2504 img_req->data_type = fctx->pos_type;
2510 fctx->iter = *fctx->pos;
2512 ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
2515 &img_req->object_extents,
2517 fctx->set_pos_fn, &fctx->iter);
2530 * @fctx->pos data buffer.
2534 * different chunks of @fctx->pos data buffer.
2536 * @fctx->pos data buffer is assumed to be large enough.
2543 struct rbd_device *rbd_dev = img_req->rbd_dev;
2548 if (fctx->pos_type == OBJ_REQUEST_NODATA ||
2549 !rbd_layout_is_fancy(&rbd_dev->layout))
2553 img_req->data_type = OBJ_REQUEST_OWN_BVECS;
2556 * Create object requests and determine ->bvec_count for each object
2557 * request. Note that ->bvec_count sum over all object requests may
2562 fctx->iter = *fctx->pos;
2564 ret = ceph_file_to_extents(&rbd_dev->layout,
2567 &img_req->object_extents,
2569 fctx->count_fn, &fctx->iter);
2575 obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count,
2576 sizeof(*obj_req->bvec_pos.bvecs),
2578 if (!obj_req->bvec_pos.bvecs)
2579 return -ENOMEM;
2586 fctx->iter = *fctx->pos;
2588 ret = ceph_iterate_extents(&rbd_dev->layout,
2591 &img_req->object_extents,
2592 fctx->copy_fn, &fctx->iter);
2607 .pos = &dummy,
2619 dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
2620 obj_req->bio_pos = *it;
2630 dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
2632 obj_req->bvec_count++;
2643 dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
2645 obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
2646 obj_req->bvec_pos.iter.bi_size += bv.bv_len;
2657 .pos = (union rbd_img_fill_iter *)bio_pos,
2671 struct ceph_bio_iter it = { .bio = bio, .iter = bio->bi_iter };
2682 obj_req->bvec_pos = *it;
2683 ceph_bvec_iter_shorten(&obj_req->bvec_pos, bytes);
2694 obj_req->bvec_count++;
2705 obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
2706 obj_req->bvec_pos.iter.bi_size += bv.bv_len;
2717 .pos = (union rbd_img_fill_iter *)bvec_pos,
2747 rbd_img_handle_request(img_req, img_req->work_result);
2752 INIT_WORK(&img_req->work, rbd_img_handle_request_work);
2753 img_req->work_result = result;
2754 queue_work(rbd_wq, &img_req->work);
2759 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
2761 if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno)) {
2762 obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST;
2767 obj_req->ex.oe_objno);
2781 obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
2795 struct rbd_img_request *img_req = obj_req->img_request;
2796 struct rbd_device *parent = img_req->rbd_dev->parent;
2802 return -ENOMEM;
2805 __set_bit(IMG_REQ_CHILD, &child_img_req->flags);
2806 child_img_req->obj_request = obj_req;
2808 down_read(&parent->header_rwsem);
2810 up_read(&parent->header_rwsem);
2816 switch (img_req->data_type) {
2819 obj_req->img_extents,
2820 obj_req->num_img_extents,
2821 &obj_req->bio_pos);
2826 obj_req->img_extents,
2827 obj_req->num_img_extents,
2828 &obj_req->bvec_pos);
2835 obj_req->img_extents,
2836 obj_req->num_img_extents,
2837 obj_req->copyup_bvecs);
2851 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
2855 switch (obj_req->read_state) {
2860 *result = -ENOENT;
2861 obj_req->read_state = RBD_OBJ_READ_OBJECT;
2870 obj_req->read_state = RBD_OBJ_READ_OBJECT;
2873 if (*result == -ENOENT && rbd_dev->parent_overlap) {
2880 if (obj_req->num_img_extents) {
2886 obj_req->read_state = RBD_OBJ_READ_PARENT;
2892 * -ENOENT means a hole in the image -- zero-fill the entire
2893 * length of the request. A short read also implies zero-fill
2896 if (*result == -ENOENT) {
2897 rbd_obj_zero_range(obj_req, 0, obj_req->ex.oe_len);
2900 if (*result < obj_req->ex.oe_len)
2902 obj_req->ex.oe_len - *result);
2904 rbd_assert(*result == obj_req->ex.oe_len);
2910 * The parent image is read only up to the overlap -- zero-fill
2916 if (obj_overlap < obj_req->ex.oe_len)
2918 obj_req->ex.oe_len - obj_overlap);
2928 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
2930 if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno))
2931 obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST;
2933 if (!(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST) &&
2934 (obj_req->flags & RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT)) {
2944 * 0 - object map update sent
2945 * 1 - object map update isn't needed
2946 * <0 - error
2950 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
2953 if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
2956 if (obj_req->flags & RBD_OBJ_FLAG_DELETION)
2971 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED)
2978 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) {
3079 rbd_assert(!obj_req->copyup_bvecs);
3080 obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap);
3081 obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count,
3082 sizeof(*obj_req->copyup_bvecs),
3084 if (!obj_req->copyup_bvecs)
3085 return -ENOMEM;
3087 for (i = 0; i < obj_req->copyup_bvec_count; i++) {
3092 return -ENOMEM;
3094 bvec_set_page(&obj_req->copyup_bvecs[i], page, len, 0);
3095 obj_overlap -= len;
3109 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
3112 rbd_assert(obj_req->num_img_extents);
3113 prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
3114 rbd_dev->parent_overlap);
3115 if (!obj_req->num_img_extents) {
3118 * image has been flattened). Re-submit the original write
3119 * request -- pass MODS_ONLY since the copyup isn't needed
3134 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
3135 struct ceph_snap_context *snapc = obj_req->img_request->snapc;
3140 rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending);
3142 if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
3145 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS)
3148 for (i = 0; i < snapc->num_snaps; i++) {
3149 if ((rbd_dev->header.features & RBD_FEATURE_FAST_DIFF) &&
3150 i + 1 < snapc->num_snaps)
3155 ret = rbd_object_map_update(obj_req, snapc->snaps[i],
3158 obj_req->pending.result = ret;
3163 obj_req->pending.num_pending++;
3172 rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending);
3175 * Only send non-zero copyup data to save some I/O and network
3176 * bandwidth -- zero copyup data is equivalent to the object not
3179 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS)
3182 if (obj_req->img_request->snapc->num_snaps && bytes > 0) {
3185 * deep-copyup the object through all existing snapshots.
3191 obj_req->pending.result = ret;
3195 obj_req->pending.num_pending++;
3201 obj_req->pending.result = ret;
3205 obj_req->pending.num_pending++;
3210 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
3214 switch (obj_req->copyup_state) {
3223 if (obj_req->num_img_extents)
3224 obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT;
3226 obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
3232 if (is_zero_bvecs(obj_req->copyup_bvecs,
3235 obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS;
3239 if (!obj_req->pending.num_pending) {
3240 *result = obj_req->pending.result;
3241 obj_req->copyup_state = RBD_OBJ_COPYUP_OBJECT_MAPS;
3244 obj_req->copyup_state = __RBD_OBJ_COPYUP_OBJECT_MAPS;
3247 if (!pending_result_dec(&obj_req->pending, result))
3258 if (!obj_req->pending.num_pending) {
3259 *result = obj_req->pending.result;
3260 obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
3263 obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT;
3266 if (!pending_result_dec(&obj_req->pending, result))
3278 * 0 - object map update sent
3279 * 1 - object map update isn't needed
3280 * <0 - error
3284 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
3287 if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
3290 if (!(obj_req->flags & RBD_OBJ_FLAG_DELETION))
3299 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
3303 switch (obj_req->write_state) {
3316 obj_req->write_state = RBD_OBJ_WRITE_PRE_OBJECT_MAP;
3331 obj_req->write_state = RBD_OBJ_WRITE_OBJECT;
3334 if (*result == -ENOENT) {
3335 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) {
3337 obj_req->copyup_state = RBD_OBJ_COPYUP_START;
3338 obj_req->write_state = __RBD_OBJ_WRITE_COPYUP;
3342 * On a non-existent object:
3343 * delete - -ENOENT, truncate/zero - 0
3345 if (obj_req->flags & RBD_OBJ_FLAG_DELETION)
3351 obj_req->write_state = RBD_OBJ_WRITE_COPYUP;
3367 obj_req->write_state = RBD_OBJ_WRITE_POST_OBJECT_MAP;
3387 struct rbd_img_request *img_req = obj_req->img_request;
3388 struct rbd_device *rbd_dev = img_req->rbd_dev;
3391 mutex_lock(&obj_req->state_mutex);
3396 mutex_unlock(&obj_req->state_mutex);
3401 obj_op_name(img_req->op_type), obj_req->ex.oe_objno,
3402 obj_req->ex.oe_off, obj_req->ex.oe_len, *result);
3408 * This is open-coded in rbd_img_handle_request() to avoid parent chain
3414 rbd_img_handle_request(obj_req->img_request, result);
3419 struct rbd_device *rbd_dev = img_req->rbd_dev;
3421 if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK))
3427 rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
3428 if (rbd_dev->opts->lock_on_read ||
3429 (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
3437 struct rbd_device *rbd_dev = img_req->rbd_dev;
3440 lockdep_assert_held(&rbd_dev->lock_rwsem);
3441 locked = rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED;
3442 spin_lock(&rbd_dev->lock_lists_lock);
3443 rbd_assert(list_empty(&img_req->lock_item));
3445 list_add_tail(&img_req->lock_item, &rbd_dev->acquiring_list);
3447 list_add_tail(&img_req->lock_item, &rbd_dev->running_list);
3448 spin_unlock(&rbd_dev->lock_lists_lock);
3454 struct rbd_device *rbd_dev = img_req->rbd_dev;
3457 lockdep_assert_held(&rbd_dev->lock_rwsem);
3458 spin_lock(&rbd_dev->lock_lists_lock);
3459 if (!list_empty(&img_req->lock_item)) {
3460 rbd_assert(!list_empty(&rbd_dev->running_list));
3461 list_del_init(&img_req->lock_item);
3462 need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
3463 list_empty(&rbd_dev->running_list));
3465 spin_unlock(&rbd_dev->lock_lists_lock);
3467 complete(&rbd_dev->quiescing_wait);
3472 struct rbd_device *rbd_dev = img_req->rbd_dev;
3485 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
3491 struct rbd_device *rbd_dev = img_req->rbd_dev;
3494 rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
3499 rbd_assert(!img_req->snapc);
3500 down_read(&rbd_dev->header_rwsem);
3501 img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
3502 up_read(&rbd_dev->header_rwsem);
3510 img_req->pending.result = result;
3514 img_req->pending.num_pending++;
3524 switch (img_req->state) {
3533 img_req->state = RBD_IMG_EXCLUSIVE_LOCK;
3542 if (!img_req->pending.num_pending) {
3543 *result = img_req->pending.result;
3544 img_req->state = RBD_IMG_OBJECT_REQUESTS;
3547 img_req->state = __RBD_IMG_OBJECT_REQUESTS;
3550 if (!pending_result_dec(&img_req->pending, result))
3566 struct rbd_device *rbd_dev = img_req->rbd_dev;
3570 down_read(&rbd_dev->lock_rwsem);
3571 mutex_lock(&img_req->state_mutex);
3575 mutex_unlock(&img_req->state_mutex);
3576 up_read(&rbd_dev->lock_rwsem);
3578 mutex_lock(&img_req->state_mutex);
3580 mutex_unlock(&img_req->state_mutex);
3586 test_bit(IMG_REQ_CHILD, &img_req->flags) ? "child " : "",
3587 obj_op_name(img_req->op_type), *result);
3598 if (test_bit(IMG_REQ_CHILD, &img_req->flags)) {
3599 struct rbd_obj_request *obj_req = img_req->obj_request;
3603 img_req = obj_req->img_request;
3619 return lhs->gid == rhs->gid && lhs->handle == rhs->handle;
3626 mutex_lock(&rbd_dev->watch_mutex);
3627 cid.gid = ceph_client_gid(rbd_dev->rbd_client->client);
3628 cid.handle = rbd_dev->watch_cookie;
3629 mutex_unlock(&rbd_dev->watch_mutex);
3639 dout("%s rbd_dev %p %llu-%llu -> %llu-%llu\n", __func__, rbd_dev,
3640 rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle,
3641 cid->gid, cid->handle);
3642 rbd_dev->owner_cid = *cid; /* struct */
3647 mutex_lock(&rbd_dev->watch_mutex);
3648 sprintf(buf, "%s %llu", RBD_LOCK_COOKIE_PREFIX, rbd_dev->watch_cookie);
3649 mutex_unlock(&rbd_dev->watch_mutex);
3656 rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
3657 strcpy(rbd_dev->lock_cookie, cookie);
3659 queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
3667 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
3672 rbd_dev->lock_cookie[0] != '\0');
3675 ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
3678 if (ret && ret != -EEXIST)
3690 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
3694 rbd_dev->lock_cookie[0] == '\0');
3696 ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
3697 RBD_LOCK_NAME, rbd_dev->lock_cookie);
3698 if (ret && ret != -ENOENT)
3702 rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
3703 rbd_dev->lock_cookie[0] = '\0';
3705 queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work);
3713 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
3722 ceph_start_encoding(&p, 2, 1, buf_size - CEPH_ENCODING_START_BLK_LEN);
3727 return ceph_osdc_notify(osdc, &rbd_dev->header_oid,
3728 &rbd_dev->header_oloc, buf, buf_size,
3765 if (ret && ret != -ETIMEDOUT) {
3776 while (n--) {
3790 ret = -EIO;
3810 ret = -ETIMEDOUT;
3818 ret = -EINVAL;
3831 lockdep_assert_held_write(&rbd_dev->lock_rwsem);
3833 cancel_delayed_work(&rbd_dev->lock_dwork);
3834 if (!completion_done(&rbd_dev->acquire_wait)) {
3835 rbd_assert(list_empty(&rbd_dev->acquiring_list) &&
3836 list_empty(&rbd_dev->running_list));
3837 rbd_dev->acquire_err = result;
3838 complete_all(&rbd_dev->acquire_wait);
3842 while (!list_empty(&rbd_dev->acquiring_list)) {
3843 img_req = list_first_entry(&rbd_dev->acquiring_list,
3845 mutex_lock(&img_req->state_mutex);
3846 rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
3848 list_move_tail(&img_req->lock_item,
3849 &rbd_dev->running_list);
3851 list_del_init(&img_req->lock_item);
3853 mutex_unlock(&img_req->state_mutex);
3860 return lhs->id.name.type == rhs->id.name.type &&
3861 lhs->id.name.num == rhs->id.name.num &&
3862 !strcmp(lhs->id.cookie, rhs->id.cookie) &&
3863 ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr);
3874 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
3882 ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
3883 &rbd_dev->header_oloc, RBD_LOCK_NAME,
3932 return ERR_PTR(-EBUSY);
3938 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
3945 ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
3946 &rbd_dev->header_oloc, &watchers,
3953 sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
3956 * Ignore addr->type while comparing. This mimics
3960 &locker->info.addr) &&
3967 dout("%s rbd_dev %p found cid %llu-%llu\n", __func__,
3987 struct ceph_client *client = rbd_dev->rbd_client->client;
3997 if (ret != -EBUSY) {
4027 ENTITY_NAME(locker->id.name));
4029 ret = ceph_monc_blocklist_add(&client->monc,
4030 &locker->info.addr);
4033 ENTITY_NAME(locker->id.name), ret);
4037 ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
4038 &rbd_dev->header_oloc, RBD_LOCK_NAME,
4039 locker->id.cookie, &locker->id.name);
4040 if (ret && ret != -ENOENT) {
4065 if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
4076 * 0 - lock acquired
4077 * 1 - caller should call rbd_request_lock()
4078 * <0 - error
4084 down_read(&rbd_dev->lock_rwsem);
4086 rbd_dev->lock_state);
4088 up_read(&rbd_dev->lock_rwsem);
4092 up_read(&rbd_dev->lock_rwsem);
4093 down_write(&rbd_dev->lock_rwsem);
4095 rbd_dev->lock_state);
4097 up_write(&rbd_dev->lock_rwsem);
4107 up_write(&rbd_dev->lock_rwsem);
4111 rbd_assert(rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED);
4112 rbd_assert(list_empty(&rbd_dev->running_list));
4116 rbd_warn(rbd_dev, "post-acquire action failed: %d", ret);
4127 up_write(&rbd_dev->lock_rwsem);
4141 dout("%s rbd_dev %p ret %d - done\n", __func__, rbd_dev, ret);
4146 if (ret == -ETIMEDOUT) {
4148 } else if (ret == -EROFS) {
4150 down_write(&rbd_dev->lock_rwsem);
4152 up_write(&rbd_dev->lock_rwsem);
4155 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
4164 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
4172 lockdep_assert_held_write(&rbd_dev->lock_rwsem);
4174 if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED)
4178 * Ensure that all in-flight IO is flushed.
4180 rbd_dev->lock_state = RBD_LOCK_STATE_QUIESCING;
4181 rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
4182 if (list_empty(&rbd_dev->running_list))
4185 up_write(&rbd_dev->lock_rwsem);
4186 wait_for_completion(&rbd_dev->quiescing_wait);
4188 down_write(&rbd_dev->lock_rwsem);
4189 if (rbd_dev->lock_state != RBD_LOCK_STATE_QUIESCING)
4192 rbd_assert(list_empty(&rbd_dev->running_list));
4198 if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)
4204 rbd_assert(list_empty(&rbd_dev->running_list));
4221 * Give others a chance to grab the lock - we would re-acquire
4227 cancel_delayed_work(&rbd_dev->lock_dwork);
4235 down_write(&rbd_dev->lock_rwsem);
4237 up_write(&rbd_dev->lock_rwsem);
4248 spin_lock(&rbd_dev->lock_lists_lock);
4249 have_requests = !list_empty(&rbd_dev->acquiring_list);
4250 spin_unlock(&rbd_dev->lock_lists_lock);
4251 if (have_requests || delayed_work_pending(&rbd_dev->lock_dwork)) {
4253 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
4267 dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
4270 down_write(&rbd_dev->lock_rwsem);
4271 if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
4272 dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n",
4277 downgrade_write(&rbd_dev->lock_rwsem);
4279 down_read(&rbd_dev->lock_rwsem);
4283 up_read(&rbd_dev->lock_rwsem);
4296 dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
4299 down_write(&rbd_dev->lock_rwsem);
4300 if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
4301 dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n",
4303 rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle);
4307 downgrade_write(&rbd_dev->lock_rwsem);
4309 down_read(&rbd_dev->lock_rwsem);
4313 up_read(&rbd_dev->lock_rwsem);
4332 dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
4337 down_read(&rbd_dev->lock_rwsem);
4339 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED &&
4340 rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid))
4349 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
4350 if (!rbd_dev->opts->exclusive) {
4353 queue_work(rbd_dev->task_wq,
4354 &rbd_dev->unlock_work);
4357 result = -EROFS;
4363 up_read(&rbd_dev->lock_rwsem);
4370 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
4380 buf_size - CEPH_ENCODING_START_BLK_LEN);
4386 ret = ceph_osdc_notify_ack(osdc, &rbd_dev->header_oid,
4387 &rbd_dev->header_oloc, notify_id, cookie,
4464 cookie, -EOPNOTSUPP);
4479 down_write(&rbd_dev->lock_rwsem);
4481 up_write(&rbd_dev->lock_rwsem);
4483 mutex_lock(&rbd_dev->watch_mutex);
4484 if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) {
4486 rbd_dev->watch_state = RBD_WATCH_STATE_ERROR;
4488 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, 0);
4490 mutex_unlock(&rbd_dev->watch_mutex);
4498 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
4501 rbd_assert(!rbd_dev->watch_handle);
4504 handle = ceph_osdc_watch(osdc, &rbd_dev->header_oid,
4505 &rbd_dev->header_oloc, rbd_watch_cb,
4510 rbd_dev->watch_handle = handle;
4519 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
4522 rbd_assert(rbd_dev->watch_handle);
4525 ret = ceph_osdc_unwatch(osdc, rbd_dev->watch_handle);
4529 rbd_dev->watch_handle = NULL;
4536 mutex_lock(&rbd_dev->watch_mutex);
4537 rbd_assert(rbd_dev->watch_state == RBD_WATCH_STATE_UNREGISTERED);
4542 rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
4543 rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
4546 mutex_unlock(&rbd_dev->watch_mutex);
4554 cancel_work_sync(&rbd_dev->acquired_lock_work);
4555 cancel_work_sync(&rbd_dev->released_lock_work);
4556 cancel_delayed_work_sync(&rbd_dev->lock_dwork);
4557 cancel_work_sync(&rbd_dev->unlock_work);
4568 mutex_lock(&rbd_dev->watch_mutex);
4569 if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED)
4571 rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
4572 mutex_unlock(&rbd_dev->watch_mutex);
4574 cancel_delayed_work_sync(&rbd_dev->watch_dwork);
4575 ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
4583 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
4591 ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid,
4592 &rbd_dev->header_oloc, RBD_LOCK_NAME,
4593 CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie,
4596 if (ret != -EOPNOTSUPP)
4600 if (rbd_dev->opts->exclusive)
4609 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
4624 mutex_lock(&rbd_dev->watch_mutex);
4625 if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) {
4626 mutex_unlock(&rbd_dev->watch_mutex);
4633 if (ret != -EBLOCKLISTED && ret != -ENOENT) {
4634 queue_delayed_work(rbd_dev->task_wq,
4635 &rbd_dev->watch_dwork,
4637 mutex_unlock(&rbd_dev->watch_mutex);
4641 mutex_unlock(&rbd_dev->watch_mutex);
4642 down_write(&rbd_dev->lock_rwsem);
4644 up_write(&rbd_dev->lock_rwsem);
4648 rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
4649 rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
4650 mutex_unlock(&rbd_dev->watch_mutex);
4652 down_write(&rbd_dev->lock_rwsem);
4653 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
4655 up_write(&rbd_dev->lock_rwsem);
4675 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
4683 * also supply outbound data--parameters for the object
4689 return -E2BIG;
4693 return -ENOMEM;
4702 return -ENOMEM;
4723 struct rbd_device *rbd_dev = img_request->rbd_dev;
4724 enum obj_operation_type op_type = img_request->op_type;
4731 /* Ignore/skip any zero-length requests */
4733 dout("%s: zero-length request\n", __func__);
4740 down_read(&rbd_dev->header_rwsem);
4741 mapping_size = rbd_dev->mapping.size;
4743 up_read(&rbd_dev->header_rwsem);
4748 result = -EIO;
4759 rq->bio);
4777 struct rbd_device *rbd_dev = hctx->queue->queuedata;
4778 struct rbd_img_request *img_req = blk_mq_rq_to_pdu(bd->rq);
4781 switch (req_op(bd->rq)) {
4795 rbd_warn(rbd_dev, "unknown req_op %d", req_op(bd->rq));
4803 rbd_warn(rbd_dev, "%s on read-only mapping",
4804 obj_op_name(img_req->op_type));
4810 INIT_WORK(&img_req->work, rbd_queue_workfn);
4811 queue_work(rbd_wq, &img_req->work);
4817 put_disk(rbd_dev->disk);
4818 blk_mq_free_tag_set(&rbd_dev->tag_set);
4819 rbd_dev->disk = NULL;
4828 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
4836 return -ENOMEM;
4838 ceph_oid_copy(&req->r_base_oid, oid);
4839 ceph_oloc_copy(&req->r_base_oloc, oloc);
4840 req->r_flags = CEPH_OSD_FLAG_READ;
4868 * return, the rbd_dev->header field will contain up-to-date
4882 * The complete header will include an array of its 64-bit
4884 * a contiguous block of NUL-terminated strings. Note that
4886 * it in, in which case we re-read it.
4898 return -ENOMEM;
4900 ret = rbd_obj_read_sync(rbd_dev, &rbd_dev->header_oid,
4901 &rbd_dev->header_oloc, ondisk, size);
4905 ret = -ENXIO;
4911 ret = -ENXIO;
4916 names_size = le64_to_cpu(ondisk->snap_names_len);
4918 snap_count = le32_to_cpu(ondisk->snap_count);
4933 * If EXISTS is not set, rbd_dev->disk may be NULL, so don't
4937 if (test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags) &&
4938 !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) {
4939 size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
4941 set_capacity_and_notify(rbd_dev->disk, size);
4953 rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
4957 .io_min = rbd_dev->opts->alloc_size,
4963 memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
4964 rbd_dev->tag_set.ops = &rbd_mq_ops;
4965 rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
4966 rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
4967 rbd_dev->tag_set.nr_hw_queues = num_present_cpus();
4968 rbd_dev->tag_set.cmd_size = sizeof(struct rbd_img_request);
4970 err = blk_mq_alloc_tag_set(&rbd_dev->tag_set);
4974 if (rbd_dev->opts->trim) {
4975 lim.discard_granularity = rbd_dev->opts->alloc_size;
4980 if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
4983 disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev);
4989 snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
4990 rbd_dev->dev_id);
4991 disk->major = rbd_dev->major;
4992 disk->first_minor = rbd_dev->minor;
4994 disk->minors = (1 << RBD_SINGLE_MAJOR_PART_SHIFT);
4996 disk->minors = RBD_MINORS_PER_MAJOR;
4997 disk->fops = &rbd_bd_ops;
4998 disk->private_data = rbd_dev;
4999 rbd_dev->disk = disk;
5003 blk_mq_free_tag_set(&rbd_dev->tag_set);
5022 (unsigned long long)rbd_dev->mapping.size);
5030 return sprintf(buf, "0x%016llx\n", rbd_dev->header.features);
5038 if (rbd_dev->major)
5039 return sprintf(buf, "%d\n", rbd_dev->major);
5049 return sprintf(buf, "%d\n", rbd_dev->minor);
5057 ceph_client_addr(rbd_dev->rbd_client->client);
5059 return sprintf(buf, "%pISpc/%u\n", &client_addr->in_addr,
5060 le32_to_cpu(client_addr->nonce));
5069 ceph_client_gid(rbd_dev->rbd_client->client));
5077 return sprintf(buf, "%pU\n", &rbd_dev->rbd_client->client->fsid);
5086 return -EPERM;
5088 return sprintf(buf, "%s\n", rbd_dev->config_info);
5096 return sprintf(buf, "%s\n", rbd_dev->spec->pool_name);
5105 (unsigned long long) rbd_dev->spec->pool_id);
5113 return sprintf(buf, "%s\n", rbd_dev->spec->pool_ns ?: "");
5121 if (rbd_dev->spec->image_name)
5122 return sprintf(buf, "%s\n", rbd_dev->spec->image_name);
5132 return sprintf(buf, "%s\n", rbd_dev->spec->image_id);
5136 * Shows the name of the currently-mapped snapshot (or
5145 return sprintf(buf, "%s\n", rbd_dev->spec->snap_name);
5153 return sprintf(buf, "%llu\n", rbd_dev->spec->snap_id);
5168 if (!rbd_dev->parent)
5171 for ( ; rbd_dev->parent; rbd_dev = rbd_dev->parent) {
5172 struct rbd_spec *spec = rbd_dev->parent_spec;
5181 spec->pool_id, spec->pool_name,
5182 spec->pool_ns ?: "",
5183 spec->image_id, spec->image_name ?: "(unknown)",
5184 spec->snap_id, spec->snap_name,
5185 rbd_dev->parent_overlap);
5200 return -EPERM;
5267 kref_get(&spec->kref);
5276 kref_put(&spec->kref, rbd_spec_free);
5287 spec->pool_id = CEPH_NOPOOL;
5288 spec->snap_id = CEPH_NOSNAP;
5289 kref_init(&spec->kref);
5298 kfree(spec->pool_name);
5299 kfree(spec->pool_ns);
5300 kfree(spec->image_id);
5301 kfree(spec->image_name);
5302 kfree(spec->snap_name);
5308 WARN_ON(rbd_dev->watch_state != RBD_WATCH_STATE_UNREGISTERED);
5309 WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_UNLOCKED);
5311 ceph_oid_destroy(&rbd_dev->header_oid);
5312 ceph_oloc_destroy(&rbd_dev->header_oloc);
5313 kfree(rbd_dev->config_info);
5315 rbd_put_client(rbd_dev->rbd_client);
5316 rbd_spec_put(rbd_dev->spec);
5317 kfree(rbd_dev->opts);
5324 bool need_put = !!rbd_dev->opts;
5327 destroy_workqueue(rbd_dev->task_wq);
5328 ida_free(&rbd_dev_id_ida, rbd_dev->dev_id);
5336 * doing something similar to dm (dm-builtin.c) is overkill.
5350 spin_lock_init(&rbd_dev->lock);
5351 INIT_LIST_HEAD(&rbd_dev->node);
5352 init_rwsem(&rbd_dev->header_rwsem);
5354 rbd_dev->header.data_pool_id = CEPH_NOPOOL;
5355 ceph_oid_init(&rbd_dev->header_oid);
5356 rbd_dev->header_oloc.pool = spec->pool_id;
5357 if (spec->pool_ns) {
5358 WARN_ON(!*spec->pool_ns);
5359 rbd_dev->header_oloc.pool_ns =
5360 ceph_find_or_create_string(spec->pool_ns,
5361 strlen(spec->pool_ns));
5364 mutex_init(&rbd_dev->watch_mutex);
5365 rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
5366 INIT_DELAYED_WORK(&rbd_dev->watch_dwork, rbd_reregister_watch);
5368 init_rwsem(&rbd_dev->lock_rwsem);
5369 rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
5370 INIT_WORK(&rbd_dev->acquired_lock_work, rbd_notify_acquired_lock);
5371 INIT_WORK(&rbd_dev->released_lock_work, rbd_notify_released_lock);
5372 INIT_DELAYED_WORK(&rbd_dev->lock_dwork, rbd_acquire_lock);
5373 INIT_WORK(&rbd_dev->unlock_work, rbd_release_lock_work);
5374 spin_lock_init(&rbd_dev->lock_lists_lock);
5375 INIT_LIST_HEAD(&rbd_dev->acquiring_list);
5376 INIT_LIST_HEAD(&rbd_dev->running_list);
5377 init_completion(&rbd_dev->acquire_wait);
5378 init_completion(&rbd_dev->quiescing_wait);
5380 spin_lock_init(&rbd_dev->object_map_lock);
5382 rbd_dev->dev.bus = &rbd_bus_type;
5383 rbd_dev->dev.type = &rbd_device_type;
5384 rbd_dev->dev.parent = &rbd_root_dev;
5385 device_initialize(&rbd_dev->dev);
5404 rbd_dev->dev_id = ida_alloc_max(&rbd_dev_id_ida,
5405 minor_to_rbd_dev_id(1 << MINORBITS) - 1,
5407 if (rbd_dev->dev_id < 0)
5410 sprintf(rbd_dev->name, RBD_DRV_NAME "%d", rbd_dev->dev_id);
5411 rbd_dev->task_wq = alloc_ordered_workqueue("%s-tasks", WQ_MEM_RECLAIM,
5412 rbd_dev->name);
5413 if (!rbd_dev->task_wq)
5419 rbd_dev->rbd_client = rbdc;
5420 rbd_dev->spec = spec;
5421 rbd_dev->opts = opts;
5423 dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id);
5427 ida_free(&rbd_dev_id_ida, rbd_dev->dev_id);
5436 put_device(&rbd_dev->dev);
5454 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
5455 &rbd_dev->header_oloc, "get_size",
5462 return -ERANGE;
5490 return -ENOMEM;
5492 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
5493 &rbd_dev->header_oloc, "get_object_prefix",
5533 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
5534 &rbd_dev->header_oloc, "get_features",
5541 return -ERANGE;
5547 return -ENXIO;
5562 * object map, store them in rbd_dev->object_map_flags.
5569 __le64 snapid = cpu_to_le64(rbd_dev->spec->snap_id);
5573 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
5574 &rbd_dev->header_oloc, "get_flags",
5580 return -EBADMSG;
5582 rbd_dev->object_map_flags = le64_to_cpu(flags);
5598 kfree(pii->pool_ns);
5599 kfree(pii->image_id);
5619 ceph_decode_64_safe(p, end, pii->pool_id, e_inval);
5620 pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
5621 if (IS_ERR(pii->pool_ns)) {
5622 ret = PTR_ERR(pii->pool_ns);
5623 pii->pool_ns = NULL;
5626 pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
5627 if (IS_ERR(pii->image_id)) {
5628 ret = PTR_ERR(pii->image_id);
5629 pii->image_id = NULL;
5632 ceph_decode_64_safe(p, end, pii->snap_id, e_inval);
5636 return -EINVAL;
5644 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
5649 ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
5653 return ret == -EOPNOTSUPP ? 1 : ret;
5661 ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
5669 ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval);
5670 if (pii->has_overlap)
5671 ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
5674 __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
5675 pii->has_overlap, pii->overlap);
5679 return -EINVAL;
5690 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
5695 ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
5703 ceph_decode_64_safe(&p, end, pii->pool_id, e_inval);
5704 pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
5705 if (IS_ERR(pii->image_id)) {
5706 ret = PTR_ERR(pii->image_id);
5707 pii->image_id = NULL;
5710 ceph_decode_64_safe(&p, end, pii->snap_id, e_inval);
5711 pii->has_overlap = true;
5712 ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
5715 __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
5716 pii->has_overlap, pii->overlap);
5720 return -EINVAL;
5732 return -ENOMEM;
5737 return -ENOMEM;
5741 ceph_encode_64(&p, rbd_dev->spec->snap_id);
5760 return -ENOMEM;
5771 ret = -EIO;
5782 parent_spec->pool_id = pii.pool_id;
5784 parent_spec->pool_ns = pii.pool_ns;
5787 parent_spec->image_id = pii.image_id;
5789 parent_spec->snap_id = pii.snap_id;
5791 rbd_assert(!rbd_dev->parent_spec);
5792 rbd_dev->parent_spec = parent_spec;
5801 rbd_dev->parent_overlap = pii.overlap;
5821 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
5822 &rbd_dev->header_oloc, "get_stripe_unit_count",
5828 return -ERANGE;
5843 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
5844 &rbd_dev->header_oloc, "get_data_pool",
5851 return -EBADMSG;
5873 rbd_assert(!rbd_dev->spec->image_name);
5875 len = strlen(rbd_dev->spec->image_id);
5883 ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32)len);
5891 ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
5913 struct ceph_snap_context *snapc = rbd_dev->header.snapc;
5919 snap_name = rbd_dev->header.snap_names;
5920 while (which < snapc->num_snaps) {
5922 return snapc->snaps[which];
5931 struct ceph_snap_context *snapc = rbd_dev->header.snapc;
5936 for (which = 0; !found && which < snapc->num_snaps; which++) {
5939 snap_id = snapc->snaps[which];
5942 /* ignore no-longer existing snapshots */
5943 if (PTR_ERR(snap_name) == -ENOENT)
5960 if (rbd_dev->image_format == 1)
5971 struct rbd_spec *spec = rbd_dev->spec;
5973 rbd_assert(spec->pool_id != CEPH_NOPOOL && spec->pool_name);
5974 rbd_assert(spec->image_id && spec->image_name);
5975 rbd_assert(spec->snap_name);
5977 if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) {
5980 snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name);
5982 return -ENOENT;
5984 spec->snap_id = snap_id;
5986 spec->snap_id = CEPH_NOSNAP;
6000 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
6001 struct rbd_spec *spec = rbd_dev->spec;
6007 rbd_assert(spec->pool_id != CEPH_NOPOOL);
6008 rbd_assert(spec->image_id);
6009 rbd_assert(spec->snap_id != CEPH_NOSNAP);
6013 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, spec->pool_id);
6015 rbd_warn(rbd_dev, "no pool with id %llu", spec->pool_id);
6016 return -EIO;
6020 return -ENOMEM;
6030 snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
6036 spec->pool_name = pool_name;
6037 spec->image_name = image_name;
6038 spec->snap_name = snap_name;
6071 return -ENOMEM;
6073 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
6074 &rbd_dev->header_oloc, "get_snapcontext",
6082 ret = -ERANGE;
6092 if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context))
6094 ret = -EINVAL;
6103 ret = -ENOMEM;
6106 snapc->seq = seq;
6108 snapc->snaps[i] = ceph_decode_64(&p);
6133 return ERR_PTR(-ENOMEM);
6136 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
6137 &rbd_dev->header_oloc, "get_snapshot_name",
6166 first_time ? &header->obj_order : NULL,
6167 &header->image_size);
6177 ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc);
6188 rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
6189 rbd_assert(!header->object_prefix && !header->snapc);
6191 if (rbd_dev->image_format == 1)
6199 * first found non-space character (if any). Returns the length of
6200 * the token (string of non-white space characters) found. Note
6209 static const char spaces[] = " \f\n\r\t\v";
6220 * that a duplicate buffer is created even for a zero-length token.
6222 * Returns a pointer to the newly-allocated duplicate, or a null
6224 * the lenp argument is a non-null pointer, the length of the token
6253 struct rbd_options *opt = pctx->opts;
6258 ret = ceph_parse_param(param, pctx->copts, NULL);
6259 if (ret != -ENOPARAM)
6263 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
6265 if (token == -ENOPARAM)
6267 param->key);
6275 opt->queue_depth = result.uint_32;
6282 opt->alloc_size = result.uint_32;
6288 opt->lock_timeout = msecs_to_jiffies(result.uint_32 * 1000);
6291 kfree(pctx->spec->pool_ns);
6292 pctx->spec->pool_ns = param->string;
6293 param->string = NULL;
6298 opt->alloc_hint_flags &=
6303 opt->alloc_hint_flags |=
6305 opt->alloc_hint_flags &=
6309 opt->alloc_hint_flags |=
6311 opt->alloc_hint_flags &=
6319 opt->read_only = true;
6322 opt->read_only = false;
6325 opt->lock_on_read = true;
6328 opt->exclusive = true;
6331 opt->trim = false;
6340 return inval_plog(&log, "%s out of range", param->key);
6370 return -ENOMEM;
6388 * and the data written is passed here via a NUL-terminated buffer.
6392 * the other parameters which return dynamically-allocated
6410 * A comma-separated list of one or more monitor addresses.
6415 * A comma-separated list of ceph and/or rbd options.
6424 * provided. Snapshot mappings are always read-only.
6444 return -EINVAL;
6450 ret = -EINVAL;
6453 return -ENOMEM;
6463 pctx.spec->pool_name = dup_token(&buf, NULL);
6464 if (!pctx.spec->pool_name)
6466 if (!*pctx.spec->pool_name) {
6471 pctx.spec->image_name = dup_token(&buf, NULL);
6472 if (!pctx.spec->image_name)
6474 if (!*pctx.spec->image_name) {
6480 * Snapshot name is optional; default is to use "-"
6486 len = sizeof (RBD_SNAP_HEAD_NAME) - 1;
6488 ret = -ENAMETOOLONG;
6495 pctx.spec->snap_name = snap_name;
6507 pctx.opts->read_only = RBD_READ_ONLY_DEFAULT;
6508 pctx.opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
6509 pctx.opts->alloc_size = RBD_ALLOC_SIZE_DEFAULT;
6510 pctx.opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
6511 pctx.opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
6512 pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
6513 pctx.opts->trim = RBD_TRIM_DEFAULT;
6531 ret = -ENOMEM;
6542 down_write(&rbd_dev->lock_rwsem);
6545 up_write(&rbd_dev->lock_rwsem);
6557 if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
6558 if (!rbd_dev->opts->exclusive && !rbd_dev->opts->lock_on_read)
6561 rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
6562 return -EINVAL;
6569 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
6570 ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait,
6571 ceph_timeout_jiffies(rbd_dev->opts->lock_timeout));
6573 ret = rbd_dev->acquire_err;
6575 cancel_delayed_work_sync(&rbd_dev->lock_dwork);
6577 ret = -ETIMEDOUT;
6615 if (rbd_dev->spec->image_id) {
6616 rbd_dev->image_format = *rbd_dev->spec->image_id ? 2 : 1;
6626 rbd_dev->spec->image_name);
6636 ret = -ENOMEM;
6642 ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
6646 if (ret == -ENOENT) {
6648 ret = image_id ? 0 : -ENOMEM;
6650 rbd_dev->image_format = 1;
6658 rbd_dev->image_format = 2;
6662 rbd_dev->spec->image_id = image_id;
6683 rbd_image_header_cleanup(&rbd_dev->header);
6691 ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix);
6700 rbd_is_ro(rbd_dev), &header->features);
6706 if (header->features & RBD_FEATURE_STRIPINGV2) {
6707 ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit,
6708 &header->stripe_count);
6713 if (header->features & RBD_FEATURE_DATA_POOL) {
6714 ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id);
6723 * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
6732 if (!rbd_dev->parent_spec)
6737 ret = -EINVAL;
6741 parent = __rbd_dev_create(rbd_dev->parent_spec);
6743 ret = -ENOMEM;
6751 parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client);
6752 parent->spec = rbd_spec_get(rbd_dev->parent_spec);
6754 __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags);
6760 rbd_dev->parent = parent;
6761 atomic_set(&rbd_dev->parent_ref, 1);
6772 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
6775 unregister_blkdev(rbd_dev->major, rbd_dev->name);
6779 * rbd_dev->header_rwsem must be locked for write and will be unlocked
6789 ret = register_blkdev(0, rbd_dev->name);
6793 rbd_dev->major = ret;
6794 rbd_dev->minor = 0;
6796 rbd_dev->major = rbd_major;
6797 rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id);
6806 set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
6807 set_disk_ro(rbd_dev->disk, rbd_is_ro(rbd_dev));
6809 ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
6813 set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
6814 up_write(&rbd_dev->header_rwsem);
6821 unregister_blkdev(rbd_dev->major, rbd_dev->name);
6823 up_write(&rbd_dev->header_rwsem);
6829 struct rbd_spec *spec = rbd_dev->spec;
6834 rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
6835 if (rbd_dev->image_format == 1)
6836 ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
6837 spec->image_name, RBD_SUFFIX);
6839 ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
6840 RBD_HEADER_PREFIX, spec->image_id);
6849 rbd_dev->spec->pool_name,
6850 rbd_dev->spec->pool_ns ?: "",
6851 rbd_dev->spec->pool_ns ? "/" : "",
6852 rbd_dev->spec->image_name);
6855 rbd_dev->spec->pool_name,
6856 rbd_dev->spec->pool_ns ?: "",
6857 rbd_dev->spec->pool_ns ? "/" : "",
6858 rbd_dev->spec->image_name,
6859 rbd_dev->spec->snap_name);
6869 rbd_dev->image_format = 0;
6870 kfree(rbd_dev->spec->image_id);
6871 rbd_dev->spec->image_id = NULL;
6890 * error, rbd_dev->spec->image_id will be filled in with
6891 * a dynamically-allocated string, and rbd_dev->image_format
6905 if (ret == -ENOENT)
6912 down_write(&rbd_dev->header_rwsem);
6914 ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true);
6916 if (ret == -ENOENT && !need_watch)
6925 * id, image name and id, and snap name - need to fill snap id.
6927 * and snap ids - need to fill in names for those ids.
6934 if (ret == -ENOENT)
6944 (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) {
6950 if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
6961 rbd_dev->image_format, rbd_dev->header_oid.name);
6966 up_write(&rbd_dev->header_rwsem);
6971 rbd_dev->image_format = 0;
6972 kfree(rbd_dev->spec->image_id);
6973 rbd_dev->spec->image_id = NULL;
6980 rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
6981 rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
6983 if (rbd_dev->header.image_size != header->image_size) {
6984 rbd_dev->header.image_size = header->image_size;
6987 rbd_dev->mapping.size = header->image_size;
6992 ceph_put_snap_context(rbd_dev->header.snapc);
6993 rbd_dev->header.snapc = header->snapc;
6994 header->snapc = NULL;
6996 if (rbd_dev->image_format == 1) {
6997 kfree(rbd_dev->header.snap_names);
6998 rbd_dev->header.snap_names = header->snap_names;
6999 header->snap_names = NULL;
7001 kfree(rbd_dev->header.snap_sizes);
7002 rbd_dev->header.snap_sizes = header->snap_sizes;
7003 header->snap_sizes = NULL;
7010 if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) {
7024 if (rbd_dev->parent_overlap) {
7025 rbd_dev->parent_overlap = 0;
7028 rbd_dev->disk->disk_name);
7031 rbd_assert(rbd_dev->parent_spec);
7037 if (!pii->overlap && rbd_dev->parent_overlap)
7040 rbd_dev->parent_overlap = pii->overlap;
7060 if (rbd_dev->parent) {
7066 down_write(&rbd_dev->header_rwsem);
7068 if (rbd_dev->parent)
7070 up_write(&rbd_dev->header_rwsem);
7088 return -EPERM;
7091 return -ENODEV;
7105 rc = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, spec->pool_name);
7107 if (rc == -ENOENT)
7108 pr_info("pool %s does not exist\n", spec->pool_name);
7111 spec->pool_id = (u64)rc;
7115 rc = -ENOMEM;
7122 /* if we are mapping a snapshot it will be a read-only mapping */
7123 if (rbd_dev->opts->read_only ||
7124 strcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME))
7125 __set_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
7127 rbd_dev->config_info = kstrdup(buf, GFP_KERNEL);
7128 if (!rbd_dev->config_info) {
7129 rc = -ENOMEM;
7137 if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) {
7139 rbd_dev->layout.object_size);
7140 rbd_dev->opts->alloc_size = rbd_dev->layout.object_size;
7153 rc = device_add(&rbd_dev->dev);
7157 rc = device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL);
7162 list_add_tail(&rbd_dev->node, &rbd_dev_list);
7165 pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
7166 (unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
7167 rbd_dev->header.features);
7193 return -EINVAL;
7206 while (rbd_dev->parent) {
7208 struct rbd_device *second = first->parent;
7215 while (second && (third = second->parent)) {
7222 first->parent = NULL;
7223 first->parent_overlap = 0;
7225 rbd_assert(first->parent_spec);
7226 rbd_spec_put(first->parent_spec);
7227 first->parent_spec = NULL;
7240 return -EPERM;
7242 dev_id = -1;
7247 return -EINVAL;
7254 return -EINVAL;
7258 ret = -ENOENT;
7261 if (rbd_dev->dev_id == dev_id) {
7267 spin_lock_irq(&rbd_dev->lock);
7268 if (rbd_dev->open_count && !force)
7269 ret = -EBUSY;
7271 &rbd_dev->flags))
7272 ret = -EINPROGRESS;
7273 spin_unlock_irq(&rbd_dev->lock);
7284 unsigned int memflags = blk_mq_freeze_queue(rbd_dev->disk->queue);
7286 blk_mark_disk_dead(rbd_dev->disk);
7287 blk_mq_unfreeze_queue(rbd_dev->disk->queue, memflags);
7290 del_gendisk(rbd_dev->disk);
7292 list_del_init(&rbd_dev->node);
7294 device_del(&rbd_dev->dev);
7306 return -EINVAL;
7349 return -ENOMEM;
7361 return -ENOMEM;
7381 return -EINVAL;
7394 rc = -ENOMEM;