1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* AFS volume management 3 * 4 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/slab.h> 10 #include "internal.h" 11 12 static unsigned __read_mostly afs_volume_record_life = 60 * 60; 13 static atomic_t afs_volume_debug_id; 14 15 static void afs_destroy_volume(struct work_struct *work); 16 17 /* 18 * Insert a volume into a cell. If there's an existing volume record, that is 19 * returned instead with a ref held. 20 */ 21 static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell, 22 struct afs_volume *volume) 23 { 24 struct afs_volume *p; 25 struct rb_node *parent = NULL, **pp; 26 27 write_seqlock(&cell->volume_lock); 28 29 pp = &cell->volumes.rb_node; 30 while (*pp) { 31 parent = *pp; 32 p = rb_entry(parent, struct afs_volume, cell_node); 33 if (p->vid < volume->vid) { 34 pp = &(*pp)->rb_left; 35 } else if (p->vid > volume->vid) { 36 pp = &(*pp)->rb_right; 37 } else { 38 if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) { 39 volume = p; 40 goto found; 41 } 42 43 set_bit(AFS_VOLUME_RM_TREE, &volume->flags); 44 rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes); 45 } 46 } 47 48 rb_link_node_rcu(&volume->cell_node, parent, pp); 49 rb_insert_color(&volume->cell_node, &cell->volumes); 50 hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes); 51 52 found: 53 write_sequnlock(&cell->volume_lock); 54 return volume; 55 56 } 57 58 static void afs_remove_volume_from_cell(struct afs_volume *volume) 59 { 60 struct afs_cell *cell = volume->cell; 61 62 if (!hlist_unhashed(&volume->proc_link)) { 63 trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref), 64 afs_volume_trace_remove); 65 write_seqlock(&cell->volume_lock); 66 hlist_del_rcu(&volume->proc_link); 67 if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags)) 68 rb_erase(&volume->cell_node, &cell->volumes); 69 write_sequnlock(&cell->volume_lock); 70 } 71 } 72 73 /* 74 * Allocate a volume record and load it up from a vldb record. 75 */ 76 static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, 77 struct afs_vldb_entry *vldb, 78 struct afs_server_list **_slist) 79 { 80 struct afs_server_list *slist; 81 struct afs_volume *volume; 82 int ret = -ENOMEM, i; 83 84 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); 85 if (!volume) 86 goto error_0; 87 88 volume->debug_id = atomic_inc_return(&afs_volume_debug_id); 89 volume->vid = vldb->vid[params->type]; 90 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; 91 volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol); 92 volume->type = params->type; 93 volume->type_force = params->force; 94 volume->name_len = vldb->name_len; 95 volume->creation_time = TIME64_MIN; 96 volume->update_time = TIME64_MIN; 97 98 refcount_set(&volume->ref, 1); 99 INIT_HLIST_NODE(&volume->proc_link); 100 INIT_WORK(&volume->destructor, afs_destroy_volume); 101 rwlock_init(&volume->servers_lock); 102 mutex_init(&volume->volsync_lock); 103 mutex_init(&volume->cb_check_lock); 104 rwlock_init(&volume->cb_v_break_lock); 105 INIT_LIST_HEAD(&volume->open_mmaps); 106 init_rwsem(&volume->open_mmaps_lock); 107 memcpy(volume->name, vldb->name, vldb->name_len + 1); 108 109 for (i = 0; i < AFS_MAXTYPES; i++) 110 volume->vids[i] = vldb->vid[i]; 111 112 slist = afs_alloc_server_list(volume, params->key, vldb); 113 if (IS_ERR(slist)) { 114 ret = PTR_ERR(slist); 115 goto error_1; 116 } 117 118 *_slist = slist; 119 rcu_assign_pointer(volume->servers, slist); 120 trace_afs_volume(volume->debug_id, volume->vid, 1, afs_volume_trace_alloc); 121 return volume; 122 123 error_1: 124 afs_put_cell(volume->cell, afs_cell_trace_put_vol); 125 kfree(volume); 126 error_0: 127 return ERR_PTR(ret); 128 } 129 130 /* 131 * Look up or allocate a volume record. 132 */ 133 static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params, 134 struct afs_vldb_entry *vldb) 135 { 136 struct afs_server_list *slist; 137 struct afs_volume *candidate, *volume; 138 139 candidate = afs_alloc_volume(params, vldb, &slist); 140 if (IS_ERR(candidate)) 141 return candidate; 142 143 volume = afs_insert_volume_into_cell(params->cell, candidate); 144 if (volume == candidate) 145 afs_attach_volume_to_servers(volume, slist); 146 else 147 afs_put_volume(candidate, afs_volume_trace_put_cell_dup); 148 return volume; 149 } 150 151 /* 152 * Look up a VLDB record for a volume. 153 */ 154 static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, 155 struct key *key, 156 const char *volname, 157 size_t volnamesz) 158 { 159 struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ); 160 struct afs_vl_cursor vc; 161 int ret; 162 163 if (!afs_begin_vlserver_operation(&vc, cell, key)) 164 return ERR_PTR(-ERESTARTSYS); 165 166 while (afs_select_vlserver(&vc)) { 167 vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); 168 } 169 170 ret = afs_end_vlserver_operation(&vc); 171 return ret < 0 ? ERR_PTR(ret) : vldb; 172 } 173 174 /* 175 * Look up a volume in the VL server and create a candidate volume record for 176 * it. 177 * 178 * The volume name can be one of the following: 179 * "%[cell:]volume[.]" R/W volume 180 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), 181 * or R/W (rwparent=1) volume 182 * "%[cell:]volume.readonly" R/O volume 183 * "#[cell:]volume.readonly" R/O volume 184 * "%[cell:]volume.backup" Backup volume 185 * "#[cell:]volume.backup" Backup volume 186 * 187 * The cell name is optional, and defaults to the current cell. 188 * 189 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin 190 * Guide 191 * - Rule 1: Explicit type suffix forces access of that type or nothing 192 * (no suffix, then use Rule 2 & 3) 193 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W 194 * if not available 195 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless 196 * explicitly told otherwise 197 */ 198 struct afs_volume *afs_create_volume(struct afs_fs_context *params) 199 { 200 struct afs_vldb_entry *vldb; 201 struct afs_volume *volume; 202 unsigned long type_mask = 1UL << params->type; 203 204 vldb = afs_vl_lookup_vldb(params->cell, params->key, 205 params->volname, params->volnamesz); 206 if (IS_ERR(vldb)) 207 return ERR_CAST(vldb); 208 209 if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) { 210 volume = ERR_PTR(vldb->error); 211 goto error; 212 } 213 214 /* Make the final decision on the type we want */ 215 volume = ERR_PTR(-ENOMEDIUM); 216 if (params->force) { 217 if (!(vldb->flags & type_mask)) 218 goto error; 219 } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) { 220 params->type = AFSVL_ROVOL; 221 } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) { 222 params->type = AFSVL_RWVOL; 223 } else { 224 goto error; 225 } 226 227 volume = afs_lookup_volume(params, vldb); 228 229 error: 230 kfree(vldb); 231 return volume; 232 } 233 234 /* 235 * Destroy a volume record 236 */ 237 static void afs_destroy_volume(struct work_struct *work) 238 { 239 struct afs_volume *volume = container_of(work, struct afs_volume, destructor); 240 struct afs_server_list *slist = rcu_access_pointer(volume->servers); 241 242 _enter("%p", volume); 243 244 #ifdef CONFIG_AFS_FSCACHE 245 ASSERTCMP(volume->cache, ==, NULL); 246 #endif 247 248 afs_detach_volume_from_servers(volume, slist); 249 afs_remove_volume_from_cell(volume); 250 afs_put_serverlist(volume->cell->net, slist); 251 afs_put_cell(volume->cell, afs_cell_trace_put_vol); 252 trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref), 253 afs_volume_trace_free); 254 kfree_rcu(volume, rcu); 255 256 _leave(" [destroyed]"); 257 } 258 259 /* 260 * Try to get a reference on a volume record. 261 */ 262 bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason) 263 { 264 int r; 265 266 if (__refcount_inc_not_zero(&volume->ref, &r)) { 267 trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason); 268 return true; 269 } 270 return false; 271 } 272 273 /* 274 * Get a reference on a volume record. 275 */ 276 struct afs_volume *afs_get_volume(struct afs_volume *volume, 277 enum afs_volume_trace reason) 278 { 279 if (volume) { 280 int r; 281 282 __refcount_inc(&volume->ref, &r); 283 trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason); 284 } 285 return volume; 286 } 287 288 289 /* 290 * Drop a reference on a volume record. 291 */ 292 void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason) 293 { 294 if (volume) { 295 unsigned int debug_id = volume->debug_id; 296 afs_volid_t vid = volume->vid; 297 bool zero; 298 int r; 299 300 zero = __refcount_dec_and_test(&volume->ref, &r); 301 trace_afs_volume(debug_id, vid, r - 1, reason); 302 if (zero) 303 schedule_work(&volume->destructor); 304 } 305 } 306 307 /* 308 * Activate a volume. 309 */ 310 int afs_activate_volume(struct afs_volume *volume) 311 { 312 #ifdef CONFIG_AFS_FSCACHE 313 struct fscache_volume *vcookie; 314 char *name; 315 316 name = kasprintf(GFP_KERNEL, "afs,%s,%llx", 317 volume->cell->name, volume->vid); 318 if (!name) 319 return -ENOMEM; 320 321 vcookie = fscache_acquire_volume(name, NULL, NULL, 0); 322 if (IS_ERR(vcookie)) { 323 if (vcookie != ERR_PTR(-EBUSY)) { 324 kfree(name); 325 return PTR_ERR(vcookie); 326 } 327 pr_err("AFS: Cache volume key already in use (%s)\n", name); 328 vcookie = NULL; 329 } 330 volume->cache = vcookie; 331 kfree(name); 332 #endif 333 return 0; 334 } 335 336 /* 337 * Deactivate a volume. 338 */ 339 void afs_deactivate_volume(struct afs_volume *volume) 340 { 341 _enter("%s", volume->name); 342 343 #ifdef CONFIG_AFS_FSCACHE 344 fscache_relinquish_volume(volume->cache, NULL, 345 test_bit(AFS_VOLUME_DELETED, &volume->flags)); 346 volume->cache = NULL; 347 #endif 348 349 _leave(""); 350 } 351 352 /* 353 * Query the VL service to update the volume status. 354 */ 355 static int afs_update_volume_status(struct afs_volume *volume, struct key *key) 356 { 357 struct afs_server_list *new, *old, *discard; 358 struct afs_vldb_entry *vldb; 359 char idbuf[24]; 360 int ret, idsz; 361 362 _enter(""); 363 364 /* We look up an ID by passing it as a decimal string in the 365 * operation's name parameter. 366 */ 367 idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid); 368 369 vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); 370 if (IS_ERR(vldb)) { 371 ret = PTR_ERR(vldb); 372 goto error; 373 } 374 375 /* See if the volume got renamed. */ 376 if (vldb->name_len != volume->name_len || 377 memcmp(vldb->name, volume->name, vldb->name_len) != 0) { 378 /* TODO: Use RCU'd string. */ 379 memcpy(volume->name, vldb->name, AFS_MAXVOLNAME); 380 volume->name_len = vldb->name_len; 381 } 382 383 /* See if the volume's server list got updated. */ 384 new = afs_alloc_server_list(volume, key, vldb); 385 if (IS_ERR(new)) { 386 ret = PTR_ERR(new); 387 goto error_vldb; 388 } 389 390 write_lock(&volume->servers_lock); 391 392 discard = new; 393 old = rcu_dereference_protected(volume->servers, 394 lockdep_is_held(&volume->servers_lock)); 395 if (afs_annotate_server_list(new, old)) { 396 new->seq = volume->servers_seq + 1; 397 rcu_assign_pointer(volume->servers, new); 398 smp_wmb(); 399 volume->servers_seq++; 400 discard = old; 401 } 402 403 /* Check more often if replication is ongoing. */ 404 if (new->ro_replicating) 405 volume->update_at = ktime_get_real_seconds() + 10 * 60; 406 else 407 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; 408 write_unlock(&volume->servers_lock); 409 410 if (discard == old) 411 afs_reattach_volume_to_servers(volume, new, old); 412 afs_put_serverlist(volume->cell->net, discard); 413 ret = 0; 414 error_vldb: 415 kfree(vldb); 416 error: 417 _leave(" = %d", ret); 418 return ret; 419 } 420 421 /* 422 * Make sure the volume record is up to date. 423 */ 424 int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op) 425 { 426 int ret, retries = 0; 427 428 _enter(""); 429 430 retry: 431 if (test_bit(AFS_VOLUME_WAIT, &volume->flags)) 432 goto wait; 433 if (volume->update_at <= ktime_get_real_seconds() || 434 test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags)) 435 goto update; 436 _leave(" = 0"); 437 return 0; 438 439 update: 440 if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) { 441 clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); 442 ret = afs_update_volume_status(volume, op->key); 443 if (ret < 0) 444 set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); 445 clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags); 446 clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags); 447 wake_up_bit(&volume->flags, AFS_VOLUME_WAIT); 448 _leave(" = %d", ret); 449 return ret; 450 } 451 452 wait: 453 if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { 454 _leave(" = 0 [no wait]"); 455 return 0; 456 } 457 458 ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, 459 (op->flags & AFS_OPERATION_UNINTR) ? 460 TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); 461 if (ret == -ERESTARTSYS) { 462 _leave(" = %d", ret); 463 return ret; 464 } 465 466 retries++; 467 if (retries == 4) { 468 _leave(" = -ESTALE"); 469 return -ESTALE; 470 } 471 goto retry; 472 } 473