1 #include "jemalloc/internal/jemalloc_preamble.h"
2 #include "jemalloc/internal/jemalloc_internal_includes.h"
3
4 #include "jemalloc/internal/assert.h"
5 #include "jemalloc/internal/san.h"
6 #include "jemalloc/internal/mutex.h"
7 #include "jemalloc/internal/rtree.h"
8
9 /******************************************************************************/
10 /* Data. */
11
12 /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
13 JEMALLOC_DIAGNOSTIC_PUSH
14 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
15
16 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
17 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
18 JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false;
19 bool tsd_booted = false;
20 #elif (defined(JEMALLOC_TLS))
21 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
22 pthread_key_t tsd_tsd;
23 bool tsd_booted = false;
24 #elif (defined(_WIN32))
25 DWORD tsd_tsd;
26 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
27 bool tsd_booted = false;
28 #else
29
30 /*
31 * This contains a mutex, but it's pretty convenient to allow the mutex code to
32 * have a dependency on tsd. So we define the struct here, and only refer to it
33 * by pointer in the header.
34 */
35 struct tsd_init_head_s {
36 ql_head(tsd_init_block_t) blocks;
37 malloc_mutex_t lock;
38 };
39
40 pthread_key_t tsd_tsd;
41 tsd_init_head_t tsd_init_head = {
42 ql_head_initializer(blocks),
43 MALLOC_MUTEX_INITIALIZER
44 };
45
46 tsd_wrapper_t tsd_boot_wrapper = {
47 false,
48 TSD_INITIALIZER
49 };
50 bool tsd_booted = false;
51 #endif
52
53 JEMALLOC_DIAGNOSTIC_POP
54
55 /******************************************************************************/
56
57 /* A list of all the tsds in the nominal state. */
58 typedef ql_head(tsd_t) tsd_list_t;
59 static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
60 static malloc_mutex_t tsd_nominal_tsds_lock;
61
62 /* How many slow-path-enabling features are turned on. */
63 static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
64
65 static bool
tsd_in_nominal_list(tsd_t * tsd)66 tsd_in_nominal_list(tsd_t *tsd) {
67 tsd_t *tsd_list;
68 bool found = false;
69 /*
70 * We don't know that tsd is nominal; it might not be safe to get data
71 * out of it here.
72 */
73 malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
74 ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
75 if (tsd == tsd_list) {
76 found = true;
77 break;
78 }
79 }
80 malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock);
81 return found;
82 }
83
84 static void
tsd_add_nominal(tsd_t * tsd)85 tsd_add_nominal(tsd_t *tsd) {
86 assert(!tsd_in_nominal_list(tsd));
87 assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
88 ql_elm_new(tsd, TSD_MANGLE(tsd_link));
89 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
90 ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
91 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
92 }
93
94 static void
tsd_remove_nominal(tsd_t * tsd)95 tsd_remove_nominal(tsd_t *tsd) {
96 assert(tsd_in_nominal_list(tsd));
97 assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
98 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
99 ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
100 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
101 }
102
103 static void
tsd_force_recompute(tsdn_t * tsdn)104 tsd_force_recompute(tsdn_t *tsdn) {
105 /*
106 * The stores to tsd->state here need to synchronize with the exchange
107 * in tsd_slow_update.
108 */
109 atomic_fence(ATOMIC_RELEASE);
110 malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
111 tsd_t *remote_tsd;
112 ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
113 assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
114 <= tsd_state_nominal_max);
115 tsd_atomic_store(&remote_tsd->state,
116 tsd_state_nominal_recompute, ATOMIC_RELAXED);
117 /* See comments in te_recompute_fast_threshold(). */
118 atomic_fence(ATOMIC_SEQ_CST);
119 te_next_event_fast_set_non_nominal(remote_tsd);
120 }
121 malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
122 }
123
124 void
tsd_global_slow_inc(tsdn_t * tsdn)125 tsd_global_slow_inc(tsdn_t *tsdn) {
126 atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
127 /*
128 * We unconditionally force a recompute, even if the global slow count
129 * was already positive. If we didn't, then it would be possible for us
130 * to return to the user, have the user synchronize externally with some
131 * other thread, and then have that other thread not have picked up the
132 * update yet (since the original incrementing thread might still be
133 * making its way through the tsd list).
134 */
135 tsd_force_recompute(tsdn);
136 }
137
tsd_global_slow_dec(tsdn_t * tsdn)138 void tsd_global_slow_dec(tsdn_t *tsdn) {
139 atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
140 /* See the note in ..._inc(). */
141 tsd_force_recompute(tsdn);
142 }
143
144 static bool
tsd_local_slow(tsd_t * tsd)145 tsd_local_slow(tsd_t *tsd) {
146 return !tsd_tcache_enabled_get(tsd)
147 || tsd_reentrancy_level_get(tsd) > 0;
148 }
149
150 bool
tsd_global_slow()151 tsd_global_slow() {
152 return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
153 }
154
155 /******************************************************************************/
156
157 static uint8_t
tsd_state_compute(tsd_t * tsd)158 tsd_state_compute(tsd_t *tsd) {
159 if (!tsd_nominal(tsd)) {
160 return tsd_state_get(tsd);
161 }
162 /* We're in *a* nominal state; but which one? */
163 if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) {
164 return tsd_state_nominal_slow;
165 } else {
166 return tsd_state_nominal;
167 }
168 }
169
170 void
tsd_slow_update(tsd_t * tsd)171 tsd_slow_update(tsd_t *tsd) {
172 uint8_t old_state;
173 do {
174 uint8_t new_state = tsd_state_compute(tsd);
175 old_state = tsd_atomic_exchange(&tsd->state, new_state,
176 ATOMIC_ACQUIRE);
177 } while (old_state == tsd_state_nominal_recompute);
178
179 te_recompute_fast_threshold(tsd);
180 }
181
182 void
tsd_state_set(tsd_t * tsd,uint8_t new_state)183 tsd_state_set(tsd_t *tsd, uint8_t new_state) {
184 /* Only the tsd module can change the state *to* recompute. */
185 assert(new_state != tsd_state_nominal_recompute);
186 uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED);
187 if (old_state > tsd_state_nominal_max) {
188 /*
189 * Not currently in the nominal list, but it might need to be
190 * inserted there.
191 */
192 assert(!tsd_in_nominal_list(tsd));
193 tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED);
194 if (new_state <= tsd_state_nominal_max) {
195 tsd_add_nominal(tsd);
196 }
197 } else {
198 /*
199 * We're currently nominal. If the new state is non-nominal,
200 * great; we take ourselves off the list and just enter the new
201 * state.
202 */
203 assert(tsd_in_nominal_list(tsd));
204 if (new_state > tsd_state_nominal_max) {
205 tsd_remove_nominal(tsd);
206 tsd_atomic_store(&tsd->state, new_state,
207 ATOMIC_RELAXED);
208 } else {
209 /*
210 * This is the tricky case. We're transitioning from
211 * one nominal state to another. The caller can't know
212 * about any races that are occurring at the same time,
213 * so we always have to recompute no matter what.
214 */
215 tsd_slow_update(tsd);
216 }
217 }
218 te_recompute_fast_threshold(tsd);
219 }
220
221 static void
tsd_prng_state_init(tsd_t * tsd)222 tsd_prng_state_init(tsd_t *tsd) {
223 /*
224 * A nondeterministic seed based on the address of tsd reduces
225 * the likelihood of lockstep non-uniform cache index
226 * utilization among identical concurrent processes, but at the
227 * cost of test repeatability. For debug builds, instead use a
228 * deterministic seed.
229 */
230 *tsd_prng_statep_get(tsd) = config_debug ? 0 :
231 (uint64_t)(uintptr_t)tsd;
232 }
233
234 static bool
tsd_data_init(tsd_t * tsd)235 tsd_data_init(tsd_t *tsd) {
236 /*
237 * We initialize the rtree context first (before the tcache), since the
238 * tcache initialization depends on it.
239 */
240 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
241 tsd_prng_state_init(tsd);
242 tsd_te_init(tsd); /* event_init may use the prng state above. */
243 tsd_san_init(tsd);
244 return tsd_tcache_enabled_data_init(tsd);
245 }
246
247 static void
assert_tsd_data_cleanup_done(tsd_t * tsd)248 assert_tsd_data_cleanup_done(tsd_t *tsd) {
249 assert(!tsd_nominal(tsd));
250 assert(!tsd_in_nominal_list(tsd));
251 assert(*tsd_arenap_get_unsafe(tsd) == NULL);
252 assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
253 assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
254 assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
255 }
256
257 static bool
tsd_data_init_nocleanup(tsd_t * tsd)258 tsd_data_init_nocleanup(tsd_t *tsd) {
259 assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
260 tsd_state_get(tsd) == tsd_state_minimal_initialized);
261 /*
262 * During reincarnation, there is no guarantee that the cleanup function
263 * will be called (deallocation may happen after all tsd destructors).
264 * We set up tsd in a way that no cleanup is needed.
265 */
266 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
267 *tsd_tcache_enabledp_get_unsafe(tsd) = false;
268 *tsd_reentrancy_levelp_get(tsd) = 1;
269 tsd_prng_state_init(tsd);
270 tsd_te_init(tsd); /* event_init may use the prng state above. */
271 tsd_san_init(tsd);
272 assert_tsd_data_cleanup_done(tsd);
273
274 return false;
275 }
276
277 tsd_t *
tsd_fetch_slow(tsd_t * tsd,bool minimal)278 tsd_fetch_slow(tsd_t *tsd, bool minimal) {
279 assert(!tsd_fast(tsd));
280
281 if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
282 /*
283 * On slow path but no work needed. Note that we can't
284 * necessarily *assert* that we're slow, because we might be
285 * slow because of an asynchronous modification to global state,
286 * which might be asynchronously modified *back*.
287 */
288 } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
289 tsd_slow_update(tsd);
290 } else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
291 if (!minimal) {
292 if (tsd_booted) {
293 tsd_state_set(tsd, tsd_state_nominal);
294 tsd_slow_update(tsd);
295 /* Trigger cleanup handler registration. */
296 tsd_set(tsd);
297 tsd_data_init(tsd);
298 }
299 } else {
300 tsd_state_set(tsd, tsd_state_minimal_initialized);
301 tsd_set(tsd);
302 tsd_data_init_nocleanup(tsd);
303 }
304 } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
305 if (!minimal) {
306 /* Switch to fully initialized. */
307 tsd_state_set(tsd, tsd_state_nominal);
308 assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
309 (*tsd_reentrancy_levelp_get(tsd))--;
310 tsd_slow_update(tsd);
311 tsd_data_init(tsd);
312 } else {
313 assert_tsd_data_cleanup_done(tsd);
314 }
315 } else if (tsd_state_get(tsd) == tsd_state_purgatory) {
316 tsd_state_set(tsd, tsd_state_reincarnated);
317 tsd_set(tsd);
318 tsd_data_init_nocleanup(tsd);
319 } else {
320 assert(tsd_state_get(tsd) == tsd_state_reincarnated);
321 }
322
323 return tsd;
324 }
325
326 void *
malloc_tsd_malloc(size_t size)327 malloc_tsd_malloc(size_t size) {
328 return a0malloc(CACHELINE_CEILING(size));
329 }
330
331 void
malloc_tsd_dalloc(void * wrapper)332 malloc_tsd_dalloc(void *wrapper) {
333 a0dalloc(wrapper);
334 }
335
336 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
337 static unsigned ncleanups;
338 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
339
340 #ifndef _WIN32
341 JEMALLOC_EXPORT
342 #endif
343 void
_malloc_thread_cleanup(void)344 _malloc_thread_cleanup(void) {
345 bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
346 unsigned i;
347
348 for (i = 0; i < ncleanups; i++) {
349 pending[i] = true;
350 }
351
352 do {
353 again = false;
354 for (i = 0; i < ncleanups; i++) {
355 if (pending[i]) {
356 pending[i] = cleanups[i]();
357 if (pending[i]) {
358 again = true;
359 }
360 }
361 }
362 } while (again);
363 }
364
365 #ifndef _WIN32
366 JEMALLOC_EXPORT
367 #endif
368 void
_malloc_tsd_cleanup_register(bool (* f)(void))369 _malloc_tsd_cleanup_register(bool (*f)(void)) {
370 assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
371 cleanups[ncleanups] = f;
372 ncleanups++;
373 }
374
375 #endif
376
377 static void
tsd_do_data_cleanup(tsd_t * tsd)378 tsd_do_data_cleanup(tsd_t *tsd) {
379 prof_tdata_cleanup(tsd);
380 iarena_cleanup(tsd);
381 arena_cleanup(tsd);
382 tcache_cleanup(tsd);
383 witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
384 *tsd_reentrancy_levelp_get(tsd) = 1;
385 }
386
387 void
tsd_cleanup(void * arg)388 tsd_cleanup(void *arg) {
389 tsd_t *tsd = (tsd_t *)arg;
390
391 switch (tsd_state_get(tsd)) {
392 case tsd_state_uninitialized:
393 /* Do nothing. */
394 break;
395 case tsd_state_minimal_initialized:
396 /* This implies the thread only did free() in its life time. */
397 /* Fall through. */
398 case tsd_state_reincarnated:
399 /*
400 * Reincarnated means another destructor deallocated memory
401 * after the destructor was called. Cleanup isn't required but
402 * is still called for testing and completeness.
403 */
404 assert_tsd_data_cleanup_done(tsd);
405 JEMALLOC_FALLTHROUGH;
406 case tsd_state_nominal:
407 case tsd_state_nominal_slow:
408 tsd_do_data_cleanup(tsd);
409 tsd_state_set(tsd, tsd_state_purgatory);
410 tsd_set(tsd);
411 break;
412 case tsd_state_purgatory:
413 /*
414 * The previous time this destructor was called, we set the
415 * state to tsd_state_purgatory so that other destructors
416 * wouldn't cause re-creation of the tsd. This time, do
417 * nothing, and do not request another callback.
418 */
419 break;
420 default:
421 not_reached();
422 }
423 #ifdef JEMALLOC_JET
424 test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
425 int *data = tsd_test_datap_get_unsafe(tsd);
426 if (test_callback != NULL) {
427 test_callback(data);
428 }
429 #endif
430 }
431
432 tsd_t *
malloc_tsd_boot0(void)433 malloc_tsd_boot0(void) {
434 tsd_t *tsd;
435
436 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
437 ncleanups = 0;
438 #endif
439 if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
440 WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
441 return NULL;
442 }
443 if (tsd_boot0()) {
444 return NULL;
445 }
446 tsd = tsd_fetch();
447 return tsd;
448 }
449
450 void
malloc_tsd_boot1(void)451 malloc_tsd_boot1(void) {
452 tsd_boot1();
453 tsd_t *tsd = tsd_fetch();
454 /* malloc_slow has been set properly. Update tsd_slow. */
455 tsd_slow_update(tsd);
456 }
457
458 #ifdef _WIN32
459 static BOOL WINAPI
_tls_callback(HINSTANCE hinstDLL,DWORD fdwReason,LPVOID lpvReserved)460 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
461 switch (fdwReason) {
462 #ifdef JEMALLOC_LAZY_LOCK
463 case DLL_THREAD_ATTACH:
464 isthreaded = true;
465 break;
466 #endif
467 case DLL_THREAD_DETACH:
468 _malloc_thread_cleanup();
469 break;
470 default:
471 break;
472 }
473 return true;
474 }
475
476 /*
477 * We need to be able to say "read" here (in the "pragma section"), but have
478 * hooked "read". We won't read for the rest of the file, so we can get away
479 * with unhooking.
480 */
481 #ifdef read
482 # undef read
483 #endif
484
485 #ifdef _MSC_VER
486 # ifdef _M_IX86
487 # pragma comment(linker, "/INCLUDE:__tls_used")
488 # pragma comment(linker, "/INCLUDE:_tls_callback")
489 # else
490 # pragma comment(linker, "/INCLUDE:_tls_used")
491 # pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
492 # endif
493 # pragma section(".CRT$XLY",long,read)
494 #endif
495 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
496 BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL,
497 DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
498 #endif
499
500 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
501 !defined(_WIN32))
502 void *
tsd_init_check_recursion(tsd_init_head_t * head,tsd_init_block_t * block)503 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
504 pthread_t self = pthread_self();
505 tsd_init_block_t *iter;
506
507 /* Check whether this thread has already inserted into the list. */
508 malloc_mutex_lock(TSDN_NULL, &head->lock);
509 ql_foreach(iter, &head->blocks, link) {
510 if (iter->thread == self) {
511 malloc_mutex_unlock(TSDN_NULL, &head->lock);
512 return iter->data;
513 }
514 }
515 /* Insert block into list. */
516 ql_elm_new(block, link);
517 block->thread = self;
518 ql_tail_insert(&head->blocks, block, link);
519 malloc_mutex_unlock(TSDN_NULL, &head->lock);
520 return NULL;
521 }
522
523 void
tsd_init_finish(tsd_init_head_t * head,tsd_init_block_t * block)524 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
525 malloc_mutex_lock(TSDN_NULL, &head->lock);
526 ql_remove(&head->blocks, block, link);
527 malloc_mutex_unlock(TSDN_NULL, &head->lock);
528 }
529 #endif
530
531 void
tsd_prefork(tsd_t * tsd)532 tsd_prefork(tsd_t *tsd) {
533 malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
534 }
535
536 void
tsd_postfork_parent(tsd_t * tsd)537 tsd_postfork_parent(tsd_t *tsd) {
538 malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
539 }
540
541 void
tsd_postfork_child(tsd_t * tsd)542 tsd_postfork_child(tsd_t *tsd) {
543 malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
544 ql_new(&tsd_nominal_tsds);
545
546 if (tsd_state_get(tsd) <= tsd_state_nominal_max) {
547 tsd_add_nominal(tsd);
548 }
549 }
550