xref: /src/contrib/jemalloc/include/jemalloc/internal/hpdata.h (revision c43cad87172039ccf38172129c79755ea79e6102)
1 #ifndef JEMALLOC_INTERNAL_HPDATA_H
2 #define JEMALLOC_INTERNAL_HPDATA_H
3 
4 #include "jemalloc/internal/fb.h"
5 #include "jemalloc/internal/ph.h"
6 #include "jemalloc/internal/ql.h"
7 #include "jemalloc/internal/typed_list.h"
8 
9 /*
10  * The metadata representation we use for extents in hugepages.  While the PAC
11  * uses the edata_t to represent both active and inactive extents, the HP only
12  * uses the edata_t for active ones; instead, inactive extent state is tracked
13  * within hpdata associated with the enclosing hugepage-sized, hugepage-aligned
14  * region of virtual address space.
15  *
16  * An hpdata need not be "truly" backed by a hugepage (which is not necessarily
17  * an observable property of any given region of address space).  It's just
18  * hugepage-sized and hugepage-aligned; it's *potentially* huge.
19  */
20 typedef struct hpdata_s hpdata_t;
21 ph_structs(hpdata_age_heap, hpdata_t);
22 struct hpdata_s {
23 	/*
24 	 * We likewise follow the edata convention of mangling names and forcing
25 	 * the use of accessors -- this lets us add some consistency checks on
26 	 * access.
27 	 */
28 
29 	/*
30 	 * The address of the hugepage in question.  This can't be named h_addr,
31 	 * since that conflicts with a macro defined in Windows headers.
32 	 */
33 	void *h_address;
34 	/* Its age (measured in psset operations). */
35 	uint64_t h_age;
36 	/* Whether or not we think the hugepage is mapped that way by the OS. */
37 	bool h_huge;
38 
39 	/*
40 	 * For some properties, we keep parallel sets of bools; h_foo_allowed
41 	 * and h_in_psset_foo_container.  This is a decoupling mechanism to
42 	 * avoid bothering the hpa (which manages policies) from the psset
43 	 * (which is the mechanism used to enforce those policies).  This allows
44 	 * all the container management logic to live in one place, without the
45 	 * HPA needing to know or care how that happens.
46 	 */
47 
48 	/*
49 	 * Whether or not the hpdata is allowed to be used to serve allocations,
50 	 * and whether or not the psset is currently tracking it as such.
51 	 */
52 	bool h_alloc_allowed;
53 	bool h_in_psset_alloc_container;
54 
55 	/*
56 	 * The same, but with purging.  There's no corresponding
57 	 * h_in_psset_purge_container, because the psset (currently) always
58 	 * removes hpdatas from their containers during updates (to implement
59 	 * LRU for purging).
60 	 */
61 	bool h_purge_allowed;
62 
63 	/* And with hugifying. */
64 	bool h_hugify_allowed;
65 	/* When we became a hugification candidate. */
66 	nstime_t h_time_hugify_allowed;
67 	bool h_in_psset_hugify_container;
68 
69 	/* Whether or not a purge or hugify is currently happening. */
70 	bool h_mid_purge;
71 	bool h_mid_hugify;
72 
73 	/*
74 	 * Whether or not the hpdata is being updated in the psset (i.e. if
75 	 * there has been a psset_update_begin call issued without a matching
76 	 * psset_update_end call).  Eventually this will expand to other types
77 	 * of updates.
78 	 */
79 	bool h_updating;
80 
81 	/* Whether or not the hpdata is in a psset. */
82 	bool h_in_psset;
83 
84 	union {
85 		/* When nonempty (and also nonfull), used by the psset bins. */
86 		hpdata_age_heap_link_t age_link;
87 		/*
88 		 * When empty (or not corresponding to any hugepage), list
89 		 * linkage.
90 		 */
91 		ql_elm(hpdata_t) ql_link_empty;
92 	};
93 
94 	/*
95 	 * Linkage for the psset to track candidates for purging and hugifying.
96 	 */
97 	ql_elm(hpdata_t) ql_link_purge;
98 	ql_elm(hpdata_t) ql_link_hugify;
99 
100 	/* The length of the largest contiguous sequence of inactive pages. */
101 	size_t h_longest_free_range;
102 
103 	/* Number of active pages. */
104 	size_t h_nactive;
105 
106 	/* A bitmap with bits set in the active pages. */
107 	fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
108 
109 	/*
110 	 * Number of dirty or active pages, and a bitmap tracking them.  One
111 	 * way to think of this is as which pages are dirty from the OS's
112 	 * perspective.
113 	 */
114 	size_t h_ntouched;
115 
116 	/* The touched pages (using the same definition as above). */
117 	fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
118 };
119 
120 TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
121 TYPED_LIST(hpdata_purge_list, hpdata_t, ql_link_purge)
122 TYPED_LIST(hpdata_hugify_list, hpdata_t, ql_link_hugify)
123 
124 ph_proto(, hpdata_age_heap, hpdata_t);
125 
126 static inline void *
hpdata_addr_get(const hpdata_t * hpdata)127 hpdata_addr_get(const hpdata_t *hpdata) {
128 	return hpdata->h_address;
129 }
130 
131 static inline void
hpdata_addr_set(hpdata_t * hpdata,void * addr)132 hpdata_addr_set(hpdata_t *hpdata, void *addr) {
133 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
134 	hpdata->h_address = addr;
135 }
136 
137 static inline uint64_t
hpdata_age_get(const hpdata_t * hpdata)138 hpdata_age_get(const hpdata_t *hpdata) {
139 	return hpdata->h_age;
140 }
141 
142 static inline void
hpdata_age_set(hpdata_t * hpdata,uint64_t age)143 hpdata_age_set(hpdata_t *hpdata, uint64_t age) {
144 	hpdata->h_age = age;
145 }
146 
147 static inline bool
hpdata_huge_get(const hpdata_t * hpdata)148 hpdata_huge_get(const hpdata_t *hpdata) {
149 	return hpdata->h_huge;
150 }
151 
152 static inline bool
hpdata_alloc_allowed_get(const hpdata_t * hpdata)153 hpdata_alloc_allowed_get(const hpdata_t *hpdata) {
154 	return hpdata->h_alloc_allowed;
155 }
156 
157 static inline void
hpdata_alloc_allowed_set(hpdata_t * hpdata,bool alloc_allowed)158 hpdata_alloc_allowed_set(hpdata_t *hpdata, bool alloc_allowed) {
159 	hpdata->h_alloc_allowed = alloc_allowed;
160 }
161 
162 static inline bool
hpdata_in_psset_alloc_container_get(const hpdata_t * hpdata)163 hpdata_in_psset_alloc_container_get(const hpdata_t *hpdata) {
164 	return hpdata->h_in_psset_alloc_container;
165 }
166 
167 static inline void
hpdata_in_psset_alloc_container_set(hpdata_t * hpdata,bool in_container)168 hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
169 	assert(in_container != hpdata->h_in_psset_alloc_container);
170 	hpdata->h_in_psset_alloc_container = in_container;
171 }
172 
173 static inline bool
hpdata_purge_allowed_get(const hpdata_t * hpdata)174 hpdata_purge_allowed_get(const hpdata_t *hpdata) {
175 	return hpdata->h_purge_allowed;
176 }
177 
178 static inline void
hpdata_purge_allowed_set(hpdata_t * hpdata,bool purge_allowed)179 hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
180        assert(purge_allowed == false || !hpdata->h_mid_purge);
181        hpdata->h_purge_allowed = purge_allowed;
182 }
183 
184 static inline bool
hpdata_hugify_allowed_get(const hpdata_t * hpdata)185 hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
186 	return hpdata->h_hugify_allowed;
187 }
188 
189 static inline void
hpdata_allow_hugify(hpdata_t * hpdata,nstime_t now)190 hpdata_allow_hugify(hpdata_t *hpdata, nstime_t now) {
191 	assert(!hpdata->h_mid_hugify);
192 	hpdata->h_hugify_allowed = true;
193 	hpdata->h_time_hugify_allowed = now;
194 }
195 
196 static inline nstime_t
hpdata_time_hugify_allowed(hpdata_t * hpdata)197 hpdata_time_hugify_allowed(hpdata_t *hpdata) {
198 	return hpdata->h_time_hugify_allowed;
199 }
200 
201 static inline void
hpdata_disallow_hugify(hpdata_t * hpdata)202 hpdata_disallow_hugify(hpdata_t *hpdata) {
203 	hpdata->h_hugify_allowed = false;
204 }
205 
206 static inline bool
hpdata_in_psset_hugify_container_get(const hpdata_t * hpdata)207 hpdata_in_psset_hugify_container_get(const hpdata_t *hpdata) {
208 	return hpdata->h_in_psset_hugify_container;
209 }
210 
211 static inline void
hpdata_in_psset_hugify_container_set(hpdata_t * hpdata,bool in_container)212 hpdata_in_psset_hugify_container_set(hpdata_t *hpdata, bool in_container) {
213 	assert(in_container != hpdata->h_in_psset_hugify_container);
214 	hpdata->h_in_psset_hugify_container = in_container;
215 }
216 
217 static inline bool
hpdata_mid_purge_get(const hpdata_t * hpdata)218 hpdata_mid_purge_get(const hpdata_t *hpdata) {
219 	return hpdata->h_mid_purge;
220 }
221 
222 static inline void
hpdata_mid_purge_set(hpdata_t * hpdata,bool mid_purge)223 hpdata_mid_purge_set(hpdata_t *hpdata, bool mid_purge) {
224 	assert(mid_purge != hpdata->h_mid_purge);
225 	hpdata->h_mid_purge = mid_purge;
226 }
227 
228 static inline bool
hpdata_mid_hugify_get(const hpdata_t * hpdata)229 hpdata_mid_hugify_get(const hpdata_t *hpdata) {
230 	return hpdata->h_mid_hugify;
231 }
232 
233 static inline void
hpdata_mid_hugify_set(hpdata_t * hpdata,bool mid_hugify)234 hpdata_mid_hugify_set(hpdata_t *hpdata, bool mid_hugify) {
235 	assert(mid_hugify != hpdata->h_mid_hugify);
236 	hpdata->h_mid_hugify = mid_hugify;
237 }
238 
239 static inline bool
hpdata_changing_state_get(const hpdata_t * hpdata)240 hpdata_changing_state_get(const hpdata_t *hpdata) {
241 	return hpdata->h_mid_purge || hpdata->h_mid_hugify;
242 }
243 
244 
245 static inline bool
hpdata_updating_get(const hpdata_t * hpdata)246 hpdata_updating_get(const hpdata_t *hpdata) {
247 	return hpdata->h_updating;
248 }
249 
250 static inline void
hpdata_updating_set(hpdata_t * hpdata,bool updating)251 hpdata_updating_set(hpdata_t *hpdata, bool updating) {
252 	assert(updating != hpdata->h_updating);
253 	hpdata->h_updating = updating;
254 }
255 
256 static inline bool
hpdata_in_psset_get(const hpdata_t * hpdata)257 hpdata_in_psset_get(const hpdata_t *hpdata) {
258 	return hpdata->h_in_psset;
259 }
260 
261 static inline void
hpdata_in_psset_set(hpdata_t * hpdata,bool in_psset)262 hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
263 	assert(in_psset != hpdata->h_in_psset);
264 	hpdata->h_in_psset = in_psset;
265 }
266 
267 static inline size_t
hpdata_longest_free_range_get(const hpdata_t * hpdata)268 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
269 	return hpdata->h_longest_free_range;
270 }
271 
272 static inline void
hpdata_longest_free_range_set(hpdata_t * hpdata,size_t longest_free_range)273 hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
274 	assert(longest_free_range <= HUGEPAGE_PAGES);
275 	hpdata->h_longest_free_range = longest_free_range;
276 }
277 
278 static inline size_t
hpdata_nactive_get(hpdata_t * hpdata)279 hpdata_nactive_get(hpdata_t *hpdata) {
280 	return hpdata->h_nactive;
281 }
282 
283 static inline size_t
hpdata_ntouched_get(hpdata_t * hpdata)284 hpdata_ntouched_get(hpdata_t *hpdata) {
285 	return hpdata->h_ntouched;
286 }
287 
288 static inline size_t
hpdata_ndirty_get(hpdata_t * hpdata)289 hpdata_ndirty_get(hpdata_t *hpdata) {
290 	return hpdata->h_ntouched - hpdata->h_nactive;
291 }
292 
293 static inline size_t
hpdata_nretained_get(hpdata_t * hpdata)294 hpdata_nretained_get(hpdata_t *hpdata) {
295 	return HUGEPAGE_PAGES - hpdata->h_ntouched;
296 }
297 
298 static inline void
hpdata_assert_empty(hpdata_t * hpdata)299 hpdata_assert_empty(hpdata_t *hpdata) {
300 	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
301 	assert(hpdata->h_nactive == 0);
302 }
303 
304 /*
305  * Only used in tests, and in hpdata_assert_consistent, below.  Verifies some
306  * consistency properties of the hpdata (e.g. that cached counts of page stats
307  * match computed ones).
308  */
309 static inline bool
hpdata_consistent(hpdata_t * hpdata)310 hpdata_consistent(hpdata_t *hpdata) {
311 	if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
312 	    != hpdata_longest_free_range_get(hpdata)) {
313 		return false;
314 	}
315 	if (fb_scount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
316 	    != hpdata->h_nactive) {
317 		return false;
318 	}
319 	if (fb_scount(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
320 	    != hpdata->h_ntouched) {
321 		return false;
322 	}
323 	if (hpdata->h_ntouched < hpdata->h_nactive) {
324 		return false;
325 	}
326 	if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
327 		return false;
328 	}
329 	if (hpdata_changing_state_get(hpdata)
330 	    && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) {
331 		return false;
332 	}
333 	if (hpdata_hugify_allowed_get(hpdata)
334 	    != hpdata_in_psset_hugify_container_get(hpdata)) {
335 		return false;
336 	}
337 	return true;
338 }
339 
340 static inline void
hpdata_assert_consistent(hpdata_t * hpdata)341 hpdata_assert_consistent(hpdata_t *hpdata) {
342 	assert(hpdata_consistent(hpdata));
343 }
344 
345 static inline bool
hpdata_empty(hpdata_t * hpdata)346 hpdata_empty(hpdata_t *hpdata) {
347 	return hpdata->h_nactive == 0;
348 }
349 
350 static inline bool
hpdata_full(hpdata_t * hpdata)351 hpdata_full(hpdata_t *hpdata) {
352 	return hpdata->h_nactive == HUGEPAGE_PAGES;
353 }
354 
355 void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
356 
357 /*
358  * Given an hpdata which can serve an allocation request, pick and reserve an
359  * offset within that allocation.
360  */
361 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
362 void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
363 
364 /*
365  * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
366  * subranges of a hugepage while holding a lock, drop the lock during the actual
367  * purging of them, and reacquire it to update the metadata again.
368  */
369 typedef struct hpdata_purge_state_s hpdata_purge_state_t;
370 struct hpdata_purge_state_s {
371 	size_t npurged;
372 	size_t ndirty_to_purge;
373 	fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
374 	size_t next_purge_search_begin;
375 };
376 
377 /*
378  * Initializes purge state.  The access to hpdata must be externally
379  * synchronized with other hpdata_* calls.
380  *
381  * You can tell whether or not a thread is purging or hugifying a given hpdata
382  * via hpdata_changing_state_get(hpdata).  Racing hugification or purging
383  * operations aren't allowed.
384  *
385  * Once you begin purging, you have to follow through and call hpdata_purge_next
386  * until you're done, and then end.  Allocating out of an hpdata undergoing
387  * purging is not allowed.
388  *
389  * Returns the number of dirty pages that will be purged.
390  */
391 size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
392 
393 /*
394  * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
395  * true, and returns true.  Otherwise, returns false to indicate that we're
396  * done.
397  *
398  * This requires exclusive access to the purge state, but *not* to the hpdata.
399  * In particular, unreserve calls are allowed while purging (i.e. you can dalloc
400  * into one part of the hpdata while purging a different part).
401  */
402 bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
403     void **r_purge_addr, size_t *r_purge_size);
404 /*
405  * Updates the hpdata metadata after all purging is done.  Needs external
406  * synchronization.
407  */
408 void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
409 
410 void hpdata_hugify(hpdata_t *hpdata);
411 void hpdata_dehugify(hpdata_t *hpdata);
412 
413 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
414