1 #ifndef JEMALLOC_INTERNAL_HPDATA_H
2 #define JEMALLOC_INTERNAL_HPDATA_H
3
4 #include "jemalloc/internal/fb.h"
5 #include "jemalloc/internal/ph.h"
6 #include "jemalloc/internal/ql.h"
7 #include "jemalloc/internal/typed_list.h"
8
9 /*
10 * The metadata representation we use for extents in hugepages. While the PAC
11 * uses the edata_t to represent both active and inactive extents, the HP only
12 * uses the edata_t for active ones; instead, inactive extent state is tracked
13 * within hpdata associated with the enclosing hugepage-sized, hugepage-aligned
14 * region of virtual address space.
15 *
16 * An hpdata need not be "truly" backed by a hugepage (which is not necessarily
17 * an observable property of any given region of address space). It's just
18 * hugepage-sized and hugepage-aligned; it's *potentially* huge.
19 */
20 typedef struct hpdata_s hpdata_t;
21 ph_structs(hpdata_age_heap, hpdata_t);
22 struct hpdata_s {
23 /*
24 * We likewise follow the edata convention of mangling names and forcing
25 * the use of accessors -- this lets us add some consistency checks on
26 * access.
27 */
28
29 /*
30 * The address of the hugepage in question. This can't be named h_addr,
31 * since that conflicts with a macro defined in Windows headers.
32 */
33 void *h_address;
34 /* Its age (measured in psset operations). */
35 uint64_t h_age;
36 /* Whether or not we think the hugepage is mapped that way by the OS. */
37 bool h_huge;
38
39 /*
40 * For some properties, we keep parallel sets of bools; h_foo_allowed
41 * and h_in_psset_foo_container. This is a decoupling mechanism to
42 * avoid bothering the hpa (which manages policies) from the psset
43 * (which is the mechanism used to enforce those policies). This allows
44 * all the container management logic to live in one place, without the
45 * HPA needing to know or care how that happens.
46 */
47
48 /*
49 * Whether or not the hpdata is allowed to be used to serve allocations,
50 * and whether or not the psset is currently tracking it as such.
51 */
52 bool h_alloc_allowed;
53 bool h_in_psset_alloc_container;
54
55 /*
56 * The same, but with purging. There's no corresponding
57 * h_in_psset_purge_container, because the psset (currently) always
58 * removes hpdatas from their containers during updates (to implement
59 * LRU for purging).
60 */
61 bool h_purge_allowed;
62
63 /* And with hugifying. */
64 bool h_hugify_allowed;
65 /* When we became a hugification candidate. */
66 nstime_t h_time_hugify_allowed;
67 bool h_in_psset_hugify_container;
68
69 /* Whether or not a purge or hugify is currently happening. */
70 bool h_mid_purge;
71 bool h_mid_hugify;
72
73 /*
74 * Whether or not the hpdata is being updated in the psset (i.e. if
75 * there has been a psset_update_begin call issued without a matching
76 * psset_update_end call). Eventually this will expand to other types
77 * of updates.
78 */
79 bool h_updating;
80
81 /* Whether or not the hpdata is in a psset. */
82 bool h_in_psset;
83
84 union {
85 /* When nonempty (and also nonfull), used by the psset bins. */
86 hpdata_age_heap_link_t age_link;
87 /*
88 * When empty (or not corresponding to any hugepage), list
89 * linkage.
90 */
91 ql_elm(hpdata_t) ql_link_empty;
92 };
93
94 /*
95 * Linkage for the psset to track candidates for purging and hugifying.
96 */
97 ql_elm(hpdata_t) ql_link_purge;
98 ql_elm(hpdata_t) ql_link_hugify;
99
100 /* The length of the largest contiguous sequence of inactive pages. */
101 size_t h_longest_free_range;
102
103 /* Number of active pages. */
104 size_t h_nactive;
105
106 /* A bitmap with bits set in the active pages. */
107 fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
108
109 /*
110 * Number of dirty or active pages, and a bitmap tracking them. One
111 * way to think of this is as which pages are dirty from the OS's
112 * perspective.
113 */
114 size_t h_ntouched;
115
116 /* The touched pages (using the same definition as above). */
117 fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
118 };
119
120 TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
121 TYPED_LIST(hpdata_purge_list, hpdata_t, ql_link_purge)
122 TYPED_LIST(hpdata_hugify_list, hpdata_t, ql_link_hugify)
123
124 ph_proto(, hpdata_age_heap, hpdata_t);
125
126 static inline void *
hpdata_addr_get(const hpdata_t * hpdata)127 hpdata_addr_get(const hpdata_t *hpdata) {
128 return hpdata->h_address;
129 }
130
131 static inline void
hpdata_addr_set(hpdata_t * hpdata,void * addr)132 hpdata_addr_set(hpdata_t *hpdata, void *addr) {
133 assert(HUGEPAGE_ADDR2BASE(addr) == addr);
134 hpdata->h_address = addr;
135 }
136
137 static inline uint64_t
hpdata_age_get(const hpdata_t * hpdata)138 hpdata_age_get(const hpdata_t *hpdata) {
139 return hpdata->h_age;
140 }
141
142 static inline void
hpdata_age_set(hpdata_t * hpdata,uint64_t age)143 hpdata_age_set(hpdata_t *hpdata, uint64_t age) {
144 hpdata->h_age = age;
145 }
146
147 static inline bool
hpdata_huge_get(const hpdata_t * hpdata)148 hpdata_huge_get(const hpdata_t *hpdata) {
149 return hpdata->h_huge;
150 }
151
152 static inline bool
hpdata_alloc_allowed_get(const hpdata_t * hpdata)153 hpdata_alloc_allowed_get(const hpdata_t *hpdata) {
154 return hpdata->h_alloc_allowed;
155 }
156
157 static inline void
hpdata_alloc_allowed_set(hpdata_t * hpdata,bool alloc_allowed)158 hpdata_alloc_allowed_set(hpdata_t *hpdata, bool alloc_allowed) {
159 hpdata->h_alloc_allowed = alloc_allowed;
160 }
161
162 static inline bool
hpdata_in_psset_alloc_container_get(const hpdata_t * hpdata)163 hpdata_in_psset_alloc_container_get(const hpdata_t *hpdata) {
164 return hpdata->h_in_psset_alloc_container;
165 }
166
167 static inline void
hpdata_in_psset_alloc_container_set(hpdata_t * hpdata,bool in_container)168 hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
169 assert(in_container != hpdata->h_in_psset_alloc_container);
170 hpdata->h_in_psset_alloc_container = in_container;
171 }
172
173 static inline bool
hpdata_purge_allowed_get(const hpdata_t * hpdata)174 hpdata_purge_allowed_get(const hpdata_t *hpdata) {
175 return hpdata->h_purge_allowed;
176 }
177
178 static inline void
hpdata_purge_allowed_set(hpdata_t * hpdata,bool purge_allowed)179 hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
180 assert(purge_allowed == false || !hpdata->h_mid_purge);
181 hpdata->h_purge_allowed = purge_allowed;
182 }
183
184 static inline bool
hpdata_hugify_allowed_get(const hpdata_t * hpdata)185 hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
186 return hpdata->h_hugify_allowed;
187 }
188
189 static inline void
hpdata_allow_hugify(hpdata_t * hpdata,nstime_t now)190 hpdata_allow_hugify(hpdata_t *hpdata, nstime_t now) {
191 assert(!hpdata->h_mid_hugify);
192 hpdata->h_hugify_allowed = true;
193 hpdata->h_time_hugify_allowed = now;
194 }
195
196 static inline nstime_t
hpdata_time_hugify_allowed(hpdata_t * hpdata)197 hpdata_time_hugify_allowed(hpdata_t *hpdata) {
198 return hpdata->h_time_hugify_allowed;
199 }
200
201 static inline void
hpdata_disallow_hugify(hpdata_t * hpdata)202 hpdata_disallow_hugify(hpdata_t *hpdata) {
203 hpdata->h_hugify_allowed = false;
204 }
205
206 static inline bool
hpdata_in_psset_hugify_container_get(const hpdata_t * hpdata)207 hpdata_in_psset_hugify_container_get(const hpdata_t *hpdata) {
208 return hpdata->h_in_psset_hugify_container;
209 }
210
211 static inline void
hpdata_in_psset_hugify_container_set(hpdata_t * hpdata,bool in_container)212 hpdata_in_psset_hugify_container_set(hpdata_t *hpdata, bool in_container) {
213 assert(in_container != hpdata->h_in_psset_hugify_container);
214 hpdata->h_in_psset_hugify_container = in_container;
215 }
216
217 static inline bool
hpdata_mid_purge_get(const hpdata_t * hpdata)218 hpdata_mid_purge_get(const hpdata_t *hpdata) {
219 return hpdata->h_mid_purge;
220 }
221
222 static inline void
hpdata_mid_purge_set(hpdata_t * hpdata,bool mid_purge)223 hpdata_mid_purge_set(hpdata_t *hpdata, bool mid_purge) {
224 assert(mid_purge != hpdata->h_mid_purge);
225 hpdata->h_mid_purge = mid_purge;
226 }
227
228 static inline bool
hpdata_mid_hugify_get(const hpdata_t * hpdata)229 hpdata_mid_hugify_get(const hpdata_t *hpdata) {
230 return hpdata->h_mid_hugify;
231 }
232
233 static inline void
hpdata_mid_hugify_set(hpdata_t * hpdata,bool mid_hugify)234 hpdata_mid_hugify_set(hpdata_t *hpdata, bool mid_hugify) {
235 assert(mid_hugify != hpdata->h_mid_hugify);
236 hpdata->h_mid_hugify = mid_hugify;
237 }
238
239 static inline bool
hpdata_changing_state_get(const hpdata_t * hpdata)240 hpdata_changing_state_get(const hpdata_t *hpdata) {
241 return hpdata->h_mid_purge || hpdata->h_mid_hugify;
242 }
243
244
245 static inline bool
hpdata_updating_get(const hpdata_t * hpdata)246 hpdata_updating_get(const hpdata_t *hpdata) {
247 return hpdata->h_updating;
248 }
249
250 static inline void
hpdata_updating_set(hpdata_t * hpdata,bool updating)251 hpdata_updating_set(hpdata_t *hpdata, bool updating) {
252 assert(updating != hpdata->h_updating);
253 hpdata->h_updating = updating;
254 }
255
256 static inline bool
hpdata_in_psset_get(const hpdata_t * hpdata)257 hpdata_in_psset_get(const hpdata_t *hpdata) {
258 return hpdata->h_in_psset;
259 }
260
261 static inline void
hpdata_in_psset_set(hpdata_t * hpdata,bool in_psset)262 hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
263 assert(in_psset != hpdata->h_in_psset);
264 hpdata->h_in_psset = in_psset;
265 }
266
267 static inline size_t
hpdata_longest_free_range_get(const hpdata_t * hpdata)268 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
269 return hpdata->h_longest_free_range;
270 }
271
272 static inline void
hpdata_longest_free_range_set(hpdata_t * hpdata,size_t longest_free_range)273 hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
274 assert(longest_free_range <= HUGEPAGE_PAGES);
275 hpdata->h_longest_free_range = longest_free_range;
276 }
277
278 static inline size_t
hpdata_nactive_get(hpdata_t * hpdata)279 hpdata_nactive_get(hpdata_t *hpdata) {
280 return hpdata->h_nactive;
281 }
282
283 static inline size_t
hpdata_ntouched_get(hpdata_t * hpdata)284 hpdata_ntouched_get(hpdata_t *hpdata) {
285 return hpdata->h_ntouched;
286 }
287
288 static inline size_t
hpdata_ndirty_get(hpdata_t * hpdata)289 hpdata_ndirty_get(hpdata_t *hpdata) {
290 return hpdata->h_ntouched - hpdata->h_nactive;
291 }
292
293 static inline size_t
hpdata_nretained_get(hpdata_t * hpdata)294 hpdata_nretained_get(hpdata_t *hpdata) {
295 return HUGEPAGE_PAGES - hpdata->h_ntouched;
296 }
297
298 static inline void
hpdata_assert_empty(hpdata_t * hpdata)299 hpdata_assert_empty(hpdata_t *hpdata) {
300 assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
301 assert(hpdata->h_nactive == 0);
302 }
303
304 /*
305 * Only used in tests, and in hpdata_assert_consistent, below. Verifies some
306 * consistency properties of the hpdata (e.g. that cached counts of page stats
307 * match computed ones).
308 */
309 static inline bool
hpdata_consistent(hpdata_t * hpdata)310 hpdata_consistent(hpdata_t *hpdata) {
311 if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
312 != hpdata_longest_free_range_get(hpdata)) {
313 return false;
314 }
315 if (fb_scount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
316 != hpdata->h_nactive) {
317 return false;
318 }
319 if (fb_scount(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
320 != hpdata->h_ntouched) {
321 return false;
322 }
323 if (hpdata->h_ntouched < hpdata->h_nactive) {
324 return false;
325 }
326 if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
327 return false;
328 }
329 if (hpdata_changing_state_get(hpdata)
330 && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) {
331 return false;
332 }
333 if (hpdata_hugify_allowed_get(hpdata)
334 != hpdata_in_psset_hugify_container_get(hpdata)) {
335 return false;
336 }
337 return true;
338 }
339
340 static inline void
hpdata_assert_consistent(hpdata_t * hpdata)341 hpdata_assert_consistent(hpdata_t *hpdata) {
342 assert(hpdata_consistent(hpdata));
343 }
344
345 static inline bool
hpdata_empty(hpdata_t * hpdata)346 hpdata_empty(hpdata_t *hpdata) {
347 return hpdata->h_nactive == 0;
348 }
349
350 static inline bool
hpdata_full(hpdata_t * hpdata)351 hpdata_full(hpdata_t *hpdata) {
352 return hpdata->h_nactive == HUGEPAGE_PAGES;
353 }
354
355 void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
356
357 /*
358 * Given an hpdata which can serve an allocation request, pick and reserve an
359 * offset within that allocation.
360 */
361 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
362 void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
363
364 /*
365 * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
366 * subranges of a hugepage while holding a lock, drop the lock during the actual
367 * purging of them, and reacquire it to update the metadata again.
368 */
369 typedef struct hpdata_purge_state_s hpdata_purge_state_t;
370 struct hpdata_purge_state_s {
371 size_t npurged;
372 size_t ndirty_to_purge;
373 fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
374 size_t next_purge_search_begin;
375 };
376
377 /*
378 * Initializes purge state. The access to hpdata must be externally
379 * synchronized with other hpdata_* calls.
380 *
381 * You can tell whether or not a thread is purging or hugifying a given hpdata
382 * via hpdata_changing_state_get(hpdata). Racing hugification or purging
383 * operations aren't allowed.
384 *
385 * Once you begin purging, you have to follow through and call hpdata_purge_next
386 * until you're done, and then end. Allocating out of an hpdata undergoing
387 * purging is not allowed.
388 *
389 * Returns the number of dirty pages that will be purged.
390 */
391 size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
392
393 /*
394 * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
395 * true, and returns true. Otherwise, returns false to indicate that we're
396 * done.
397 *
398 * This requires exclusive access to the purge state, but *not* to the hpdata.
399 * In particular, unreserve calls are allowed while purging (i.e. you can dalloc
400 * into one part of the hpdata while purging a different part).
401 */
402 bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
403 void **r_purge_addr, size_t *r_purge_size);
404 /*
405 * Updates the hpdata metadata after all purging is done. Needs external
406 * synchronization.
407 */
408 void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
409
410 void hpdata_hugify(hpdata_t *hpdata);
411 void hpdata_dehugify(hpdata_t *hpdata);
412
413 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
414