xref: /src/contrib/xz/src/xz/hardware.c (revision ae12432049e7873ab3912643ae5d08297b8cbc49)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       hardware.c
6 /// \brief      Detection of available hardware resources
7 //
8 //  Author:     Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11 
12 #include "private.h"
13 
14 #ifdef HAVE_GETRLIMIT
15 #	include <sys/resource.h>
16 #endif
17 
18 
19 /// Maximum number of worker threads. This can be set with
20 /// the --threads=NUM command line option.
21 static uint32_t threads_max;
22 
23 /// True when the number of threads is automatically determined based
24 /// on the available hardware threads.
25 static bool threads_are_automatic = false;
26 
27 /// If true, then try to use multi-threaded mode (if memlimit allows)
28 /// even if only one thread was requested explicitly (-T+1).
29 static bool use_mt_mode_with_one_thread = false;
30 
31 /// Memory usage limit for compression
32 static uint64_t memlimit_compress = 0;
33 
34 /// Memory usage limit for decompression
35 static uint64_t memlimit_decompress = 0;
36 
37 /// Default memory usage for multithreaded modes:
38 ///
39 ///   - Default value for --memlimit-compress when automatic number of threads
40 ///     is used. However, if the limit wouldn't allow even one thread then
41 ///     the limit is ignored in coder.c and one thread will be used anyway.
42 ///     This mess is a compromise: we wish to prevent -T0 from using too
43 ///     many threads but we also don't want xz to give an error due to
44 ///     a memlimit that the user didn't explicitly set.
45 ///
46 ///   - Default value for --memlimit-mt-decompress
47 ///
48 /// This value is calculated in hardware_init() and cannot be changed later.
49 static uint64_t memlimit_mt_default;
50 
51 /// Memory usage limit for multithreaded decompression. This is a soft limit:
52 /// if reducing the number of threads to one isn't enough to keep memory
53 /// usage below this limit, then one thread is used and this limit is ignored.
54 /// memlimit_decompress is still obeyed.
55 ///
56 /// This can be set with --memlimit-mt-decompress. The default value for
57 /// this is memlimit_mt_default.
58 static uint64_t memlimit_mtdec;
59 
60 /// Total amount of physical RAM
61 static uint64_t total_ram;
62 
63 
64 extern void
hardware_threads_set(uint32_t n)65 hardware_threads_set(uint32_t n)
66 {
67 	// Reset these to false first and set them to true when appropriate.
68 	threads_are_automatic = false;
69 	use_mt_mode_with_one_thread = false;
70 
71 	if (n == 0) {
72 		// Automatic number of threads was requested.
73 		// If there is only one hardware thread, multi-threaded
74 		// mode will still be used if memory limit allows.
75 		threads_are_automatic = true;
76 		use_mt_mode_with_one_thread = true;
77 
78 		// If threading support was enabled at build time,
79 		// use the number of available CPU cores. Otherwise
80 		// use one thread since disabling threading support
81 		// omits lzma_cputhreads() from liblzma.
82 #ifdef MYTHREAD_ENABLED
83 		threads_max = lzma_cputhreads();
84 		if (threads_max == 0)
85 			threads_max = 1;
86 #else
87 		threads_max = 1;
88 #endif
89 	} else if (n == UINT32_MAX) {
90 		use_mt_mode_with_one_thread = true;
91 		threads_max = 1;
92 	} else {
93 		threads_max = n;
94 	}
95 
96 	return;
97 }
98 
99 
100 extern uint32_t
hardware_threads_get(void)101 hardware_threads_get(void)
102 {
103 	return threads_max;
104 }
105 
106 
107 extern bool
hardware_threads_is_mt(void)108 hardware_threads_is_mt(void)
109 {
110 #ifdef MYTHREAD_ENABLED
111 	return threads_max > 1 || use_mt_mode_with_one_thread;
112 #else
113 	return false;
114 #endif
115 }
116 
117 
118 extern void
hardware_memlimit_set(uint64_t new_memlimit,bool set_compress,bool set_decompress,bool set_mtdec,bool is_percentage)119 hardware_memlimit_set(uint64_t new_memlimit,
120 		bool set_compress, bool set_decompress, bool set_mtdec,
121 		bool is_percentage)
122 {
123 	if (is_percentage) {
124 		assert(new_memlimit > 0);
125 		assert(new_memlimit <= 100);
126 		new_memlimit = (uint32_t)new_memlimit * total_ram / 100;
127 	}
128 
129 	if (set_compress) {
130 		memlimit_compress = new_memlimit;
131 
132 #if SIZE_MAX == UINT32_MAX
133 		// FIXME?
134 		//
135 		// When running a 32-bit xz on a system with a lot of RAM and
136 		// using a percentage-based memory limit, the result can be
137 		// bigger than the 32-bit address space. Limiting the limit
138 		// below SIZE_MAX for compression (not decompression) makes
139 		// xz lower the compression settings (or number of threads)
140 		// to a level that *might* work. In practice it has worked
141 		// when using a 64-bit kernel that gives full 4 GiB address
142 		// space to 32-bit programs. In other situations this might
143 		// still be too high, like 32-bit kernels that may give much
144 		// less than 4 GiB to a single application.
145 		//
146 		// So this is an ugly hack but I will keep it here while
147 		// it does more good than bad.
148 		//
149 		// Use a value less than SIZE_MAX so that there's some room
150 		// for the xz program and so on. Don't use 4000 MiB because
151 		// it could look like someone mixed up base-2 and base-10.
152 #ifdef __mips__
153 		// For MIPS32, due to architectural peculiarities,
154 		// the limit is even lower.
155 		const uint64_t limit_max = UINT64_C(2000) << 20;
156 #else
157 		const uint64_t limit_max = UINT64_C(4020) << 20;
158 #endif
159 
160 		// UINT64_MAX is a special case for the string "max" so
161 		// that has to be handled specially.
162 		if (memlimit_compress != UINT64_MAX
163 				&& memlimit_compress > limit_max)
164 			memlimit_compress = limit_max;
165 #endif
166 	}
167 
168 	if (set_decompress)
169 		memlimit_decompress = new_memlimit;
170 
171 	if (set_mtdec)
172 		memlimit_mtdec = new_memlimit;
173 
174 	return;
175 }
176 
177 
178 extern uint64_t
hardware_memlimit_get(enum operation_mode mode)179 hardware_memlimit_get(enum operation_mode mode)
180 {
181 	// 0 is a special value that indicates the default.
182 	// It disables the limit in single-threaded mode.
183 	//
184 	// NOTE: For multithreaded decompression, this is the hard limit
185 	// (memlimit_stop). hardware_memlimit_mtdec_get() gives the
186 	// soft limit (memlimit_threaded).
187 	const uint64_t memlimit = mode == MODE_COMPRESS
188 			? memlimit_compress : memlimit_decompress;
189 	return memlimit != 0 ? memlimit : UINT64_MAX;
190 }
191 
192 
193 extern uint64_t
hardware_memlimit_mtenc_get(void)194 hardware_memlimit_mtenc_get(void)
195 {
196 	return hardware_memlimit_mtenc_is_default()
197 			? memlimit_mt_default
198 			: hardware_memlimit_get(MODE_COMPRESS);
199 }
200 
201 
202 extern bool
hardware_memlimit_mtenc_is_default(void)203 hardware_memlimit_mtenc_is_default(void)
204 {
205 	return memlimit_compress == 0 && threads_are_automatic;
206 }
207 
208 
209 extern uint64_t
hardware_memlimit_mtdec_get(void)210 hardware_memlimit_mtdec_get(void)
211 {
212 	uint64_t m = memlimit_mtdec != 0
213 			? memlimit_mtdec
214 			: memlimit_mt_default;
215 
216 	// Cap the value to memlimit_decompress if it has been specified.
217 	// This is nice for --info-memory. It wouldn't be needed for liblzma
218 	// since it does this anyway.
219 	if (memlimit_decompress != 0 && m > memlimit_decompress)
220 		m = memlimit_decompress;
221 
222 	return m;
223 }
224 
225 
226 /// Helper for hardware_memlimit_show() to print one human-readable info line.
227 static void
memlimit_show(const char * str,size_t str_columns,uint64_t value)228 memlimit_show(const char *str, size_t str_columns, uint64_t value)
229 {
230 	// Calculate the field width so that str will be padded to take
231 	// str_columns on the terminal.
232 	//
233 	// NOTE: If the string is invalid, this will be -1. Using -1 as
234 	// the field width is fine here so it's not handled specially.
235 	const int fw = tuklib_mbstr_fw(str, (int)(str_columns));
236 
237 	// The memory usage limit is considered to be disabled if value
238 	// is 0 or UINT64_MAX. This might get a bit more complex once there
239 	// is threading support. See the comment in hardware_memlimit_get().
240 	if (value == 0 || value == UINT64_MAX)
241 		printf("  %-*s  %s\n", fw, str, _("Disabled"));
242 	else
243 		printf("  %-*s  %s MiB (%s B)\n", fw, str,
244 				uint64_to_str(round_up_to_mib(value), 0),
245 				uint64_to_str(value, 1));
246 
247 	return;
248 }
249 
250 
251 extern void
hardware_memlimit_show(void)252 hardware_memlimit_show(void)
253 {
254 	uint32_t cputhreads = 1;
255 #ifdef MYTHREAD_ENABLED
256 	cputhreads = lzma_cputhreads();
257 	if (cputhreads == 0)
258 		cputhreads = 1;
259 #endif
260 
261 	if (opt_robot) {
262 		printf("%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
263 				"\t%" PRIu64 "\t%" PRIu32 "\n",
264 				total_ram,
265 				memlimit_compress,
266 				memlimit_decompress,
267 				hardware_memlimit_mtdec_get(),
268 				memlimit_mt_default,
269 				cputhreads);
270 	} else {
271 		const char *msgs[] = {
272 			_("Amount of physical memory (RAM):"),
273 			_("Number of processor threads:"),
274 			_("Compression:"),
275 			_("Decompression:"),
276 			_("Multi-threaded decompression:"),
277 			_("Default for -T0:"),
278 		};
279 
280 		size_t width_max = 1;
281 		for (unsigned i = 0; i < ARRAY_SIZE(msgs); ++i) {
282 			size_t w = tuklib_mbstr_width(msgs[i], NULL);
283 
284 			// When debugging, catch invalid strings with
285 			// an assertion. Otherwise fallback to 1 so
286 			// that the columns just won't be aligned.
287 			assert(w != (size_t)-1);
288 			if (w == (size_t)-1)
289 				w = 1;
290 
291 			if (width_max < w)
292 				width_max = w;
293 		}
294 
295 		puts(_("Hardware information:"));
296 		memlimit_show(msgs[0], width_max, total_ram);
297 		printf("  %-*s  %" PRIu32 "\n",
298 				tuklib_mbstr_fw(msgs[1], (int)(width_max)),
299 				msgs[1], cputhreads);
300 
301 		putchar('\n');
302 		puts(_("Memory usage limits:"));
303 		memlimit_show(msgs[2], width_max, memlimit_compress);
304 		memlimit_show(msgs[3], width_max, memlimit_decompress);
305 		memlimit_show(msgs[4], width_max,
306 				hardware_memlimit_mtdec_get());
307 		memlimit_show(msgs[5], width_max, memlimit_mt_default);
308 	}
309 
310 	tuklib_exit(E_SUCCESS, E_ERROR, message_verbosity_get() != V_SILENT);
311 }
312 
313 
314 extern void
hardware_init(void)315 hardware_init(void)
316 {
317 	// Get the amount of RAM. If we cannot determine it,
318 	// use the assumption defined by the configure script.
319 	total_ram = lzma_physmem();
320 	if (total_ram == 0)
321 		total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024;
322 
323 	// FIXME? There may be better methods to determine the default value.
324 	// One Linux-specific suggestion is to use MemAvailable from
325 	// /proc/meminfo as the starting point.
326 	memlimit_mt_default = total_ram / 4;
327 
328 #ifdef HAVE_GETRLIMIT
329 	// Try to set the default multithreaded memory usage limit so that
330 	// we won't exceed resource limits. Exceeding the limits would result
331 	// in allocation failures, which currently make liblzma and xz fail
332 	// (instead of continuing by reducing the number of threads).
333 	const int resources[] = {
334 		RLIMIT_DATA,
335 #	ifdef RLIMIT_AS
336 		RLIMIT_AS, // OpenBSD 7.8 doesn't have RLIMIT_AS.
337 #	endif
338 #	if defined(RLIMIT_VMEM) && RLIMIT_VMEM != RLIMIT_AS
339 		RLIMIT_VMEM, // For Solaris. On FreeBSD this is an alias.
340 #	endif
341 	};
342 
343 	// The resource limits cannot be passed to liblzma directly;
344 	// some margin is required:
345 	//   - The memory usage limit counts only liblzma's memory usage,
346 	//     but xz itself needs some memory (including gettext usage etc.).
347 	//   - Memory allocation has some overhead.
348 	//   - Address space limit counts code size too.
349 	//
350 	// The following value is a guess based on quick testing on Linux.
351 	const rlim_t margin = 64 << 20;
352 
353 	for (size_t i = 0; i < ARRAY_SIZE(resources); ++i) {
354 		// glibc: When GNU extensions are enabled, <sys/resource.h>
355 		// declares getrlimit() so that the first argument is an enum
356 		// instead of int as in POSIX. GCC and Clang use unsigned int
357 		// for enums when possible, so a sign conversion occurs when
358 		// resources[i] is convert to the enum type. Clang warns about
359 		// this with -Wsign-conversion but GCC doesn't.
360 #ifdef __clang__
361 #	pragma GCC diagnostic push
362 #	pragma GCC diagnostic ignored "-Wsign-conversion"
363 #endif
364 		// RLIM_SAVED_* might be used on some 32-bit OSes
365 		// (AIX at least) when the limit doesn't fit in a 32-bit
366 		// unsigned integer. Thus, for us these are the same thing
367 		// as no limit at all.
368 		struct rlimit rl;
369 		if (getrlimit(resources[i], &rl) == 0
370 				&& rl.rlim_cur != RLIM_INFINITY
371 				&& rl.rlim_cur != RLIM_SAVED_CUR
372 				&& rl.rlim_cur != RLIM_SAVED_MAX) {
373 #ifdef __clang__
374 #	pragma GCC diagnostic pop
375 #endif
376 			// Subtract the margin from the current resource
377 			// limit, but avoid negative results. Avoid also 0
378 			// because hardware_memlimit_show() (--info-memory)
379 			// treats it specially. In practice, 1 byte is
380 			// effectively 0 anyway.
381 			//
382 			// SUSv2 and POSIX.1-2024 require rlimit_t to be
383 			// unsigned. A cast is needed to silence a compiler
384 			// warning still because, for historical reasons,
385 			// rlim_t is intentionally signed on FreeBSD 14.
386 			const uint64_t rl_with_margin = rl.rlim_cur > margin
387 					? (uint64_t)(rl.rlim_cur - margin) : 1;
388 
389 			// Lower the memory usage limit if needed.
390 			if (memlimit_mt_default > rl_with_margin)
391 				memlimit_mt_default = rl_with_margin;
392 		}
393 	}
394 #endif
395 
396 #if SIZE_MAX == UINT32_MAX
397 	// A too high value may cause 32-bit xz to run out of address space.
398 	// Use a conservative maximum value here. A few typical address space
399 	// sizes with Linux:
400 	//   - x86-64 with 32-bit xz: 4 GiB
401 	//   - x86: 3 GiB
402 	//   - MIPS32: 2 GiB
403 	const size_t mem_ceiling = 1400U << 20;
404 	if (memlimit_mt_default > mem_ceiling)
405 		memlimit_mt_default = mem_ceiling;
406 #endif
407 
408 	// Enable threaded mode by default. xz 5.4.x and older
409 	// used single-threaded mode by default.
410 	hardware_threads_set(0);
411 
412 	return;
413 }
414