xref: /src/sys/contrib/openzfs/module/zstd/lib/common/cpu.h (revision 8a62a2a5659d1839d8799b4274c04469d7f17c78)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2 /*
3  * Copyright (c) Meta Platforms, Inc. and affiliates.
4  * All rights reserved.
5  *
6  * This source code is licensed under both the BSD-style license (found in the
7  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8  * in the COPYING file in the root directory of this source tree).
9  * You may select, at your option, one of the above-listed licenses.
10  */
11 
12 #ifndef ZSTD_COMMON_CPU_H
13 #define ZSTD_COMMON_CPU_H
14 
15 /**
16  * Implementation taken from folly/CpuId.h
17  * https://github.com/facebook/folly/blob/master/folly/CpuId.h
18  */
19 
20 #include "mem.h"
21 
22 #ifdef _MSC_VER
23 #include <intrin.h>
24 #endif
25 
26 typedef struct {
27     U32 f1c;
28     U32 f1d;
29     U32 f7b;
30     U32 f7c;
31 } ZSTD_cpuid_t;
32 
ZSTD_cpuid(void)33 MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
34     U32 f1c = 0;
35     U32 f1d = 0;
36     U32 f7b = 0;
37     U32 f7c = 0;
38 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
39 #if !defined(_M_X64) || !defined(__clang__) || __clang_major__ >= 16
40     int reg[4];
41     __cpuid((int*)reg, 0);
42     {
43         int const n = reg[0];
44         if (n >= 1) {
45             __cpuid((int*)reg, 1);
46             f1c = (U32)reg[2];
47             f1d = (U32)reg[3];
48         }
49         if (n >= 7) {
50             __cpuidex((int*)reg, 7, 0);
51             f7b = (U32)reg[1];
52             f7c = (U32)reg[2];
53         }
54     }
55 #else
56     /* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
57      * which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
58      * to due to being a reserved register. So in that case, do the `cpuid`
59      * ourselves. Clang supports inline assembly anyway.
60      */
61     U32 n;
62     __asm__(
63         "pushq %%rbx\n\t"
64         "cpuid\n\t"
65         "popq %%rbx\n\t"
66         : "=a"(n)
67         : "a"(0)
68         : "rcx", "rdx");
69     if (n >= 1) {
70       U32 f1a;
71       __asm__(
72           "pushq %%rbx\n\t"
73           "cpuid\n\t"
74           "popq %%rbx\n\t"
75           : "=a"(f1a), "=c"(f1c), "=d"(f1d)
76           : "a"(1)
77           :);
78     }
79     if (n >= 7) {
80       __asm__(
81           "pushq %%rbx\n\t"
82           "cpuid\n\t"
83           "movq %%rbx, %%rax\n\t"
84           "popq %%rbx"
85           : "=a"(f7b), "=c"(f7c)
86           : "a"(7), "c"(0)
87           : "rdx");
88     }
89 #endif
90 #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
91     /* The following block like the normal cpuid branch below, but gcc
92      * reserves ebx for use of its pic register so we must specially
93      * handle the save and restore to avoid clobbering the register
94      */
95     U32 n;
96     __asm__(
97         "pushl %%ebx\n\t"
98         "cpuid\n\t"
99         "popl %%ebx\n\t"
100         : "=a"(n)
101         : "a"(0)
102         : "ecx", "edx");
103     if (n >= 1) {
104       U32 f1a;
105       __asm__(
106           "pushl %%ebx\n\t"
107           "cpuid\n\t"
108           "popl %%ebx\n\t"
109           : "=a"(f1a), "=c"(f1c), "=d"(f1d)
110           : "a"(1));
111     }
112     if (n >= 7) {
113       __asm__(
114           "pushl %%ebx\n\t"
115           "cpuid\n\t"
116           "movl %%ebx, %%eax\n\t"
117           "popl %%ebx"
118           : "=a"(f7b), "=c"(f7c)
119           : "a"(7), "c"(0)
120           : "edx");
121     }
122 #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
123     U32 n;
124     __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
125     if (n >= 1) {
126       U32 f1a;
127       __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
128     }
129     if (n >= 7) {
130       U32 f7a;
131       __asm__("cpuid"
132               : "=a"(f7a), "=b"(f7b), "=c"(f7c)
133               : "a"(7), "c"(0)
134               : "edx");
135     }
136 #endif
137     {
138         ZSTD_cpuid_t cpuid;
139         cpuid.f1c = f1c;
140         cpuid.f1d = f1d;
141         cpuid.f7b = f7b;
142         cpuid.f7c = f7c;
143         return cpuid;
144     }
145 }
146 
147 #define X(name, r, bit)                                                        \
148   MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
149     return ((cpuid.r) & (1U << bit)) != 0;                                     \
150   }
151 
152 /* cpuid(1): Processor Info and Feature Bits. */
153 #define C(name, bit) X(name, f1c, bit)
154   C(sse3, 0)
155   C(pclmuldq, 1)
156   C(dtes64, 2)
157   C(monitor, 3)
158   C(dscpl, 4)
159   C(vmx, 5)
160   C(smx, 6)
161   C(eist, 7)
162   C(tm2, 8)
163   C(ssse3, 9)
164   C(cnxtid, 10)
165   C(fma, 12)
166   C(cx16, 13)
167   C(xtpr, 14)
168   C(pdcm, 15)
169   C(pcid, 17)
170   C(dca, 18)
171   C(sse41, 19)
172   C(sse42, 20)
173   C(x2apic, 21)
174   C(movbe, 22)
175   C(popcnt, 23)
176   C(tscdeadline, 24)
177   C(aes, 25)
178   C(xsave, 26)
179   C(osxsave, 27)
180   C(avx, 28)
181   C(f16c, 29)
182   C(rdrand, 30)
183 #undef C
184 #define D(name, bit) X(name, f1d, bit)
185   D(fpu, 0)
186   D(vme, 1)
187   D(de, 2)
188   D(pse, 3)
189   D(tsc, 4)
190   D(msr, 5)
191   D(pae, 6)
192   D(mce, 7)
193   D(cx8, 8)
194   D(apic, 9)
195   D(sep, 11)
196   D(mtrr, 12)
197   D(pge, 13)
198   D(mca, 14)
199   D(cmov, 15)
200   D(pat, 16)
201   D(pse36, 17)
202   D(psn, 18)
203   D(clfsh, 19)
204   D(ds, 21)
205   D(acpi, 22)
206   D(mmx, 23)
207   D(fxsr, 24)
208   D(sse, 25)
209   D(sse2, 26)
210   D(ss, 27)
211   D(htt, 28)
212   D(tm, 29)
213   D(pbe, 31)
214 #undef D
215 
216 /* cpuid(7): Extended Features. */
217 #define B(name, bit) X(name, f7b, bit)
218   B(bmi1, 3)
219   B(hle, 4)
220   B(avx2, 5)
221   B(smep, 7)
222   B(bmi2, 8)
223   B(erms, 9)
224   B(invpcid, 10)
225   B(rtm, 11)
226   B(mpx, 14)
227   B(avx512f, 16)
228   B(avx512dq, 17)
229   B(rdseed, 18)
230   B(adx, 19)
231   B(smap, 20)
232   B(avx512ifma, 21)
233   B(pcommit, 22)
234   B(clflushopt, 23)
235   B(clwb, 24)
236   B(avx512pf, 26)
237   B(avx512er, 27)
238   B(avx512cd, 28)
239   B(sha, 29)
240   B(avx512bw, 30)
241   B(avx512vl, 31)
242 #undef B
243 #define C(name, bit) X(name, f7c, bit)
244   C(prefetchwt1, 0)
245   C(avx512vbmi, 1)
246 #undef C
247 
248 #undef X
249 
250 #endif /* ZSTD_COMMON_CPU_H */
251