1{ lib }:
2
3rec {
4 # gcc.arch to its features (as in /proc/cpuinfo)
5 features = {
6 # x86_64 Generic
7 # Spec: https://gitlab.com/x86-psABIs/x86-64-ABI/
8 default = [ ];
9 x86-64 = [ ];
10 x86-64-v2 = [
11 "sse3"
12 "ssse3"
13 "sse4_1"
14 "sse4_2"
15 ];
16 x86-64-v3 = [
17 "sse3"
18 "ssse3"
19 "sse4_1"
20 "sse4_2"
21 "avx"
22 "avx2"
23 "fma"
24 ];
25 x86-64-v4 = [
26 "sse3"
27 "ssse3"
28 "sse4_1"
29 "sse4_2"
30 "avx"
31 "avx2"
32 "avx512"
33 "fma"
34 ];
35 # x86_64 Intel
36 nehalem = [
37 "sse3"
38 "ssse3"
39 "sse4_1"
40 "sse4_2"
41 ];
42 westmere = [
43 "sse3"
44 "ssse3"
45 "sse4_1"
46 "sse4_2"
47 ];
48 silvermont = [
49 "sse3"
50 "ssse3"
51 "sse4_1"
52 "sse4_2"
53 ];
54 sandybridge = [
55 "sse3"
56 "ssse3"
57 "sse4_1"
58 "sse4_2"
59 "avx"
60 ];
61 ivybridge = [
62 "sse3"
63 "ssse3"
64 "sse4_1"
65 "sse4_2"
66 "avx"
67 ];
68 haswell = [
69 "sse3"
70 "ssse3"
71 "sse4_1"
72 "sse4_2"
73 "avx"
74 "avx2"
75 "fma"
76 ];
77 broadwell = [
78 "sse3"
79 "ssse3"
80 "sse4_1"
81 "sse4_2"
82 "avx"
83 "avx2"
84 "fma"
85 ];
86 skylake = [
87 "sse3"
88 "ssse3"
89 "sse4_1"
90 "sse4_2"
91 "aes"
92 "avx"
93 "avx2"
94 "fma"
95 ];
96 skylake-avx512 = [
97 "sse3"
98 "ssse3"
99 "sse4_1"
100 "sse4_2"
101 "aes"
102 "avx"
103 "avx2"
104 "avx512"
105 "fma"
106 ];
107 cannonlake = [
108 "sse3"
109 "ssse3"
110 "sse4_1"
111 "sse4_2"
112 "aes"
113 "avx"
114 "avx2"
115 "avx512"
116 "fma"
117 ];
118 icelake-client = [
119 "sse3"
120 "ssse3"
121 "sse4_1"
122 "sse4_2"
123 "aes"
124 "avx"
125 "avx2"
126 "avx512"
127 "fma"
128 ];
129 icelake-server = [
130 "sse3"
131 "ssse3"
132 "sse4_1"
133 "sse4_2"
134 "aes"
135 "avx"
136 "avx2"
137 "avx512"
138 "fma"
139 ];
140 cascadelake = [
141 "sse3"
142 "ssse3"
143 "sse4_1"
144 "sse4_2"
145 "aes"
146 "avx"
147 "avx2"
148 "avx512"
149 "fma"
150 ];
151 cooperlake = [
152 "sse3"
153 "ssse3"
154 "sse4_1"
155 "sse4_2"
156 "aes"
157 "avx"
158 "avx2"
159 "avx512"
160 "fma"
161 ];
162 tigerlake = [
163 "sse3"
164 "ssse3"
165 "sse4_1"
166 "sse4_2"
167 "aes"
168 "avx"
169 "avx2"
170 "avx512"
171 "fma"
172 ];
173 alderlake = [
174 "sse3"
175 "ssse3"
176 "sse4_1"
177 "sse4_2"
178 "aes"
179 "avx"
180 "avx2"
181 "fma"
182 ];
183 sapphirerapids = [
184 "sse3"
185 "ssse3"
186 "sse4_1"
187 "sse4_2"
188 "aes"
189 "avx"
190 "avx2"
191 "avx512"
192 "fma"
193 ];
194 emeraldrapids = [
195 "sse3"
196 "ssse3"
197 "sse4_1"
198 "sse4_2"
199 "aes"
200 "avx"
201 "avx2"
202 "avx512"
203 "fma"
204 ];
205 sierraforest = [
206 "sse3"
207 "ssse3"
208 "sse4_1"
209 "sse4_2"
210 "aes"
211 "avx"
212 "avx2"
213 "fma"
214 ];
215 # x86_64 AMD
216 btver1 = [
217 "sse3"
218 "ssse3"
219 "sse4_1"
220 "sse4_2"
221 ];
222 btver2 = [
223 "sse3"
224 "ssse3"
225 "sse4_1"
226 "sse4_2"
227 "aes"
228 "avx"
229 ];
230 bdver1 = [
231 "sse3"
232 "ssse3"
233 "sse4_1"
234 "sse4_2"
235 "sse4a"
236 "aes"
237 "avx"
238 "fma"
239 "fma4"
240 ];
241 bdver2 = [
242 "sse3"
243 "ssse3"
244 "sse4_1"
245 "sse4_2"
246 "sse4a"
247 "aes"
248 "avx"
249 "fma"
250 "fma4"
251 ];
252 bdver3 = [
253 "sse3"
254 "ssse3"
255 "sse4_1"
256 "sse4_2"
257 "sse4a"
258 "aes"
259 "avx"
260 "fma"
261 "fma4"
262 ];
263 bdver4 = [
264 "sse3"
265 "ssse3"
266 "sse4_1"
267 "sse4_2"
268 "sse4a"
269 "aes"
270 "avx"
271 "avx2"
272 "fma"
273 "fma4"
274 ];
275 znver1 = [
276 "sse3"
277 "ssse3"
278 "sse4_1"
279 "sse4_2"
280 "sse4a"
281 "aes"
282 "avx"
283 "avx2"
284 "fma"
285 ];
286 znver2 = [
287 "sse3"
288 "ssse3"
289 "sse4_1"
290 "sse4_2"
291 "sse4a"
292 "aes"
293 "avx"
294 "avx2"
295 "fma"
296 ];
297 znver3 = [
298 "sse3"
299 "ssse3"
300 "sse4_1"
301 "sse4_2"
302 "sse4a"
303 "aes"
304 "avx"
305 "avx2"
306 "fma"
307 ];
308 znver4 = [
309 "sse3"
310 "ssse3"
311 "sse4_1"
312 "sse4_2"
313 "sse4a"
314 "aes"
315 "avx"
316 "avx2"
317 "avx512"
318 "fma"
319 ];
320 znver5 = [
321 "sse3"
322 "ssse3"
323 "sse4_1"
324 "sse4_2"
325 "sse4a"
326 "aes"
327 "avx"
328 "avx2"
329 "avx512"
330 "fma"
331 ];
332 # other
333 armv5te = [ ];
334 armv6 = [ ];
335 armv7-a = [ ];
336 armv8-a = [ ];
337 mips32 = [ ];
338 loongson2f = [ ];
339 };
340
341 # a superior CPU has all the features of an inferior and is able to build and test code for it
342 inferiors =
343 let
344 withInferiors = archs: lib.unique (archs ++ lib.flatten (lib.attrVals archs inferiors));
345 in
346 {
347 # x86_64 Generic
348 default = [ ];
349 x86-64 = [ ];
350 x86-64-v2 = [ "x86-64" ];
351 x86-64-v3 = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
352 x86-64-v4 = [ "x86-64-v3" ] ++ inferiors.x86-64-v3;
353
354 # x86_64 Intel
355 # https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html
356 nehalem = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
357 westmere = [ "nehalem" ] ++ inferiors.nehalem;
358 sandybridge = [ "westmere" ] ++ inferiors.westmere;
359 ivybridge = [ "sandybridge" ] ++ inferiors.sandybridge;
360
361 haswell = lib.unique (
362 [
363 "ivybridge"
364 "x86-64-v3"
365 ]
366 ++ inferiors.ivybridge
367 ++ inferiors.x86-64-v3
368 );
369 broadwell = [ "haswell" ] ++ inferiors.haswell;
370 skylake = [ "broadwell" ] ++ inferiors.broadwell;
371
372 skylake-avx512 = lib.unique (
373 [
374 "skylake"
375 "x86-64-v4"
376 ]
377 ++ inferiors.skylake
378 ++ inferiors.x86-64-v4
379 );
380 cannonlake = [ "skylake-avx512" ] ++ inferiors.skylake-avx512;
381 icelake-client = [ "cannonlake" ] ++ inferiors.cannonlake;
382 icelake-server = [ "icelake-client" ] ++ inferiors.icelake-client;
383 cascadelake = [ "cannonlake" ] ++ inferiors.cannonlake;
384 cooperlake = [ "cascadelake" ] ++ inferiors.cascadelake;
385 tigerlake = [ "icelake-server" ] ++ inferiors.icelake-server;
386 sapphirerapids = [ "tigerlake" ] ++ inferiors.tigerlake;
387 emeraldrapids = [ "sapphirerapids" ] ++ inferiors.sapphirerapids;
388
389 alderlake = [ "skylake" ] ++ inferiors.skylake;
390 sierraforest = [ "alderlake" ] ++ inferiors.alderlake;
391
392 # x86_64 AMD
393 # TODO: fill in specific CPU architecture inferiors
394 btver1 = [ "x86-64" ];
395 btver2 = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
396 bdver1 = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
397 bdver2 = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
398 bdver3 = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
399 bdver4 = [ "x86-64-v3" ] ++ inferiors.x86-64-v3;
400 # Regarding `skylake` as inferior of `znver1`, there are reports of
401 # successful usage by Gentoo users and Phoronix benchmarking of different
402 # `-march` targets.
403 #
404 # The GCC documentation on extensions used and wikichip documentation
405 # regarding supperted extensions on znver1 and skylake was used to create
406 # this partial order.
407 #
408 # Note:
409 #
410 # - The successors of `skylake` (`cannonlake`, `icelake`, etc) use `avx512`
411 # which no current AMD Zen michroarch support.
412 # - `znver1` uses `ABM`, `CLZERO`, `CX16`, `MWAITX`, and `SSE4A` which no
413 # current Intel microarch support.
414 #
415 # https://www.phoronix.com/scan.php?page=article&item=amd-znver3-gcc11&num=1
416 # https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html
417 # https://en.wikichip.org/wiki/amd/microarchitectures/zen
418 # https://en.wikichip.org/wiki/intel/microarchitectures/skylake
419 znver1 = [ "skylake" ] ++ inferiors.skylake; # Includes haswell and x86-64-v3
420 znver2 = [ "znver1" ] ++ inferiors.znver1;
421 znver3 = [ "znver2" ] ++ inferiors.znver2;
422 znver4 = lib.unique (
423 [
424 "znver3"
425 "x86-64-v4"
426 ]
427 ++ inferiors.znver3
428 ++ inferiors.x86-64-v4
429 );
430 znver5 = [ "znver4" ] ++ inferiors.znver4;
431
432 # ARM64 (AArch64)
433 armv8-a = [ ];
434 "armv8.1-a" = [ "armv8-a" ];
435 "armv8.2-a" = [ "armv8.1-a" ] ++ inferiors."armv8.1-a";
436 "armv8.3-a" = [ "armv8.2-a" ] ++ inferiors."armv8.2-a";
437 "armv8.4-a" = [ "armv8.3-a" ] ++ inferiors."armv8.3-a";
438 "armv8.5-a" = [ "armv8.4-a" ] ++ inferiors."armv8.4-a";
439 "armv8.6-a" = [ "armv8.5-a" ] ++ inferiors."armv8.5-a";
440 "armv8.7-a" = [ "armv8.6-a" ] ++ inferiors."armv8.6-a";
441 "armv8.8-a" = [ "armv8.7-a" ] ++ inferiors."armv8.7-a";
442 "armv8.9-a" = [ "armv8.8-a" ] ++ inferiors."armv8.8-a";
443 armv9-a = [ "armv8.5-a" ] ++ inferiors."armv8.5-a";
444 "armv9.1-a" = [
445 "armv9-a"
446 "armv8.6-a"
447 ] ++ inferiors."armv8.6-a";
448 "armv9.2-a" = lib.unique (
449 [
450 "armv9.1-a"
451 "armv8.7-a"
452 ]
453 ++ inferiors."armv9.1-a"
454 ++ inferiors."armv8.7-a"
455 );
456 "armv9.3-a" = lib.unique (
457 [
458 "armv9.2-a"
459 "armv8.8-a"
460 ]
461 ++ inferiors."armv9.2-a"
462 ++ inferiors."armv8.8-a"
463 );
464 "armv9.4-a" = [ "armv9.3-a" ] ++ inferiors."armv9.3-a";
465
466 # ARM
467 cortex-a53 = [ "armv8-a" ];
468 cortex-a72 = [ "armv8-a" ];
469 cortex-a55 = [
470 "armv8.2-a"
471 "cortex-a53"
472 "cortex-a72"
473 ] ++ inferiors."armv8.2-a";
474 cortex-a76 = [
475 "armv8.2-a"
476 "cortex-a53"
477 "cortex-a72"
478 ] ++ inferiors."armv8.2-a";
479
480 # Ampere
481 ampere1 = withInferiors [
482 "armv8.6-a"
483 "cortex-a55"
484 "cortex-a76"
485 ];
486 ampere1a = [ "ampere1" ] ++ inferiors.ampere1;
487 ampere1b = [ "ampere1a" ] ++ inferiors.ampere1a;
488
489 # other
490 armv5te = [ ];
491 armv6 = [ ];
492 armv7-a = [ ];
493 mips32 = [ ];
494 loongson2f = [ ];
495 };
496
497 predicates =
498 let
499 featureSupport = feature: x: builtins.elem feature features.${x} or [ ];
500 in
501 {
502 sse3Support = featureSupport "sse3";
503 ssse3Support = featureSupport "ssse3";
504 sse4_1Support = featureSupport "sse4_1";
505 sse4_2Support = featureSupport "sse4_2";
506 sse4_aSupport = featureSupport "sse4a";
507 avxSupport = featureSupport "avx";
508 avx2Support = featureSupport "avx2";
509 avx512Support = featureSupport "avx512";
510 aesSupport = featureSupport "aes";
511 fmaSupport = featureSupport "fma";
512 fma4Support = featureSupport "fma4";
513 };
514}