1{ lib }:
2
3rec {
4 # gcc.arch to its features (as in /proc/cpuinfo)
5 features = {
6 # x86_64 Generic
7 # Spec: https://gitlab.com/x86-psABIs/x86-64-ABI/
8 default = [ ];
9 x86-64 = [ ];
10 x86-64-v2 = [
11 "sse3"
12 "ssse3"
13 "sse4_1"
14 "sse4_2"
15 ];
16 x86-64-v3 = [
17 "sse3"
18 "ssse3"
19 "sse4_1"
20 "sse4_2"
21 "avx"
22 "avx2"
23 "fma"
24 ];
25 x86-64-v4 = [
26 "sse3"
27 "ssse3"
28 "sse4_1"
29 "sse4_2"
30 "avx"
31 "avx2"
32 "avx512"
33 "fma"
34 ];
35 # x86_64 Intel
36 nehalem = [
37 "sse3"
38 "ssse3"
39 "sse4_1"
40 "sse4_2"
41 ];
42 westmere = [
43 "sse3"
44 "ssse3"
45 "sse4_1"
46 "sse4_2"
47 ];
48 silvermont = [
49 "sse3"
50 "ssse3"
51 "sse4_1"
52 "sse4_2"
53 ];
54 sandybridge = [
55 "sse3"
56 "ssse3"
57 "sse4_1"
58 "sse4_2"
59 "avx"
60 ];
61 ivybridge = [
62 "sse3"
63 "ssse3"
64 "sse4_1"
65 "sse4_2"
66 "avx"
67 ];
68 haswell = [
69 "sse3"
70 "ssse3"
71 "sse4_1"
72 "sse4_2"
73 "avx"
74 "avx2"
75 "fma"
76 ];
77 broadwell = [
78 "sse3"
79 "ssse3"
80 "sse4_1"
81 "sse4_2"
82 "avx"
83 "avx2"
84 "fma"
85 ];
86 skylake = [
87 "sse3"
88 "ssse3"
89 "sse4_1"
90 "sse4_2"
91 "aes"
92 "avx"
93 "avx2"
94 "fma"
95 ];
96 skylake-avx512 = [
97 "sse3"
98 "ssse3"
99 "sse4_1"
100 "sse4_2"
101 "aes"
102 "avx"
103 "avx2"
104 "avx512"
105 "fma"
106 ];
107 cannonlake = [
108 "sse3"
109 "ssse3"
110 "sse4_1"
111 "sse4_2"
112 "aes"
113 "avx"
114 "avx2"
115 "avx512"
116 "fma"
117 ];
118 icelake-client = [
119 "sse3"
120 "ssse3"
121 "sse4_1"
122 "sse4_2"
123 "aes"
124 "avx"
125 "avx2"
126 "avx512"
127 "fma"
128 ];
129 icelake-server = [
130 "sse3"
131 "ssse3"
132 "sse4_1"
133 "sse4_2"
134 "aes"
135 "avx"
136 "avx2"
137 "avx512"
138 "fma"
139 ];
140 cascadelake = [
141 "sse3"
142 "ssse3"
143 "sse4_1"
144 "sse4_2"
145 "aes"
146 "avx"
147 "avx2"
148 "avx512"
149 "fma"
150 ];
151 cooperlake = [
152 "sse3"
153 "ssse3"
154 "sse4_1"
155 "sse4_2"
156 "aes"
157 "avx"
158 "avx2"
159 "avx512"
160 "fma"
161 ];
162 tigerlake = [
163 "sse3"
164 "ssse3"
165 "sse4_1"
166 "sse4_2"
167 "aes"
168 "avx"
169 "avx2"
170 "avx512"
171 "fma"
172 ];
173 alderlake = [
174 "sse3"
175 "ssse3"
176 "sse4_1"
177 "sse4_2"
178 "aes"
179 "avx"
180 "avx2"
181 "fma"
182 ];
183 sapphirerapids = [
184 "sse3"
185 "ssse3"
186 "sse4_1"
187 "sse4_2"
188 "aes"
189 "avx"
190 "avx2"
191 "avx512"
192 "fma"
193 ];
194 emeraldrapids = [
195 "sse3"
196 "ssse3"
197 "sse4_1"
198 "sse4_2"
199 "aes"
200 "avx"
201 "avx2"
202 "avx512"
203 "fma"
204 ];
205 sierraforest = [
206 "sse3"
207 "ssse3"
208 "sse4_1"
209 "sse4_2"
210 "aes"
211 "avx"
212 "avx2"
213 "fma"
214 ];
215 # x86_64 AMD
216 btver1 = [
217 "sse3"
218 "ssse3"
219 "sse4_1"
220 "sse4_2"
221 ];
222 btver2 = [
223 "sse3"
224 "ssse3"
225 "sse4_1"
226 "sse4_2"
227 "aes"
228 "avx"
229 ];
230 bdver1 = [
231 "sse3"
232 "ssse3"
233 "sse4_1"
234 "sse4_2"
235 "sse4a"
236 "aes"
237 "avx"
238 "fma"
239 "fma4"
240 ];
241 bdver2 = [
242 "sse3"
243 "ssse3"
244 "sse4_1"
245 "sse4_2"
246 "sse4a"
247 "aes"
248 "avx"
249 "fma"
250 "fma4"
251 ];
252 bdver3 = [
253 "sse3"
254 "ssse3"
255 "sse4_1"
256 "sse4_2"
257 "sse4a"
258 "aes"
259 "avx"
260 "fma"
261 "fma4"
262 ];
263 bdver4 = [
264 "sse3"
265 "ssse3"
266 "sse4_1"
267 "sse4_2"
268 "sse4a"
269 "aes"
270 "avx"
271 "avx2"
272 "fma"
273 "fma4"
274 ];
275 znver1 = [
276 "sse3"
277 "ssse3"
278 "sse4_1"
279 "sse4_2"
280 "sse4a"
281 "aes"
282 "avx"
283 "avx2"
284 "fma"
285 ];
286 znver2 = [
287 "sse3"
288 "ssse3"
289 "sse4_1"
290 "sse4_2"
291 "sse4a"
292 "aes"
293 "avx"
294 "avx2"
295 "fma"
296 ];
297 znver3 = [
298 "sse3"
299 "ssse3"
300 "sse4_1"
301 "sse4_2"
302 "sse4a"
303 "aes"
304 "avx"
305 "avx2"
306 "fma"
307 ];
308 znver4 = [
309 "sse3"
310 "ssse3"
311 "sse4_1"
312 "sse4_2"
313 "sse4a"
314 "aes"
315 "avx"
316 "avx2"
317 "avx512"
318 "fma"
319 ];
320 znver5 = [
321 "sse3"
322 "ssse3"
323 "sse4_1"
324 "sse4_2"
325 "sse4a"
326 "aes"
327 "avx"
328 "avx2"
329 "avx512"
330 "fma"
331 ];
332 # LoongArch64
333 # https://github.com/loongson/la-toolchain-conventions
334 loongarch64 = [
335 "fpu64"
336 ];
337 la464 = [
338 "fpu64"
339 "lsx"
340 "lasx"
341 ];
342 la664 = [
343 "fpu64"
344 "lsx"
345 "lasx"
346 "div32"
347 "frecipe"
348 "lam-bh"
349 "lamcas"
350 "ld-seq-sa"
351 ];
352 "la64v1.0" = [
353 "fpu64"
354 "lsx"
355 ];
356 "la64v1.1" = [
357 "fpu64"
358 "lsx"
359 "div32"
360 "frecipe"
361 "lam-bh"
362 "lamcas"
363 "ld-seq-sa"
364 ];
365 # other
366 armv5te = [ ];
367 armv6 = [ ];
368 armv7-a = [ ];
369 armv8-a = [ ];
370 mips32 = [ ];
371 loongson2f = [ ];
372 };
373
374 # a superior CPU has all the features of an inferior and is able to build and test code for it
375 inferiors =
376 let
377 withInferiors = archs: lib.unique (archs ++ lib.flatten (lib.attrVals archs inferiors));
378 in
379 {
380 # x86_64 Generic
381 default = [ ];
382 x86-64 = [ ];
383 x86-64-v2 = [ "x86-64" ];
384 x86-64-v3 = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
385 x86-64-v4 = [ "x86-64-v3" ] ++ inferiors.x86-64-v3;
386
387 # x86_64 Intel
388 # https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html
389 nehalem = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
390 westmere = [ "nehalem" ] ++ inferiors.nehalem;
391 sandybridge = [ "westmere" ] ++ inferiors.westmere;
392 ivybridge = [ "sandybridge" ] ++ inferiors.sandybridge;
393
394 haswell = lib.unique (
395 [
396 "ivybridge"
397 "x86-64-v3"
398 ]
399 ++ inferiors.ivybridge
400 ++ inferiors.x86-64-v3
401 );
402 broadwell = [ "haswell" ] ++ inferiors.haswell;
403 skylake = [ "broadwell" ] ++ inferiors.broadwell;
404
405 skylake-avx512 = lib.unique (
406 [
407 "skylake"
408 "x86-64-v4"
409 ]
410 ++ inferiors.skylake
411 ++ inferiors.x86-64-v4
412 );
413 cannonlake = [ "skylake-avx512" ] ++ inferiors.skylake-avx512;
414 icelake-client = [ "cannonlake" ] ++ inferiors.cannonlake;
415 icelake-server = [ "icelake-client" ] ++ inferiors.icelake-client;
416 cascadelake = [ "cannonlake" ] ++ inferiors.cannonlake;
417 cooperlake = [ "cascadelake" ] ++ inferiors.cascadelake;
418 tigerlake = [ "icelake-server" ] ++ inferiors.icelake-server;
419 sapphirerapids = [ "tigerlake" ] ++ inferiors.tigerlake;
420 emeraldrapids = [ "sapphirerapids" ] ++ inferiors.sapphirerapids;
421
422 alderlake = [ "skylake" ] ++ inferiors.skylake;
423 sierraforest = [ "alderlake" ] ++ inferiors.alderlake;
424
425 # x86_64 AMD
426 # TODO: fill in specific CPU architecture inferiors
427 btver1 = [ "x86-64" ];
428 btver2 = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
429 bdver1 = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
430 bdver2 = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
431 bdver3 = [ "x86-64-v2" ] ++ inferiors.x86-64-v2;
432 bdver4 = [ "x86-64-v3" ] ++ inferiors.x86-64-v3;
433 # Regarding `skylake` as inferior of `znver1`, there are reports of
434 # successful usage by Gentoo users and Phoronix benchmarking of different
435 # `-march` targets.
436 #
437 # The GCC documentation on extensions used and wikichip documentation
438 # regarding supperted extensions on znver1 and skylake was used to create
439 # this partial order.
440 #
441 # Note:
442 #
443 # - The successors of `skylake` (`cannonlake`, `icelake`, etc) use `avx512`
444 # which no current AMD Zen michroarch support.
445 # - `znver1` uses `ABM`, `CLZERO`, `CX16`, `MWAITX`, and `SSE4A` which no
446 # current Intel microarch support.
447 #
448 # https://www.phoronix.com/scan.php?page=article&item=amd-znver3-gcc11&num=1
449 # https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html
450 # https://en.wikichip.org/wiki/amd/microarchitectures/zen
451 # https://en.wikichip.org/wiki/intel/microarchitectures/skylake
452 znver1 = [ "skylake" ] ++ inferiors.skylake; # Includes haswell and x86-64-v3
453 znver2 = [ "znver1" ] ++ inferiors.znver1;
454 znver3 = [ "znver2" ] ++ inferiors.znver2;
455 znver4 = lib.unique (
456 [
457 "znver3"
458 "x86-64-v4"
459 ]
460 ++ inferiors.znver3
461 ++ inferiors.x86-64-v4
462 );
463 znver5 = [ "znver4" ] ++ inferiors.znver4;
464
465 # ARM64 (AArch64)
466 armv8-a = [ ];
467 "armv8.1-a" = [ "armv8-a" ];
468 "armv8.2-a" = [ "armv8.1-a" ] ++ inferiors."armv8.1-a";
469 "armv8.3-a" = [ "armv8.2-a" ] ++ inferiors."armv8.2-a";
470 "armv8.4-a" = [ "armv8.3-a" ] ++ inferiors."armv8.3-a";
471 "armv8.5-a" = [ "armv8.4-a" ] ++ inferiors."armv8.4-a";
472 "armv8.6-a" = [ "armv8.5-a" ] ++ inferiors."armv8.5-a";
473 "armv8.7-a" = [ "armv8.6-a" ] ++ inferiors."armv8.6-a";
474 "armv8.8-a" = [ "armv8.7-a" ] ++ inferiors."armv8.7-a";
475 "armv8.9-a" = [ "armv8.8-a" ] ++ inferiors."armv8.8-a";
476 armv9-a = [ "armv8.5-a" ] ++ inferiors."armv8.5-a";
477 "armv9.1-a" = [
478 "armv9-a"
479 "armv8.6-a"
480 ]
481 ++ inferiors."armv8.6-a";
482 "armv9.2-a" = lib.unique (
483 [
484 "armv9.1-a"
485 "armv8.7-a"
486 ]
487 ++ inferiors."armv9.1-a"
488 ++ inferiors."armv8.7-a"
489 );
490 "armv9.3-a" = lib.unique (
491 [
492 "armv9.2-a"
493 "armv8.8-a"
494 ]
495 ++ inferiors."armv9.2-a"
496 ++ inferiors."armv8.8-a"
497 );
498 "armv9.4-a" = [ "armv9.3-a" ] ++ inferiors."armv9.3-a";
499
500 # ARM
501 cortex-a53 = [ "armv8-a" ];
502 cortex-a72 = [ "armv8-a" ];
503 cortex-a55 = [
504 "armv8.2-a"
505 "cortex-a53"
506 "cortex-a72"
507 ]
508 ++ inferiors."armv8.2-a";
509 cortex-a76 = [
510 "armv8.2-a"
511 "cortex-a53"
512 "cortex-a72"
513 ]
514 ++ inferiors."armv8.2-a";
515
516 # Ampere
517 ampere1 = withInferiors [
518 "armv8.6-a"
519 "cortex-a55"
520 "cortex-a76"
521 ];
522 ampere1a = [ "ampere1" ] ++ inferiors.ampere1;
523 ampere1b = [ "ampere1a" ] ++ inferiors.ampere1a;
524
525 # LoongArch64
526 loongarch64 = [ ];
527 "la64v1.0" = [ "loongarch64" ];
528 la464 = [ "la64v1.0" ] ++ inferiors."la64v1.0";
529 "la64v1.1" = [ "la64v1.0" ] ++ inferiors."la64v1.0";
530 la664 = withInferiors [
531 "la464"
532 "la64v1.1"
533 ];
534
535 # other
536 armv5te = [ ];
537 armv6 = [ ];
538 armv7-a = [ ];
539 mips32 = [ ];
540 loongson2f = [ ];
541 };
542
543 /**
544 Check whether one GCC architecture has the the other inferior architecture.
545
546 # Inputs
547
548 `arch1`
549 : GCC architecture in string
550
551 `arch2`
552 : GCC architecture in string
553
554 # Type
555
556 ```
557 hasInferior :: string -> string -> bool
558 ```
559
560 # Examples
561 ::: {.example}
562 ## `lib.systems.architectures.hasInferior` usage example
563
564 ```nix
565 hasInferior "x86-64-v3" "x86-64"
566 => true
567 hasInferior "x86-64" "x86-64-v3"
568 => false
569 hasInferior "x86-64" "x86-64"
570 => false
571 ```
572 */
573 hasInferior = arch1: arch2: inferiors ? ${arch1} && lib.elem arch2 inferiors.${arch1};
574
575 /**
576 Check whether one GCC architecture can execute the other.
577
578 # Inputs
579
580 `arch1`
581 : GCC architecture in string
582
583 `arch2`
584 : GCC architecture in string
585
586 # Type
587
588 ```
589 canExecute :: string -> string -> bool
590 ```
591
592 # Examples
593 ::: {.example}
594 ## `lib.systems.architectures.canExecute` usage example
595
596 ```nix
597 canExecute "x86-64" "x86-64-v3"
598 => false
599 canExecute "x86-64-v3" "x86-64"
600 => true
601 canExecute "x86-64" "x86-64"
602 => true
603 ```
604 */
605 canExecute = arch1: arch2: arch1 == arch2 || hasInferior arch1 arch2;
606
607 predicates =
608 let
609 featureSupport = feature: x: builtins.elem feature features.${x} or [ ];
610 in
611 {
612 sse3Support = featureSupport "sse3";
613 ssse3Support = featureSupport "ssse3";
614 sse4_1Support = featureSupport "sse4_1";
615 sse4_2Support = featureSupport "sse4_2";
616 sse4_aSupport = featureSupport "sse4a";
617 avxSupport = featureSupport "avx";
618 avx2Support = featureSupport "avx2";
619 avx512Support = featureSupport "avx512";
620 aesSupport = featureSupport "aes";
621 fmaSupport = featureSupport "fma";
622 fma4Support = featureSupport "fma4";
623 lsxSupport = featureSupport "lsx";
624 lasxSupport = featureSupport "lasx";
625 };
626}