1{
2 config,
3 lib,
4 pkgs,
5 ...
6}:
7with lib;
8
9# See http://christophe.varoqui.free.fr/usage.html and
10# https://github.com/opensvc/multipath-tools/blob/master/multipath/multipath.conf.5
11
12let
13 cfg = config.services.multipath;
14
15 indentLines =
16 n: str:
17 concatStringsSep "\n" (
18 map (line: "${fixedWidthString n " " " "}${line}") (filter (x: x != "") (splitString "\n" str))
19 );
20
21 addCheckDesc =
22 desc: elemType: check:
23 types.addCheck elemType check // { description = "${elemType.description} (with check: ${desc})"; };
24 hexChars = stringToCharacters "0123456789abcdef";
25 isHexString = s: all (c: elem c hexChars) (stringToCharacters (toLower s));
26 hexStr = addCheckDesc "hexadecimal string" types.str isHexString;
27
28in
29{
30
31 options.services.multipath = with types; {
32
33 enable = mkEnableOption "the device mapper multipath (DM-MP) daemon";
34
35 package = mkPackageOption pkgs "multipath-tools" { };
36
37 devices = mkOption {
38 default = [ ];
39 example = literalExpression ''
40 [
41 {
42 vendor = "\"COMPELNT\"";
43 product = "\"Compellent Vol\"";
44 path_checker = "tur";
45 no_path_retry = "queue";
46 max_sectors_kb = 256;
47 }, ...
48 ]
49 '';
50 description = ''
51 This option allows you to define arrays for use in multipath
52 groups.
53 '';
54 type = listOf (submodule {
55 options = {
56
57 vendor = mkOption {
58 type = str;
59 example = "COMPELNT";
60 description = "Regular expression to match the vendor name";
61 };
62
63 product = mkOption {
64 type = str;
65 example = "Compellent Vol";
66 description = "Regular expression to match the product name";
67 };
68
69 revision = mkOption {
70 type = nullOr str;
71 default = null;
72 description = "Regular expression to match the product revision";
73 };
74
75 product_blacklist = mkOption {
76 type = nullOr str;
77 default = null;
78 description = "Products with the given vendor matching this string are blacklisted";
79 };
80
81 alias_prefix = mkOption {
82 type = nullOr str;
83 default = null;
84 description = "The user_friendly_names prefix to use for this device type, instead of the default mpath";
85 };
86
87 vpd_vendor = mkOption {
88 type = nullOr str;
89 default = null;
90 description = "The vendor specific vpd page information, using the vpd page abbreviation";
91 };
92
93 hardware_handler = mkOption {
94 type = nullOr (enum [
95 "emc"
96 "rdac"
97 "hp_sw"
98 "alua"
99 "ana"
100 ]);
101 default = null;
102 description = "The hardware handler to use for this device type";
103 };
104
105 # Optional arguments
106 path_grouping_policy = mkOption {
107 type = nullOr (enum [
108 "failover"
109 "multibus"
110 "group_by_serial"
111 "group_by_prio"
112 "group_by_node_name"
113 ]);
114 default = null; # real default: "failover"
115 description = "The default path grouping policy to apply to unspecified multipaths";
116 };
117
118 uid_attribute = mkOption {
119 type = nullOr str;
120 default = null;
121 description = "The udev attribute providing a unique path identifier (WWID)";
122 };
123
124 getuid_callout = mkOption {
125 type = nullOr str;
126 default = null;
127 description = ''
128 (Superseded by uid_attribute) The default program and args to callout
129 to obtain a unique path identifier. Should be specified with an absolute path.
130 '';
131 };
132
133 path_selector = mkOption {
134 type = nullOr (enum [
135 ''"round-robin 0"''
136 ''"queue-length 0"''
137 ''"service-time 0"''
138 ''"historical-service-time 0"''
139 ]);
140 default = null; # real default: "service-time 0"
141 description = "The default path selector algorithm to use; they are offered by the kernel multipath target";
142 };
143
144 path_checker = mkOption {
145 type = enum [
146 "readsector0"
147 "tur"
148 "emc_clariion"
149 "hp_sw"
150 "rdac"
151 "directio"
152 "cciss_tur"
153 "none"
154 ];
155 default = "tur";
156 description = "The default method used to determine the paths state";
157 };
158
159 prio = mkOption {
160 type = nullOr (enum [
161 "none"
162 "const"
163 "sysfs"
164 "emc"
165 "alua"
166 "ontap"
167 "rdac"
168 "hp_sw"
169 "hds"
170 "random"
171 "weightedpath"
172 "path_latency"
173 "ana"
174 "datacore"
175 "iet"
176 ]);
177 default = null; # real default: "const"
178 description = "The name of the path priority routine";
179 };
180
181 prio_args = mkOption {
182 type = nullOr str;
183 default = null;
184 description = "Arguments to pass to to the prio function";
185 };
186
187 features = mkOption {
188 type = nullOr str;
189 default = null;
190 description = "Specify any device-mapper features to be used";
191 };
192
193 failback = mkOption {
194 type = nullOr str;
195 default = null; # real default: "manual"
196 description = "Tell multipathd how to manage path group failback. Quote integers as strings";
197 };
198
199 rr_weight = mkOption {
200 type = nullOr (enum [
201 "priorities"
202 "uniform"
203 ]);
204 default = null; # real default: "uniform"
205 description = ''
206 If set to priorities the multipath configurator will assign path weights
207 as "path prio * rr_min_io".
208 '';
209 };
210
211 no_path_retry = mkOption {
212 type = nullOr str;
213 default = null; # real default: "fail"
214 description = "Specify what to do when all paths are down. Quote integers as strings";
215 };
216
217 rr_min_io = mkOption {
218 type = nullOr int;
219 default = null; # real default: 1000
220 description = ''
221 Number of I/O requests to route to a path before switching to the next in the
222 same path group. This is only for Block I/O (BIO) based multipath and
223 only apply to round-robin path_selector.
224 '';
225 };
226
227 rr_min_io_rq = mkOption {
228 type = nullOr int;
229 default = null; # real default: 1
230 description = ''
231 Number of I/O requests to route to a path before switching to the next in the
232 same path group. This is only for Request based multipath and
233 only apply to round-robin path_selector.
234 '';
235 };
236
237 fast_io_fail_tmo = mkOption {
238 type = nullOr str;
239 default = null; # real default: 5
240 description = ''
241 Specify the number of seconds the SCSI layer will wait after a problem has been
242 detected on a FC remote port before failing I/O to devices on that remote port.
243 This should be smaller than dev_loss_tmo. Setting this to "off" will disable
244 the timeout. Quote integers as strings.
245 '';
246 };
247
248 dev_loss_tmo = mkOption {
249 type = nullOr str;
250 default = null; # real default: 600
251 description = ''
252 Specify the number of seconds the SCSI layer will wait after a problem has
253 been detected on a FC remote port before removing it from the system. This
254 can be set to "infinity" which sets it to the max value of 2147483647
255 seconds, or 68 years. It will be automatically adjusted to the overall
256 retry interval no_path_retry * polling_interval
257 if a number of retries is given with no_path_retry and the
258 overall retry interval is longer than the specified dev_loss_tmo value.
259 The Linux kernel will cap this value to 600 if fast_io_fail_tmo
260 is not set.
261 '';
262 };
263
264 flush_on_last_del = mkOption {
265 type = nullOr (enum [
266 "yes"
267 "no"
268 ]);
269 default = null; # real default: "no"
270 description = ''
271 If set to "yes" multipathd will disable queueing when the last path to a
272 device has been deleted.
273 '';
274 };
275
276 user_friendly_names = mkOption {
277 type = nullOr (enum [
278 "yes"
279 "no"
280 ]);
281 default = null; # real default: "no"
282 description = ''
283 If set to "yes", using the bindings file /etc/multipath/bindings
284 to assign a persistent and unique alias to the multipath, in the
285 form of mpath. If set to "no" use the WWID as the alias. In either
286 case this be will be overridden by any specific aliases in the
287 multipaths section.
288 '';
289 };
290
291 detect_prio = mkOption {
292 type = nullOr (enum [
293 "yes"
294 "no"
295 ]);
296 default = null; # real default: "yes"
297 description = ''
298 If set to "yes", multipath will try to detect if the device supports
299 SCSI-3 ALUA. If so, the device will automatically use the sysfs
300 prioritizer if the required sysf attributes access_state and
301 preferred_path are supported, or the alua prioritizer if not. If set
302 to "no", the prioritizer will be selected as usual.
303 '';
304 };
305
306 detect_checker = mkOption {
307 type = nullOr (enum [
308 "yes"
309 "no"
310 ]);
311 default = null; # real default: "yes"
312 description = ''
313 If set to "yes", multipath will try to detect if the device supports
314 SCSI-3 ALUA. If so, the device will automatically use the tur checker.
315 If set to "no", the checker will be selected as usual.
316 '';
317 };
318
319 deferred_remove = mkOption {
320 type = nullOr (enum [
321 "yes"
322 "no"
323 ]);
324 default = null; # real default: "no"
325 description = ''
326 If set to "yes", multipathd will do a deferred remove instead of a
327 regular remove when the last path device has been deleted. This means
328 that if the multipath device is still in use, it will be freed when
329 the last user closes it. If path is added to the multipath device
330 before the last user closes it, the deferred remove will be canceled.
331 '';
332 };
333
334 san_path_err_threshold = mkOption {
335 type = nullOr str;
336 default = null;
337 description = ''
338 If set to a value greater than 0, multipathd will watch paths and check
339 how many times a path has been failed due to errors.If the number of
340 failures on a particular path is greater then the san_path_err_threshold,
341 then the path will not reinstate till san_path_err_recovery_time. These
342 path failures should occur within a san_path_err_forget_rate checks, if
343 not we will consider the path is good enough to reinstantate.
344 '';
345 };
346
347 san_path_err_forget_rate = mkOption {
348 type = nullOr str;
349 default = null;
350 description = ''
351 If set to a value greater than 0, multipathd will check whether the path
352 failures has exceeded the san_path_err_threshold within this many checks
353 i.e san_path_err_forget_rate. If so we will not reinstante the path till
354 san_path_err_recovery_time.
355 '';
356 };
357
358 san_path_err_recovery_time = mkOption {
359 type = nullOr str;
360 default = null;
361 description = ''
362 If set to a value greater than 0, multipathd will make sure that when
363 path failures has exceeded the san_path_err_threshold within
364 san_path_err_forget_rate then the path will be placed in failed state
365 for san_path_err_recovery_time duration. Once san_path_err_recovery_time
366 has timeout we will reinstante the failed path. san_path_err_recovery_time
367 value should be in secs.
368 '';
369 };
370
371 marginal_path_err_sample_time = mkOption {
372 type = nullOr int;
373 default = null;
374 description = "One of the four parameters of supporting path check based on accounting IO error such as intermittent error";
375 };
376
377 marginal_path_err_rate_threshold = mkOption {
378 type = nullOr int;
379 default = null;
380 description = "The error rate threshold as a permillage (1/1000)";
381 };
382
383 marginal_path_err_recheck_gap_time = mkOption {
384 type = nullOr str;
385 default = null;
386 description = "One of the four parameters of supporting path check based on accounting IO error such as intermittent error";
387 };
388
389 marginal_path_double_failed_time = mkOption {
390 type = nullOr str;
391 default = null;
392 description = "One of the four parameters of supporting path check based on accounting IO error such as intermittent error";
393 };
394
395 delay_watch_checks = mkOption {
396 type = nullOr str;
397 default = null;
398 description = "This option is deprecated, and mapped to san_path_err_forget_rate";
399 };
400
401 delay_wait_checks = mkOption {
402 type = nullOr str;
403 default = null;
404 description = "This option is deprecated, and mapped to san_path_err_recovery_time";
405 };
406
407 skip_kpartx = mkOption {
408 type = nullOr (enum [
409 "yes"
410 "no"
411 ]);
412 default = null; # real default: "no"
413 description = "If set to yes, kpartx will not automatically create partitions on the device";
414 };
415
416 max_sectors_kb = mkOption {
417 type = nullOr int;
418 default = null;
419 description = "Sets the max_sectors_kb device parameter on all path devices and the multipath device to the specified value";
420 };
421
422 ghost_delay = mkOption {
423 type = nullOr int;
424 default = null;
425 description = "Sets the number of seconds that multipath will wait after creating a device with only ghost paths before marking it ready for use in systemd";
426 };
427
428 all_tg_pt = mkOption {
429 type = nullOr str;
430 default = null;
431 description = "Set the 'all targets ports' flag when registering keys with mpathpersist";
432 };
433
434 };
435 });
436 };
437
438 defaults = mkOption {
439 type = nullOr str;
440 default = null;
441 description = ''
442 This section defines default values for attributes which are used
443 whenever no values are given in the appropriate device or multipath
444 sections.
445 '';
446 };
447
448 blacklist = mkOption {
449 type = nullOr str;
450 default = null;
451 description = ''
452 This section defines which devices should be excluded from the
453 multipath topology discovery.
454 '';
455 };
456
457 blacklist_exceptions = mkOption {
458 type = nullOr str;
459 default = null;
460 description = ''
461 This section defines which devices should be included in the
462 multipath topology discovery, despite being listed in the
463 blacklist section.
464 '';
465 };
466
467 overrides = mkOption {
468 type = nullOr str;
469 default = null;
470 description = ''
471 This section defines values for attributes that should override the
472 device-specific settings for all devices.
473 '';
474 };
475
476 extraConfig = mkOption {
477 type = nullOr str;
478 default = null;
479 description = "Lines to append to default multipath.conf";
480 };
481
482 extraConfigFile = mkOption {
483 type = nullOr str;
484 default = null;
485 description = "Append an additional file's contents to /etc/multipath.conf";
486 };
487
488 pathGroups = mkOption {
489 example = literalExpression ''
490 [
491 {
492 wwid = "360080e500043b35c0123456789abcdef";
493 alias = 10001234;
494 array = "bigarray.example.com";
495 fsType = "zfs"; # optional
496 options = "ro"; # optional
497 }, ...
498 ]
499 '';
500 description = ''
501 This option allows you to define multipath groups as described
502 in http://christophe.varoqui.free.fr/usage.html.
503 '';
504 type = listOf (submodule {
505 options = {
506
507 alias = mkOption {
508 type = int;
509 example = 1001234;
510 description = "The name of the multipath device";
511 };
512
513 wwid = mkOption {
514 type = hexStr;
515 example = "360080e500043b35c0123456789abcdef";
516 description = "The identifier for the multipath device";
517 };
518
519 array = mkOption {
520 type = str;
521 default = null;
522 example = "bigarray.example.com";
523 description = "The DNS name of the storage array";
524 };
525
526 fsType = mkOption {
527 type = nullOr str;
528 default = null;
529 example = "zfs";
530 description = "Type of the filesystem";
531 };
532
533 options = mkOption {
534 type = nullOr str;
535 default = null;
536 example = "ro";
537 description = "Options used to mount the file system";
538 };
539
540 };
541 });
542 };
543
544 };
545
546 config = mkIf cfg.enable {
547 environment.etc."multipath.conf".text =
548 let
549 inherit (cfg)
550 defaults
551 blacklist
552 blacklist_exceptions
553 overrides
554 ;
555
556 mkDeviceBlock =
557 cfg:
558 let
559 nonNullCfg = lib.filterAttrs (k: v: v != null) cfg;
560 attrs = lib.mapAttrsToList (name: value: " ${name} ${toString value}") nonNullCfg;
561 in
562 ''
563 device {
564 ${lib.concatStringsSep "\n" attrs}
565 }
566 '';
567 devices = lib.concatMapStringsSep "\n" mkDeviceBlock cfg.devices;
568
569 mkMultipathBlock = m: ''
570 multipath {
571 wwid ${m.wwid}
572 alias ${toString m.alias}
573 }
574 '';
575 multipaths = lib.concatMapStringsSep "\n" mkMultipathBlock cfg.pathGroups;
576
577 in
578 ''
579 devices {
580 ${indentLines 2 devices}
581 }
582
583 ${optionalString (defaults != null) ''
584 defaults {
585 ${indentLines 2 defaults}
586 }
587 ''}
588 ${optionalString (blacklist != null) ''
589 blacklist {
590 ${indentLines 2 blacklist}
591 }
592 ''}
593 ${optionalString (blacklist_exceptions != null) ''
594 blacklist_exceptions {
595 ${indentLines 2 blacklist_exceptions}
596 }
597 ''}
598 ${optionalString (overrides != null) ''
599 overrides {
600 ${indentLines 2 overrides}
601 }
602 ''}
603 multipaths {
604 ${indentLines 2 multipaths}
605 }
606 '';
607
608 systemd.packages = [ cfg.package ];
609
610 environment.systemPackages = [ cfg.package ];
611 boot.kernelModules = [
612 "dm-multipath"
613 "dm-service-time"
614 ];
615
616 # We do not have systemd in stage-1 boot so must invoke `multipathd`
617 # with the `-1` argument which disables systemd calls. Invoke `multipath`
618 # to display the multipath mappings in the output of `journalctl -b`.
619 # TODO: Implement for systemd stage 1
620 boot.initrd.kernelModules = [
621 "dm-multipath"
622 "dm-service-time"
623 ];
624 boot.initrd.postDeviceCommands = mkIf (!config.boot.initrd.systemd.enable) ''
625 modprobe -a dm-multipath dm-service-time
626 multipathd -s
627 (set -x && sleep 1 && multipath -ll)
628 '';
629 };
630}