nixos/hadoop: add hadoop module (hdfs, yarn)

Changed files
+352
nixos
modules
tests
+8
nixos/modules/misc/ids.nix
···
restic = 291;
openvpn = 292;
meguca = 293;
+
yarn = 294;
+
hdfs = 295;
+
mapred = 296;
+
hadoop = 297;
# When adding a uid, make sure it doesn't match an existing gid. And don't use uids above 399!
···
restic = 291;
openvpn = 292;
meguca = 293;
+
yarn = 294;
+
hdfs = 295;
+
mapred = 296;
+
hadoop = 297;
# When adding a gid, make sure it doesn't match an existing
# uid. Users and groups with the same name should have equal
+1
nixos/modules/module-list.nix
···
./services/backup/rsnapshot.nix
./services/backup/tarsnap.nix
./services/backup/znapzend.nix
+
./services/cluster/hadoop/default.nix
./services/cluster/kubernetes/default.nix
./services/cluster/kubernetes/dns.nix
./services/cluster/kubernetes/dashboard.nix
+31
nixos/modules/services/cluster/hadoop/conf.nix
···
+
{ hadoop, pkgs }:
+
let
+
propertyXml = name: value: ''
+
<property>
+
<name>${name}</name>
+
<value>${builtins.toString value}</value>
+
</property>
+
'';
+
siteXml = fileName: properties: pkgs.writeTextDir fileName ''
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+
<!-- generated by NixOS -->
+
<configuration>
+
${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)}
+
</configuration>
+
'';
+
userFunctions = ''
+
hadoop_verify_logdir() {
+
echo Skipping verification of log directory
+
}
+
'';
+
in
+
pkgs.buildEnv {
+
name = "hadoop-conf";
+
paths = [
+
(siteXml "core-site.xml" hadoop.coreSite)
+
(siteXml "hdfs-site.xml" hadoop.hdfsSite)
+
(siteXml "mapred-site.xml" hadoop.mapredSite)
+
(siteXml "yarn-site.xml" hadoop.yarnSite)
+
(pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions)
+
];
+
}
+63
nixos/modules/services/cluster/hadoop/default.nix
···
+
{ config, lib, pkgs, ...}:
+
let
+
cfg = config.services.hadoop;
+
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
+
in
+
with lib;
+
{
+
imports = [ ./yarn.nix ./hdfs.nix ];
+
+
options.services.hadoop = {
+
coreSite = mkOption {
+
default = {};
+
example = {
+
"fs.defaultFS" = "hdfs://localhost";
+
};
+
description = "Hadoop core-site.xml definition";
+
};
+
+
hdfsSite = mkOption {
+
default = {};
+
example = {
+
"dfs.nameservices" = "namenode1";
+
};
+
description = "Hadoop hdfs-site.xml definition";
+
};
+
+
mapredSite = mkOption {
+
default = {};
+
example = {
+
"mapreduce.map.cpu.vcores" = "1";
+
};
+
description = "Hadoop mapred-site.xml definition";
+
};
+
+
yarnSite = mkOption {
+
default = {};
+
example = {
+
"yarn.resourcemanager.ha.id" = "resourcemanager1";
+
};
+
description = "Hadoop yarn-site.xml definition";
+
};
+
+
package = mkOption {
+
type = types.package;
+
default = pkgs.hadoop;
+
defaultText = "pkgs.hadoop";
+
example = literalExample "pkgs.hadoop";
+
description = ''
+
'';
+
};
+
};
+
+
+
config = mkMerge [
+
(mkIf (builtins.hasAttr "yarn" config.users.extraUsers ||
+
builtins.hasAttr "hdfs" config.users.extraUsers ) {
+
users.extraGroups.hadoop = {
+
gid = config.ids.gids.hadoop;
+
};
+
})
+
+
];
+
}
+73
nixos/modules/services/cluster/hadoop/hdfs.nix
···
+
{ config, lib, pkgs, ...}:
+
let
+
cfg = config.services.hadoop;
+
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
+
in
+
with lib;
+
{
+
options.services.hadoop.hdfs = {
+
namenode.enabled = mkOption {
+
type = types.bool;
+
default = false;
+
description = ''
+
Whether to run the Hadoop YARN NameNode
+
'';
+
};
+
datanode.enabled = mkOption {
+
type = types.bool;
+
default = false;
+
description = ''
+
Whether to run the Hadoop YARN DataNode
+
'';
+
};
+
};
+
+
config = mkMerge [
+
(mkIf cfg.hdfs.namenode.enabled {
+
systemd.services."hdfs-namenode" = {
+
description = "Hadoop HDFS NameNode";
+
wantedBy = [ "multi-user.target" ];
+
+
environment = {
+
HADOOP_HOME = "${cfg.package}";
+
};
+
+
preStart = ''
+
${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true
+
'';
+
+
serviceConfig = {
+
User = "hdfs";
+
SyslogIdentifier = "hdfs-namenode";
+
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode";
+
};
+
};
+
})
+
(mkIf cfg.hdfs.datanode.enabled {
+
systemd.services."hdfs-datanode" = {
+
description = "Hadoop HDFS DataNode";
+
wantedBy = [ "multi-user.target" ];
+
+
environment = {
+
HADOOP_HOME = "${cfg.package}";
+
};
+
+
serviceConfig = {
+
User = "hdfs";
+
SyslogIdentifier = "hdfs-datanode";
+
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode";
+
};
+
};
+
})
+
(mkIf (
+
cfg.hdfs.namenode.enabled || cfg.hdfs.datanode.enabled
+
) {
+
users.extraUsers.hdfs = {
+
description = "Hadoop HDFS user";
+
group = "hadoop";
+
uid = config.ids.uids.hdfs;
+
};
+
})
+
+
];
+
}
+74
nixos/modules/services/cluster/hadoop/yarn.nix
···
+
{ config, lib, pkgs, ...}:
+
let
+
cfg = config.services.hadoop;
+
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
+
in
+
with lib;
+
{
+
options.services.hadoop.yarn = {
+
resourcemanager.enabled = mkOption {
+
type = types.bool;
+
default = false;
+
description = ''
+
Whether to run the Hadoop YARN ResourceManager
+
'';
+
};
+
nodemanager.enabled = mkOption {
+
type = types.bool;
+
default = false;
+
description = ''
+
Whether to run the Hadoop YARN NodeManager
+
'';
+
};
+
};
+
+
config = mkMerge [
+
(mkIf (
+
cfg.yarn.resourcemanager.enabled || cfg.yarn.nodemanager.enabled
+
) {
+
+
users.extraUsers.yarn = {
+
description = "Hadoop YARN user";
+
group = "hadoop";
+
uid = config.ids.uids.yarn;
+
};
+
})
+
+
(mkIf cfg.yarn.resourcemanager.enabled {
+
systemd.services."yarn-resourcemanager" = {
+
description = "Hadoop YARN ResourceManager";
+
wantedBy = [ "multi-user.target" ];
+
+
environment = {
+
HADOOP_HOME = "${cfg.package}";
+
};
+
+
serviceConfig = {
+
User = "yarn";
+
SyslogIdentifier = "yarn-resourcemanager";
+
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
+
" resourcemanager";
+
};
+
};
+
})
+
+
(mkIf cfg.yarn.nodemanager.enabled {
+
systemd.services."yarn-nodemanager" = {
+
description = "Hadoop YARN NodeManager";
+
wantedBy = [ "multi-user.target" ];
+
+
environment = {
+
HADOOP_HOME = "${cfg.package}";
+
};
+
+
serviceConfig = {
+
User = "yarn";
+
SyslogIdentifier = "yarn-nodemanager";
+
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
+
" nodemanager";
+
};
+
};
+
})
+
+
];
+
}
+2
nixos/release.nix
···
tests.gnome3-gdm = callTest tests/gnome3-gdm.nix {};
tests.grafana = callTest tests/grafana.nix {};
tests.graphite = callTest tests/graphite.nix {};
+
tests.hadoop.hdfs = callTestOnMatchingSystems [ "x86_64-linux" ] tests/hadoop/hdfs.nix {};
+
tests.hadoop.yarn = callTestOnMatchingSystems [ "x86_64-linux" ] tests/hadoop/yarn.nix {};
tests.hardened = callTest tests/hardened.nix { };
tests.haproxy = callTest tests/haproxy.nix {};
tests.hibernate = callTest tests/hibernate.nix {};
+54
nixos/tests/hadoop/hdfs.nix
···
+
import ../make-test.nix ({pkgs, ...}: {
+
nodes = {
+
namenode = {pkgs, config, ...}: {
+
services.hadoop = {
+
package = pkgs.hadoop_3_1;
+
hdfs.namenode.enabled = true;
+
coreSite = {
+
"fs.defaultFS" = "hdfs://namenode:8020";
+
};
+
hdfsSite = {
+
"dfs.replication" = 1;
+
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
+
"dfs.namenode.http-bind-host" = "0.0.0.0";
+
};
+
};
+
networking.firewall.allowedTCPPorts = [
+
9870 # namenode.http-address
+
8020 # namenode.rpc-address
+
];
+
};
+
datanode = {pkgs, config, ...}: {
+
services.hadoop = {
+
package = pkgs.hadoop_3_1;
+
hdfs.datanode.enabled = true;
+
coreSite = {
+
"fs.defaultFS" = "hdfs://namenode:8020";
+
};
+
};
+
networking.firewall.allowedTCPPorts = [
+
9864 # datanode.http.address
+
9866 # datanode.address
+
9867 # datanode.ipc.address
+
];
+
};
+
};
+
+
testScript = ''
+
startAll
+
+
$namenode->waitForUnit("hdfs-namenode");
+
$namenode->waitForUnit("network.target");
+
$namenode->waitForOpenPort(8020);
+
$namenode->waitForOpenPort(9870);
+
+
$datanode->waitForUnit("hdfs-datanode");
+
$datanode->waitForUnit("network.target");
+
$datanode->waitForOpenPort(9864);
+
$datanode->waitForOpenPort(9866);
+
$datanode->waitForOpenPort(9867);
+
+
$namenode->succeed("curl http://namenode:9870");
+
$datanode->succeed("curl http://datanode:9864");
+
'';
+
})
+46
nixos/tests/hadoop/yarn.nix
···
+
import ../make-test.nix ({pkgs, ...}: {
+
nodes = {
+
resourcemanager = {pkgs, config, ...}: {
+
services.hadoop.package = pkgs.hadoop_3_1;
+
services.hadoop.yarn.resourcemanager.enabled = true;
+
services.hadoop.yarnSite = {
+
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
+
};
+
networking.firewall.allowedTCPPorts = [
+
8088 # resourcemanager.webapp.address
+
8031 # resourcemanager.resource-tracker.address
+
];
+
};
+
nodemanager = {pkgs, config, ...}: {
+
services.hadoop.package = pkgs.hadoop_3_1;
+
services.hadoop.yarn.nodemanager.enabled = true;
+
services.hadoop.yarnSite = {
+
"yarn.resourcemanager.hostname" = "resourcemanager";
+
"yarn.nodemanager.log-dirs" = "/tmp/userlogs";
+
"yarn.nodemanager.address" = "0.0.0.0:8041";
+
};
+
networking.firewall.allowedTCPPorts = [
+
8042 # nodemanager.webapp.address
+
8041 # nodemanager.address
+
];
+
};
+
+
};
+
+
testScript = ''
+
startAll;
+
+
$resourcemanager->waitForUnit("yarn-resourcemanager");
+
$resourcemanager->waitForUnit("network.target");
+
$resourcemanager->waitForOpenPort(8031);
+
$resourcemanager->waitForOpenPort(8088);
+
+
$nodemanager->waitForUnit("yarn-nodemanager");
+
$nodemanager->waitForUnit("network.target");
+
$nodemanager->waitForOpenPort(8042);
+
$nodemanager->waitForOpenPort(8041);
+
+
$resourcemanager->succeed("curl http://localhost:8088");
+
$nodemanager->succeed("curl http://localhost:8042");
+
'';
+
})