1{ config, lib, options, pkgs, ...}:
2let
3 cfg = config.services.hadoop;
4 opt = options.services.hadoop;
5in
6with lib;
7{
8 imports = [ ./yarn.nix ./hdfs.nix ./hbase.nix ];
9
10 options.services.hadoop = {
11 coreSite = mkOption {
12 default = {};
13 type = types.attrsOf types.anything;
14 example = literalExpression ''
15 {
16 "fs.defaultFS" = "hdfs://localhost";
17 }
18 '';
19 description = lib.mdDoc ''
20 Hadoop core-site.xml definition
21 <https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml>
22 '';
23 };
24 coreSiteInternal = mkOption {
25 default = {};
26 type = types.attrsOf types.anything;
27 internal = true;
28 description = lib.mdDoc ''
29 Internal option to add configs to core-site.xml based on module options
30 '';
31 };
32
33 hdfsSiteDefault = mkOption {
34 default = {
35 "dfs.namenode.rpc-bind-host" = "0.0.0.0";
36 "dfs.namenode.http-address" = "0.0.0.0:9870";
37 "dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
38 "dfs.namenode.http-bind-host" = "0.0.0.0";
39 };
40 type = types.attrsOf types.anything;
41 description = lib.mdDoc ''
42 Default options for hdfs-site.xml
43 '';
44 };
45 hdfsSite = mkOption {
46 default = {};
47 type = types.attrsOf types.anything;
48 example = literalExpression ''
49 {
50 "dfs.nameservices" = "namenode1";
51 }
52 '';
53 description = lib.mdDoc ''
54 Additional options and overrides for hdfs-site.xml
55 <https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml>
56 '';
57 };
58 hdfsSiteInternal = mkOption {
59 default = {};
60 type = types.attrsOf types.anything;
61 internal = true;
62 description = lib.mdDoc ''
63 Internal option to add configs to hdfs-site.xml based on module options
64 '';
65 };
66
67 mapredSiteDefault = mkOption {
68 default = {
69 "mapreduce.framework.name" = "yarn";
70 "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
71 "mapreduce.map.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
72 "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
73 };
74 defaultText = literalExpression ''
75 {
76 "mapreduce.framework.name" = "yarn";
77 "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}";
78 "mapreduce.map.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}";
79 "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}";
80 }
81 '';
82 type = types.attrsOf types.anything;
83 description = lib.mdDoc ''
84 Default options for mapred-site.xml
85 '';
86 };
87 mapredSite = mkOption {
88 default = {};
89 type = types.attrsOf types.anything;
90 example = literalExpression ''
91 {
92 "mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC";
93 }
94 '';
95 description = lib.mdDoc ''
96 Additional options and overrides for mapred-site.xml
97 <https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml>
98 '';
99 };
100
101 yarnSiteDefault = mkOption {
102 default = {
103 "yarn.nodemanager.admin-env" = "PATH=$PATH";
104 "yarn.nodemanager.aux-services" = "mapreduce_shuffle";
105 "yarn.nodemanager.aux-services.mapreduce_shuffle.class" = "org.apache.hadoop.mapred.ShuffleHandler";
106 "yarn.nodemanager.bind-host" = "0.0.0.0";
107 "yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor";
108 "yarn.nodemanager.env-whitelist" = "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,LANG,TZ";
109 "yarn.nodemanager.linux-container-executor.group" = "hadoop";
110 "yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor";
111 "yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
112 "yarn.resourcemanager.bind-host" = "0.0.0.0";
113 "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler";
114 };
115 type = types.attrsOf types.anything;
116 description = lib.mdDoc ''
117 Default options for yarn-site.xml
118 '';
119 };
120 yarnSite = mkOption {
121 default = {};
122 type = types.attrsOf types.anything;
123 example = literalExpression ''
124 {
125 "yarn.resourcemanager.hostname" = "''${config.networking.hostName}";
126 }
127 '';
128 description = lib.mdDoc ''
129 Additional options and overrides for yarn-site.xml
130 <https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-common/yarn-default.xml>
131 '';
132 };
133 yarnSiteInternal = mkOption {
134 default = {};
135 type = types.attrsOf types.anything;
136 internal = true;
137 description = lib.mdDoc ''
138 Internal option to add configs to yarn-site.xml based on module options
139 '';
140 };
141
142 httpfsSite = mkOption {
143 default = { };
144 type = types.attrsOf types.anything;
145 example = literalExpression ''
146 {
147 "hadoop.http.max.threads" = 500;
148 }
149 '';
150 description = lib.mdDoc ''
151 Hadoop httpfs-site.xml definition
152 <https://hadoop.apache.org/docs/current/hadoop-hdfs-httpfs/httpfs-default.html>
153 '';
154 };
155
156 log4jProperties = mkOption {
157 default = "${cfg.package}/lib/${cfg.package.untarDir}/etc/hadoop/log4j.properties";
158 defaultText = literalExpression ''
159 "''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}/etc/hadoop/log4j.properties"
160 '';
161 type = types.path;
162 example = literalExpression ''
163 "''${pkgs.hadoop}/lib/''${pkgs.hadoop.untarDir}/etc/hadoop/log4j.properties";
164 '';
165 description = lib.mdDoc "log4j.properties file added to HADOOP_CONF_DIR";
166 };
167
168 containerExecutorCfg = mkOption {
169 default = {
170 # must be the same as yarn.nodemanager.linux-container-executor.group in yarnSite
171 "yarn.nodemanager.linux-container-executor.group"="hadoop";
172 "min.user.id"=1000;
173 "feature.terminal.enabled"=1;
174 "feature.mount-cgroup.enabled" = 1;
175 };
176 type = types.attrsOf types.anything;
177 example = literalExpression ''
178 options.services.hadoop.containerExecutorCfg.default // {
179 "feature.terminal.enabled" = 0;
180 }
181 '';
182 description = lib.mdDoc ''
183 Yarn container-executor.cfg definition
184 <https://hadoop.apache.org/docs/r2.7.2/hadoop-yarn/hadoop-yarn-site/SecureContainer.html>
185 '';
186 };
187
188 extraConfDirs = mkOption {
189 default = [];
190 type = types.listOf types.path;
191 example = literalExpression ''
192 [
193 ./extraHDFSConfs
194 ./extraYARNConfs
195 ]
196 '';
197 description = lib.mdDoc "Directories containing additional config files to be added to HADOOP_CONF_DIR";
198 };
199
200 gatewayRole.enable = mkEnableOption (lib.mdDoc "gateway role for deploying hadoop configs");
201
202 package = mkOption {
203 type = types.package;
204 default = pkgs.hadoop;
205 defaultText = literalExpression "pkgs.hadoop";
206 description = lib.mdDoc "";
207 };
208 };
209
210
211 config = mkIf cfg.gatewayRole.enable {
212 users.groups.hadoop = {
213 gid = config.ids.gids.hadoop;
214 };
215 environment = {
216 systemPackages = [ cfg.package ];
217 etc."hadoop-conf".source = let
218 hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
219 in "${hadoopConf}";
220 variables.HADOOP_CONF_DIR = "/etc/hadoop-conf/";
221 };
222 };
223}