nixos/hadoop: add HBase submodule

Changed files
+297 -16
nixos
modules
services
cluster
databases
tests
+1
nixos/modules/services/cluster/hadoop/conf.nix
···
mkdir -p $out/
cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/
cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/
cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/
cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/
cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/
···
mkdir -p $out/
cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/
cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/
+
cp ${siteXml "hbase-site.xml" (hbaseSiteDefault // hbaseSite // hbaseSiteInternal)}/* $out/
cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/
cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/
cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/
+1 -1
nixos/modules/services/cluster/hadoop/default.nix
···
in
with lib;
{
-
imports = [ ./yarn.nix ./hdfs.nix ];
options.services.hadoop = {
coreSite = mkOption {
···
in
with lib;
{
+
imports = [ ./yarn.nix ./hdfs.nix ./hbase.nix ];
options.services.hadoop = {
coreSite = mkOption {
+200
nixos/modules/services/cluster/hadoop/hbase.nix
···
···
+
{ config, lib, pkgs, ...}:
+
+
with lib;
+
let
+
cfg = config.services.hadoop;
+
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
+
mkIfNotNull = x: mkIf (x != null) x;
+
in
+
{
+
options.services.hadoop = {
+
+
gatewayRole.enableHbaseCli = mkOption {
+
description = "Whether to enable HBase CLI tools";
+
default = false;
+
type = types.bool;
+
};
+
+
hbaseSiteDefault = mkOption {
+
default = {
+
"hbase.regionserver.ipc.address" = "0.0.0.0";
+
"hbase.master.ipc.address" = "0.0.0.0";
+
"hbase.master.info.bindAddress" = "0.0.0.0";
+
"hbase.regionserver.info.bindAddress" = "0.0.0.0";
+
+
"hbase.cluster.distributed" = "true";
+
};
+
type = types.attrsOf types.anything;
+
description = ''
+
Default options for hbase-site.xml
+
'';
+
};
+
hbaseSite = mkOption {
+
default = {};
+
type = with types; attrsOf anything;
+
example = literalExpression ''
+
'';
+
description = ''
+
Additional options and overrides for hbase-site.xml
+
<link xlink:href="https://github.com/apache/hbase/blob/rel/2.4.11/hbase-common/src/main/resources/hbase-default.xml"/>
+
'';
+
};
+
hbaseSiteInternal = mkOption {
+
default = {};
+
type = with types; attrsOf anything;
+
internal = true;
+
description = ''
+
Internal option to add configs to hbase-site.xml based on module options
+
'';
+
};
+
+
hbase = {
+
+
package = mkOption {
+
type = types.package;
+
default = pkgs.hbase;
+
defaultText = literalExpression "pkgs.hbase";
+
description = "HBase package";
+
};
+
+
rootdir = mkOption {
+
description = ''
+
This option will set "hbase.rootdir" in hbase-site.xml and determine
+
the directory shared by region servers and into which HBase persists.
+
The URL should be 'fully-qualified' to include the filesystem scheme.
+
If a core-site.xml is provided, the FS scheme defaults to the value
+
of "fs.defaultFS".
+
+
Filesystems other than HDFS (like S3, QFS, Swift) are also supported.
+
'';
+
type = types.str;
+
example = "hdfs://nameservice1/hbase";
+
default = "/hbase";
+
};
+
zookeeperQuorum = mkOption {
+
description = ''
+
This option will set "hbase.zookeeper.quorum" in hbase-site.xml.
+
Comma separated list of servers in the ZooKeeper ensemble.
+
'';
+
type = with types; nullOr commas;
+
example = "zk1.internal,zk2.internal,zk3.internal";
+
default = null;
+
};
+
master = {
+
enable = mkEnableOption "HBase Master";
+
initHDFS = mkEnableOption "initialization of the hbase directory on HDFS";
+
+
openFirewall = mkOption {
+
type = types.bool;
+
default = false;
+
description = ''
+
Open firewall ports for HBase master.
+
'';
+
};
+
};
+
regionServer = {
+
enable = mkEnableOption "HBase RegionServer";
+
+
overrideHosts = mkOption {
+
type = types.bool;
+
default = true;
+
description = ''
+
Remove /etc/hosts entries for "127.0.0.2" and "::1" defined in nixos/modules/config/networking.nix
+
Regionservers must be able to resolve their hostnames to their IP addresses, through PTR records
+
or /etc/hosts entries.
+
+
'';
+
};
+
+
openFirewall = mkOption {
+
type = types.bool;
+
default = false;
+
description = ''
+
Open firewall ports for HBase master.
+
'';
+
};
+
};
+
};
+
};
+
+
config = mkMerge [
+
(mkIf cfg.hbase.master.enable {
+
services.hadoop.gatewayRole = {
+
enable = true;
+
enableHbaseCli = mkDefault true;
+
};
+
+
systemd.services.hbase-master = {
+
description = "HBase master";
+
wantedBy = [ "multi-user.target" ];
+
+
preStart = mkIf cfg.hbase.master.initHDFS ''
+
HADOOP_USER_NAME=hdfs ${cfg.package}/bin/hdfs --config ${hadoopConf} dfsadmin -safemode wait
+
HADOOP_USER_NAME=hdfs ${cfg.package}/bin/hdfs --config ${hadoopConf} dfs -mkdir -p ${cfg.hbase.rootdir}
+
HADOOP_USER_NAME=hdfs ${cfg.package}/bin/hdfs --config ${hadoopConf} dfs -chown hbase ${cfg.hbase.rootdir}
+
'';
+
+
serviceConfig = {
+
User = "hbase";
+
SyslogIdentifier = "hbase-master";
+
ExecStart = "${cfg.hbase.package}/bin/hbase --config ${hadoopConf} " +
+
"master start";
+
Restart = "always";
+
};
+
};
+
+
services.hadoop.hbaseSiteInternal."hbase.rootdir" = cfg.hbase.rootdir;
+
+
networking.firewall.allowedTCPPorts = (mkIf cfg.hbase.master.openFirewall [
+
16000 16010
+
]);
+
+
})
+
+
(mkIf cfg.hbase.regionServer.enable {
+
services.hadoop.gatewayRole = {
+
enable = true;
+
enableHbaseCli = mkDefault true;
+
};
+
+
systemd.services.hbase-regionserver = {
+
description = "HBase RegionServer";
+
wantedBy = [ "multi-user.target" ];
+
serviceConfig = {
+
User = "hbase";
+
SyslogIdentifier = "hbase-regionserver";
+
ExecStart = "${cfg.hbase.package}/bin/hbase --config /etc/hadoop-conf/ " +
+
"regionserver start";
+
Restart = "always";
+
};
+
};
+
+
services.hadoop.hbaseSiteInternal."hbase.rootdir" = cfg.hbase.rootdir;
+
+
networking = {
+
firewall.allowedTCPPorts = (mkIf cfg.hbase.regionServer.openFirewall [
+
16020 16030
+
]);
+
hosts = mkIf cfg.hbase.regionServer.overrideHosts {
+
"127.0.0.2" = mkForce [ ];
+
"::1" = mkForce [ ];
+
};
+
};
+
})
+
+
(mkIf cfg.gatewayRole.enable {
+
+
environment.systemPackages = mkIf cfg.gatewayRole.enableHbaseCli [ cfg.hbase.package ];
+
+
services.hadoop.hbaseSiteInternal = with cfg.hbase; {
+
"hbase.zookeeper.quorum" = mkIfNotNull zookeeperQuorum;
+
};
+
+
users.users.hbase = {
+
description = "Hadoop HBase user";
+
group = "hadoop";
+
isSystemUser = true;
+
};
+
})
+
];
+
}
+8 -13
nixos/modules/services/databases/hbase.nix
···
with lib;
let
-
cfg = config.services.hbase;
-
opt = options.services.hbase;
buildProperty = configAttr:
(builtins.concatStringsSep "\n"
···
###### interface
options = {
-
-
services.hbase = {
-
enable = mkOption {
-
type = types.bool;
-
default = false;
-
description = lib.mdDoc ''
-
Whether to run HBase.
-
'';
-
};
package = mkOption {
type = types.package;
···
};
};
-
};
###### implementation
-
config = mkIf config.services.hbase.enable {
systemd.tmpfiles.rules = [
"d '${cfg.dataDir}' - ${cfg.user} ${cfg.group} - -"
···
with lib;
let
+
cfg = config.services.hbase-standalone;
+
opt = options.services.hbase-standalone;
buildProperty = configAttr:
(builtins.concatStringsSep "\n"
···
###### interface
options = {
+
services.hbase-standalone = {
+
enable = mkEnableOption ''
+
HBase master in standalone mode with embedded regionserver and zookeper.
+
Do not use this configuration for production nor for evaluating HBase performance.
+
'';
package = mkOption {
type = types.package;
···
};
};
};
###### implementation
+
config = mkIf cfg.enable {
systemd.tmpfiles.rules = [
"d '${cfg.dataDir}' - ${cfg.user} ${cfg.group} - -"
+1
nixos/tests/hadoop/default.nix
···
all = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop.nix { inherit package; };
hdfs = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hdfs.nix { inherit package; };
yarn = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./yarn.nix { inherit package; };
}
···
all = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop.nix { inherit package; };
hdfs = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hdfs.nix { inherit package; };
yarn = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./yarn.nix { inherit package; };
+
hbase = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hbase.nix { inherit package; };
}
+84
nixos/tests/hadoop/hbase.nix
···
···
+
# Test a minimal hbase cluster
+
{ pkgs, ... }:
+
import ../make-test-python.nix ({ hadoop ? pkgs.hadoop, hbase ? pkgs.hbase, ... }:
+
with pkgs.lib;
+
{
+
name = "hadoop-hbase";
+
+
nodes = let
+
coreSite = {
+
"fs.defaultFS" = "hdfs://namenode:8020";
+
};
+
defOpts = {
+
enable = true;
+
openFirewall = true;
+
};
+
zookeeperQuorum = "zookeeper";
+
in {
+
zookeeper = { ... }: {
+
services.zookeeper.enable = true;
+
networking.firewall.allowedTCPPorts = [ 2181 ];
+
};
+
namenode = { ... }: {
+
services.hadoop = {
+
hdfs = {
+
namenode = defOpts // { formatOnInit = true; };
+
};
+
inherit coreSite;
+
};
+
};
+
datanode = { ... }: {
+
virtualisation.diskSize = 8192;
+
services.hadoop = {
+
hdfs.datanode = defOpts;
+
inherit coreSite;
+
};
+
};
+
+
master = { ... }:{
+
services.hadoop = {
+
inherit coreSite;
+
hbase = {
+
inherit zookeeperQuorum;
+
master = defOpts // { initHDFS = true; };
+
};
+
};
+
};
+
regionserver = { ... }:{
+
services.hadoop = {
+
inherit coreSite;
+
hbase = {
+
inherit zookeeperQuorum;
+
regionServer = defOpts;
+
};
+
};
+
};
+
};
+
+
testScript = ''
+
start_all()
+
+
# wait for HDFS cluster
+
namenode.wait_for_unit("hdfs-namenode")
+
namenode.wait_for_unit("network.target")
+
namenode.wait_for_open_port(8020)
+
namenode.wait_for_open_port(9870)
+
datanode.wait_for_unit("hdfs-datanode")
+
datanode.wait_for_unit("network.target")
+
datanode.wait_for_open_port(9864)
+
datanode.wait_for_open_port(9866)
+
datanode.wait_for_open_port(9867)
+
+
# wait for ZK
+
zookeeper.wait_for_unit("zookeeper")
+
zookeeper.wait_for_open_port(2181)
+
+
# wait for HBase to start up
+
master.wait_for_unit("hbase-master")
+
regionserver.wait_for_unit("hbase-regionserver")
+
+
assert "1 active master, 0 backup masters, 1 servers" in master.succeed("echo status | HADOOP_USER_NAME=hbase hbase shell -n")
+
regionserver.wait_until_succeeds("echo \"create 't1','f1'\" | HADOOP_USER_NAME=hbase hbase shell -n")
+
assert "NAME => 'f1'" in regionserver.succeed("echo \"describe 't1'\" | HADOOP_USER_NAME=hbase hbase shell -n")
+
'';
+
})
+2 -2
nixos/tests/hbase.nix
···
import ./make-test-python.nix ({ pkgs, lib, package ? pkgs.hbase, ... }:
{
-
name = "hbase";
meta = with lib.maintainers; {
maintainers = [ illustris ];
···
nodes = {
hbase = { pkgs, ... }: {
-
services.hbase = {
enable = true;
inherit package;
# Needed for standalone mode in hbase 2+
···
import ./make-test-python.nix ({ pkgs, lib, package ? pkgs.hbase, ... }:
{
+
name = "hbase-standalone";
meta = with lib.maintainers; {
maintainers = [ illustris ];
···
nodes = {
hbase = { pkgs, ... }: {
+
services.hbase-standalone = {
enable = true;
inherit package;
# Needed for standalone mode in hbase 2+