at 23.05-pre 9.3 kB view raw
1# This test is very comprehensive. It tests whether all hadoop services work well with each other. 2# Run this when updating the Hadoop package or making significant changes to the hadoop module. 3# For a more basic test, see hdfs.nix and yarn.nix 4import ../make-test-python.nix ({ package, ... }: { 5 name = "hadoop-combined"; 6 7 nodes = 8 let 9 coreSite = { 10 "fs.defaultFS" = "hdfs://ns1"; 11 }; 12 hdfsSite = { 13 # HA Quorum Journal Manager configuration 14 "dfs.nameservices" = "ns1"; 15 "dfs.ha.namenodes.ns1" = "nn1,nn2"; 16 "dfs.namenode.shared.edits.dir.ns1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1"; 17 "dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020"; 18 "dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020"; 19 "dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022"; 20 "dfs.namenode.servicerpc-address.ns1.nn2" = "nn2:8022"; 21 "dfs.namenode.http-address.ns1.nn1" = "nn1:9870"; 22 "dfs.namenode.http-address.ns1.nn2" = "nn2:9870"; 23 24 # Automatic failover configuration 25 "dfs.client.failover.proxy.provider.ns1" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"; 26 "dfs.ha.automatic-failover.enabled.ns1" = "true"; 27 "dfs.ha.fencing.methods" = "shell(true)"; 28 "ha.zookeeper.quorum" = "zk1:2181"; 29 }; 30 yarnSite = { 31 "yarn.resourcemanager.zk-address" = "zk1:2181"; 32 "yarn.resourcemanager.ha.enabled" = "true"; 33 "yarn.resourcemanager.ha.rm-ids" = "rm1,rm2"; 34 "yarn.resourcemanager.hostname.rm1" = "rm1"; 35 "yarn.resourcemanager.hostname.rm2" = "rm2"; 36 "yarn.resourcemanager.ha.automatic-failover.enabled" = "true"; 37 "yarn.resourcemanager.cluster-id" = "cluster1"; 38 # yarn.resourcemanager.webapp.address needs to be defined even though yarn.resourcemanager.hostname is set. This shouldn't be necessary, but there's a bug in 39 # hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmFilterInitializer.java:70 40 # that causes AM containers to fail otherwise. 41 "yarn.resourcemanager.webapp.address.rm1" = "rm1:8088"; 42 "yarn.resourcemanager.webapp.address.rm2" = "rm2:8088"; 43 }; 44 in 45 { 46 zk1 = { ... }: { 47 services.zookeeper.enable = true; 48 networking.firewall.allowedTCPPorts = [ 2181 ]; 49 }; 50 51 # HDFS cluster 52 nn1 = { ... }: { 53 services.hadoop = { 54 inherit package coreSite hdfsSite; 55 hdfs.namenode = { 56 enable = true; 57 openFirewall = true; 58 }; 59 hdfs.zkfc.enable = true; 60 }; 61 }; 62 nn2 = { ... }: { 63 services.hadoop = { 64 inherit package coreSite hdfsSite; 65 hdfs.namenode = { 66 enable = true; 67 openFirewall = true; 68 }; 69 hdfs.zkfc.enable = true; 70 }; 71 }; 72 73 jn1 = { ... }: { 74 services.hadoop = { 75 inherit package coreSite hdfsSite; 76 hdfs.journalnode = { 77 enable = true; 78 openFirewall = true; 79 }; 80 }; 81 }; 82 jn2 = { ... }: { 83 services.hadoop = { 84 inherit package coreSite hdfsSite; 85 hdfs.journalnode = { 86 enable = true; 87 openFirewall = true; 88 }; 89 }; 90 }; 91 jn3 = { ... }: { 92 services.hadoop = { 93 inherit package coreSite hdfsSite; 94 hdfs.journalnode = { 95 enable = true; 96 openFirewall = true; 97 }; 98 }; 99 }; 100 101 dn1 = { ... }: { 102 services.hadoop = { 103 inherit package coreSite hdfsSite; 104 hdfs.datanode = { 105 enable = true; 106 openFirewall = true; 107 }; 108 }; 109 }; 110 111 # YARN cluster 112 rm1 = { options, ... }: { 113 services.hadoop = { 114 inherit package coreSite hdfsSite yarnSite; 115 yarn.resourcemanager = { 116 enable = true; 117 openFirewall = true; 118 }; 119 }; 120 }; 121 rm2 = { options, ... }: { 122 services.hadoop = { 123 inherit package coreSite hdfsSite yarnSite; 124 yarn.resourcemanager = { 125 enable = true; 126 openFirewall = true; 127 }; 128 }; 129 }; 130 nm1 = { options, ... }: { 131 virtualisation.memorySize = 2048; 132 services.hadoop = { 133 inherit package coreSite hdfsSite yarnSite; 134 yarn.nodemanager = { 135 enable = true; 136 openFirewall = true; 137 }; 138 }; 139 }; 140 client = { options, ... }: { 141 services.hadoop = { 142 gatewayRole.enable = true; 143 inherit package coreSite hdfsSite yarnSite; 144 }; 145 }; 146 }; 147 148 testScript = '' 149 start_all() 150 151 #### HDFS tests #### 152 153 zk1.wait_for_unit("network.target") 154 jn1.wait_for_unit("network.target") 155 jn2.wait_for_unit("network.target") 156 jn3.wait_for_unit("network.target") 157 nn1.wait_for_unit("network.target") 158 nn2.wait_for_unit("network.target") 159 dn1.wait_for_unit("network.target") 160 161 zk1.wait_for_unit("zookeeper") 162 jn1.wait_for_unit("hdfs-journalnode") 163 jn2.wait_for_unit("hdfs-journalnode") 164 jn3.wait_for_unit("hdfs-journalnode") 165 166 zk1.wait_for_open_port(2181) 167 jn1.wait_for_open_port(8480) 168 jn1.wait_for_open_port(8485) 169 jn2.wait_for_open_port(8480) 170 jn2.wait_for_open_port(8485) 171 172 # Namenodes must be stopped before initializing the cluster 173 nn1.succeed("systemctl stop hdfs-namenode") 174 nn2.succeed("systemctl stop hdfs-namenode") 175 nn1.succeed("systemctl stop hdfs-zkfc") 176 nn2.succeed("systemctl stop hdfs-zkfc") 177 178 # Initialize zookeeper for failover controller 179 nn1.succeed("sudo -u hdfs hdfs zkfc -formatZK 2>&1 | systemd-cat") 180 181 # Format NN1 and start it 182 nn1.succeed("sudo -u hdfs hadoop namenode -format 2>&1 | systemd-cat") 183 nn1.succeed("systemctl start hdfs-namenode") 184 nn1.wait_for_open_port(9870) 185 nn1.wait_for_open_port(8022) 186 nn1.wait_for_open_port(8020) 187 188 # Bootstrap NN2 from NN1 and start it 189 nn2.succeed("sudo -u hdfs hdfs namenode -bootstrapStandby 2>&1 | systemd-cat") 190 nn2.succeed("systemctl start hdfs-namenode") 191 nn2.wait_for_open_port(9870) 192 nn2.wait_for_open_port(8022) 193 nn2.wait_for_open_port(8020) 194 nn1.succeed("netstat -tulpne | systemd-cat") 195 196 # Start failover controllers 197 nn1.succeed("systemctl start hdfs-zkfc") 198 nn2.succeed("systemctl start hdfs-zkfc") 199 200 # DN should have started by now, but confirm anyway 201 dn1.wait_for_unit("hdfs-datanode") 202 # Print states of namenodes 203 client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") 204 # Wait for cluster to exit safemode 205 client.succeed("sudo -u hdfs hdfs dfsadmin -safemode wait") 206 client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") 207 # test R/W 208 client.succeed("echo testfilecontents | sudo -u hdfs hdfs dfs -put - /testfile") 209 assert "testfilecontents" in client.succeed("sudo -u hdfs hdfs dfs -cat /testfile") 210 211 # Test NN failover 212 nn1.succeed("systemctl stop hdfs-namenode") 213 assert "active" in client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState") 214 client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") 215 assert "testfilecontents" in client.succeed("sudo -u hdfs hdfs dfs -cat /testfile") 216 217 nn1.succeed("systemctl start hdfs-namenode") 218 nn1.wait_for_open_port(9870) 219 nn1.wait_for_open_port(8022) 220 nn1.wait_for_open_port(8020) 221 assert "standby" in client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState") 222 client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") 223 224 #### YARN tests #### 225 226 rm1.wait_for_unit("network.target") 227 rm2.wait_for_unit("network.target") 228 nm1.wait_for_unit("network.target") 229 230 rm1.wait_for_unit("yarn-resourcemanager") 231 rm1.wait_for_open_port(8088) 232 rm2.wait_for_unit("yarn-resourcemanager") 233 rm2.wait_for_open_port(8088) 234 235 nm1.wait_for_unit("yarn-nodemanager") 236 nm1.wait_for_open_port(8042) 237 nm1.wait_for_open_port(8040) 238 client.wait_until_succeeds("yarn node -list | grep Nodes:1") 239 client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat") 240 client.succeed("sudo -u yarn yarn node -list | systemd-cat") 241 242 # Test RM failover 243 rm1.succeed("systemctl stop yarn-resourcemanager") 244 assert "standby" not in client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState") 245 client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat") 246 rm1.succeed("systemctl start yarn-resourcemanager") 247 rm1.wait_for_unit("yarn-resourcemanager") 248 rm1.wait_for_open_port(8088) 249 assert "standby" in client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState") 250 client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat") 251 252 assert "Estimated value of Pi is" in client.succeed("HADOOP_USER_NAME=hdfs yarn jar $(readlink $(which yarn) | sed -r 's~bin/yarn~lib/hadoop-*/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar~g') pi 2 10") 253 assert "SUCCEEDED" in client.succeed("yarn application -list -appStates FINISHED") 254 ''; 255})