at 25.11-pre 10 kB view raw
1# This test is very comprehensive. It tests whether all hadoop services work well with each other. 2# Run this when updating the Hadoop package or making significant changes to the hadoop module. 3# For a more basic test, see hdfs.nix and yarn.nix 4import ../make-test-python.nix ( 5 { package, ... }: 6 { 7 name = "hadoop-combined"; 8 9 nodes = 10 let 11 coreSite = { 12 "fs.defaultFS" = "hdfs://ns1"; 13 }; 14 hdfsSite = { 15 # HA Quorum Journal Manager configuration 16 "dfs.nameservices" = "ns1"; 17 "dfs.ha.namenodes.ns1" = "nn1,nn2"; 18 "dfs.namenode.shared.edits.dir.ns1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1"; 19 "dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020"; 20 "dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020"; 21 "dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022"; 22 "dfs.namenode.servicerpc-address.ns1.nn2" = "nn2:8022"; 23 "dfs.namenode.http-address.ns1.nn1" = "nn1:9870"; 24 "dfs.namenode.http-address.ns1.nn2" = "nn2:9870"; 25 26 # Automatic failover configuration 27 "dfs.client.failover.proxy.provider.ns1" = 28 "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"; 29 "dfs.ha.automatic-failover.enabled.ns1" = "true"; 30 "dfs.ha.fencing.methods" = "shell(true)"; 31 "ha.zookeeper.quorum" = "zk1:2181"; 32 }; 33 yarnSite = { 34 "yarn.resourcemanager.zk-address" = "zk1:2181"; 35 "yarn.resourcemanager.ha.enabled" = "true"; 36 "yarn.resourcemanager.ha.rm-ids" = "rm1,rm2"; 37 "yarn.resourcemanager.hostname.rm1" = "rm1"; 38 "yarn.resourcemanager.hostname.rm2" = "rm2"; 39 "yarn.resourcemanager.ha.automatic-failover.enabled" = "true"; 40 "yarn.resourcemanager.cluster-id" = "cluster1"; 41 # yarn.resourcemanager.webapp.address needs to be defined even though yarn.resourcemanager.hostname is set. This shouldn't be necessary, but there's a bug in 42 # hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmFilterInitializer.java:70 43 # that causes AM containers to fail otherwise. 44 "yarn.resourcemanager.webapp.address.rm1" = "rm1:8088"; 45 "yarn.resourcemanager.webapp.address.rm2" = "rm2:8088"; 46 }; 47 in 48 { 49 zk1 = 50 { ... }: 51 { 52 services.zookeeper.enable = true; 53 networking.firewall.allowedTCPPorts = [ 2181 ]; 54 }; 55 56 # HDFS cluster 57 nn1 = 58 { ... }: 59 { 60 services.hadoop = { 61 inherit package coreSite hdfsSite; 62 hdfs.namenode = { 63 enable = true; 64 openFirewall = true; 65 }; 66 hdfs.zkfc.enable = true; 67 }; 68 }; 69 nn2 = 70 { ... }: 71 { 72 services.hadoop = { 73 inherit package coreSite hdfsSite; 74 hdfs.namenode = { 75 enable = true; 76 openFirewall = true; 77 }; 78 hdfs.zkfc.enable = true; 79 }; 80 }; 81 82 jn1 = 83 { ... }: 84 { 85 services.hadoop = { 86 inherit package coreSite hdfsSite; 87 hdfs.journalnode = { 88 enable = true; 89 openFirewall = true; 90 }; 91 }; 92 }; 93 jn2 = 94 { ... }: 95 { 96 services.hadoop = { 97 inherit package coreSite hdfsSite; 98 hdfs.journalnode = { 99 enable = true; 100 openFirewall = true; 101 }; 102 }; 103 }; 104 jn3 = 105 { ... }: 106 { 107 services.hadoop = { 108 inherit package coreSite hdfsSite; 109 hdfs.journalnode = { 110 enable = true; 111 openFirewall = true; 112 }; 113 }; 114 }; 115 116 dn1 = 117 { ... }: 118 { 119 virtualisation.diskSize = 4096; 120 services.hadoop = { 121 inherit package coreSite hdfsSite; 122 hdfs.datanode = { 123 enable = true; 124 openFirewall = true; 125 }; 126 }; 127 }; 128 129 # YARN cluster 130 rm1 = 131 { options, ... }: 132 { 133 services.hadoop = { 134 inherit 135 package 136 coreSite 137 hdfsSite 138 yarnSite 139 ; 140 yarn.resourcemanager = { 141 enable = true; 142 openFirewall = true; 143 }; 144 }; 145 }; 146 rm2 = 147 { options, ... }: 148 { 149 services.hadoop = { 150 inherit 151 package 152 coreSite 153 hdfsSite 154 yarnSite 155 ; 156 yarn.resourcemanager = { 157 enable = true; 158 openFirewall = true; 159 }; 160 }; 161 }; 162 nm1 = 163 { options, ... }: 164 { 165 virtualisation.memorySize = 2048; 166 services.hadoop = { 167 inherit 168 package 169 coreSite 170 hdfsSite 171 yarnSite 172 ; 173 yarn.nodemanager = { 174 enable = true; 175 openFirewall = true; 176 useCGroups = false; 177 }; 178 }; 179 }; 180 client = 181 { options, ... }: 182 { 183 services.hadoop = { 184 gatewayRole.enable = true; 185 inherit 186 package 187 coreSite 188 hdfsSite 189 yarnSite 190 ; 191 }; 192 }; 193 }; 194 195 testScript = '' 196 start_all() 197 198 #### HDFS tests #### 199 200 zk1.wait_for_unit("network.target") 201 jn1.wait_for_unit("network.target") 202 jn2.wait_for_unit("network.target") 203 jn3.wait_for_unit("network.target") 204 nn1.wait_for_unit("network.target") 205 nn2.wait_for_unit("network.target") 206 dn1.wait_for_unit("network.target") 207 208 zk1.wait_for_unit("zookeeper") 209 jn1.wait_for_unit("hdfs-journalnode") 210 jn2.wait_for_unit("hdfs-journalnode") 211 jn3.wait_for_unit("hdfs-journalnode") 212 213 zk1.wait_for_open_port(2181) 214 jn1.wait_for_open_port(8480) 215 jn1.wait_for_open_port(8485) 216 jn2.wait_for_open_port(8480) 217 jn2.wait_for_open_port(8485) 218 219 # Namenodes must be stopped before initializing the cluster 220 nn1.succeed("systemctl stop hdfs-namenode") 221 nn2.succeed("systemctl stop hdfs-namenode") 222 nn1.succeed("systemctl stop hdfs-zkfc") 223 nn2.succeed("systemctl stop hdfs-zkfc") 224 225 # Initialize zookeeper for failover controller 226 nn1.succeed("sudo -u hdfs systemd-cat hdfs zkfc -formatZK") 227 228 # Format NN1 and start it 229 nn1.succeed("sudo -u hdfs systemd-cat hadoop namenode -format") 230 nn1.succeed("systemctl start hdfs-namenode") 231 nn1.wait_for_open_port(9870) 232 nn1.wait_for_open_port(8022) 233 nn1.wait_for_open_port(8020) 234 235 # Bootstrap NN2 from NN1 and start it 236 nn2.succeed("sudo -u hdfs systemd-cat hdfs namenode -bootstrapStandby") 237 nn2.succeed("systemctl start hdfs-namenode") 238 nn2.wait_for_open_port(9870) 239 nn2.wait_for_open_port(8022) 240 nn2.wait_for_open_port(8020) 241 nn1.succeed("systemd-cat netstat -tulpne") 242 243 # Start failover controllers 244 nn1.succeed("systemctl start hdfs-zkfc") 245 nn2.succeed("systemctl start hdfs-zkfc") 246 247 # DN should have started by now, but confirm anyway 248 dn1.wait_for_unit("hdfs-datanode") 249 # Print states of namenodes 250 client.succeed("sudo -u hdfs systemd-cat hdfs haadmin -getAllServiceState") 251 # Wait for cluster to exit safemode 252 client.succeed("sudo -u hdfs hdfs dfsadmin -safemode wait") 253 client.succeed("sudo -u hdfs systemd-cat hdfs haadmin -getAllServiceState") 254 # test R/W 255 client.succeed("echo testfilecontents | sudo -u hdfs hdfs dfs -put - /testfile") 256 assert "testfilecontents" in client.succeed("sudo -u hdfs hdfs dfs -cat /testfile") 257 258 # Test NN failover 259 nn1.succeed("systemctl stop hdfs-namenode") 260 assert "active" in client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState") 261 client.succeed("sudo -u hdfs systemd-cat hdfs haadmin -getAllServiceState") 262 assert "testfilecontents" in client.succeed("sudo -u hdfs hdfs dfs -cat /testfile") 263 264 nn1.succeed("systemctl start hdfs-namenode") 265 nn1.wait_for_open_port(9870) 266 nn1.wait_for_open_port(8022) 267 nn1.wait_for_open_port(8020) 268 assert "standby" in client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState") 269 client.succeed("sudo -u hdfs systemd-cat hdfs haadmin -getAllServiceState") 270 271 #### YARN tests #### 272 273 rm1.wait_for_unit("network.target") 274 rm2.wait_for_unit("network.target") 275 nm1.wait_for_unit("network.target") 276 277 rm1.wait_for_unit("yarn-resourcemanager") 278 rm1.wait_for_open_port(8088) 279 rm2.wait_for_unit("yarn-resourcemanager") 280 rm2.wait_for_open_port(8088) 281 282 nm1.wait_for_unit("yarn-nodemanager") 283 nm1.wait_for_open_port(8042) 284 nm1.wait_for_open_port(8040) 285 client.wait_until_succeeds("yarn node -list | grep Nodes:1") 286 client.succeed("sudo -u yarn systemd-cat yarn rmadmin -getAllServiceState") 287 client.succeed("sudo -u yarn systemd-cat yarn node -list") 288 289 # Test RM failover 290 rm1.succeed("systemctl stop yarn-resourcemanager") 291 assert "standby" not in client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState") 292 client.succeed("sudo -u yarn systemd-cat yarn rmadmin -getAllServiceState") 293 rm1.succeed("systemctl start yarn-resourcemanager") 294 rm1.wait_for_unit("yarn-resourcemanager") 295 rm1.wait_for_open_port(8088) 296 assert "standby" in client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState") 297 client.succeed("sudo -u yarn systemd-cat yarn rmadmin -getAllServiceState") 298 299 assert "Estimated value of Pi is" in client.succeed("HADOOP_USER_NAME=hdfs yarn jar $(readlink $(which yarn) | sed -r 's~bin/yarn~share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar~g') pi 2 10") 300 assert "SUCCEEDED" in client.succeed("yarn application -list -appStates FINISHED") 301 ''; 302 } 303)