at 21.11-pre 7.5 kB view raw
1let 2 grpcPort = 19090; 3 queryPort = 9090; 4 minioPort = 9000; 5 pushgwPort = 9091; 6 7 s3 = { 8 accessKey = "BKIKJAA5BMMU2RHO6IBB"; 9 secretKey = "V7f1CwQqAcwo80UEIJEjc5gVQUSSx5ohQ9GSrr12"; 10 }; 11 12 objstore.config = { 13 type = "S3"; 14 config = { 15 bucket = "thanos-bucket"; 16 endpoint = "s3:${toString minioPort}"; 17 region = "us-east-1"; 18 access_key = s3.accessKey; 19 secret_key = s3.secretKey; 20 insecure = true; 21 signature_version2 = false; 22 put_user_metadata = {}; 23 http_config = { 24 idle_conn_timeout = "0s"; 25 insecure_skip_verify = false; 26 }; 27 trace = { 28 enable = false; 29 }; 30 }; 31 }; 32 33in import ./make-test-python.nix { 34 name = "prometheus"; 35 36 nodes = { 37 prometheus = { pkgs, ... }: { 38 virtualisation.diskSize = 2 * 1024; 39 virtualisation.memorySize = 2048; 40 environment.systemPackages = [ pkgs.jq ]; 41 networking.firewall.allowedTCPPorts = [ grpcPort ]; 42 services.prometheus = { 43 enable = true; 44 scrapeConfigs = [ 45 { 46 job_name = "prometheus"; 47 static_configs = [ 48 { 49 targets = [ "127.0.0.1:${toString queryPort}" ]; 50 labels = { instance = "localhost"; }; 51 } 52 ]; 53 } 54 { 55 job_name = "pushgateway"; 56 scrape_interval = "1s"; 57 static_configs = [ 58 { 59 targets = [ "127.0.0.1:${toString pushgwPort}" ]; 60 } 61 ]; 62 } 63 ]; 64 rules = [ 65 '' 66 groups: 67 - name: test 68 rules: 69 - record: testrule 70 expr: count(up{job="prometheus"}) 71 '' 72 ]; 73 globalConfig = { 74 external_labels = { 75 some_label = "required by thanos"; 76 }; 77 }; 78 extraFlags = [ 79 # Required by thanos 80 "--storage.tsdb.min-block-duration=5s" 81 "--storage.tsdb.max-block-duration=5s" 82 ]; 83 }; 84 services.prometheus.pushgateway = { 85 enable = true; 86 web.listen-address = ":${toString pushgwPort}"; 87 persistMetrics = true; 88 persistence.interval = "1s"; 89 stateDir = "prometheus-pushgateway"; 90 }; 91 services.thanos = { 92 sidecar = { 93 enable = true; 94 grpc-address = "0.0.0.0:${toString grpcPort}"; 95 inherit objstore; 96 }; 97 98 # TODO: Add some tests for these services: 99 #rule = { 100 # enable = true; 101 # http-address = "0.0.0.0:19194"; 102 # grpc-address = "0.0.0.0:19193"; 103 # query.addresses = [ 104 # "localhost:19191" 105 # ]; 106 # labels = { 107 # just = "some"; 108 # nice = "labels"; 109 # }; 110 #}; 111 # 112 #receive = { 113 # http-address = "0.0.0.0:19195"; 114 # enable = true; 115 # labels = { 116 # just = "some"; 117 # nice = "labels"; 118 # }; 119 #}; 120 }; 121 }; 122 123 query = { pkgs, ... }: { 124 environment.systemPackages = [ pkgs.jq ]; 125 services.thanos.query = { 126 enable = true; 127 http-address = "0.0.0.0:${toString queryPort}"; 128 store.addresses = [ 129 "prometheus:${toString grpcPort}" 130 ]; 131 }; 132 }; 133 134 store = { pkgs, ... }: { 135 virtualisation.diskSize = 2 * 1024; 136 virtualisation.memorySize = 2048; 137 environment.systemPackages = with pkgs; [ jq thanos ]; 138 services.thanos.store = { 139 enable = true; 140 http-address = "0.0.0.0:10902"; 141 grpc-address = "0.0.0.0:${toString grpcPort}"; 142 inherit objstore; 143 sync-block-duration = "1s"; 144 }; 145 services.thanos.compact = { 146 enable = true; 147 http-address = "0.0.0.0:10903"; 148 inherit objstore; 149 consistency-delay = "5s"; 150 }; 151 services.thanos.query = { 152 enable = true; 153 http-address = "0.0.0.0:${toString queryPort}"; 154 store.addresses = [ 155 "localhost:${toString grpcPort}" 156 ]; 157 }; 158 }; 159 160 s3 = { pkgs, ... } : { 161 # Minio requires at least 1GiB of free disk space to run. 162 virtualisation = { 163 diskSize = 2 * 1024; 164 memorySize = 1024; 165 }; 166 networking.firewall.allowedTCPPorts = [ minioPort ]; 167 168 services.minio = { 169 enable = true; 170 inherit (s3) accessKey secretKey; 171 }; 172 173 environment.systemPackages = [ pkgs.minio-client ]; 174 }; 175 }; 176 177 testScript = { nodes, ... } : '' 178 # Before starting the other machines we first make sure that our S3 service is online 179 # and has a bucket added for thanos: 180 s3.start() 181 s3.wait_for_unit("minio.service") 182 s3.wait_for_open_port(${toString minioPort}) 183 s3.succeed( 184 "mc config host add minio " 185 + "http://localhost:${toString minioPort} " 186 + "${s3.accessKey} ${s3.secretKey} --api s3v4", 187 "mc mb minio/thanos-bucket", 188 ) 189 190 # Now that s3 has started we can start the other machines: 191 for machine in prometheus, query, store: 192 machine.start() 193 194 # Check if prometheus responds to requests: 195 prometheus.wait_for_unit("prometheus.service") 196 prometheus.wait_for_open_port(${toString queryPort}) 197 prometheus.succeed("curl -sf http://127.0.0.1:${toString queryPort}/metrics") 198 199 # Let's test if pushing a metric to the pushgateway succeeds: 200 prometheus.wait_for_unit("pushgateway.service") 201 prometheus.succeed( 202 "echo 'some_metric 3.14' | " 203 + "curl -f --data-binary \@- " 204 + "http://127.0.0.1:${toString pushgwPort}/metrics/job/some_job" 205 ) 206 207 # Now check whether that metric gets ingested by prometheus. 208 # Since we'll check for the metric several times on different machines 209 # we abstract the test using the following function: 210 211 # Function to check if the metric "some_metric" has been received and returns the correct value. 212 def wait_for_metric(machine): 213 return machine.wait_until_succeeds( 214 "curl -sf 'http://127.0.0.1:${toString queryPort}/api/v1/query?query=some_metric' | " 215 + "jq '.data.result[0].value[1]' | grep '\"3.14\"'" 216 ) 217 218 219 wait_for_metric(prometheus) 220 221 # Let's test if the pushgateway persists metrics to the configured location. 222 prometheus.wait_until_succeeds("test -e /var/lib/prometheus-pushgateway/metrics") 223 224 # Test thanos 225 prometheus.wait_for_unit("thanos-sidecar.service") 226 227 # Test if the Thanos query service can correctly retrieve the metric that was send above. 228 query.wait_for_unit("thanos-query.service") 229 wait_for_metric(query) 230 231 # Test if the Thanos sidecar has correctly uploaded its TSDB to S3, if the 232 # Thanos storage service has correctly downloaded it from S3 and if the Thanos 233 # query service running on $store can correctly retrieve the metric: 234 store.wait_for_unit("thanos-store.service") 235 wait_for_metric(store) 236 237 store.wait_for_unit("thanos-compact.service") 238 239 # Test if the Thanos bucket command is able to retrieve blocks from the S3 bucket 240 # and check if the blocks have the correct labels: 241 store.succeed( 242 "thanos tools bucket ls " 243 + "--objstore.config-file=${nodes.store.config.services.thanos.store.objstore.config-file} " 244 + "--output=json | " 245 + "jq .thanos.labels.some_label | " 246 + "grep 'required by thanos'" 247 ) 248 ''; 249}