1let
2 grpcPort = 19090;
3 queryPort = 9090;
4 minioPort = 9000;
5 pushgwPort = 9091;
6
7 s3 = {
8 accessKey = "BKIKJAA5BMMU2RHO6IBB";
9 secretKey = "V7f1CwQqAcwo80UEIJEjc5gVQUSSx5ohQ9GSrr12";
10 };
11
12 objstore.config = {
13 type = "S3";
14 config = {
15 bucket = "thanos-bucket";
16 endpoint = "s3:${toString minioPort}";
17 region = "us-east-1";
18 access_key = s3.accessKey;
19 secret_key = s3.secretKey;
20 insecure = true;
21 signature_version2 = false;
22 put_user_metadata = {};
23 http_config = {
24 idle_conn_timeout = "0s";
25 insecure_skip_verify = false;
26 };
27 trace = {
28 enable = false;
29 };
30 };
31 };
32
33in import ./make-test-python.nix {
34 name = "prometheus";
35
36 nodes = {
37 prometheus = { pkgs, ... }: {
38 virtualisation.diskSize = 2 * 1024;
39 virtualisation.memorySize = 2048;
40 environment.systemPackages = [ pkgs.jq ];
41 networking.firewall.allowedTCPPorts = [ grpcPort ];
42 services.prometheus = {
43 enable = true;
44 scrapeConfigs = [
45 {
46 job_name = "prometheus";
47 static_configs = [
48 {
49 targets = [ "127.0.0.1:${toString queryPort}" ];
50 labels = { instance = "localhost"; };
51 }
52 ];
53 }
54 {
55 job_name = "pushgateway";
56 scrape_interval = "1s";
57 static_configs = [
58 {
59 targets = [ "127.0.0.1:${toString pushgwPort}" ];
60 }
61 ];
62 }
63 ];
64 rules = [
65 ''
66 groups:
67 - name: test
68 rules:
69 - record: testrule
70 expr: count(up{job="prometheus"})
71 ''
72 ];
73 globalConfig = {
74 external_labels = {
75 some_label = "required by thanos";
76 };
77 };
78 extraFlags = [
79 # Required by thanos
80 "--storage.tsdb.min-block-duration=5s"
81 "--storage.tsdb.max-block-duration=5s"
82 ];
83 };
84 services.prometheus.pushgateway = {
85 enable = true;
86 web.listen-address = ":${toString pushgwPort}";
87 persistMetrics = true;
88 persistence.interval = "1s";
89 stateDir = "prometheus-pushgateway";
90 };
91 services.thanos = {
92 sidecar = {
93 enable = true;
94 grpc-address = "0.0.0.0:${toString grpcPort}";
95 inherit objstore;
96 };
97
98 # TODO: Add some tests for these services:
99 #rule = {
100 # enable = true;
101 # http-address = "0.0.0.0:19194";
102 # grpc-address = "0.0.0.0:19193";
103 # query.addresses = [
104 # "localhost:19191"
105 # ];
106 # labels = {
107 # just = "some";
108 # nice = "labels";
109 # };
110 #};
111 #
112 #receive = {
113 # http-address = "0.0.0.0:19195";
114 # enable = true;
115 # labels = {
116 # just = "some";
117 # nice = "labels";
118 # };
119 #};
120 };
121 };
122
123 query = { pkgs, ... }: {
124 environment.systemPackages = [ pkgs.jq ];
125 services.thanos.query = {
126 enable = true;
127 http-address = "0.0.0.0:${toString queryPort}";
128 store.addresses = [
129 "prometheus:${toString grpcPort}"
130 ];
131 };
132 };
133
134 store = { pkgs, ... }: {
135 virtualisation.diskSize = 2 * 1024;
136 virtualisation.memorySize = 2048;
137 environment.systemPackages = with pkgs; [ jq thanos ];
138 services.thanos.store = {
139 enable = true;
140 http-address = "0.0.0.0:10902";
141 grpc-address = "0.0.0.0:${toString grpcPort}";
142 inherit objstore;
143 sync-block-duration = "1s";
144 };
145 services.thanos.compact = {
146 enable = true;
147 http-address = "0.0.0.0:10903";
148 inherit objstore;
149 consistency-delay = "5s";
150 };
151 services.thanos.query = {
152 enable = true;
153 http-address = "0.0.0.0:${toString queryPort}";
154 store.addresses = [
155 "localhost:${toString grpcPort}"
156 ];
157 };
158 };
159
160 s3 = { pkgs, ... } : {
161 # Minio requires at least 1GiB of free disk space to run.
162 virtualisation = {
163 diskSize = 2 * 1024;
164 memorySize = 1024;
165 };
166 networking.firewall.allowedTCPPorts = [ minioPort ];
167
168 services.minio = {
169 enable = true;
170 inherit (s3) accessKey secretKey;
171 };
172
173 environment.systemPackages = [ pkgs.minio-client ];
174 };
175 };
176
177 testScript = { nodes, ... } : ''
178 # Before starting the other machines we first make sure that our S3 service is online
179 # and has a bucket added for thanos:
180 s3.start()
181 s3.wait_for_unit("minio.service")
182 s3.wait_for_open_port(${toString minioPort})
183 s3.succeed(
184 "mc config host add minio "
185 + "http://localhost:${toString minioPort} "
186 + "${s3.accessKey} ${s3.secretKey} --api s3v4",
187 "mc mb minio/thanos-bucket",
188 )
189
190 # Now that s3 has started we can start the other machines:
191 for machine in prometheus, query, store:
192 machine.start()
193
194 # Check if prometheus responds to requests:
195 prometheus.wait_for_unit("prometheus.service")
196 prometheus.wait_for_open_port(${toString queryPort})
197 prometheus.succeed("curl -sf http://127.0.0.1:${toString queryPort}/metrics")
198
199 # Let's test if pushing a metric to the pushgateway succeeds:
200 prometheus.wait_for_unit("pushgateway.service")
201 prometheus.succeed(
202 "echo 'some_metric 3.14' | "
203 + "curl -f --data-binary \@- "
204 + "http://127.0.0.1:${toString pushgwPort}/metrics/job/some_job"
205 )
206
207 # Now check whether that metric gets ingested by prometheus.
208 # Since we'll check for the metric several times on different machines
209 # we abstract the test using the following function:
210
211 # Function to check if the metric "some_metric" has been received and returns the correct value.
212 def wait_for_metric(machine):
213 return machine.wait_until_succeeds(
214 "curl -sf 'http://127.0.0.1:${toString queryPort}/api/v1/query?query=some_metric' | "
215 + "jq '.data.result[0].value[1]' | grep '\"3.14\"'"
216 )
217
218
219 wait_for_metric(prometheus)
220
221 # Let's test if the pushgateway persists metrics to the configured location.
222 prometheus.wait_until_succeeds("test -e /var/lib/prometheus-pushgateway/metrics")
223
224 # Test thanos
225 prometheus.wait_for_unit("thanos-sidecar.service")
226
227 # Test if the Thanos query service can correctly retrieve the metric that was send above.
228 query.wait_for_unit("thanos-query.service")
229 wait_for_metric(query)
230
231 # Test if the Thanos sidecar has correctly uploaded its TSDB to S3, if the
232 # Thanos storage service has correctly downloaded it from S3 and if the Thanos
233 # query service running on $store can correctly retrieve the metric:
234 store.wait_for_unit("thanos-store.service")
235 wait_for_metric(store)
236
237 store.wait_for_unit("thanos-compact.service")
238
239 # Test if the Thanos bucket command is able to retrieve blocks from the S3 bucket
240 # and check if the blocks have the correct labels:
241 store.succeed(
242 "thanos tools bucket ls "
243 + "--objstore.config-file=${nodes.store.config.services.thanos.store.objstore.config-file} "
244 + "--output=json | "
245 + "jq .thanos.labels.some_label | "
246 + "grep 'required by thanos'"
247 )
248 '';
249}