1{
2 pkgs,
3 makeTest,
4 genTests,
5}:
6
7let
8 inherit (pkgs) lib;
9
10 makeTestFor =
11 package:
12 makeTest {
13 name = "postgresql_anonymizer-${package.name}";
14 meta.maintainers = lib.teams.flyingcircus.members;
15
16 nodes.machine =
17 { pkgs, ... }:
18 {
19 environment.systemPackages = [ pkgs.pg-dump-anon ];
20 services.postgresql = {
21 inherit package;
22 enable = true;
23 extensions = ps: [ ps.anonymizer ];
24 settings.shared_preload_libraries = [ "anon" ];
25 };
26 };
27
28 testScript = ''
29 start_all()
30 machine.wait_for_unit("multi-user.target")
31 machine.wait_for_unit("postgresql.service")
32
33 with subtest("Setup"):
34 machine.succeed("sudo -u postgres psql --command 'create database demo'")
35 machine.succeed(
36 "sudo -u postgres psql -d demo -f ${pkgs.writeText "init.sql" ''
37 create extension anon cascade;
38 select anon.init();
39 create table player(id serial, name text, points int);
40 insert into player(id,name,points) values (1,'Foo', 23);
41 insert into player(id,name,points) values (2,'Bar',42);
42 security label for anon on column player.name is 'MASKED WITH FUNCTION anon.fake_last_name();';
43 security label for anon on column player.points is 'MASKED WITH VALUE NULL';
44 ''}"
45 )
46
47 def get_player_table_contents():
48 return [
49 x.split(',') for x in machine.succeed("sudo -u postgres psql -d demo --csv --command 'select * from player'").splitlines()[1:]
50 ]
51
52 def check_anonymized_row(row, id, original_name):
53 assert row[0] == id, f"Expected first row to have ID {id}, but got {row[0]}"
54 assert row[1] != original_name, f"Expected first row to have a name other than {original_name}"
55 assert not bool(row[2]), "Expected points to be NULL in first row"
56
57 def find_xsv_in_dump(dump, sep=','):
58 """
59 Expecting to find a CSV (for pg_dump_anon) or TSV (for pg_dump) structure, looking like
60
61 COPY public.player ...
62 1,Shields,
63 2,Salazar,
64 \.
65
66 in the given dump (the commas are tabs in case of pg_dump).
67 Extract the CSV lines and split by `sep`.
68 """
69
70 try:
71 from itertools import dropwhile, takewhile
72 return [x.split(sep) for x in list(takewhile(
73 lambda x: x != "\\.",
74 dropwhile(
75 lambda x: not x.startswith("COPY public.player"),
76 dump.splitlines()
77 )
78 ))[1:]]
79 except:
80 print(f"Dump to process: {dump}")
81 raise
82
83 def check_original_data(output):
84 assert output[0] == ['1','Foo','23'], f"Expected first row from player table to be 1,Foo,23; got {output[0]}"
85 assert output[1] == ['2','Bar','42'], f"Expected first row from player table to be 2,Bar,42; got {output[1]}"
86
87 def check_anonymized_rows(output):
88 check_anonymized_row(output[0], '1', 'Foo')
89 check_anonymized_row(output[1], '2', 'Bar')
90
91 with subtest("Check initial state"):
92 check_original_data(get_player_table_contents())
93
94 with subtest("Anonymous dumps"):
95 check_original_data(find_xsv_in_dump(
96 machine.succeed("sudo -u postgres pg_dump demo"),
97 sep='\t'
98 ))
99 check_anonymized_rows(find_xsv_in_dump(
100 machine.succeed("sudo -u postgres pg_dump_anon -U postgres -h /run/postgresql -d demo"),
101 sep=','
102 ))
103
104 with subtest("Anonymize"):
105 machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'")
106 check_anonymized_rows(get_player_table_contents())
107 '';
108 };
109in
110genTests {
111 inherit makeTestFor;
112 filter = _: p: !p.pkgs.anonymizer.meta.broken;
113}