My agentic slop goes here. Not intended for anyone else!
1(** Public Suffix List implementation for OCaml
2
3 This library provides functions to query the Mozilla Public Suffix List (PSL)
4 to determine public suffixes and registrable domains. It implements the
5 algorithm specified at {{:https://publicsuffix.org/list/} publicsuffix.org}.
6
7 {1 Overview}
8
9 The Public Suffix List is a cross-vendor initiative to provide an accurate
10 list of domain name suffixes under which Internet users can directly register
11 names. A "public suffix" is one under which Internet users can register names.
12 Some examples of public suffixes are [.com], [.co.uk] and [.pvt.k12.ma.us].
13
14 The "registrable domain" is the public suffix plus one additional label.
15 For example, for the domain [www.example.com], the public suffix is [.com]
16 and the registrable domain is [example.com].
17
18 {1 Sections}
19
20 The PSL is divided into two sections:
21
22 - {b ICANN}: Domains delegated by ICANN or in the IANA root zone database.
23 These are official TLDs and their subdivisions.
24
25 - {b Private}: Domains submitted by private parties (e.g., [blogspot.com],
26 [github.io]). Some applications may want to treat these differently.
27
28 {1 Rule Types}
29
30 The PSL supports three types of rules:
31
32 - {b Normal}: A standard domain suffix (e.g., [com], [co.uk])
33 - {b Wildcard}: Matches any label in that position (e.g., [*.jp] matches
34 anything under [.jp])
35 - {b Exception}: Overrides a wildcard rule (e.g., [!city.kobe.jp] allows
36 [city.kobe.jp] to be a registrable domain despite [*.kobe.jp])
37
38 {1 Example Usage}
39
40 {[
41 let psl = Publicsuffix.create () in
42
43 (* Get the public suffix of a domain *)
44 Publicsuffix.public_suffix psl "www.example.com"
45 (* Returns: Ok "com" *)
46
47 Publicsuffix.public_suffix psl "www.example.co.uk"
48 (* Returns: Ok "co.uk" *)
49
50 (* Get the registrable domain *)
51 Publicsuffix.registrable_domain psl "www.example.com"
52 (* Returns: Ok "example.com" *)
53
54 (* Check if a domain is a public suffix *)
55 Publicsuffix.is_public_suffix psl "com"
56 (* Returns: Ok true *)
57
58 Publicsuffix.is_public_suffix psl "example.com"
59 (* Returns: Ok false *)
60 ]}
61
62 {1 Internationalized Domain Names}
63
64 The library handles internationalized domain names (IDN) by converting them
65 to Punycode (ASCII-compatible encoding) before lookup. Both Unicode and
66 Punycode input are accepted:
67
68 {[
69 Publicsuffix.registrable_domain psl "www.食狮.com.cn"
70 (* Returns: Ok "食狮.com.cn" *)
71
72 Publicsuffix.registrable_domain psl "www.xn--85x722f.com.cn"
73 (* Returns: Ok "xn--85x722f.com.cn" *)
74 ]}
75
76 {1 Trailing Dots}
77
78 Per the PSL specification, trailing dots (indicating fully-qualified domain
79 names) are preserved in the output:
80
81 {[
82 Publicsuffix.public_suffix psl "example.com"
83 (* Returns: Ok "com" *)
84
85 Publicsuffix.public_suffix psl "example.com."
86 (* Returns: Ok "com." *)
87 ]}
88*)
89
90(** {1 Types} *)
91
92(** Section of the Public Suffix List where a rule originates *)
93type section =
94 | ICANN (** Domains delegated by ICANN or in the IANA root zone *)
95 | Private (** Domains submitted by private parties *)
96
97(** A handle to the parsed Public Suffix List *)
98type t
99
100(** {1 Errors} *)
101
102(** Errors that can occur during PSL operations *)
103type error =
104 | Empty_domain
105 (** The input domain was empty *)
106 | Invalid_domain of string
107 (** The domain could not be parsed as a valid domain name *)
108 | Leading_dot
109 (** The domain has a leading dot (e.g., [.example.com]) *)
110 | Punycode_error of string
111 (** Failed to convert internationalized domain to Punycode *)
112 | No_public_suffix
113 (** The domain has no public suffix (should not happen with valid domains) *)
114 | Domain_is_public_suffix
115 (** The domain is itself a public suffix and has no registrable domain *)
116
117(** Pretty-print an error *)
118val pp_error : Format.formatter -> error -> unit
119
120(** Convert an error to a human-readable string *)
121val error_to_string : error -> string
122
123(** {1 Creation} *)
124
125(** Create a PSL instance using the embedded Public Suffix List data.
126 The data is compiled into the library at build time. *)
127val create : unit -> t
128
129(** {1 Core Operations} *)
130
131(** [public_suffix t domain] returns the public suffix portion of [domain].
132
133 The public suffix is determined by the PSL algorithm:
134 - Match against all rules, taking the longest match
135 - Exception rules ([!]) take priority over all other rules
136 - If no rules match, the implicit [*] rule applies (returns the TLD)
137
138 @param t The PSL instance
139 @param domain The domain name to query (Unicode or Punycode)
140 @return [Ok suffix] with the public suffix, or [Error e] on failure
141
142 Examples:
143 - [public_suffix t "www.example.com"] returns [Ok "com"]
144 - [public_suffix t "www.example.co.uk"] returns [Ok "co.uk"]
145 - [public_suffix t "test.k12.ak.us"] returns [Ok "k12.ak.us"]
146 - [public_suffix t "city.kobe.jp"] returns [Ok "jp"] (exception rule)
147*)
148val public_suffix : t -> string -> (string, error) result
149
150(** [public_suffix_with_section t domain] is like {!public_suffix} but also
151 returns the section (ICANN or Private) where the matching rule was found.
152
153 If the implicit [*] rule was used (no explicit rule matched), the section
154 is [ICANN].
155
156 @return [Ok (suffix, section)] or [Error e] on failure
157*)
158val public_suffix_with_section : t -> string -> (string * section, error) result
159
160(** [registrable_domain t domain] returns the registrable domain portion.
161
162 The registrable domain is the public suffix plus one additional label.
163 This is the highest-level domain that can be registered by a user.
164
165 @param t The PSL instance
166 @param domain The domain name to query
167 @return [Ok domain] with the registrable domain, or [Error e] on failure
168
169 Returns [Error Domain_is_public_suffix] if the domain is itself a public
170 suffix (e.g., [com] or [co.uk]).
171
172 Examples:
173 - [registrable_domain t "www.example.com"] returns [Ok "example.com"]
174 - [registrable_domain t "example.com"] returns [Ok "example.com"]
175 - [registrable_domain t "com"] returns [Error Domain_is_public_suffix]
176*)
177val registrable_domain : t -> string -> (string, error) result
178
179(** [registrable_domain_with_section t domain] is like {!registrable_domain}
180 but also returns the section where the matching rule was found.
181
182 @return [Ok (domain, section)] or [Error e] on failure
183*)
184val registrable_domain_with_section : t -> string -> (string * section, error) result
185
186(** {1 Predicates} *)
187
188(** [is_public_suffix t domain] returns [true] if [domain] is exactly a
189 public suffix according to the PSL.
190
191 Note: This returns [true] if the domain matches a rule exactly, not if
192 it's under a wildcard. For example:
193 - [is_public_suffix t "com"] returns [Ok true]
194 - [is_public_suffix t "example.com"] returns [Ok false]
195 - [is_public_suffix t "foo.ck"] returns [Ok true] (due to [*.ck] rule)
196 - [is_public_suffix t "www.ck"] returns [Ok false] (due to [!www.ck] exception)
197*)
198val is_public_suffix : t -> string -> (bool, error) result
199
200(** [is_registrable_domain t domain] returns [true] if [domain] is exactly
201 a registrable domain (public suffix plus one label, no more).
202
203 Examples:
204 - [is_registrable_domain t "example.com"] returns [Ok true]
205 - [is_registrable_domain t "www.example.com"] returns [Ok false]
206 - [is_registrable_domain t "com"] returns [Ok false]
207*)
208val is_registrable_domain : t -> string -> (bool, error) result
209
210(** {1 Statistics} *)
211
212(** Total number of rules in the embedded PSL *)
213val rule_count : t -> int
214
215(** Number of ICANN section rules *)
216val icann_rule_count : t -> int
217
218(** Number of private section rules *)
219val private_rule_count : t -> int