My agentic slop goes here. Not intended for anyone else!
1(*
2 * Copyright (c) 2014, OCaml.org project
3 * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *)
17
18(** Feed quality analysis. *)
19
20type t
21(** Quality metrics for a feed or user's aggregated feed. *)
22
23val make :
24 total_entries:int ->
25 entries_with_summary:int ->
26 entries_with_author:int ->
27 entries_with_date:int ->
28 entries_with_content:int ->
29 entries_with_tags:int ->
30 avg_content_length:float ->
31 min_content_length:int ->
32 max_content_length:int ->
33 posting_frequency_days:float option ->
34 quality_score:float ->
35 t
36(** [make ~total_entries ...] creates quality metrics. *)
37
38val total_entries : t -> int
39val entries_with_summary : t -> int
40val entries_with_author : t -> int
41val entries_with_date : t -> int
42val entries_with_content : t -> int
43val entries_with_tags : t -> int
44val avg_content_length : t -> float
45val min_content_length : t -> int
46val max_content_length : t -> int
47val posting_frequency_days : t -> float option
48val quality_score : t -> float
49(** Accessors for quality metrics. *)
50
51val analyze : Syndic.Atom.entry list -> t
52(** [analyze entries] computes quality metrics from Atom entries.
53
54 The quality score is a weighted average of:
55 - Content completeness (40%)
56 - Metadata completeness (30%)
57 - Content richness (30%) *)