1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 # build-system
7 setuptools,
8
9 # dependencies
10 numpy,
11 scikit-learn,
12 termcolor,
13 tqdm,
14 pandas,
15
16 # tests
17 cleanvision,
18 datasets,
19 fasttext,
20 hypothesis,
21 keras,
22 matplotlib,
23 pytestCheckHook,
24 pytest-lazy-fixture,
25 skorch,
26 tensorflow,
27 torch,
28 torchvision,
29 wget,
30 pythonAtLeast,
31}:
32
33buildPythonPackage rec {
34 pname = "cleanlab";
35 version = "2.7.1";
36 pyproject = true;
37
38 src = fetchFromGitHub {
39 owner = "cleanlab";
40 repo = "cleanlab";
41 tag = "v${version}";
42 hash = "sha256-KzVqBOLTxxkgvoGPYMeYb7zMuG8VwQwX6SYR/FUhfBw=";
43 };
44
45 build-system = [ setuptools ];
46
47 pythonRelaxDeps = [
48 "numpy"
49 ];
50
51 dependencies = [
52 numpy
53 scikit-learn
54 termcolor
55 tqdm
56 pandas
57 ];
58
59 # This is ONLY turned off when we have testing enabled.
60 # The reason we do this is because of duplicate packages in the enclosure
61 # when using the packages in nativeCheckInputs.
62 # Affected packages: grpcio protobuf tensorboard tensorboard-plugin-profile
63 catchConflicts = (!doCheck);
64 doCheck = true;
65
66 nativeCheckInputs = [
67 cleanvision
68 datasets
69 fasttext
70 hypothesis
71 keras
72 matplotlib
73 pytestCheckHook
74 pytest-lazy-fixture
75 skorch
76 tensorflow
77 torch
78 torchvision
79 wget
80 ];
81
82 disabledTests = [
83 # Incorrect snapshots (AssertionError)
84 "test_color_sentence"
85
86 # Requires the datasets we prevent from downloading
87 "test_create_imagelab"
88
89 # Non-trivial numpy2 incompatibilities
90 # assert np.float64(0.492) == 0.491
91 "test_duplicate_points_have_similar_scores"
92 # AssertionError: assert 'Annotators [1] did not label any examples.'
93 "test_label_quality_scores_multiannotator"
94 ]
95 ++ lib.optionals (pythonAtLeast "3.12") [
96 # AttributeError: 'called_once_with' is not a valid assertion.
97 # Use a spec for the mock if 'called_once_with' is meant to be an attribute..
98 # Did you mean: 'assert_called_once_with'?
99 "test_custom_issue_manager_not_registered"
100 ];
101
102 disabledTestPaths = [
103 # Requires internet
104 "tests/test_dataset.py"
105 # Requires the datasets we just prevented from downloading
106 "tests/datalab/test_cleanvision_integration.py"
107 # Fails because of issues with the keras derivation
108 "tests/test_frameworks.py"
109 ];
110
111 meta = {
112 description = "Standard data-centric AI package for data quality and machine learning with messy, real-world data and labels";
113 homepage = "https://github.com/cleanlab/cleanlab";
114 changelog = "https://github.com/cleanlab/cleanlab/releases/tag/v${version}";
115 license = lib.licenses.agpl3Only;
116 maintainers = with lib.maintainers; [ happysalada ];
117 # cleanlab is incompatible with datasets>=4.0.0
118 # cleanlab/datalab/internal/data.py:313: AssertionError
119 # https://github.com/cleanlab/cleanlab/issues/1244
120 broken = lib.versionAtLeast datasets.version "4.0.0";
121 };
122}