at master 2.9 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 6 # build-system 7 setuptools, 8 9 # dependencies 10 numpy, 11 scikit-learn, 12 termcolor, 13 tqdm, 14 pandas, 15 16 # tests 17 cleanvision, 18 datasets, 19 fasttext, 20 hypothesis, 21 keras, 22 matplotlib, 23 pytestCheckHook, 24 pytest-lazy-fixture, 25 skorch, 26 tensorflow, 27 torch, 28 torchvision, 29 wget, 30 pythonAtLeast, 31}: 32 33buildPythonPackage rec { 34 pname = "cleanlab"; 35 version = "2.7.1"; 36 pyproject = true; 37 38 src = fetchFromGitHub { 39 owner = "cleanlab"; 40 repo = "cleanlab"; 41 tag = "v${version}"; 42 hash = "sha256-KzVqBOLTxxkgvoGPYMeYb7zMuG8VwQwX6SYR/FUhfBw="; 43 }; 44 45 build-system = [ setuptools ]; 46 47 pythonRelaxDeps = [ 48 "numpy" 49 ]; 50 51 dependencies = [ 52 numpy 53 scikit-learn 54 termcolor 55 tqdm 56 pandas 57 ]; 58 59 # This is ONLY turned off when we have testing enabled. 60 # The reason we do this is because of duplicate packages in the enclosure 61 # when using the packages in nativeCheckInputs. 62 # Affected packages: grpcio protobuf tensorboard tensorboard-plugin-profile 63 catchConflicts = (!doCheck); 64 doCheck = true; 65 66 nativeCheckInputs = [ 67 cleanvision 68 datasets 69 fasttext 70 hypothesis 71 keras 72 matplotlib 73 pytestCheckHook 74 pytest-lazy-fixture 75 skorch 76 tensorflow 77 torch 78 torchvision 79 wget 80 ]; 81 82 disabledTests = [ 83 # Incorrect snapshots (AssertionError) 84 "test_color_sentence" 85 86 # Requires the datasets we prevent from downloading 87 "test_create_imagelab" 88 89 # Non-trivial numpy2 incompatibilities 90 # assert np.float64(0.492) == 0.491 91 "test_duplicate_points_have_similar_scores" 92 # AssertionError: assert 'Annotators [1] did not label any examples.' 93 "test_label_quality_scores_multiannotator" 94 ] 95 ++ lib.optionals (pythonAtLeast "3.12") [ 96 # AttributeError: 'called_once_with' is not a valid assertion. 97 # Use a spec for the mock if 'called_once_with' is meant to be an attribute.. 98 # Did you mean: 'assert_called_once_with'? 99 "test_custom_issue_manager_not_registered" 100 ]; 101 102 disabledTestPaths = [ 103 # Requires internet 104 "tests/test_dataset.py" 105 # Requires the datasets we just prevented from downloading 106 "tests/datalab/test_cleanvision_integration.py" 107 # Fails because of issues with the keras derivation 108 "tests/test_frameworks.py" 109 ]; 110 111 meta = { 112 description = "Standard data-centric AI package for data quality and machine learning with messy, real-world data and labels"; 113 homepage = "https://github.com/cleanlab/cleanlab"; 114 changelog = "https://github.com/cleanlab/cleanlab/releases/tag/v${version}"; 115 license = lib.licenses.agpl3Only; 116 maintainers = with lib.maintainers; [ happysalada ]; 117 # cleanlab is incompatible with datasets>=4.0.0 118 # cleanlab/datalab/internal/data.py:313: AssertionError 119 # https://github.com/cleanlab/cleanlab/issues/1244 120 broken = lib.versionAtLeast datasets.version "4.0.0"; 121 }; 122}