1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 6 # build system 7 setuptools, 8 9 # dependencies 10 absl-py, 11 array-record, 12 dm-tree, 13 etils, 14 immutabledict, 15 numpy, 16 promise, 17 protobuf, 18 psutil, 19 pyarrow, 20 requests, 21 simple-parsing, 22 tensorflow-metadata, 23 termcolor, 24 toml, 25 tqdm, 26 wrapt, 27 pythonOlder, 28 importlib-resources, 29 30 # tests 31 apache-beam, 32 beautifulsoup4, 33 click, 34 cloudpickle, 35 datasets, 36 dill, 37 ffmpeg, 38 imagemagick, 39 jax, 40 jaxlib, 41 jinja2, 42 langdetect, 43 lxml, 44 matplotlib, 45 mlcroissant, 46 mwparserfromhell, 47 mwxml, 48 networkx, 49 nltk, 50 opencv4, 51 pandas, 52 pillow, 53 pycocotools, 54 pydub, 55 pytest-xdist, 56 pytestCheckHook, 57 scikit-image, 58 scipy, 59 sortedcontainers, 60 tensorflow, 61 tifffile, 62 zarr, 63}: 64 65buildPythonPackage rec { 66 pname = "tensorflow-datasets"; 67 version = "4.9.9"; 68 pyproject = true; 69 70 src = fetchFromGitHub { 71 owner = "tensorflow"; 72 repo = "datasets"; 73 tag = "v${version}"; 74 hash = "sha256-ZXaPYmj8aozfe6ygzKybId8RZ1TqPuIOSpd8XxnRHus="; 75 }; 76 77 build-system = [ setuptools ]; 78 79 dependencies = [ 80 absl-py 81 array-record 82 dm-tree 83 etils 84 immutabledict 85 numpy 86 promise 87 protobuf 88 psutil 89 pyarrow 90 requests 91 simple-parsing 92 tensorflow-metadata 93 termcolor 94 toml 95 tqdm 96 wrapt 97 ] 98 ++ etils.optional-dependencies.epath 99 ++ etils.optional-dependencies.etree 100 ++ lib.optionals (pythonOlder "3.9") [ 101 importlib-resources 102 ]; 103 104 pythonImportsCheck = [ "tensorflow_datasets" ]; 105 106 nativeCheckInputs = [ 107 apache-beam 108 beautifulsoup4 109 click 110 cloudpickle 111 datasets 112 dill 113 ffmpeg 114 imagemagick 115 jax 116 jaxlib 117 jinja2 118 langdetect 119 lxml 120 matplotlib 121 mlcroissant 122 mwparserfromhell 123 mwxml 124 networkx 125 nltk 126 opencv4 127 pandas 128 pillow 129 pycocotools 130 pydub 131 pytest-xdist 132 pytestCheckHook 133 scikit-image 134 scipy 135 sortedcontainers 136 tensorflow 137 tifffile 138 zarr 139 ]; 140 141 disabledTests = [ 142 # Since updating apache-beam to 2.65.0 143 # RuntimeError: Unable to pickle fn CallableWrapperDoFn...: maximum recursion depth exceeded 144 # https://github.com/tensorflow/datasets/issues/11055 145 "test_download_and_prepare_as_dataset" 146 ]; 147 148 disabledTestPaths = [ 149 # Sandbox violations: network access, filesystem write attempts outside of build dir, ... 150 "tensorflow_datasets/core/dataset_builder_test.py" 151 "tensorflow_datasets/core/dataset_info_test.py" 152 "tensorflow_datasets/core/features/features_test.py" 153 "tensorflow_datasets/core/github_api/github_path_test.py" 154 "tensorflow_datasets/core/registered_test.py" 155 "tensorflow_datasets/core/utils/gcs_utils_test.py" 156 "tensorflow_datasets/import_without_tf_test.py" 157 "tensorflow_datasets/proto/build_tf_proto_test.py" 158 "tensorflow_datasets/scripts/cli/build_test.py" 159 "tensorflow_datasets/datasets/imagenet2012_corrupted/imagenet2012_corrupted_dataset_builder_test.py" 160 161 # Requires `pretty_midi` which is not packaged in `nixpkgs`. 162 "tensorflow_datasets/audio/groove.py" 163 "tensorflow_datasets/datasets/groove/groove_dataset_builder_test.py" 164 165 # Requires `crepe` which is not packaged in `nixpkgs`. 166 "tensorflow_datasets/audio/nsynth.py" 167 "tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py" 168 169 # Requires `conllu` which is not packaged in `nixpkgs`. 170 "tensorflow_datasets/core/dataset_builders/conll/conllu_dataset_builder_test.py" 171 "tensorflow_datasets/datasets/universal_dependencies/universal_dependencies_dataset_builder_test.py" 172 "tensorflow_datasets/datasets/xtreme_pos/xtreme_pos_dataset_builder_test.py" 173 174 # Requires `gcld3` and `pretty_midi` which are not packaged in `nixpkgs`. 175 "tensorflow_datasets/core/lazy_imports_lib_test.py" 176 177 # AttributeError: 'NoneType' object has no attribute 'Table' 178 "tensorflow_datasets/core/dataset_builder_beam_test.py" 179 "tensorflow_datasets/core/dataset_builders/adhoc_builder_test.py" 180 "tensorflow_datasets/core/split_builder_test.py" 181 "tensorflow_datasets/core/writer_test.py" 182 183 # Requires `tensorflow_io` which is not packaged in `nixpkgs`. 184 "tensorflow_datasets/core/features/audio_feature_test.py" 185 "tensorflow_datasets/image/lsun_test.py" 186 187 # Fails with `TypeError: Constant constructor takes either 0 or 2 positional arguments` 188 # deep in TF AutoGraph. Doesn't reproduce in Docker with Ubuntu 22.04 => might be related 189 # to the differences in some of the dependencies? 190 "tensorflow_datasets/rl_unplugged/rlu_atari/rlu_atari_test.py" 191 192 # Fails with `ValueError: setting an array element with a sequence` 193 "tensorflow_datasets/core/dataset_utils_test.py" 194 "tensorflow_datasets/core/features/sequence_feature_test.py" 195 196 # Requires `tensorflow_docs` which is not packaged in `nixpkgs` and the test is for documentation anyway. 197 "tensorflow_datasets/scripts/documentation/build_api_docs_test.py" 198 199 # Not a test, should not be executed. 200 "tensorflow_datasets/testing/test_utils.py" 201 202 # Require `gcld3` and `nltk.punkt` which are not packaged in `nixpkgs`. 203 "tensorflow_datasets/text/c4_test.py" 204 "tensorflow_datasets/text/c4_utils_test.py" 205 206 # AttributeError: 'NoneType' object has no attribute 'Table' 207 "tensorflow_datasets/core/file_adapters_test.py::test_read_write" 208 "tensorflow_datasets/text/c4_wsrs/c4_wsrs_test.py::C4WSRSTest" 209 ]; 210 211 meta = { 212 description = "Library of datasets ready to use with TensorFlow"; 213 homepage = "https://www.tensorflow.org/datasets/overview"; 214 changelog = "https://github.com/tensorflow/datasets/releases/tag/v${version}"; 215 license = lib.licenses.asl20; 216 maintainers = with lib.maintainers; [ ndl ]; 217 }; 218}