1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 # build system
7 setuptools,
8
9 # dependencies
10 absl-py,
11 array-record,
12 dm-tree,
13 etils,
14 immutabledict,
15 numpy,
16 promise,
17 protobuf,
18 psutil,
19 pyarrow,
20 requests,
21 simple-parsing,
22 tensorflow-metadata,
23 termcolor,
24 toml,
25 tqdm,
26 wrapt,
27 pythonOlder,
28 importlib-resources,
29
30 # tests
31 apache-beam,
32 beautifulsoup4,
33 click,
34 cloudpickle,
35 datasets,
36 dill,
37 ffmpeg,
38 imagemagick,
39 jax,
40 jaxlib,
41 jinja2,
42 langdetect,
43 lxml,
44 matplotlib,
45 mlcroissant,
46 mwparserfromhell,
47 mwxml,
48 networkx,
49 nltk,
50 opencv4,
51 pandas,
52 pillow,
53 pycocotools,
54 pydub,
55 pytest-xdist,
56 pytestCheckHook,
57 scikit-image,
58 scipy,
59 sortedcontainers,
60 tensorflow,
61 tifffile,
62 zarr,
63}:
64
65buildPythonPackage rec {
66 pname = "tensorflow-datasets";
67 version = "4.9.9";
68 pyproject = true;
69
70 src = fetchFromGitHub {
71 owner = "tensorflow";
72 repo = "datasets";
73 tag = "v${version}";
74 hash = "sha256-ZXaPYmj8aozfe6ygzKybId8RZ1TqPuIOSpd8XxnRHus=";
75 };
76
77 build-system = [ setuptools ];
78
79 dependencies = [
80 absl-py
81 array-record
82 dm-tree
83 etils
84 immutabledict
85 numpy
86 promise
87 protobuf
88 psutil
89 pyarrow
90 requests
91 simple-parsing
92 tensorflow-metadata
93 termcolor
94 toml
95 tqdm
96 wrapt
97 ]
98 ++ etils.optional-dependencies.epath
99 ++ etils.optional-dependencies.etree
100 ++ lib.optionals (pythonOlder "3.9") [
101 importlib-resources
102 ];
103
104 pythonImportsCheck = [ "tensorflow_datasets" ];
105
106 nativeCheckInputs = [
107 apache-beam
108 beautifulsoup4
109 click
110 cloudpickle
111 datasets
112 dill
113 ffmpeg
114 imagemagick
115 jax
116 jaxlib
117 jinja2
118 langdetect
119 lxml
120 matplotlib
121 mlcroissant
122 mwparserfromhell
123 mwxml
124 networkx
125 nltk
126 opencv4
127 pandas
128 pillow
129 pycocotools
130 pydub
131 pytest-xdist
132 pytestCheckHook
133 scikit-image
134 scipy
135 sortedcontainers
136 tensorflow
137 tifffile
138 zarr
139 ];
140
141 disabledTests = [
142 # Since updating apache-beam to 2.65.0
143 # RuntimeError: Unable to pickle fn CallableWrapperDoFn...: maximum recursion depth exceeded
144 # https://github.com/tensorflow/datasets/issues/11055
145 "test_download_and_prepare_as_dataset"
146 ];
147
148 disabledTestPaths = [
149 # Sandbox violations: network access, filesystem write attempts outside of build dir, ...
150 "tensorflow_datasets/core/dataset_builder_test.py"
151 "tensorflow_datasets/core/dataset_info_test.py"
152 "tensorflow_datasets/core/features/features_test.py"
153 "tensorflow_datasets/core/github_api/github_path_test.py"
154 "tensorflow_datasets/core/registered_test.py"
155 "tensorflow_datasets/core/utils/gcs_utils_test.py"
156 "tensorflow_datasets/import_without_tf_test.py"
157 "tensorflow_datasets/proto/build_tf_proto_test.py"
158 "tensorflow_datasets/scripts/cli/build_test.py"
159 "tensorflow_datasets/datasets/imagenet2012_corrupted/imagenet2012_corrupted_dataset_builder_test.py"
160
161 # Requires `pretty_midi` which is not packaged in `nixpkgs`.
162 "tensorflow_datasets/audio/groove.py"
163 "tensorflow_datasets/datasets/groove/groove_dataset_builder_test.py"
164
165 # Requires `crepe` which is not packaged in `nixpkgs`.
166 "tensorflow_datasets/audio/nsynth.py"
167 "tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py"
168
169 # Requires `conllu` which is not packaged in `nixpkgs`.
170 "tensorflow_datasets/core/dataset_builders/conll/conllu_dataset_builder_test.py"
171 "tensorflow_datasets/datasets/universal_dependencies/universal_dependencies_dataset_builder_test.py"
172 "tensorflow_datasets/datasets/xtreme_pos/xtreme_pos_dataset_builder_test.py"
173
174 # Requires `gcld3` and `pretty_midi` which are not packaged in `nixpkgs`.
175 "tensorflow_datasets/core/lazy_imports_lib_test.py"
176
177 # AttributeError: 'NoneType' object has no attribute 'Table'
178 "tensorflow_datasets/core/dataset_builder_beam_test.py"
179 "tensorflow_datasets/core/dataset_builders/adhoc_builder_test.py"
180 "tensorflow_datasets/core/split_builder_test.py"
181 "tensorflow_datasets/core/writer_test.py"
182
183 # Requires `tensorflow_io` which is not packaged in `nixpkgs`.
184 "tensorflow_datasets/core/features/audio_feature_test.py"
185 "tensorflow_datasets/image/lsun_test.py"
186
187 # Fails with `TypeError: Constant constructor takes either 0 or 2 positional arguments`
188 # deep in TF AutoGraph. Doesn't reproduce in Docker with Ubuntu 22.04 => might be related
189 # to the differences in some of the dependencies?
190 "tensorflow_datasets/rl_unplugged/rlu_atari/rlu_atari_test.py"
191
192 # Fails with `ValueError: setting an array element with a sequence`
193 "tensorflow_datasets/core/dataset_utils_test.py"
194 "tensorflow_datasets/core/features/sequence_feature_test.py"
195
196 # Requires `tensorflow_docs` which is not packaged in `nixpkgs` and the test is for documentation anyway.
197 "tensorflow_datasets/scripts/documentation/build_api_docs_test.py"
198
199 # Not a test, should not be executed.
200 "tensorflow_datasets/testing/test_utils.py"
201
202 # Require `gcld3` and `nltk.punkt` which are not packaged in `nixpkgs`.
203 "tensorflow_datasets/text/c4_test.py"
204 "tensorflow_datasets/text/c4_utils_test.py"
205
206 # AttributeError: 'NoneType' object has no attribute 'Table'
207 "tensorflow_datasets/core/file_adapters_test.py::test_read_write"
208 "tensorflow_datasets/text/c4_wsrs/c4_wsrs_test.py::C4WSRSTest"
209 ];
210
211 meta = {
212 description = "Library of datasets ready to use with TensorFlow";
213 homepage = "https://www.tensorflow.org/datasets/overview";
214 changelog = "https://github.com/tensorflow/datasets/releases/tag/v${version}";
215 license = lib.licenses.asl20;
216 maintainers = with lib.maintainers; [ ndl ];
217 };
218}