1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 curl,
6
7 # build-system
8 setuptools,
9
10 # dependencies
11 braceexpand,
12 numpy,
13 pyyaml,
14
15 # tests
16 imageio,
17 lmdb,
18 msgpack,
19 pytestCheckHook,
20 torch,
21 torchvision,
22}:
23buildPythonPackage rec {
24 pname = "webdataset";
25 version = "1.0.2";
26 pyproject = true;
27
28 src = fetchFromGitHub {
29 owner = "webdataset";
30 repo = "webdataset";
31 # recent versions are not tagged on GitHub
32 rev = "0773837ecd298587fc89c4f944ef346ef1a6b619";
33 hash = "sha256-jFFRp5W9yP1mKi9x43EdOakFAd9ArnDqH3dnvFOeCmc=";
34 };
35
36 postPatch = ''
37 substituteInPlace src/webdataset/gopen.py \
38 --replace-fail \
39 '"curl"' \
40 '"${lib.getExe curl}"'
41 '';
42
43 build-system = [
44 setuptools
45 ];
46
47 dependencies = [
48 braceexpand
49 numpy
50 pyyaml
51 ];
52
53 nativeCheckInputs = [
54 imageio
55 lmdb
56 msgpack
57 pytestCheckHook
58 torch
59 torchvision
60 ];
61
62 pythonImportsCheck = [ "webdataset" ];
63
64 disabledTests = [
65 # Require network
66 "test_batched"
67 "test_cache_dir"
68 "test_dataloader"
69 "test_decode_handlers"
70 "test_decoder"
71 "test_handlers"
72 "test_pipe"
73 "test_remote_file"
74 "test_shard_syntax"
75 "test_torchvision"
76 "test_unbatched"
77 ];
78
79 meta = {
80 description = "High-performance Python-based I/O system for large (and small) deep learning problems, with strong support for PyTorch";
81 mainProgram = "widsindex";
82 homepage = "https://github.com/webdataset/webdataset";
83 changelog = "https://github.com/webdataset/webdataset/releases/tag/${version}";
84 license = lib.licenses.bsd3;
85 maintainers = with lib.maintainers; [ iynaix ];
86 };
87}