1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 # build-system
7 setuptools,
8
9 # dependencies
10 aiofiles,
11 aiohttp,
12 importlib-metadata,
13 nest-asyncio,
14 psutil,
15 pyyaml,
16 torch,
17 typing-extensions,
18
19 # tests
20 pytest-asyncio,
21 pytestCheckHook,
22}:
23
24buildPythonPackage rec {
25 pname = "torchsnapshot";
26 version = "0.1.0";
27 pyproject = true;
28
29 src = fetchFromGitHub {
30 owner = "pytorch";
31 repo = "torchsnapshot";
32 tag = version;
33 hash = "sha256-F8OaxLH8BL6MPNLFv1hBuVmeEdnEQ5w2Qny6by1wP6k=";
34 };
35
36 # _pickle.UnpicklingError: Weights only load failed.
37 # torchsnapshot needs to adapt to the change of torch.load that occured in 2.6.0:
38 # https://pytorch.org/docs/stable/generated/torch.load.html
39 postPatch = ''
40 substituteInPlace torchsnapshot/io_preparers/object.py \
41 --replace-fail \
42 "torch.load(io.BytesIO(buf))" \
43 "torch.load(io.BytesIO(buf), weights_only=False)"
44 '';
45
46 build-system = [
47 setuptools
48 ];
49
50 dependencies = [
51 aiofiles
52 aiohttp
53 importlib-metadata
54 nest-asyncio
55 psutil
56 pyyaml
57 torch
58 typing-extensions
59 ];
60
61 pythonImportsCheck = [ "torchsnapshot" ];
62
63 nativeCheckInputs = [
64 pytest-asyncio
65 pytestCheckHook
66 ];
67
68 disabledTests = [
69 # torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
70 # AssertionError: "Socket Timeout" does not match "wait timeout after 5000ms
71 "test_linear_barrier_timeout"
72 ];
73
74 meta = {
75 description = "Performant, memory-efficient checkpointing library for PyTorch applications, designed with large, complex distributed workloads in mind";
76 homepage = "https://github.com/pytorch/torchsnapshot/";
77 changelog = "https://github.com/pytorch/torchsnapshot/releases/tag/${version}";
78 license = lib.licenses.bsd3;
79 maintainers = with lib.maintainers; [ GaetanLepage ];
80 badPlatforms = [
81 # test suite gets stuck and eventually times out with: "torch.distributed.DistNetworkError: The client socket has timed out after"
82 lib.systems.inspect.patterns.isDarwin
83 ];
84 };
85}