1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6
7 # build-system
8 hatchling,
9 poetry-core,
10
11 # dependencies
12 docling,
13 pydantic-settings,
14 typer,
15 boto3,
16 pandas,
17 fastparquet,
18 pyarrow,
19 httpx,
20
21 # optional dependencies
22 ray,
23 rq,
24 msgpack,
25
26 # tests
27 pytestCheckHook,
28 pytest-asyncio,
29 writableTmpDirAsHomeHook,
30}:
31
32buildPythonPackage rec {
33 pname = "docling-jobkit";
34 version = "1.5.0";
35 pyproject = true;
36
37 src = fetchFromGitHub {
38 owner = "docling-project";
39 repo = "docling-jobkit";
40 tag = "v${version}";
41 hash = "sha256-/rFMP5KiWHBsGaA2LVOWP2TkJLVeguIlrmO+JL610hQ=";
42 };
43
44 build-system = [
45 hatchling
46 poetry-core
47 ];
48
49 dependencies = [
50 docling
51 pydantic-settings
52 typer
53 boto3
54 pandas
55 fastparquet
56 pyarrow
57 httpx
58 ];
59
60 optional-dependencies = {
61 ray = [ ray ];
62 rq = [
63 rq
64 msgpack
65 ];
66 };
67
68 pythonRelaxDeps = [
69 "boto3"
70 "pandas"
71 "pyarrow"
72 ];
73
74 pythonImportsCheck = [
75 "docling"
76 "docling_jobkit"
77 ];
78
79 nativeCheckInputs = [
80 pytestCheckHook
81 pytest-asyncio
82 writableTmpDirAsHomeHook
83 ]
84 ++ optional-dependencies.rq;
85
86 disabledTests = [
87 # requires network access
88 "test_chunk_file"
89 "test_convert_file"
90 "test_convert_warmup"
91 "test_convert_url"
92 "test_replicated_convert"
93 ]
94 ++ lib.optionals stdenv.hostPlatform.isDarwin [
95 # Flaky due to comparison with magic object
96 # https://github.com/docling-project/docling-jobkit/issues/45
97 "test_options_validator"
98 ];
99
100 meta = {
101 changelog = "https://github.com/docling-project/docling-jobkit/blob/${src.tag}/CHANGELOG.md";
102 description = "Running a distributed job processing documents with Docling";
103 homepage = "https://github.com/docling-project/docling-jobkit";
104 license = lib.licenses.mit;
105 maintainers = with lib.maintainers; [ codgician ];
106 };
107}