1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 # runtime dependencies
6 layoutparser,
7 python-multipart,
8 huggingface-hub,
9 opencv-python,
10 onnxruntime,
11 transformers,
12 detectron2,
13 paddleocr,
14 # check inputs
15 pytestCheckHook,
16 coverage,
17 click,
18 httpx,
19 mypy,
20 pytest-cov-stub,
21 pdf2image,
22}:
23
24buildPythonPackage rec {
25 pname = "unstructured-inference";
26 version = "1.0.5";
27 format = "setuptools";
28
29 src = fetchFromGitHub {
30 owner = "Unstructured-IO";
31 repo = "unstructured-inference";
32 tag = version;
33 hash = "sha256-3eyavjGUc3qbKuTorAiefisz4TjiG5v/88lsXYmcFmo=";
34 };
35
36 propagatedBuildInputs = [
37 layoutparser
38 python-multipart
39 huggingface-hub
40 opencv-python
41 onnxruntime
42 transformers
43 # detectron2 # fails to build
44 # paddleocr # 3.12 not yet supported
45 # yolox
46 ]
47 ++ layoutparser.optional-dependencies.layoutmodels
48 ++ layoutparser.optional-dependencies.tesseract;
49
50 nativeCheckInputs = [
51 pytestCheckHook
52 coverage
53 click
54 httpx
55 mypy
56 pytest-cov-stub
57 pdf2image
58 huggingface-hub
59 ];
60
61 # This dependency needs to be updated properly
62 doCheck = false;
63
64 preCheck = ''
65 export HOME=$(mktemp -d)
66 '';
67
68 disabledTests = [
69 # not sure why this fails
70 "test_get_path_oob_move_deeply_nested"
71 "test_get_path_oob_move_nested[False]"
72 # requires yolox
73 "test_yolox"
74 ];
75
76 disabledTestPaths = [
77 # network access
78 "test_unstructured_inference/inference/test_layout.py"
79 "test_unstructured_inference/models/test_chippermodel.py"
80 "test_unstructured_inference/models/test_detectron2onnx.py"
81 # unclear failure
82 "test_unstructured_inference/models/test_donut.py"
83 "test_unstructured_inference/models/test_model.py"
84 "test_unstructured_inference/models/test_tables.py"
85 ];
86
87 pythonImportsCheck = [ "unstructured_inference" ];
88
89 meta = with lib; {
90 description = "Hosted model inference code for layout parsing models";
91 homepage = "https://github.com/Unstructured-IO/unstructured-inference";
92 changelog = "https://github.com/Unstructured-IO/unstructured-inference/blob/${src.tag}/CHANGELOG.md";
93 license = licenses.asl20;
94 maintainers = with maintainers; [ happysalada ];
95 platforms = [
96 "x86_64-linux"
97 "x86_64-darwin"
98 "aarch64-darwin"
99 ];
100 };
101}