1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 cmake,
6 pkg-config,
7 cxxopts,
8 setuptools,
9 pybind11,
10 zlib,
11 nlohmann_json,
12 utf8cpp,
13 libjpeg,
14 qpdf,
15 loguru-cpp,
16 # python dependencies
17 tabulate,
18 pillow,
19 pydantic,
20 docling-core,
21 pytestCheckHook,
22}:
23
24buildPythonPackage rec {
25 pname = "docling-parse";
26 version = "4.5.0";
27 pyproject = true;
28
29 src = fetchFromGitHub {
30 owner = "docling-project";
31 repo = "docling-parse";
32 tag = "v${version}";
33 hash = "sha256-8eHYMvfjPuGgrgrlqEh061ug+yer+1nQLbeDR1dQu68=";
34 };
35
36 dontUseCmakeConfigure = true;
37
38 nativeBuildInputs = [
39 cmake
40 pkg-config
41 ];
42
43 build-system = [
44 setuptools
45 ];
46
47 env.NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp";
48
49 buildInputs = [
50 pybind11
51 cxxopts
52 libjpeg
53 loguru-cpp
54 nlohmann_json
55 qpdf
56 utf8cpp
57 zlib
58 ];
59
60 env.USE_SYSTEM_DEPS = true;
61
62 cmakeFlags = [
63 "-DUSE_SYSTEM_DEPS=True"
64 ];
65
66 dependencies = [
67 tabulate
68 pillow
69 pydantic
70 docling-core
71 ];
72
73 pythonRelaxDeps = [
74 "pydantic"
75 "pillow"
76 ];
77
78 # Listed as runtime dependencies but only used in CI to build wheels
79 preBuild = ''
80 sed -i '/cibuildwheel/d' pyproject.toml
81 sed -i '/delocate/d' pyproject.toml
82 '';
83
84 pythonImportsCheck = [
85 "docling_parse"
86 ];
87
88 nativeCheckInputs = [
89 pytestCheckHook
90 ];
91
92 meta = {
93 changelog = "https://github.com/DS4SD/docling-parse/blob/${src.tag}/CHANGELOG.md";
94 description = "Simple package to extract text with coordinates from programmatic PDFs";
95 homepage = "https://github.com/DS4SD/docling-parse";
96 license = lib.licenses.mit;
97 maintainers = with lib.maintainers; [ ];
98 };
99}