1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 cmake,
6 cxxopts,
7 deepsearch-toolkit,
8 docling-core,
9 fasttext,
10 fmt,
11 loguru,
12 matplotlib,
13 nlohmann_json,
14 pandas,
15 pcre2,
16 pkg-config,
17 poetry-core,
18 pybind11,
19 python-dotenv,
20 requests,
21 rich,
22 sentencepiece,
23 tabulate,
24 tqdm,
25 utf8cpp,
26 zlib,
27}:
28
29buildPythonPackage rec {
30 pname = "deepsearch-glm";
31 version = "1.0.0";
32 pyproject = true;
33
34 src = fetchFromGitHub {
35 owner = "DS4SD";
36 repo = "deepsearch-glm";
37 tag = "v${version}";
38 hash = "sha256-3sJNkrx0tTm6RMYAwV8Aha7x8dZjf4tGdds8OScRff8=";
39 };
40
41 dontUseCmakeConfigure = true;
42
43 nativeBuildInputs = [
44 cmake
45 pkg-config
46 ];
47
48 build-system = [
49 poetry-core
50 pybind11
51 ];
52
53 env = {
54 NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp";
55 USE_SYSTEM_DEPS = true;
56 };
57
58 optional-dependencies = {
59 docling = [
60 docling-core
61 pandas
62 ];
63 pyplot = [
64 matplotlib
65 ];
66 toolkit = [
67 deepsearch-toolkit
68 python-dotenv
69 ];
70 utils = [
71 pandas
72 python-dotenv
73 requests
74 rich
75 tabulate
76 tqdm
77 ];
78 };
79
80 buildInputs = [
81 cxxopts
82 fasttext
83 fmt
84 loguru
85 nlohmann_json
86 pcre2
87 sentencepiece
88 utf8cpp
89 zlib
90 ];
91
92 # Test suite insists on downloading models, data etc. from s3 bucket
93 doCheck = false;
94
95 pythonImportsCheck = [
96 "deepsearch_glm"
97 ];
98
99 meta = {
100 changelog = "https://github.com/DS4SD/deepsearch-glm/releases/tag/v${version}";
101 description = "Create fast graph language models from converted PDF documents for knowledge extraction and Q&A";
102 homepage = "https://github.com/DS4SD/deepsearch-glm";
103 license = lib.licenses.mit;
104 maintainers = with lib.maintainers; [ booxter ];
105 };
106}