1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 cmake, 6 cxxopts, 7 deepsearch-toolkit, 8 docling-core, 9 fasttext, 10 fmt, 11 loguru, 12 matplotlib, 13 nlohmann_json, 14 pandas, 15 pcre2, 16 pkg-config, 17 poetry-core, 18 pybind11, 19 python-dotenv, 20 requests, 21 rich, 22 sentencepiece, 23 tabulate, 24 tqdm, 25 utf8cpp, 26 zlib, 27}: 28 29buildPythonPackage rec { 30 pname = "deepsearch-glm"; 31 version = "1.0.0"; 32 pyproject = true; 33 34 src = fetchFromGitHub { 35 owner = "DS4SD"; 36 repo = "deepsearch-glm"; 37 tag = "v${version}"; 38 hash = "sha256-3sJNkrx0tTm6RMYAwV8Aha7x8dZjf4tGdds8OScRff8="; 39 }; 40 41 dontUseCmakeConfigure = true; 42 43 nativeBuildInputs = [ 44 cmake 45 pkg-config 46 ]; 47 48 build-system = [ 49 poetry-core 50 pybind11 51 ]; 52 53 env = { 54 NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp"; 55 USE_SYSTEM_DEPS = true; 56 }; 57 58 optional-dependencies = { 59 docling = [ 60 docling-core 61 pandas 62 ]; 63 pyplot = [ 64 matplotlib 65 ]; 66 toolkit = [ 67 deepsearch-toolkit 68 python-dotenv 69 ]; 70 utils = [ 71 pandas 72 python-dotenv 73 requests 74 rich 75 tabulate 76 tqdm 77 ]; 78 }; 79 80 buildInputs = [ 81 cxxopts 82 fasttext 83 fmt 84 loguru 85 nlohmann_json 86 pcre2 87 sentencepiece 88 utf8cpp 89 zlib 90 ]; 91 92 # Test suite insists on downloading models, data etc. from s3 bucket 93 doCheck = false; 94 95 pythonImportsCheck = [ 96 "deepsearch_glm" 97 ]; 98 99 meta = { 100 changelog = "https://github.com/DS4SD/deepsearch-glm/releases/tag/v${version}"; 101 description = "Create fast graph language models from converted PDF documents for knowledge extraction and Q&A"; 102 homepage = "https://github.com/DS4SD/deepsearch-glm"; 103 license = lib.licenses.mit; 104 maintainers = with lib.maintainers; [ booxter ]; 105 }; 106}