1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 hatchling, 6 boilerpy3, 7 events, 8 httpx, 9 jsonschema, 10 lazy-imports, 11 more-itertools, 12 networkx, 13 pandas, 14 pillow, 15 platformdirs, 16 posthog, 17 prompthub-py, 18 pydantic, 19 quantulum3, 20 rank-bm25, 21 requests, 22 requests-cache, 23 scikit-learn, 24 sseclient-py, 25 tenacity, 26 tiktoken, 27 tqdm, 28 transformers, 29 openai-whisper, 30 boto3, 31 botocore, 32 # , beir 33 selenium, 34 coverage, 35 dulwich, 36 # , jupytercontrib 37 mkdocs, 38 mypy, 39 pre-commit, 40 psutil, 41 # , pydoc-markdown 42 pylint, 43 pytest, 44 pytest-asyncio, 45 pytest-cov, 46 # , pytest-custom-exit-code 47 python-multipart, 48 reno, 49 responses, 50 toml, 51 tox, 52 watchdog, 53 elastic-transport, 54 elasticsearch, 55 # , azure-ai-formrecognizer 56 beautifulsoup4, 57 markdown, 58 python-docx, 59 python-frontmatter, 60 python-magic, 61 tika, 62 black, 63 huggingface-hub, 64 sentence-transformers, 65 mlflow, 66 rapidfuzz, 67 scipy, 68 seqeval, 69 pdf2image, 70 pytesseract, 71 faiss, 72 # , faiss-gpu 73 pinecone-client, 74 onnxruntime, 75 onnxruntime-tools, 76 # , onnxruntime-gpu 77 opensearch-py, 78 pymupdf, 79 langdetect, 80 nltk, 81 canals, 82 jinja2, 83 openai, 84 aiorwlock, 85 ray, 86 psycopg2, 87 sqlalchemy, 88 sqlalchemy-utils, 89 weaviate-client, 90}: 91 92buildPythonPackage rec { 93 pname = "haystack-ai"; 94 version = "2.16.1"; 95 pyproject = true; 96 97 src = fetchFromGitHub { 98 owner = "deepset-ai"; 99 repo = "haystack"; 100 tag = "v${version}"; 101 hash = "sha256-Z5T5X92Hig7nW1fUc8b+LuegJlIZbMfyjJ0PnVudPew="; 102 }; 103 104 nativeBuildInputs = [ 105 hatchling 106 ]; 107 108 pythonRemoveDeps = [ 109 # We call it faiss, not faiss-cpu. 110 "faiss-cpu" 111 ]; 112 113 propagatedBuildInputs = [ 114 boilerpy3 115 events 116 httpx 117 jsonschema 118 lazy-imports 119 more-itertools 120 networkx 121 pandas 122 pillow 123 platformdirs 124 posthog 125 prompthub-py 126 pydantic 127 quantulum3 128 rank-bm25 129 requests 130 requests-cache 131 scikit-learn 132 sseclient-py 133 tenacity 134 tiktoken 135 tqdm 136 transformers 137 ]; 138 139 env.HOME = "$(mktemp -d)"; 140 141 optional-dependencies = { 142 # all = [ 143 # farm-haystack 144 # ]; 145 # all-gpu = [ 146 # farm-haystack 147 # ]; 148 audio = [ openai-whisper ]; 149 aws = [ 150 boto3 151 botocore 152 ]; 153 # beir = [ 154 # beir 155 # ]; 156 colab = [ pillow ]; 157 crawler = [ selenium ]; 158 dev = [ 159 coverage 160 dulwich 161 # jupytercontrib 162 mkdocs 163 mypy 164 pre-commit 165 psutil 166 # pydoc-markdown 167 pylint 168 pytest 169 pytest-asyncio 170 pytest-cov 171 # pytest-custom-exit-code 172 python-multipart 173 reno 174 responses 175 toml 176 tox 177 watchdog 178 ]; 179 elasticsearch7 = [ 180 elastic-transport 181 elasticsearch 182 ]; 183 elasticsearch8 = [ 184 elastic-transport 185 elasticsearch 186 ]; 187 file-conversion = [ 188 # azure-ai-formrecognizer 189 beautifulsoup4 190 markdown 191 python-docx 192 python-frontmatter 193 python-magic 194 # python-magic-bin 195 tika 196 ]; 197 formatting = [ black ]; 198 inference = [ 199 huggingface-hub 200 sentence-transformers 201 transformers 202 ]; 203 metrics = [ 204 mlflow 205 rapidfuzz 206 scipy 207 seqeval 208 ]; 209 ocr = [ 210 pdf2image 211 pytesseract 212 ]; 213 only-faiss = [ faiss ]; 214 # only-faiss-gpu = [ 215 # faiss-gpu 216 # ]; 217 only-pinecone = [ pinecone-client ]; 218 onnx = [ 219 onnxruntime 220 onnxruntime-tools 221 ]; 222 # onnx-gpu = [ 223 # onnxruntime-gpu 224 # onnxruntime-tools 225 # ]; 226 opensearch = [ opensearch-py ]; 227 pdf = [ pymupdf ]; 228 preprocessing = [ 229 langdetect 230 nltk 231 ]; 232 preview = [ 233 canals 234 jinja2 235 lazy-imports 236 openai 237 pandas 238 rank-bm25 239 requests 240 tenacity 241 tqdm 242 ]; 243 ray = [ 244 aiorwlock 245 ray 246 ]; 247 sql = [ 248 psycopg2 249 sqlalchemy 250 sqlalchemy-utils 251 ]; 252 weaviate = [ weaviate-client ]; 253 }; 254 255 # the setup for test is intensive, hopefully can be done at some point 256 doCheck = false; 257 258 pythonImportsCheck = [ "haystack" ]; 259 260 meta = with lib; { 261 description = "LLM orchestration framework to build customizable, production-ready LLM applications"; 262 longDescription = '' 263 LLM orchestration framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data. With advanced retrieval methods, it's best suited for building RAG, question answering, semantic search or conversational agent chatbots 264 ''; 265 changelog = "https://github.com/deepset-ai/haystack/releases/tag/${src.tag}"; 266 homepage = "https://github.com/deepset-ai/haystack"; 267 license = licenses.asl20; 268 maintainers = with maintainers; [ happysalada ]; 269 # https://github.com/deepset-ai/haystack/issues/5304 270 broken = versionAtLeast pydantic.version "2"; 271 }; 272}