1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 hatchling,
6 boilerpy3,
7 events,
8 httpx,
9 jsonschema,
10 lazy-imports,
11 more-itertools,
12 networkx,
13 pandas,
14 pillow,
15 platformdirs,
16 posthog,
17 prompthub-py,
18 pydantic,
19 quantulum3,
20 rank-bm25,
21 requests,
22 requests-cache,
23 scikit-learn,
24 sseclient-py,
25 tenacity,
26 tiktoken,
27 tqdm,
28 transformers,
29 openai-whisper,
30 boto3,
31 botocore,
32 # , beir
33 selenium,
34 coverage,
35 dulwich,
36 # , jupytercontrib
37 mkdocs,
38 mypy,
39 pre-commit,
40 psutil,
41 # , pydoc-markdown
42 pylint,
43 pytest,
44 pytest-asyncio,
45 pytest-cov,
46 # , pytest-custom-exit-code
47 python-multipart,
48 reno,
49 responses,
50 toml,
51 tox,
52 watchdog,
53 elastic-transport,
54 elasticsearch,
55 # , azure-ai-formrecognizer
56 beautifulsoup4,
57 markdown,
58 python-docx,
59 python-frontmatter,
60 python-magic,
61 tika,
62 black,
63 huggingface-hub,
64 sentence-transformers,
65 mlflow,
66 rapidfuzz,
67 scipy,
68 seqeval,
69 pdf2image,
70 pytesseract,
71 faiss,
72 # , faiss-gpu
73 pinecone-client,
74 onnxruntime,
75 onnxruntime-tools,
76 # , onnxruntime-gpu
77 opensearch-py,
78 pymupdf,
79 langdetect,
80 nltk,
81 canals,
82 jinja2,
83 openai,
84 aiorwlock,
85 ray,
86 psycopg2,
87 sqlalchemy,
88 sqlalchemy-utils,
89 weaviate-client,
90}:
91
92buildPythonPackage rec {
93 pname = "haystack-ai";
94 version = "2.16.1";
95 pyproject = true;
96
97 src = fetchFromGitHub {
98 owner = "deepset-ai";
99 repo = "haystack";
100 tag = "v${version}";
101 hash = "sha256-Z5T5X92Hig7nW1fUc8b+LuegJlIZbMfyjJ0PnVudPew=";
102 };
103
104 nativeBuildInputs = [
105 hatchling
106 ];
107
108 pythonRemoveDeps = [
109 # We call it faiss, not faiss-cpu.
110 "faiss-cpu"
111 ];
112
113 propagatedBuildInputs = [
114 boilerpy3
115 events
116 httpx
117 jsonschema
118 lazy-imports
119 more-itertools
120 networkx
121 pandas
122 pillow
123 platformdirs
124 posthog
125 prompthub-py
126 pydantic
127 quantulum3
128 rank-bm25
129 requests
130 requests-cache
131 scikit-learn
132 sseclient-py
133 tenacity
134 tiktoken
135 tqdm
136 transformers
137 ];
138
139 env.HOME = "$(mktemp -d)";
140
141 optional-dependencies = {
142 # all = [
143 # farm-haystack
144 # ];
145 # all-gpu = [
146 # farm-haystack
147 # ];
148 audio = [ openai-whisper ];
149 aws = [
150 boto3
151 botocore
152 ];
153 # beir = [
154 # beir
155 # ];
156 colab = [ pillow ];
157 crawler = [ selenium ];
158 dev = [
159 coverage
160 dulwich
161 # jupytercontrib
162 mkdocs
163 mypy
164 pre-commit
165 psutil
166 # pydoc-markdown
167 pylint
168 pytest
169 pytest-asyncio
170 pytest-cov
171 # pytest-custom-exit-code
172 python-multipart
173 reno
174 responses
175 toml
176 tox
177 watchdog
178 ];
179 elasticsearch7 = [
180 elastic-transport
181 elasticsearch
182 ];
183 elasticsearch8 = [
184 elastic-transport
185 elasticsearch
186 ];
187 file-conversion = [
188 # azure-ai-formrecognizer
189 beautifulsoup4
190 markdown
191 python-docx
192 python-frontmatter
193 python-magic
194 # python-magic-bin
195 tika
196 ];
197 formatting = [ black ];
198 inference = [
199 huggingface-hub
200 sentence-transformers
201 transformers
202 ];
203 metrics = [
204 mlflow
205 rapidfuzz
206 scipy
207 seqeval
208 ];
209 ocr = [
210 pdf2image
211 pytesseract
212 ];
213 only-faiss = [ faiss ];
214 # only-faiss-gpu = [
215 # faiss-gpu
216 # ];
217 only-pinecone = [ pinecone-client ];
218 onnx = [
219 onnxruntime
220 onnxruntime-tools
221 ];
222 # onnx-gpu = [
223 # onnxruntime-gpu
224 # onnxruntime-tools
225 # ];
226 opensearch = [ opensearch-py ];
227 pdf = [ pymupdf ];
228 preprocessing = [
229 langdetect
230 nltk
231 ];
232 preview = [
233 canals
234 jinja2
235 lazy-imports
236 openai
237 pandas
238 rank-bm25
239 requests
240 tenacity
241 tqdm
242 ];
243 ray = [
244 aiorwlock
245 ray
246 ];
247 sql = [
248 psycopg2
249 sqlalchemy
250 sqlalchemy-utils
251 ];
252 weaviate = [ weaviate-client ];
253 };
254
255 # the setup for test is intensive, hopefully can be done at some point
256 doCheck = false;
257
258 pythonImportsCheck = [ "haystack" ];
259
260 meta = with lib; {
261 description = "LLM orchestration framework to build customizable, production-ready LLM applications";
262 longDescription = ''
263 LLM orchestration framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data. With advanced retrieval methods, it's best suited for building RAG, question answering, semantic search or conversational agent chatbots
264 '';
265 changelog = "https://github.com/deepset-ai/haystack/releases/tag/${src.tag}";
266 homepage = "https://github.com/deepset-ai/haystack";
267 license = licenses.asl20;
268 maintainers = with maintainers; [ happysalada ];
269 # https://github.com/deepset-ai/haystack/issues/5304
270 broken = versionAtLeast pydantic.version "2";
271 };
272}