1{
2 lib,
3 aiohttp,
4 aiosqlite,
5 banks,
6 buildPythonPackage,
7 dataclasses-json,
8 deprecated,
9 dirtyjson,
10 fetchFromGitHub,
11 filetype,
12 fsspec,
13 hatchling,
14 jsonpath-ng,
15 llama-index-workflows,
16 llamaindex-py-client,
17 nest-asyncio,
18 networkx,
19 nltk-data,
20 nltk,
21 numpy,
22 openai,
23 pandas,
24 pillow,
25 pytest-asyncio,
26 pytest-mock,
27 pytestCheckHook,
28 pythonOlder,
29 pyvis,
30 pyyaml,
31 requests,
32 spacy,
33 sqlalchemy,
34 tenacity,
35 tiktoken,
36 tree-sitter,
37 typing-inspect,
38}:
39
40buildPythonPackage rec {
41 pname = "llama-index-core";
42 version = "0.13.0.post1";
43 pyproject = true;
44
45 disabled = pythonOlder "3.9";
46
47 src = fetchFromGitHub {
48 owner = "run-llama";
49 repo = "llama_index";
50 tag = "v${version}";
51 hash = "sha256-X4PDvxynQkHOdhDC5Aqwnr3jSF/83VgbFiDD1M9LOoM=";
52 };
53
54 sourceRoot = "${src.name}/${pname}";
55
56 # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to
57 # download them if they aren't present.
58 # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67
59 # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in
60 # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this
61 # solution seems more elegant.
62 postPatch = ''
63 mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/
64 cp -r ${nltk-data.stopwords}/corpora/stopwords/* llama_index/core/_static/nltk_cache/corpora/stopwords/
65
66 mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/
67 cp -r ${nltk-data.punkt}/tokenizers/punkt/* llama_index/core/_static/nltk_cache/tokenizers/punkt/
68 '';
69
70 pythonRelaxDeps = [
71 "setuptools"
72 "tenacity"
73 ];
74
75 build-system = [ hatchling ];
76
77 dependencies = [
78 aiohttp
79 aiosqlite
80 banks
81 dataclasses-json
82 deprecated
83 dirtyjson
84 filetype
85 fsspec
86 jsonpath-ng
87 llama-index-workflows
88 llamaindex-py-client
89 nest-asyncio
90 networkx
91 nltk
92 numpy
93 openai
94 pandas
95 pillow
96 pyvis
97 pyyaml
98 requests
99 spacy
100 sqlalchemy
101 tenacity
102 tiktoken
103 typing-inspect
104 ];
105
106 nativeCheckInputs = [
107 tree-sitter
108 pytest-asyncio
109 pytest-mock
110 pytestCheckHook
111 ];
112
113 pythonImportsCheck = [ "llama_index" ];
114
115 disabledTestPaths = [
116 # Tests require network access
117 "tests/agent/"
118 "tests/callbacks/"
119 "tests/chat_engine/"
120 "tests/evaluation/"
121 "tests/indices/"
122 "tests/ingestion/"
123 "tests/memory/"
124 "tests/node_parser/"
125 "tests/objects/"
126 "tests/playground/"
127 "tests/postprocessor/"
128 "tests/query_engine/"
129 "tests/question_gen/"
130 "tests/response_synthesizers/"
131 "tests/retrievers/"
132 "tests/selectors/"
133 "tests/test_utils.py"
134 "tests/text_splitter/"
135 "tests/token_predictor/"
136 "tests/tools/"
137 "tests/schema/"
138 "tests/multi_modal_llms/"
139 ];
140
141 disabledTests = [
142 # Tests require network access
143 "test_context_extraction_basic"
144 "test_context_extraction_custom_prompt"
145 "test_context_extraction_oversized_document"
146 "test_document_block_from_b64"
147 "test_document_block_from_bytes"
148 "test_document_block_from_path"
149 "test_document_block_from_url"
150 "test_from_namespaced_persist_dir"
151 "test_from_persist_dir"
152 "test_mimetype_raw_data"
153 "test_multiple_documents_context"
154 "test_resource"
155 # asyncio.exceptions.InvalidStateError: invalid state
156 "test_workflow_context_to_dict_mid_run"
157 "test_SimpleDirectoryReader"
158 # RuntimeError
159 "test_str"
160 ];
161
162 meta = with lib; {
163 description = "Data framework for your LLM applications";
164 homepage = "https://github.com/run-llama/llama_index/";
165 changelog = "https://github.com/run-llama/llama_index/blob/${src.tag}/CHANGELOG.md";
166 license = licenses.mit;
167 maintainers = with maintainers; [ fab ];
168 };
169}