1{
2 lib,
3 buildPythonPackage,
4 fetchPypi,
5
6 # build-system
7 setuptools,
8
9 # dependencies
10 bitarray,
11 datasets,
12 flask,
13 python-dotenv,
14 ninja,
15 scipy,
16 tqdm,
17 transformers,
18 ujson,
19 gitpython,
20 torch,
21 faiss,
22}:
23
24buildPythonPackage rec {
25 pname = "colbert-ai";
26 version = "0.2.21";
27 pyproject = true;
28
29 src = fetchPypi {
30 inherit version;
31 pname = "colbert_ai";
32 hash = "sha256-qNb9tOInLysI7Tf45QlgchYNhBXR5AWFdRiYt35iW6s=";
33 };
34
35 # ImportError: cannot import name 'AdamW' from 'transformers'
36 # https://github.com/stanford-futuredata/ColBERT/pull/390
37 postPatch = ''
38 substituteInPlace colbert/training/training.py \
39 --replace-fail \
40 "from transformers import AdamW, get_linear_schedule_with_warmup" \
41 "from transformers import get_linear_schedule_with_warmup; from torch.optim import AdamW"
42 '';
43
44 pythonRemoveDeps = [ "git-python" ];
45
46 build-system = [
47 setuptools
48 ];
49
50 dependencies = [
51 bitarray
52 datasets
53 faiss
54 flask
55 gitpython
56 python-dotenv
57 ninja
58 scipy
59 torch
60 tqdm
61 transformers
62 ujson
63 ];
64
65 pythonImportsCheck = [ "colbert" ];
66
67 # There is no tests
68 doCheck = false;
69
70 meta = {
71 description = "Fast and accurate retrieval model, enabling scalable BERT-based search over large text collections in tens of milliseconds";
72 homepage = "https://github.com/stanford-futuredata/ColBERT";
73 license = lib.licenses.mit;
74 maintainers = with lib.maintainers; [
75 bachp
76 ];
77 };
78}