1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6
7 # build-system
8 setuptools,
9
10 # dependencies
11 accelerate,
12 datasets,
13 huggingface-hub,
14 optimum,
15 pillow,
16 scikit-learn,
17 scipy,
18 torch,
19 tqdm,
20 transformers,
21 typing-extensions,
22
23 # tests
24 pytestCheckHook,
25 pytest-cov-stub,
26}:
27
28buildPythonPackage rec {
29 pname = "sentence-transformers";
30 version = "5.1.1";
31 pyproject = true;
32
33 src = fetchFromGitHub {
34 owner = "UKPLab";
35 repo = "sentence-transformers";
36 tag = "v${version}";
37 hash = "sha256-n0ZP01BU/s9iJ+RP7rNlBjD11jNDj8A8Q/seekh56nA=";
38 };
39
40 build-system = [ setuptools ];
41
42 dependencies = [
43 huggingface-hub
44 pillow
45 scikit-learn
46 scipy
47 torch
48 tqdm
49 transformers
50 typing-extensions
51 ];
52
53 optional-dependencies = {
54 train = [
55 accelerate
56 datasets
57 ];
58 onnx = [ optimum ] ++ optimum.optional-dependencies.onnxruntime;
59 # onnx-gpu = [ optimum ] ++ optimum.optional-dependencies.onnxruntime-gpu;
60 # openvino = [ optimum-intel ] ++ optimum-intel.optional-dependencies.openvino;
61 };
62
63 nativeCheckInputs = [
64 pytest-cov-stub
65 pytestCheckHook
66 ]
67 ++ lib.flatten (builtins.attrValues optional-dependencies);
68
69 pythonImportsCheck = [ "sentence_transformers" ];
70
71 disabledTests = [
72 # Tests require network access
73 "test_LabelAccuracyEvaluator"
74 "test_ParaphraseMiningEvaluator"
75 "test_TripletEvaluator"
76 "test_cmnrl_same_grad"
77 "test_forward"
78 "test_initialization_with_embedding_dim"
79 "test_initialization_with_embedding_weights"
80 "test_loading_model2vec"
81 "test_mine_hard_negatives_with_prompt"
82 "test_model_card_base"
83 "test_model_card_reuse"
84 "test_nanobeir_evaluator"
85 "test_paraphrase_mining"
86 "test_pretrained_model"
87 "test_router_as_middle_module"
88 "test_router_backwards_compatibility"
89 "test_router_encode"
90 "test_router_load_with_config"
91 "test_router_save_load"
92 "test_router_save_load_with_custom_default_route"
93 "test_router_save_load_with_multiple_modules_per_route"
94 "test_router_save_load_without_default_route"
95 "test_router_with_trainer"
96 "test_router_with_trainer_without_router_mapping"
97 "test_save_and_load"
98 "test_simple_encode"
99 "test_tokenize"
100 "test_train_stsb"
101 "test_trainer"
102 "test_trainer_invalid_column_names"
103 "test_trainer_multi_dataset_errors"
104
105 # Assertion error: Sparse operations take too long
106 # (namely, load-sensitive test)
107 "test_performance_with_large_vectors"
108
109 # NameError: name 'ParallelismConfig' is not defined
110 "test_hf_argument_parser"
111 "test_hf_argument_parser_incorrect_string_arguments"
112 ]
113 ++ lib.optionals (!stdenv.hostPlatform.isAarch64 && stdenv.hostPlatform.isDarwin) [
114 # These sparse tests also time out, on x86_64-darwin.
115 "sim_sparse"
116 ];
117
118 disabledTestPaths = [
119 # Tests require network access
120 "tests/cross_encoder/test_cross_encoder.py"
121 "tests/cross_encoder/test_train_stsb.py"
122 "tests/evaluation/test_information_retrieval_evaluator.py"
123 "tests/sparse_encoder/models/test_csr.py"
124 "tests/sparse_encoder/models/test_sparse_static_embedding.py"
125 "tests/sparse_encoder/test_opensearch_models.py"
126 "tests/sparse_encoder/test_pretrained.py"
127 "tests/sparse_encoder/test_sparse_encoder.py"
128 "tests/test_compute_embeddings.py"
129 "tests/test_model_card_data.py"
130 "tests/test_multi_process.py"
131 "tests/test_pretrained_stsb.py"
132 "tests/test_sentence_transformer.py"
133 "tests/test_train_stsb.py"
134 "tests/util/test_hard_negatives.py"
135 ];
136
137 # Sentence-transformer needs a writable hf_home cache
138 postInstall = ''
139 export HF_HOME=$(mktemp -d)
140 '';
141
142 meta = {
143 description = "Multilingual Sentence & Image Embeddings with BERT";
144 homepage = "https://github.com/UKPLab/sentence-transformers";
145 changelog = "https://github.com/UKPLab/sentence-transformers/releases/tag/${src.tag}";
146 license = lib.licenses.asl20;
147 maintainers = with lib.maintainers; [ dit7ya ];
148 };
149}