1{
2 lib,
3 buildPythonPackage,
4 fetchurl,
5 protobuf,
6 pymorphy3,
7 pymorphy3-dicts-uk,
8 sentencepiece,
9 setuptools,
10 spacy,
11 spacy-pkuseg,
12 spacy-curated-transformers,
13 sudachipy,
14 sudachidict-core,
15 transformers,
16 writeScript,
17 stdenv,
18 jq,
19 nix,
20 moreutils,
21}:
22let
23 buildModelPackage =
24 {
25 pname,
26 version,
27 sha256,
28 license,
29 }:
30
31 let
32 lang = builtins.substring 0 2 pname;
33 requires-protobuf =
34 pname == "fr_dep_news_trf" || pname == "sl_core_news_trf" || pname == "uk_core_news_trf";
35 requires-sentencepiece = pname == "fr_dep_news_trf" || pname == "sl_core_news_trf";
36 requires-transformers = pname == "uk_core_news_trf";
37 in
38 buildPythonPackage {
39 inherit pname version;
40 pyproject = true;
41
42 src = fetchurl {
43 url = "https://github.com/explosion/spacy-models/releases/download/${pname}-${version}/${pname}-${version}.tar.gz";
44 inherit sha256;
45 };
46
47 propagatedBuildInputs = [
48 spacy
49 ]
50 ++ lib.optionals (lib.hasSuffix "_trf" pname) [ spacy-curated-transformers ]
51 ++ lib.optionals requires-transformers [ transformers ]
52 ++ lib.optionals (lang == "ja") [
53 sudachidict-core
54 sudachipy
55 ]
56 ++ lib.optionals (lang == "ru") [ pymorphy3 ]
57 ++ lib.optionals (lang == "uk") [
58 pymorphy3
59 pymorphy3-dicts-uk
60 ]
61 ++ lib.optionals (lang == "zh") [ spacy-pkuseg ]
62 ++ lib.optionals requires-sentencepiece [ sentencepiece ];
63
64 postPatch =
65 lib.optionalString requires-protobuf ''
66 substituteInPlace meta.json \
67 --replace-fail "protobuf<3.21.0" "protobuf"
68 ''
69 + lib.optionalString (lang == "zh") ''
70 # Uses numpy 2.x, while the rest of the dependencies still uses
71 # numpy 1.x. Remove once all spaCy packages are updated for
72 # numpy 2.x.
73 substituteInPlace meta.json \
74 --replace-fail "spacy-pkuseg>=1.0.0,<2.0.0" "spacy-pkuseg"
75 '';
76
77 nativeBuildInputs = [ setuptools ] ++ lib.optionals requires-protobuf [ protobuf ];
78
79 pythonImportsCheck = [ pname ];
80
81 passthru.updateScript = writeScript "update-spacy-models" ''
82 #!${stdenv.shell}
83 set -eou pipefail
84 PATH=${
85 lib.makeBinPath [
86 jq
87 nix
88 moreutils
89 ]
90 }
91
92 IFS=. read -r major minor patch <<<"${spacy.version}"
93 spacyVersion="$(echo "$major.$minor.0")"
94
95 pushd pkgs/development/python-modules/spacy/ || exit
96
97 jq -r '.[] | .pname' models.json | while IFS= read -r pname; do
98 if [ "$(jq --arg pname "$pname" -r '.[] | select(.pname == $pname) | .version' models.json)" == "$spacyVersion" ]; then
99 continue
100 fi
101
102 newHash="$(nix-prefetch-url "https://github.com/explosion/spacy-models/releases/download/$pname-$spacyVersion/$pname-$spacyVersion.tar.gz")"
103 jq --arg newHash "$newHash" --arg pname "$pname" --arg spacyVersion "$spacyVersion" \
104 '[(.[] | select(.pname != $pname)), (.[] | select(.pname == $pname) | .sha256 = $newHash | .version = $spacyVersion)] | sort_by(.pname)' \
105 models.json | sponge models.json
106 done
107
108 popd || exit
109 '';
110
111 meta = {
112 description = "Models for the spaCy NLP library";
113 homepage = "https://github.com/explosion/spacy-models";
114 license = lib.licenses.${license};
115 };
116 };
117
118 makeModelSet =
119 models: lib.listToAttrs (map (m: lib.nameValuePair m.pname (buildModelPackage m)) models);
120in
121makeModelSet (lib.importJSON ./models.json)