at master 3.7 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchurl, 5 protobuf, 6 pymorphy3, 7 pymorphy3-dicts-uk, 8 sentencepiece, 9 setuptools, 10 spacy, 11 spacy-pkuseg, 12 spacy-curated-transformers, 13 sudachipy, 14 sudachidict-core, 15 transformers, 16 writeScript, 17 stdenv, 18 jq, 19 nix, 20 moreutils, 21}: 22let 23 buildModelPackage = 24 { 25 pname, 26 version, 27 sha256, 28 license, 29 }: 30 31 let 32 lang = builtins.substring 0 2 pname; 33 requires-protobuf = 34 pname == "fr_dep_news_trf" || pname == "sl_core_news_trf" || pname == "uk_core_news_trf"; 35 requires-sentencepiece = pname == "fr_dep_news_trf" || pname == "sl_core_news_trf"; 36 requires-transformers = pname == "uk_core_news_trf"; 37 in 38 buildPythonPackage { 39 inherit pname version; 40 pyproject = true; 41 42 src = fetchurl { 43 url = "https://github.com/explosion/spacy-models/releases/download/${pname}-${version}/${pname}-${version}.tar.gz"; 44 inherit sha256; 45 }; 46 47 propagatedBuildInputs = [ 48 spacy 49 ] 50 ++ lib.optionals (lib.hasSuffix "_trf" pname) [ spacy-curated-transformers ] 51 ++ lib.optionals requires-transformers [ transformers ] 52 ++ lib.optionals (lang == "ja") [ 53 sudachidict-core 54 sudachipy 55 ] 56 ++ lib.optionals (lang == "ru") [ pymorphy3 ] 57 ++ lib.optionals (lang == "uk") [ 58 pymorphy3 59 pymorphy3-dicts-uk 60 ] 61 ++ lib.optionals (lang == "zh") [ spacy-pkuseg ] 62 ++ lib.optionals requires-sentencepiece [ sentencepiece ]; 63 64 postPatch = 65 lib.optionalString requires-protobuf '' 66 substituteInPlace meta.json \ 67 --replace-fail "protobuf<3.21.0" "protobuf" 68 '' 69 + lib.optionalString (lang == "zh") '' 70 # Uses numpy 2.x, while the rest of the dependencies still uses 71 # numpy 1.x. Remove once all spaCy packages are updated for 72 # numpy 2.x. 73 substituteInPlace meta.json \ 74 --replace-fail "spacy-pkuseg>=1.0.0,<2.0.0" "spacy-pkuseg" 75 ''; 76 77 nativeBuildInputs = [ setuptools ] ++ lib.optionals requires-protobuf [ protobuf ]; 78 79 pythonImportsCheck = [ pname ]; 80 81 passthru.updateScript = writeScript "update-spacy-models" '' 82 #!${stdenv.shell} 83 set -eou pipefail 84 PATH=${ 85 lib.makeBinPath [ 86 jq 87 nix 88 moreutils 89 ] 90 } 91 92 IFS=. read -r major minor patch <<<"${spacy.version}" 93 spacyVersion="$(echo "$major.$minor.0")" 94 95 pushd pkgs/development/python-modules/spacy/ || exit 96 97 jq -r '.[] | .pname' models.json | while IFS= read -r pname; do 98 if [ "$(jq --arg pname "$pname" -r '.[] | select(.pname == $pname) | .version' models.json)" == "$spacyVersion" ]; then 99 continue 100 fi 101 102 newHash="$(nix-prefetch-url "https://github.com/explosion/spacy-models/releases/download/$pname-$spacyVersion/$pname-$spacyVersion.tar.gz")" 103 jq --arg newHash "$newHash" --arg pname "$pname" --arg spacyVersion "$spacyVersion" \ 104 '[(.[] | select(.pname != $pname)), (.[] | select(.pname == $pname) | .sha256 = $newHash | .version = $spacyVersion)] | sort_by(.pname)' \ 105 models.json | sponge models.json 106 done 107 108 popd || exit 109 ''; 110 111 meta = { 112 description = "Models for the spaCy NLP library"; 113 homepage = "https://github.com/explosion/spacy-models"; 114 license = lib.licenses.${license}; 115 }; 116 }; 117 118 makeModelSet = 119 models: lib.listToAttrs (map (m: lib.nameValuePair m.pname (buildModelPackage m)) models); 120in 121makeModelSet (lib.importJSON ./models.json)