1{
2 lib,
3 buildPythonPackage,
4 click,
5 cython,
6 fetchFromGitHub,
7 jieba,
8 joblib,
9 lmdb,
10 marisa-trie,
11 mwparserfromhell,
12 numpy,
13 pythonOlder,
14 scipy,
15 setuptools,
16 tqdm,
17}:
18
19buildPythonPackage rec {
20 pname = "wikipedia2vec";
21 version = "2.0.0";
22 pyproject = true;
23
24 disabled = pythonOlder "3.8";
25
26 src = fetchFromGitHub {
27 owner = "wikipedia2vec";
28 repo = "wikipedia2vec";
29 tag = "v${version}";
30 hash = "sha256-vrBLlNm0bVIStSBWDHRCtuRpazu8JMCtBl4qJPtHGvU=";
31 };
32
33 nativeBuildInputs = [
34 cython
35 setuptools
36 ];
37
38 propagatedBuildInputs = [
39 click
40 cython
41 jieba
42 joblib
43 lmdb
44 marisa-trie
45 mwparserfromhell
46 numpy
47 scipy
48 tqdm
49 ];
50
51 preBuild = ''
52 bash cythonize.sh
53 '';
54
55 pythonImportsCheck = [ "wikipedia2vec" ];
56
57 meta = with lib; {
58 description = "Tool for learning vector representations of words and entities from Wikipedia";
59 mainProgram = "wikipedia2vec";
60 homepage = "https://wikipedia2vec.github.io/wikipedia2vec/";
61 changelog = "https://github.com/wikipedia2vec/wikipedia2vec/releases/tag/v${version}";
62 license = licenses.asl20;
63 maintainers = with maintainers; [ derdennisop ];
64 };
65}