1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 cython, 6 setuptools, 7 regex, 8 pytestCheckHook, 9}: 10 11buildPythonPackage rec { 12 pname = "curated-tokenizers"; 13 version = "2.0.0"; 14 pyproject = true; 15 16 src = fetchFromGitHub { 17 owner = "explosion"; 18 repo = "curated-tokenizers"; 19 tag = "v${version}"; 20 hash = "sha256-VkDV/9c5b8TzYlthCZ38ufbrne4rihtkmkZ/gyAQXLE="; 21 fetchSubmodules = true; 22 }; 23 24 build-system = [ 25 cython 26 setuptools 27 ]; 28 29 dependencies = [ 30 regex 31 ]; 32 33 nativeCheckInputs = [ 34 pytestCheckHook 35 ]; 36 37 # Explicitly set the path to avoid running vendored 38 # sentencepiece tests. 39 enabledTestPaths = [ "tests" ]; 40 41 preCheck = '' 42 # avoid local paths, relative imports wont resolve correctly 43 mv curated_tokenizers/tests tests 44 rm -r curated_tokenizers 45 ''; 46 47 pythonImportsCheck = [ "curated_tokenizers" ]; 48 49 meta = with lib; { 50 description = "Lightweight piece tokenization library"; 51 homepage = "https://github.com/explosion/curated-tokenizers"; 52 changelog = "https://github.com/explosion/curated-tokenizers/releases/tag/${src.tag}"; 53 license = licenses.mit; 54 maintainers = with maintainers; [ danieldk ]; 55 }; 56}