at master 1.7 kB view raw
1{ 2 lib, 3 aiohttp, 4 buildPythonPackage, 5 dill, 6 fetchFromGitHub, 7 fsspec, 8 huggingface-hub, 9 multiprocess, 10 numpy, 11 packaging, 12 pandas, 13 pyarrow, 14 requests, 15 responses, 16 setuptools, 17 tqdm, 18 xxhash, 19}: 20buildPythonPackage rec { 21 pname = "datasets"; 22 version = "4.0.0"; 23 pyproject = true; 24 25 src = fetchFromGitHub { 26 owner = "huggingface"; 27 repo = "datasets"; 28 tag = version; 29 hash = "sha256-Cr25PgLNGX/KcFZE5h1oiaDW9J50ccMqA5z3q4sITus="; 30 }; 31 32 build-system = [ 33 setuptools 34 ]; 35 36 dependencies = [ 37 aiohttp 38 dill 39 fsspec 40 huggingface-hub 41 multiprocess 42 numpy 43 packaging 44 pandas 45 pyarrow 46 requests 47 responses 48 tqdm 49 xxhash 50 ]; 51 52 pythonRelaxDeps = [ 53 # https://github.com/huggingface/datasets/blob/a256b85cbc67aa3f0e75d32d6586afc507cf535b/setup.py#L117 54 # "pin until dill has official support for determinism" 55 "dill" 56 "multiprocess" 57 # https://github.com/huggingface/datasets/blob/a256b85cbc67aa3f0e75d32d6586afc507cf535b/setup.py#L129 58 # "to support protocol=kwargs in fsspec's `open`, `get_fs_token_paths`" 59 "fsspec" 60 ]; 61 62 # Tests require pervasive internet access 63 doCheck = false; 64 65 # Module import will attempt to create a cache directory 66 postFixup = "export HF_MODULES_CACHE=$TMPDIR"; 67 68 pythonImportsCheck = [ "datasets" ]; 69 70 meta = { 71 description = "Open-access datasets and evaluation metrics for natural language processing"; 72 mainProgram = "datasets-cli"; 73 homepage = "https://github.com/huggingface/datasets"; 74 changelog = "https://github.com/huggingface/datasets/releases/tag/${src.tag}"; 75 license = lib.licenses.asl20; 76 maintainers = with lib.maintainers; [ osbm ]; 77 }; 78}