1{ 2 lib, 3 buildPythonPackage, 4 colorlog, 5 dataclasses-json, 6 fetchPypi, 7 nltk-data, 8 numpy, 9 pandas, 10 poetry-core, 11 pydantic, 12 pydateinfer, 13 python-dateutil, 14 pythonOlder, 15 scipy, 16 symlinkJoin, 17 type-infer, 18}: 19let 20 testNltkData = symlinkJoin { 21 name = "nltk-test-data"; 22 paths = [ 23 nltk-data.punkt 24 nltk-data.stopwords 25 ]; 26 }; 27in 28buildPythonPackage rec { 29 pname = "dataprep-ml"; 30 version = "25.2.3.0"; 31 pyproject = true; 32 33 disabled = pythonOlder "3.8"; 34 35 # using PyPI as github repo does not contain tags or release branches 36 src = fetchPypi { 37 pname = "dataprep_ml"; 38 inherit version; 39 hash = "sha256-pULqrPTxGtBLRsKCpSsP3a/QA0O5eXOP6BSI5TbCQWY="; 40 }; 41 42 pythonRelaxDeps = [ 43 "pydantic" 44 "numpy" 45 ]; 46 47 nativeBuildInputs = [ 48 poetry-core 49 ]; 50 51 propagatedBuildInputs = [ 52 colorlog 53 dataclasses-json 54 numpy 55 pandas 56 pydantic 57 pydateinfer 58 python-dateutil 59 scipy 60 type-infer 61 ]; 62 63 # PyPI tarball has no tests 64 doCheck = false; 65 66 # Package import requires NLTK data to be downloaded 67 # It is the only way to set NLTK_DATA environment variable, 68 # so that it is available in pythonImportsCheck 69 env.NLTK_DATA = testNltkData; 70 pythonImportsCheck = [ 71 "dataprep_ml" 72 "dataprep_ml.cleaners" 73 "dataprep_ml.helpers" 74 "dataprep_ml.imputers" 75 "dataprep_ml.insights" 76 "dataprep_ml.recommenders" 77 "dataprep_ml.splitters" 78 ]; 79 80 meta = with lib; { 81 description = "Data utilities for Machine Learning pipelines"; 82 homepage = "https://github.com/mindsdb/dataprep_ml"; 83 license = licenses.gpl3Only; 84 maintainers = with maintainers; [ mbalatsko ]; 85 }; 86}