1{
2 lib,
3 buildPythonPackage,
4 colorlog,
5 dataclasses-json,
6 fetchPypi,
7 nltk-data,
8 numpy,
9 pandas,
10 poetry-core,
11 pydantic,
12 pydateinfer,
13 python-dateutil,
14 pythonOlder,
15 scipy,
16 symlinkJoin,
17 type-infer,
18}:
19let
20 testNltkData = symlinkJoin {
21 name = "nltk-test-data";
22 paths = [
23 nltk-data.punkt
24 nltk-data.stopwords
25 ];
26 };
27in
28buildPythonPackage rec {
29 pname = "dataprep-ml";
30 version = "25.2.3.0";
31 pyproject = true;
32
33 disabled = pythonOlder "3.8";
34
35 # using PyPI as github repo does not contain tags or release branches
36 src = fetchPypi {
37 pname = "dataprep_ml";
38 inherit version;
39 hash = "sha256-pULqrPTxGtBLRsKCpSsP3a/QA0O5eXOP6BSI5TbCQWY=";
40 };
41
42 pythonRelaxDeps = [
43 "pydantic"
44 "numpy"
45 ];
46
47 nativeBuildInputs = [
48 poetry-core
49 ];
50
51 propagatedBuildInputs = [
52 colorlog
53 dataclasses-json
54 numpy
55 pandas
56 pydantic
57 pydateinfer
58 python-dateutil
59 scipy
60 type-infer
61 ];
62
63 # PyPI tarball has no tests
64 doCheck = false;
65
66 # Package import requires NLTK data to be downloaded
67 # It is the only way to set NLTK_DATA environment variable,
68 # so that it is available in pythonImportsCheck
69 env.NLTK_DATA = testNltkData;
70 pythonImportsCheck = [
71 "dataprep_ml"
72 "dataprep_ml.cleaners"
73 "dataprep_ml.helpers"
74 "dataprep_ml.imputers"
75 "dataprep_ml.insights"
76 "dataprep_ml.recommenders"
77 "dataprep_ml.splitters"
78 ];
79
80 meta = with lib; {
81 description = "Data utilities for Machine Learning pipelines";
82 homepage = "https://github.com/mindsdb/dataprep_ml";
83 license = licenses.gpl3Only;
84 maintainers = with maintainers; [ mbalatsko ];
85 };
86}