1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6
7 # build-system
8 setuptools,
9 setuptools-scm,
10
11 # dependencies
12 numpy,
13 packaging,
14 pandas,
15 pydantic,
16 typeguard,
17 typing-extensions,
18 typing-inspect,
19
20 # optional-dependencies
21 black,
22 dask,
23 duckdb,
24 fastapi,
25 geopandas,
26 hypothesis,
27 ibis-framework,
28 pandas-stubs,
29 polars,
30 pyyaml,
31 scipy,
32 shapely,
33
34 # tests
35 joblib,
36 pyarrow-hotfix,
37 pyarrow,
38 pytest-asyncio,
39 pytestCheckHook,
40 pythonAtLeast,
41 rich,
42}:
43
44buildPythonPackage rec {
45 pname = "pandera";
46 version = "0.26.1";
47 pyproject = true;
48
49 src = fetchFromGitHub {
50 owner = "unionai-oss";
51 repo = "pandera";
52 tag = "v${version}";
53 hash = "sha256-kjKsujDxX2+X6omP9qDWc2JI8bxQlOSVOcEnfACoL2I=";
54 };
55
56 build-system = [
57 setuptools
58 setuptools-scm
59 ];
60
61 env.SETUPTOOLS_SCM_PRETEND_VERSION = version;
62
63 dependencies = [
64 packaging
65 pydantic
66 typeguard
67 typing-extensions
68 typing-inspect
69 ];
70
71 optional-dependencies =
72 let
73 dask-dataframe = [ dask ] ++ dask.optional-dependencies.dataframe;
74 extras = {
75 strategies = [ hypothesis ];
76 hypotheses = [ scipy ];
77 io = [
78 pyyaml
79 black
80 #frictionless # not in nixpkgs
81 ];
82 # pyspark expression does not define optional-dependencies.connect:
83 #pyspark = [ pyspark ] ++ pyspark.optional-dependencies.connect;
84 # modin not in nixpkgs:
85 #modin = [
86 # modin
87 # ray
88 #] ++ dask-dataframe;
89 #modin-ray = [
90 # modin
91 # ray
92 #];
93 #modin-dask = [
94 # modin
95 #] ++ dask-dataframe;
96 dask = dask-dataframe;
97 mypy = [ pandas-stubs ];
98 fastapi = [ fastapi ];
99 geopandas = [
100 geopandas
101 shapely
102 ];
103 ibis = [
104 ibis-framework
105 duckdb
106 ];
107 pandas = [
108 numpy
109 pandas
110 ];
111 polars = [ polars ];
112 };
113 in
114 extras // { all = lib.concatLists (lib.attrValues extras); };
115
116 nativeCheckInputs = [
117 joblib
118 pyarrow
119 pyarrow-hotfix
120 pytest-asyncio
121 pytestCheckHook
122 rich
123 ]
124 ++ optional-dependencies.all;
125
126 disabledTestPaths = [
127 "tests/fastapi/test_app.py" # tries to access network
128 "tests/pandas/test_docs_setting_column_widths.py" # tests doc generation, requires sphinx
129 "tests/modin" # requires modin, not in nixpkgs
130 "tests/mypy/test_pandas_static_type_checking.py" # some typing failures
131 "tests/pyspark" # requires spark
132
133 # KeyError: 'dask'
134 "tests/dask/test_dask.py::test_series_schema"
135 "tests/dask/test_dask_accessor.py::test_dataframe_series_add_schema"
136 ];
137
138 disabledTests = [
139 # TypeError: __class__ assignment: 'GeoDataFrame' object...
140 "test_schema_model"
141 "test_schema_from_dataframe"
142 "test_schema_no_geometry"
143 ]
144 ++ lib.optionals stdenv.hostPlatform.isDarwin [
145 # OOM error on ofborg:
146 "test_engine_geometry_coerce_crs"
147 # pandera.errors.SchemaError: Error while coercing 'geometry' to type geometry
148 "test_schema_dtype_crs_with_coerce"
149 ]
150 ++ lib.optionals (pythonAtLeast "3.13") [
151 # AssertionError: assert DataType(Sparse[float64, nan]) == DataType(Sparse[float64, nan])
152 "test_legacy_default_pandas_extension_dtype"
153 ];
154
155 pythonImportsCheck = [
156 "pandera"
157 "pandera.api"
158 "pandera.config"
159 "pandera.dtypes"
160 "pandera.engines"
161 ];
162
163 meta = {
164 description = "Light-weight, flexible, and expressive statistical data testing library";
165 homepage = "https://pandera.readthedocs.io";
166 changelog = "https://github.com/unionai-oss/pandera/releases/tag/${src.tag}";
167 license = lib.licenses.mit;
168 maintainers = with lib.maintainers; [ bcdarwin ];
169 };
170}