1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6 pythonOlder,
7
8 # build-system
9 cython,
10 meson-python,
11 meson,
12 pkg-config,
13 versioneer,
14 wheel,
15
16 # propagates
17 numpy,
18 python-dateutil,
19 pytz,
20 tzdata,
21
22 # optionals
23 beautifulsoup4,
24 bottleneck,
25 blosc2,
26 fsspec,
27 gcsfs,
28 html5lib,
29 jinja2,
30 lxml,
31 matplotlib,
32 numba,
33 numexpr,
34 odfpy,
35 openpyxl,
36 psycopg2,
37 pyarrow,
38 pymysql,
39 pyqt5,
40 pyreadstat,
41 qtpy,
42 s3fs,
43 scipy,
44 sqlalchemy,
45 tables,
46 tabulate,
47 xarray,
48 xlrd,
49 xlsxwriter,
50 zstandard,
51
52 # tests
53 adv_cmds,
54 glibc,
55 hypothesis,
56 pytestCheckHook,
57 pytest-xdist,
58 pytest-asyncio,
59 python,
60 runtimeShell,
61}:
62
63let
64 pandas = buildPythonPackage rec {
65 pname = "pandas";
66 version = "2.3.1";
67 pyproject = true;
68
69 disabled = pythonOlder "3.9";
70
71 src = fetchFromGitHub {
72 owner = "pandas-dev";
73 repo = "pandas";
74 tag = "v${version}";
75 hash = "sha256-xvdiWjJ5uHfrzXB7c4cYjFjZ6ue5i7qzb4tAEPJMAV0=";
76 };
77
78 # A NOTE regarding the Numpy version relaxing: Both Numpy versions 1.x &
79 # 2.x are supported. However upstream wants to always build with Numpy 2,
80 # and with it to still be able to run with a Numpy 1 or 2. We insist to
81 # perform this substitution even though python3.pkgs.numpy is of version 2
82 # nowadays, because our ecosystem unfortunately doesn't allow easily
83 # separating runtime and build-system dependencies. See also:
84 #
85 # https://discourse.nixos.org/t/several-comments-about-priorities-and-new-policies-in-the-python-ecosystem/51790
86 #
87 # Being able to build (& run) with Numpy 1 helps for python environments
88 # that override globally the `numpy` attribute to point to `numpy_1`.
89 postPatch = ''
90 substituteInPlace pyproject.toml \
91 --replace-fail "numpy>=2.0" numpy
92 '';
93
94 build-system = [
95 cython
96 meson-python
97 meson
98 numpy
99 pkg-config
100 versioneer
101 wheel
102 ]
103 ++ versioneer.optional-dependencies.toml;
104
105 enableParallelBuilding = true;
106
107 dependencies = [
108 numpy
109 python-dateutil
110 pytz
111 tzdata
112 ];
113
114 optional-dependencies =
115 let
116 extras = {
117 aws = [ s3fs ];
118 clipboard = [
119 pyqt5
120 qtpy
121 ];
122 compression = [ zstandard ];
123 computation = [
124 scipy
125 xarray
126 ];
127 excel = [
128 odfpy
129 openpyxl
130 # TODO: pyxlsb
131 xlrd
132 xlsxwriter
133 ];
134 feather = [ pyarrow ];
135 fss = [ fsspec ];
136 gcp = [
137 gcsfs
138 # TODO: pandas-gqb
139 ];
140 hdf5 = [
141 blosc2
142 tables
143 ];
144 html = [
145 beautifulsoup4
146 html5lib
147 lxml
148 ];
149 mysql = [
150 sqlalchemy
151 pymysql
152 ];
153 output_formatting = [
154 jinja2
155 tabulate
156 ];
157 parquet = [ pyarrow ];
158 performance = [
159 bottleneck
160 numba
161 numexpr
162 ];
163 plot = [ matplotlib ];
164 postgresql = [
165 sqlalchemy
166 psycopg2
167 ];
168 spss = [ pyreadstat ];
169 sql-other = [ sqlalchemy ];
170 xml = [ lxml ];
171 };
172 in
173 extras // { all = lib.concatLists (lib.attrValues extras); };
174
175 doCheck = false; # various infinite recursions
176
177 passthru.tests.pytest = pandas.overridePythonAttrs (_: {
178 doCheck = true;
179 });
180
181 nativeCheckInputs = [
182 hypothesis
183 pytest-asyncio
184 pytest-xdist
185 pytestCheckHook
186 ]
187 ++ lib.flatten (lib.attrValues optional-dependencies)
188 ++ lib.optionals (stdenv.hostPlatform.isLinux) [
189 # for locale executable
190 glibc
191 ]
192 ++ lib.optionals (stdenv.hostPlatform.isDarwin) [
193 # for locale executable
194 adv_cmds
195 ];
196
197 # don't max out build cores, it breaks tests
198 dontUsePytestXdist = true;
199
200 __darwinAllowLocalNetworking = true;
201
202 pytestFlags = [
203 # https://github.com/pandas-dev/pandas/issues/54907
204 "--no-strict-data-files"
205 "--numprocesses=4"
206 ];
207
208 disabledTestMarks = [
209 # https://github.com/pandas-dev/pandas/blob/main/test_fast.sh
210 "single_cpu"
211 "slow"
212 "network"
213 "db"
214 "slow_arm"
215 ];
216
217 disabledTests = [
218 # AssertionError: Did not see expected warning of class 'FutureWarning'
219 "test_parsing_tzlocal_deprecated"
220 ]
221 ++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64) [
222 # tests/generic/test_finalize.py::test_binops[and_-args4-right] - AssertionError: assert {} == {'a': 1}
223 "test_binops"
224 # These tests are unreliable on aarch64-darwin. See https://github.com/pandas-dev/pandas/issues/38921.
225 "test_rolling"
226 ]
227 ++ lib.optional stdenv.hostPlatform.is32bit [
228 # https://github.com/pandas-dev/pandas/issues/37398
229 "test_rolling_var_numerical_issues"
230 ];
231
232 # Tests have relative paths, and need to reference compiled C extensions
233 # so change directory where `import .test` is able to be resolved
234 preCheck = ''
235 export HOME=$TMPDIR
236 cd $out/${python.sitePackages}/pandas
237 ''
238 # TODO: Get locale and clipboard support working on darwin.
239 # Until then we disable the tests.
240 + lib.optionalString stdenv.hostPlatform.isDarwin ''
241 # Fake the impure dependencies pbpaste and pbcopy
242 echo "#!${runtimeShell}" > pbcopy
243 echo "#!${runtimeShell}" > pbpaste
244 chmod a+x pbcopy pbpaste
245 export PATH=$(pwd):$PATH
246 '';
247
248 pythonImportsCheck = [ "pandas" ];
249
250 meta = with lib; {
251 # pandas devs no longer test i686, it's commonly broken
252 # broken = stdenv.hostPlatform.isi686;
253 changelog = "https://pandas.pydata.org/docs/whatsnew/index.html";
254 description = "Powerful data structures for data analysis, time series, and statistics";
255 downloadPage = "https://github.com/pandas-dev/pandas";
256 homepage = "https://pandas.pydata.org";
257 license = licenses.bsd3;
258 longDescription = ''
259 Flexible and powerful data analysis / manipulation library for
260 Python, providing labeled data structures similar to R data.frame
261 objects, statistical functions, and much more.
262 '';
263 maintainers = with maintainers; [
264 raskin
265 ];
266 };
267 };
268in
269pandas