1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 cargo,
6 cmake,
7 fetchFromGitHub,
8 pkg-config,
9 pkgs, # zstd hidden by python3Packages.zstd
10 pytestCheckHook,
11 pytest-codspeed ? null, # Not in Nixpkgs
12 pytest-cov-stub,
13 pytest-xdist,
14 pytest-benchmark,
15 rustc,
16 rustPlatform,
17 runCommand,
18
19 mimalloc,
20 jemalloc,
21 rust-jemalloc-sys,
22 # Another alternative is to try `mimalloc`
23 polarsMemoryAllocator ? mimalloc, # polarsJemalloc,
24 polarsJemalloc ?
25 let
26 jemalloc' = rust-jemalloc-sys.override {
27 jemalloc = jemalloc.override {
28 # "libjemalloc.so.2: cannot allocate memory in static TLS block"
29
30 # https://github.com/pola-rs/polars/issues/5401#issuecomment-1300998316
31 disableInitExecTls = true;
32 };
33 };
34 in
35 assert builtins.elem "--disable-initial-exec-tls" jemalloc'.configureFlags;
36 jemalloc',
37
38 polars,
39 python,
40}:
41
42let
43 version = "1.31.0";
44
45 # Hide symbols to prevent accidental use
46 rust-jemalloc-sys = throw "polars: use polarsMemoryAllocator over rust-jemalloc-sys";
47 jemalloc = throw "polars: use polarsMemoryAllocator over jemalloc";
48in
49
50buildPythonPackage rec {
51 pname = "polars";
52 inherit version;
53 format = "setuptools";
54
55 src = fetchFromGitHub {
56 owner = "pola-rs";
57 repo = "polars";
58 tag = "py-${version}";
59 hash = "sha256-OZ7guV/uxa3jGesAh+ubrFjQSNVp5ImfXfPAQxagTj0=";
60 };
61
62 patches = [
63 ./avx512.patch
64 ];
65
66 # Do not type-check assertions because some of them use unstable features (`is_none_or`)
67 postPatch = ''
68 while IFS= read -r -d "" path ; do
69 sed -i 's \(\s*\)debug_assert! \1#[cfg(debug_assertions)]\n\1debug_assert! ' "$path"
70 done < <( find -iname '*.rs' -print0 )
71 '';
72
73 cargoDeps = rustPlatform.fetchCargoVendor {
74 inherit pname version src;
75 hash = "sha256-yGTXUW6IVa+nRpmnkEl20/RJ/mxTSAaokETT8QLE+Ns=";
76 };
77
78 requiredSystemFeatures = [ "big-parallel" ];
79
80 build-system = [ rustPlatform.maturinBuildHook ];
81
82 nativeBuildInputs = [
83 cargo
84 pkg-config
85 cmake # libz-ng-sys
86 rustPlatform.cargoSetupHook
87 rustPlatform.cargoBuildHook
88 rustPlatform.cargoInstallHook
89 rustc
90 ];
91
92 buildInputs = [
93 polarsMemoryAllocator
94 (pkgs.__splicedPackages.zstd or pkgs.zstd)
95 ];
96
97 env = {
98 ZSTD_SYS_USE_PKG_CONFIG = true;
99
100 # https://github.com/NixOS/nixpkgs/blob/5c38beb516f8da3a823d94b746dd3bf3c6b9bbd7/doc/languages-frameworks/rust.section.md#using-community-maintained-rust-toolchains-using-community-maintained-rust-toolchains
101 # https://discourse.nixos.org/t/nixpkgs-rustplatform-and-nightly/22870
102 RUSTC_BOOTSTRAP = true;
103
104 # Several `debug_assert!` statements use the unstable `Option::is_none_or` method
105 RUSTFLAGS = lib.concatStringsSep " " (
106 [
107 "-Cdebug_assertions=n"
108 ]
109 ++ lib.optionals (polarsMemoryAllocator.pname == "mimalloc") [
110 "--cfg use_mimalloc"
111 ]
112 );
113 RUST_BACKTRACE = true;
114 };
115
116 dontUseCmakeConfigure = true;
117
118 maturinBuildFlags = [
119 "-m"
120 "py-polars/Cargo.toml"
121 ];
122
123 postInstall = ''
124 # Move polars.abi3.so -> polars.so
125 local polarsSo=""
126 local soName=""
127 while IFS= read -r -d "" p ; do
128 polarsSo=$p
129 soName="$(basename "$polarsSo")"
130 [[ "$soName" == polars.so ]] && break
131 done < <( find "$out" -iname "polars*.so" -print0 )
132 [[ -z "''${polarsSo:-}" ]] && echo "polars.so not found" >&2 && exit 1
133 if [[ "$soName" != polars.so ]] ; then
134 mv "$polarsSo" "$(dirname "$polarsSo")/polars.so"
135 fi
136 '';
137
138 pythonImportsCheck = [
139 "polars"
140 ];
141
142 passthru.tests.dynloading-1 =
143 runCommand "polars-dynloading-1"
144 {
145 nativeBuildInputs = [
146 (python.withPackages (ps: [
147 ps.pyarrow
148 polars
149 ]))
150 ];
151 }
152 ''
153 ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF
154 import pyarrow
155 import polars
156 EOF
157 touch $out
158 '';
159 passthru.tests.dynloading-2 =
160 runCommand "polars-dynloading-2"
161 {
162 nativeBuildInputs = [
163 (python.withPackages (ps: [
164 ps.pyarrow
165 polars
166 ]))
167 ];
168 failureHook = ''
169 sed "s/^/ /" $out >&2
170 '';
171 }
172 ''
173 ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF
174 import polars
175 import pyarrow
176 EOF
177 '';
178 passthru.tests.pytest = stdenv.mkDerivation {
179 pname = "${polars.pname}-pytest";
180
181 inherit (polars) version src;
182
183 requiredSystemFeatures = [ "big-parallel" ];
184
185 sourceRoot = "${src.name}/py-polars";
186 postPatch = ''
187 for f in * ; do
188 [[ "$f" == "tests" ]] || \
189 [[ "$f" == "pyproject.toml" ]] || \
190 rm -rf "$f"
191 done
192 for pat in "__pycache__" "*.pyc" ; do
193 find -iname "$pat" -exec rm "{}" ";"
194 done
195 '';
196 dontConfigure = true;
197 dontBuild = true;
198
199 doCheck = true;
200 checkPhase = "pytestCheckPhase";
201 nativeBuildInputs = [
202 (python.withPackages (ps: [
203 polars
204 ps.aiosqlite
205 ps.altair
206 ps.boto3
207 ps.deltalake
208 ps.fastexcel
209 ps.flask
210 ps.flask-cors
211 ps.fsspec
212 ps.gevent
213 ps.hypothesis
214 ps.jax
215 ps.jaxlib
216 (ps.kuzu or null)
217 ps.matplotlib
218 ps.moto
219 ps.nest-asyncio
220 ps.numpy
221 ps.openpyxl
222 ps.pandas
223 ps.pyarrow
224 ps.pydantic
225 ps.pyiceberg
226 ps.sqlalchemy
227 ps.torch
228 ps.xlsx2csv
229 ps.xlsxwriter
230 ps.zstandard
231 ps.cloudpickle
232 ]))
233 ];
234 nativeCheckInputs = [
235 pytestCheckHook
236 pytest-codspeed
237 pytest-cov-stub
238 pytest-xdist
239 pytest-benchmark
240 ];
241
242 pytestFlags = [
243 "--benchmark-disable"
244 "-nauto"
245 "--dist=loadgroup"
246 ];
247 disabledTests = [
248 "test_read_kuzu_graph_database" # kuzu
249 "test_read_database_cx_credentials" # connectorx
250
251 # adbc_driver_.*
252 "test_write_database_append_replace"
253 "test_write_database_create"
254 "test_write_database_create_quoted_tablename"
255 "test_write_database_adbc_temporary_table"
256 "test_write_database_create"
257 "test_write_database_append_replace"
258 "test_write_database_errors"
259 "test_write_database_errors"
260 "test_write_database_create_quoted_tablename"
261
262 # Internet access:
263 "test_read_web_file"
264 "test_run_python_snippets"
265
266 # AssertionError: Series are different (exact value mismatch)
267 "test_reproducible_hash_with_seeds"
268
269 # AssertionError: assert 'PARTITIONED FORCE SPILLED' in 'OOC sort forced\nOOC sort started\nRUN STREAMING PIPELINE\n[df -> sort -> ordered_sink]\nfinished sinking into OOC so... sort took: 365.662µs\nstarted sort source phase\nsort source phase took: 2.169915ms\nfull ooc sort took: 4.502947ms\n'
270 "test_streaming_sort"
271
272 # AssertionError assert sys.getrefcount(foos[0]) == base_count (3 == 2)
273 # tests/unit/dataframe/test_df.py::test_extension
274 "test_extension"
275
276 # Internet access (https://bucket.s3.amazonaws.com/)
277 "test_scan_credential_provider"
278 "test_scan_credential_provider_serialization"
279
280 # ModuleNotFoundError: ADBC 'adbc_driver_sqlite.dbapi' driver not detected.
281 "test_read_database"
282 "test_read_database_parameterised_uri"
283
284 # Untriaged
285 "test_pickle_lazyframe_nested_function_udf"
286 "test_serde_udf"
287 "test_hash_struct"
288 ];
289 disabledTestPaths = [
290 "tests/benchmark"
291 "tests/docs"
292
293 # Internet access
294 "tests/unit/io/cloud/test_credential_provider.py"
295
296 # Wrong altair version
297 "tests/unit/operations/namespaces/test_plot.py"
298
299 # adbc
300 "tests/unit/io/database/test_read.py"
301
302 # Untriaged
303 "tests/unit/cloud/test_prepare_cloud_plan.py"
304 "tests/unit/io/cloud/test_cloud.py"
305 ];
306
307 installPhase = "touch $out";
308 };
309
310 meta = {
311 description = "Dataframes powered by a multithreaded, vectorized query engine, written in Rust";
312 homepage = "https://github.com/pola-rs/polars";
313 changelog = "https://github.com/pola-rs/polars/releases/tag/py-${version}";
314 license = lib.licenses.mit;
315 maintainers = with lib.maintainers; [
316 happysalada
317 SomeoneSerge
318 ];
319 mainProgram = "polars";
320 platforms = lib.platforms.all;
321 };
322}