at master 8.3 kB view raw
1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 cargo, 6 cmake, 7 fetchFromGitHub, 8 pkg-config, 9 pkgs, # zstd hidden by python3Packages.zstd 10 pytestCheckHook, 11 pytest-codspeed ? null, # Not in Nixpkgs 12 pytest-cov-stub, 13 pytest-xdist, 14 pytest-benchmark, 15 rustc, 16 rustPlatform, 17 runCommand, 18 19 mimalloc, 20 jemalloc, 21 rust-jemalloc-sys, 22 # Another alternative is to try `mimalloc` 23 polarsMemoryAllocator ? mimalloc, # polarsJemalloc, 24 polarsJemalloc ? 25 let 26 jemalloc' = rust-jemalloc-sys.override { 27 jemalloc = jemalloc.override { 28 # "libjemalloc.so.2: cannot allocate memory in static TLS block" 29 30 # https://github.com/pola-rs/polars/issues/5401#issuecomment-1300998316 31 disableInitExecTls = true; 32 }; 33 }; 34 in 35 assert builtins.elem "--disable-initial-exec-tls" jemalloc'.configureFlags; 36 jemalloc', 37 38 polars, 39 python, 40}: 41 42let 43 version = "1.31.0"; 44 45 # Hide symbols to prevent accidental use 46 rust-jemalloc-sys = throw "polars: use polarsMemoryAllocator over rust-jemalloc-sys"; 47 jemalloc = throw "polars: use polarsMemoryAllocator over jemalloc"; 48in 49 50buildPythonPackage rec { 51 pname = "polars"; 52 inherit version; 53 format = "setuptools"; 54 55 src = fetchFromGitHub { 56 owner = "pola-rs"; 57 repo = "polars"; 58 tag = "py-${version}"; 59 hash = "sha256-OZ7guV/uxa3jGesAh+ubrFjQSNVp5ImfXfPAQxagTj0="; 60 }; 61 62 patches = [ 63 ./avx512.patch 64 ]; 65 66 # Do not type-check assertions because some of them use unstable features (`is_none_or`) 67 postPatch = '' 68 while IFS= read -r -d "" path ; do 69 sed -i 's \(\s*\)debug_assert! \1#[cfg(debug_assertions)]\n\1debug_assert! ' "$path" 70 done < <( find -iname '*.rs' -print0 ) 71 ''; 72 73 cargoDeps = rustPlatform.fetchCargoVendor { 74 inherit pname version src; 75 hash = "sha256-yGTXUW6IVa+nRpmnkEl20/RJ/mxTSAaokETT8QLE+Ns="; 76 }; 77 78 requiredSystemFeatures = [ "big-parallel" ]; 79 80 build-system = [ rustPlatform.maturinBuildHook ]; 81 82 nativeBuildInputs = [ 83 cargo 84 pkg-config 85 cmake # libz-ng-sys 86 rustPlatform.cargoSetupHook 87 rustPlatform.cargoBuildHook 88 rustPlatform.cargoInstallHook 89 rustc 90 ]; 91 92 buildInputs = [ 93 polarsMemoryAllocator 94 (pkgs.__splicedPackages.zstd or pkgs.zstd) 95 ]; 96 97 env = { 98 ZSTD_SYS_USE_PKG_CONFIG = true; 99 100 # https://github.com/NixOS/nixpkgs/blob/5c38beb516f8da3a823d94b746dd3bf3c6b9bbd7/doc/languages-frameworks/rust.section.md#using-community-maintained-rust-toolchains-using-community-maintained-rust-toolchains 101 # https://discourse.nixos.org/t/nixpkgs-rustplatform-and-nightly/22870 102 RUSTC_BOOTSTRAP = true; 103 104 # Several `debug_assert!` statements use the unstable `Option::is_none_or` method 105 RUSTFLAGS = lib.concatStringsSep " " ( 106 [ 107 "-Cdebug_assertions=n" 108 ] 109 ++ lib.optionals (polarsMemoryAllocator.pname == "mimalloc") [ 110 "--cfg use_mimalloc" 111 ] 112 ); 113 RUST_BACKTRACE = true; 114 }; 115 116 dontUseCmakeConfigure = true; 117 118 maturinBuildFlags = [ 119 "-m" 120 "py-polars/Cargo.toml" 121 ]; 122 123 postInstall = '' 124 # Move polars.abi3.so -> polars.so 125 local polarsSo="" 126 local soName="" 127 while IFS= read -r -d "" p ; do 128 polarsSo=$p 129 soName="$(basename "$polarsSo")" 130 [[ "$soName" == polars.so ]] && break 131 done < <( find "$out" -iname "polars*.so" -print0 ) 132 [[ -z "''${polarsSo:-}" ]] && echo "polars.so not found" >&2 && exit 1 133 if [[ "$soName" != polars.so ]] ; then 134 mv "$polarsSo" "$(dirname "$polarsSo")/polars.so" 135 fi 136 ''; 137 138 pythonImportsCheck = [ 139 "polars" 140 ]; 141 142 passthru.tests.dynloading-1 = 143 runCommand "polars-dynloading-1" 144 { 145 nativeBuildInputs = [ 146 (python.withPackages (ps: [ 147 ps.pyarrow 148 polars 149 ])) 150 ]; 151 } 152 '' 153 ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF 154 import pyarrow 155 import polars 156 EOF 157 touch $out 158 ''; 159 passthru.tests.dynloading-2 = 160 runCommand "polars-dynloading-2" 161 { 162 nativeBuildInputs = [ 163 (python.withPackages (ps: [ 164 ps.pyarrow 165 polars 166 ])) 167 ]; 168 failureHook = '' 169 sed "s/^/ /" $out >&2 170 ''; 171 } 172 '' 173 ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF 174 import polars 175 import pyarrow 176 EOF 177 ''; 178 passthru.tests.pytest = stdenv.mkDerivation { 179 pname = "${polars.pname}-pytest"; 180 181 inherit (polars) version src; 182 183 requiredSystemFeatures = [ "big-parallel" ]; 184 185 sourceRoot = "${src.name}/py-polars"; 186 postPatch = '' 187 for f in * ; do 188 [[ "$f" == "tests" ]] || \ 189 [[ "$f" == "pyproject.toml" ]] || \ 190 rm -rf "$f" 191 done 192 for pat in "__pycache__" "*.pyc" ; do 193 find -iname "$pat" -exec rm "{}" ";" 194 done 195 ''; 196 dontConfigure = true; 197 dontBuild = true; 198 199 doCheck = true; 200 checkPhase = "pytestCheckPhase"; 201 nativeBuildInputs = [ 202 (python.withPackages (ps: [ 203 polars 204 ps.aiosqlite 205 ps.altair 206 ps.boto3 207 ps.deltalake 208 ps.fastexcel 209 ps.flask 210 ps.flask-cors 211 ps.fsspec 212 ps.gevent 213 ps.hypothesis 214 ps.jax 215 ps.jaxlib 216 (ps.kuzu or null) 217 ps.matplotlib 218 ps.moto 219 ps.nest-asyncio 220 ps.numpy 221 ps.openpyxl 222 ps.pandas 223 ps.pyarrow 224 ps.pydantic 225 ps.pyiceberg 226 ps.sqlalchemy 227 ps.torch 228 ps.xlsx2csv 229 ps.xlsxwriter 230 ps.zstandard 231 ps.cloudpickle 232 ])) 233 ]; 234 nativeCheckInputs = [ 235 pytestCheckHook 236 pytest-codspeed 237 pytest-cov-stub 238 pytest-xdist 239 pytest-benchmark 240 ]; 241 242 pytestFlags = [ 243 "--benchmark-disable" 244 "-nauto" 245 "--dist=loadgroup" 246 ]; 247 disabledTests = [ 248 "test_read_kuzu_graph_database" # kuzu 249 "test_read_database_cx_credentials" # connectorx 250 251 # adbc_driver_.* 252 "test_write_database_append_replace" 253 "test_write_database_create" 254 "test_write_database_create_quoted_tablename" 255 "test_write_database_adbc_temporary_table" 256 "test_write_database_create" 257 "test_write_database_append_replace" 258 "test_write_database_errors" 259 "test_write_database_errors" 260 "test_write_database_create_quoted_tablename" 261 262 # Internet access: 263 "test_read_web_file" 264 "test_run_python_snippets" 265 266 # AssertionError: Series are different (exact value mismatch) 267 "test_reproducible_hash_with_seeds" 268 269 # AssertionError: assert 'PARTITIONED FORCE SPILLED' in 'OOC sort forced\nOOC sort started\nRUN STREAMING PIPELINE\n[df -> sort -> ordered_sink]\nfinished sinking into OOC so... sort took: 365.662µs\nstarted sort source phase\nsort source phase took: 2.169915ms\nfull ooc sort took: 4.502947ms\n' 270 "test_streaming_sort" 271 272 # AssertionError assert sys.getrefcount(foos[0]) == base_count (3 == 2) 273 # tests/unit/dataframe/test_df.py::test_extension 274 "test_extension" 275 276 # Internet access (https://bucket.s3.amazonaws.com/) 277 "test_scan_credential_provider" 278 "test_scan_credential_provider_serialization" 279 280 # ModuleNotFoundError: ADBC 'adbc_driver_sqlite.dbapi' driver not detected. 281 "test_read_database" 282 "test_read_database_parameterised_uri" 283 284 # Untriaged 285 "test_pickle_lazyframe_nested_function_udf" 286 "test_serde_udf" 287 "test_hash_struct" 288 ]; 289 disabledTestPaths = [ 290 "tests/benchmark" 291 "tests/docs" 292 293 # Internet access 294 "tests/unit/io/cloud/test_credential_provider.py" 295 296 # Wrong altair version 297 "tests/unit/operations/namespaces/test_plot.py" 298 299 # adbc 300 "tests/unit/io/database/test_read.py" 301 302 # Untriaged 303 "tests/unit/cloud/test_prepare_cloud_plan.py" 304 "tests/unit/io/cloud/test_cloud.py" 305 ]; 306 307 installPhase = "touch $out"; 308 }; 309 310 meta = { 311 description = "Dataframes powered by a multithreaded, vectorized query engine, written in Rust"; 312 homepage = "https://github.com/pola-rs/polars"; 313 changelog = "https://github.com/pola-rs/polars/releases/tag/py-${version}"; 314 license = lib.licenses.mit; 315 maintainers = with lib.maintainers; [ 316 happysalada 317 SomeoneSerge 318 ]; 319 mainProgram = "polars"; 320 platforms = lib.platforms.all; 321 }; 322}