at master 1.8 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 rustPlatform, 6 7 # nativeBuildInputs 8 protoc, 9 10 # buildInputs 11 protobuf, 12 13 # dependencies 14 pyarrow, 15 typing-extensions, 16 17 # tests 18 numpy, 19 pytest-asyncio, 20 pytestCheckHook, 21}: 22 23buildPythonPackage rec { 24 pname = "datafusion"; 25 version = "49.0.0"; 26 pyproject = true; 27 28 src = fetchFromGitHub { 29 name = "datafusion-source"; 30 owner = "apache"; 31 repo = "arrow-datafusion-python"; 32 tag = version; 33 # Fetch arrow-testing and parquet-testing (tests assets) 34 fetchSubmodules = true; 35 hash = "sha256-U3LRZQMjL8sNa5yQmwfhw9NRGC0299TRODylzZkvFh4="; 36 }; 37 38 cargoDeps = rustPlatform.fetchCargoVendor { 39 inherit pname src version; 40 hash = "sha256-lCbqy6kZK+LSLvr+Odxt167ACnDap2enH/J4ILcPtOc="; 41 }; 42 43 nativeBuildInputs = with rustPlatform; [ 44 cargoSetupHook 45 maturinBuildHook 46 protoc 47 ]; 48 49 buildInputs = [ 50 protobuf 51 ]; 52 53 dependencies = [ 54 pyarrow 55 typing-extensions 56 ]; 57 58 nativeCheckInputs = [ 59 numpy 60 pytest-asyncio 61 pytestCheckHook 62 ]; 63 64 pythonImportsCheck = [ 65 "datafusion" 66 "datafusion._internal" 67 ]; 68 69 preCheck = '' 70 rm -rf python/datafusion 71 ''; 72 73 disabledTests = [ 74 # Exception: DataFusion error (requires internet access) 75 "test_register_http_csv" 76 ]; 77 78 meta = { 79 description = "Extensible query execution framework"; 80 longDescription = '' 81 DataFusion is an extensible query execution framework, written in Rust, 82 that uses Apache Arrow as its in-memory format. 83 ''; 84 homepage = "https://arrow.apache.org/datafusion/"; 85 changelog = "https://github.com/apache/arrow-datafusion-python/blob/${version}/CHANGELOG.md"; 86 license = with lib.licenses; [ asl20 ]; 87 maintainers = with lib.maintainers; [ cpcloud ]; 88 }; 89}