at master 5.0 kB view raw
1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 python, 6 pythonAtLeast, 7 pythonOlder, 8 arrow-cpp, 9 cffi, 10 cloudpickle, 11 cmake, 12 cython, 13 fsspec, 14 hypothesis, 15 numpy, 16 pandas, 17 pytestCheckHook, 18 pytest-lazy-fixture, 19 pkg-config, 20 setuptools, 21 setuptools-scm, 22 oldest-supported-numpy, 23}: 24 25let 26 zero_or_one = cond: if cond then 1 else 0; 27in 28 29buildPythonPackage rec { 30 pname = "pyarrow"; 31 inherit (arrow-cpp) version src; 32 pyproject = true; 33 34 disabled = pythonOlder "3.7"; 35 36 sourceRoot = "${src.name}/python"; 37 38 nativeBuildInputs = [ 39 cmake 40 cython 41 pkg-config 42 setuptools 43 setuptools-scm 44 oldest-supported-numpy 45 ]; 46 47 buildInputs = [ arrow-cpp ]; 48 49 propagatedBuildInputs = [ 50 cffi 51 numpy 52 ]; 53 54 checkInputs = [ 55 cloudpickle 56 fsspec 57 ]; 58 59 nativeCheckInputs = [ 60 hypothesis 61 pandas 62 pytestCheckHook 63 pytest-lazy-fixture 64 ]; 65 66 PYARROW_BUILD_TYPE = "release"; 67 68 PYARROW_WITH_DATASET = zero_or_one true; 69 PYARROW_WITH_FLIGHT = zero_or_one arrow-cpp.enableFlight; 70 PYARROW_WITH_HDFS = zero_or_one true; 71 PYARROW_WITH_PARQUET = zero_or_one true; 72 PYARROW_WITH_PARQUET_ENCRYPTION = zero_or_one true; 73 PYARROW_WITH_S3 = zero_or_one arrow-cpp.enableS3; 74 PYARROW_WITH_GCS = zero_or_one arrow-cpp.enableGcs; 75 PYARROW_BUNDLE_ARROW_CPP_HEADERS = zero_or_one false; 76 77 PYARROW_CMAKE_OPTIONS = [ "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib" ]; 78 79 ARROW_HOME = arrow-cpp; 80 PARQUET_HOME = arrow-cpp; 81 82 ARROW_TEST_DATA = lib.optionalString doCheck arrow-cpp.ARROW_TEST_DATA; 83 doCheck = true; 84 85 dontUseCmakeConfigure = true; 86 87 __darwinAllowLocalNetworking = true; 88 89 preBuild = '' 90 export PYARROW_PARALLEL=$NIX_BUILD_CORES 91 ''; 92 93 postInstall = '' 94 # copy the pyarrow C++ header files to the appropriate location 95 pyarrow_include="$out/${python.sitePackages}/pyarrow/include" 96 mkdir -p "$pyarrow_include/arrow/python" 97 find "$PWD/pyarrow/src/arrow" -type f -name '*.h' -exec cp {} "$pyarrow_include/arrow/python" \; 98 ''; 99 100 pytestFlagsArray = [ 101 # A couple of tests are missing fixture imports, luckily pytest offers a 102 # clean solution. 103 "--fixtures pyarrow/tests/conftest.py" 104 # Deselect a single test because pyarrow prints a 2-line error message where 105 # only a single line is expected. The additional line of output comes from 106 # the glog library which is an optional dependency of arrow-cpp that is 107 # enabled in nixpkgs. 108 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393 109 "--deselect=pyarrow/tests/test_memory.py::test_env_var" 110 # these tests require access to s3 via the internet 111 "--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region" 112 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws" 113 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection" 114 "--deselect=pyarrow/tests/test_fs.py::test_s3_options" 115 # Flaky test 116 "--deselect=pyarrow/tests/test_flight.py::test_roundtrip_errors" 117 "--deselect=pyarrow/tests/test_pandas.py::test_threaded_pandas_import" 118 # Flaky test, works locally but not on Hydra 119 "--deselect=pyarrow/tests/test_csv.py::TestThreadedCSVTableRead::test_cancellation" 120 # expects arrow-cpp headers to be bundled 121 "--deselect=pyarrow/tests/test_cpp_internals.py::test_pyarrow_include" 122 ] 123 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 124 # Requires loopback networking 125 "--deselect=pyarrow/tests/test_ipc.py::test_socket_" 126 "--deselect=pyarrow/tests/test_flight.py::test_never_sends_data" 127 "--deselect=pyarrow/tests/test_flight.py::test_large_descriptor" 128 "--deselect=pyarrow/tests/test_flight.py::test_large_metadata_client" 129 "--deselect=pyarrow/tests/test_flight.py::test_none_action_side_effect" 130 # fails to compile 131 "--deselect=pyarrow/tests/test_cython.py::test_cython_api" 132 ] 133 ++ lib.optionals (pythonAtLeast "3.11") [ 134 # Repr output is printing number instead of enum name so these tests fail 135 "--deselect=pyarrow/tests/test_fs.py::test_get_file_info" 136 ] 137 ++ lib.optionals stdenv.hostPlatform.isLinux [ 138 # this test requires local networking 139 "--deselect=pyarrow/tests/test_fs.py::test_filesystem_from_uri_gcs" 140 ]; 141 142 disabledTests = [ "GcsFileSystem" ]; 143 144 preCheck = '' 145 shopt -s extglob 146 rm -r pyarrow/!(conftest.py|tests) 147 mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py 148 substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest 149 '' 150 + lib.optionalString stdenv.hostPlatform.isDarwin '' 151 # OSError: [Errno 24] Too many open files 152 ulimit -n 1024 153 ''; 154 155 pythonImportsCheck = [ 156 "pyarrow" 157 ] 158 ++ map (module: "pyarrow.${module}") [ 159 "compute" 160 "csv" 161 "dataset" 162 "feather" 163 "flight" 164 "fs" 165 "json" 166 "orc" 167 "parquet" 168 ]; 169 170 meta = with lib; { 171 description = "Cross-language development platform for in-memory data"; 172 homepage = "https://arrow.apache.org/"; 173 license = licenses.asl20; 174 platforms = platforms.unix; 175 maintainers = with maintainers; [ 176 veprbl 177 cpcloud 178 ]; 179 }; 180}