1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6 rustPlatform,
7
8 # nativeBuildInputs
9 pkg-config,
10
11 # buildInputs
12 openssl,
13 protobuf,
14
15 # dependencies
16 numpy,
17 pyarrow,
18
19 # optional-dependencies
20 torch,
21
22 # tests
23 datafusion,
24 duckdb,
25 ml-dtypes,
26 pandas,
27 pillow,
28 polars,
29 pytestCheckHook,
30 tqdm,
31}:
32
33buildPythonPackage rec {
34 pname = "pylance";
35 version = "0.38.0";
36 pyproject = true;
37
38 src = fetchFromGitHub {
39 owner = "lancedb";
40 repo = "lance";
41 tag = "v${version}";
42 hash = "sha256-2Rz6OUEs3628dIbG5wT5oIy2yDdJ4bsUQW/0eWxZKoA=";
43 };
44
45 sourceRoot = "${src.name}/python";
46
47 cargoDeps = rustPlatform.fetchCargoVendor {
48 inherit
49 pname
50 version
51 src
52 sourceRoot
53 ;
54 hash = "sha256-7me5r65TWQhulF08ZwXK1GWiHnOSVBSu0YgB37JUUDk=";
55 };
56
57 nativeBuildInputs = [
58 pkg-config
59 protobuf # for protoc
60 rustPlatform.cargoSetupHook
61 ];
62
63 build-system = [
64 rustPlatform.cargoSetupHook
65 rustPlatform.maturinBuildHook
66 ];
67
68 buildInputs = [
69 openssl
70 protobuf
71 ];
72
73 pythonRelaxDeps = [ "pyarrow" ];
74
75 dependencies = [
76 numpy
77 pyarrow
78 ];
79
80 optional-dependencies = {
81 torch = [ torch ];
82 };
83
84 pythonImportsCheck = [ "lance" ];
85
86 nativeCheckInputs = [
87 datafusion
88 duckdb
89 ml-dtypes
90 pandas
91 pillow
92 polars
93 pytestCheckHook
94 tqdm
95 ]
96 ++ optional-dependencies.torch;
97
98 preCheck = ''
99 cd python/tests
100 '';
101
102 disabledTests = [
103 # Hangs indefinitely
104 "test_all_permutations"
105
106 # Writes to read-only build directory
107 "test_add_data_storage_version"
108 "test_fix_data_storage_version"
109 "test_fts_backward_v0_27_0"
110
111 # AttributeError: 'SessionContext' object has no attribute 'register_table_provider'
112 "test_table_loading"
113
114 # subprocess.CalledProcessError: Command ... returned non-zero exit status 1.
115 # ModuleNotFoundError: No module named 'lance'
116 "test_lance_log_file"
117 "test_lance_log_file_invalid_path"
118 "test_lance_log_file_with_directory_creation"
119 "test_timestamp_precision"
120 "test_tracing"
121
122 # Flaky (AssertionError)
123 "test_index_cache_size"
124
125 # OSError: LanceError(IO): Failed to initialize default tokenizer:
126 # An invalid argument was passed:
127 # 'LinderaError { kind: Parse, source: failed to build tokenizer: LinderaError(kind=Io, source=No such file or directory (os error 2)) }', /build/source/rust/lance-index/src/scalar/inverted/tokenizer/lindera.rs:63:21
128 "test_lindera_load_config_fallback"
129
130 # OSError: LanceError(IO): Failed to load tokenizer config
131 "test_indexed_filter_with_fts_index_with_lindera_ipadic_jp_tokenizer"
132 "test_lindera_ipadic_jp_tokenizer_bin_user_dict"
133 "test_lindera_ipadic_jp_tokenizer_csv_user_dict"
134 "test_lindera_load_config_priority"
135 ]
136 ++ lib.optionals (stdenv.hostPlatform.isLinux && stdenv.hostPlatform.isAarch64) [
137 # OSError: LanceError(IO): Resources exhausted: Failed to allocate additional 1245184 bytes for ExternalSorter[0]...
138 "test_merge_insert_large"
139 ]
140 ++ lib.optionals stdenv.hostPlatform.isDarwin [
141 # Build hangs after all the tests are run due to a torch subprocess not exiting
142 "test_multiprocess_loading"
143 ];
144
145 meta = {
146 description = "Python wrapper for Lance columnar format";
147 homepage = "https://github.com/lancedb/lance";
148 changelog = "https://github.com/lancedb/lance/releases/tag/v${version}";
149 license = lib.licenses.asl20;
150 maintainers = with lib.maintainers; [ natsukium ];
151 };
152}