1{
2 lib,
3 stdenv,
4 gcc13Stdenv,
5 buildPythonPackage,
6 fetchFromGitHub,
7 fetchpatch,
8
9 # nativeBuildInputs
10 cmake,
11 ninja,
12
13 # build-system
14 pathspec,
15 pyproject-metadata,
16 scikit-build-core,
17
18 # dependencies
19 diskcache,
20 jinja2,
21 numpy,
22 typing-extensions,
23
24 # tests
25 scipy,
26 huggingface-hub,
27
28 # passthru
29 gitUpdater,
30 pytestCheckHook,
31 llama-cpp-python,
32
33 config,
34 cudaSupport ? config.cudaSupport,
35 cudaPackages ? { },
36
37}:
38let
39 stdenvTarget = if cudaSupport then gcc13Stdenv else stdenv;
40in
41buildPythonPackage rec {
42 pname = "llama-cpp-python";
43 version = "0.3.16";
44 pyproject = true;
45
46 src = fetchFromGitHub {
47 owner = "abetlen";
48 repo = "llama-cpp-python";
49 tag = "v${version}";
50 hash = "sha256-EUDtCv86J4bznsTqNsdgj1IYkAu83cf+RydFTUb2NEE=";
51 fetchSubmodules = true;
52 };
53 # src = /home/gaetan/llama-cpp-python;
54
55 patches = [
56 # Fix test failure on a machine with no metal devices (e.g. nix-community darwin builder)
57 # https://github.com/ggml-org/llama.cpp/pull/15531
58 (fetchpatch {
59 url = "https://github.com/ggml-org/llama.cpp/pull/15531/commits/63a83ffefe4d478ebadff89300a0a3c5d660f56a.patch";
60 stripLen = 1;
61 extraPrefix = "vendor/llama.cpp/";
62 hash = "sha256-9LGnzviBgYYOOww8lhiLXf7xgd/EtxRXGQMredOO4qM=";
63 })
64 ];
65
66 dontUseCmakeConfigure = true;
67 cmakeFlags = [
68 # Set GGML_NATIVE=off. Otherwise, cmake attempts to build with
69 # -march=native* which is either a no-op (if cc-wrapper is able to ignore
70 # it), or an attempt to build a non-reproducible binary.
71 #
72 # This issue was spotted when cmake rules appended feature modifiers to
73 # -mcpu, breaking linux build as follows:
74 #
75 # cc1: error: unknown value ‘native+nodotprod+noi8mm+nosve’ for ‘-mcpu’
76 "-DGGML_NATIVE=off"
77 "-DGGML_BUILD_NUMBER=1"
78 ]
79 ++ lib.optionals cudaSupport [
80 "-DGGML_CUDA=on"
81 "-DCUDAToolkit_ROOT=${lib.getDev cudaPackages.cuda_nvcc}"
82 "-DCMAKE_CUDA_COMPILER=${lib.getExe cudaPackages.cuda_nvcc}"
83 ];
84
85 enableParallelBuilding = true;
86
87 nativeBuildInputs = [
88 cmake
89 ninja
90 ];
91
92 build-system = [
93 pathspec
94 pyproject-metadata
95 scikit-build-core
96 ];
97
98 buildInputs = lib.optionals cudaSupport (
99 with cudaPackages;
100 [
101 cuda_cudart # cuda_runtime.h
102 cuda_cccl # <thrust/*>
103 libcublas # cublas_v2.h
104 ]
105 );
106
107 stdenv = stdenvTarget;
108
109 dependencies = [
110 diskcache
111 jinja2
112 numpy
113 typing-extensions
114 ];
115
116 nativeCheckInputs = [
117 pytestCheckHook
118 scipy
119 huggingface-hub
120 ];
121
122 disabledTests = [
123 # tries to download model from huggingface-hub
124 "test_real_model"
125 "test_real_llama"
126 ];
127
128 pythonImportsCheck = [ "llama_cpp" ];
129
130 passthru = {
131 updateScript = gitUpdater {
132 rev-prefix = "v";
133 allowedVersions = "^[.0-9]+$";
134 };
135 tests = lib.optionalAttrs stdenvTarget.hostPlatform.isLinux {
136 withCuda = llama-cpp-python.override {
137 cudaSupport = true;
138 };
139 };
140 };
141
142 meta = {
143 description = "Python bindings for llama.cpp";
144 homepage = "https://github.com/abetlen/llama-cpp-python";
145 changelog = "https://github.com/abetlen/llama-cpp-python/blob/v${version}/CHANGELOG.md";
146 license = lib.licenses.mit;
147 maintainers = with lib.maintainers; [
148 booxter
149 kirillrdy
150 ];
151 };
152}