1{ 2 lib, 3 stdenv, 4 gcc13Stdenv, 5 buildPythonPackage, 6 fetchFromGitHub, 7 fetchpatch, 8 9 # nativeBuildInputs 10 cmake, 11 ninja, 12 13 # build-system 14 pathspec, 15 pyproject-metadata, 16 scikit-build-core, 17 18 # dependencies 19 diskcache, 20 jinja2, 21 numpy, 22 typing-extensions, 23 24 # tests 25 scipy, 26 huggingface-hub, 27 28 # passthru 29 gitUpdater, 30 pytestCheckHook, 31 llama-cpp-python, 32 33 config, 34 cudaSupport ? config.cudaSupport, 35 cudaPackages ? { }, 36 37}: 38let 39 stdenvTarget = if cudaSupport then gcc13Stdenv else stdenv; 40in 41buildPythonPackage rec { 42 pname = "llama-cpp-python"; 43 version = "0.3.16"; 44 pyproject = true; 45 46 src = fetchFromGitHub { 47 owner = "abetlen"; 48 repo = "llama-cpp-python"; 49 tag = "v${version}"; 50 hash = "sha256-EUDtCv86J4bznsTqNsdgj1IYkAu83cf+RydFTUb2NEE="; 51 fetchSubmodules = true; 52 }; 53 # src = /home/gaetan/llama-cpp-python; 54 55 patches = [ 56 # Fix test failure on a machine with no metal devices (e.g. nix-community darwin builder) 57 # https://github.com/ggml-org/llama.cpp/pull/15531 58 (fetchpatch { 59 url = "https://github.com/ggml-org/llama.cpp/pull/15531/commits/63a83ffefe4d478ebadff89300a0a3c5d660f56a.patch"; 60 stripLen = 1; 61 extraPrefix = "vendor/llama.cpp/"; 62 hash = "sha256-9LGnzviBgYYOOww8lhiLXf7xgd/EtxRXGQMredOO4qM="; 63 }) 64 ]; 65 66 dontUseCmakeConfigure = true; 67 cmakeFlags = [ 68 # Set GGML_NATIVE=off. Otherwise, cmake attempts to build with 69 # -march=native* which is either a no-op (if cc-wrapper is able to ignore 70 # it), or an attempt to build a non-reproducible binary. 71 # 72 # This issue was spotted when cmake rules appended feature modifiers to 73 # -mcpu, breaking linux build as follows: 74 # 75 # cc1: error: unknown value ‘native+nodotprod+noi8mm+nosve’ for ‘-mcpu’ 76 "-DGGML_NATIVE=off" 77 "-DGGML_BUILD_NUMBER=1" 78 ] 79 ++ lib.optionals cudaSupport [ 80 "-DGGML_CUDA=on" 81 "-DCUDAToolkit_ROOT=${lib.getDev cudaPackages.cuda_nvcc}" 82 "-DCMAKE_CUDA_COMPILER=${lib.getExe cudaPackages.cuda_nvcc}" 83 ]; 84 85 enableParallelBuilding = true; 86 87 nativeBuildInputs = [ 88 cmake 89 ninja 90 ]; 91 92 build-system = [ 93 pathspec 94 pyproject-metadata 95 scikit-build-core 96 ]; 97 98 buildInputs = lib.optionals cudaSupport ( 99 with cudaPackages; 100 [ 101 cuda_cudart # cuda_runtime.h 102 cuda_cccl # <thrust/*> 103 libcublas # cublas_v2.h 104 ] 105 ); 106 107 stdenv = stdenvTarget; 108 109 dependencies = [ 110 diskcache 111 jinja2 112 numpy 113 typing-extensions 114 ]; 115 116 nativeCheckInputs = [ 117 pytestCheckHook 118 scipy 119 huggingface-hub 120 ]; 121 122 disabledTests = [ 123 # tries to download model from huggingface-hub 124 "test_real_model" 125 "test_real_llama" 126 ]; 127 128 pythonImportsCheck = [ "llama_cpp" ]; 129 130 passthru = { 131 updateScript = gitUpdater { 132 rev-prefix = "v"; 133 allowedVersions = "^[.0-9]+$"; 134 }; 135 tests = lib.optionalAttrs stdenvTarget.hostPlatform.isLinux { 136 withCuda = llama-cpp-python.override { 137 cudaSupport = true; 138 }; 139 }; 140 }; 141 142 meta = { 143 description = "Python bindings for llama.cpp"; 144 homepage = "https://github.com/abetlen/llama-cpp-python"; 145 changelog = "https://github.com/abetlen/llama-cpp-python/blob/v${version}/CHANGELOG.md"; 146 license = lib.licenses.mit; 147 maintainers = with lib.maintainers; [ 148 booxter 149 kirillrdy 150 ]; 151 }; 152}