python3Packages.triton: always support ROCm

ROCm packages are a runtime only dep for triton. triton-llvm always supports AMD GPU targets,
so we can reduce how many different builds of triton are needed by teaching triton to better search
for libamdhip64.so and ld.lld.

Luna Nova c74e5ffb 0e209ec1

+2
doc/release-notes/rl-2511.section.md
···
The binary name remains `webfontkitgenerator`.
The `webfontkitgenerator` package is an alias to `webfont-bundler`.
+
- `python3Packages.triton` no longer takes an `enableRocm` argument and supports ROCm in all build configurations via runtime binding. In most cases no action will be needed. If triton is unable to find the HIP SDK add `rocmPackages.clr` as a build input or set the environment variable `HIP_PATH="${rocmPackages.clr}"`.
+
- `inspircd` has been updated to the v4 release series. Please refer to the upstream documentation for [general information](https://docs.inspircd.org/4/overview/#v4-overview) and a list of [breaking changes](https://docs.inspircd.org/4/breaking-changes/).
- `lima` package now only includes the guest agent for the host's architecture by default. If your guest VM's architecture differs from your Lima host's, you'll need to enable the `lima-additional-guestagents` package by setting `withAdditionalGuestAgents = true` when overriding lima with this input.
+1 -7
pkgs/development/python-modules/torch/source/default.nix
···
# (dependencies without cuda support).
# Instead we should rely on overlays and nixpkgsFun.
# (@SomeoneSerge)
-
_tritonEffective ?
-
if cudaSupport then
-
triton-cuda
-
else if rocmSupport then
-
rocmPackages.triton
-
else
-
triton,
+
_tritonEffective ? if cudaSupport then triton-cuda else triton,
triton-cuda,
# Disable MKLDNN on aarch64-darwin, it negatively impacts performance,
-28
pkgs/development/python-modules/triton/0002-nvidia-amd-driver-short-circuit-before-ldconfig.patch
···
-
diff --git a/third_party/amd/backend/driver.py b/third_party/amd/backend/driver.py
-
index ca712f904..0961d2dda 100644
-
--- a/third_party/amd/backend/driver.py
-
+++ b/third_party/amd/backend/driver.py
-
@@ -79,6 +79,9 @@ def _get_path_to_hip_runtime_dylib():
-
return mmapped_path
-
raise RuntimeError(f"memory mapped '{mmapped_path}' in process does not point to a valid {lib_name}")
-
-
+ if os.path.isdir("@libhipDir@"):
-
+ return ["@libhipDir@"]
-
+
-
paths = []
-
-
import site
-
diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py
-
index d088ec092..625de2db8 100644
-
--- a/third_party/nvidia/backend/driver.py
-
+++ b/third_party/nvidia/backend/driver.py
-
@@ -23,6 +23,9 @@ def libcuda_dirs():
-
if env_libcuda_path:
-
return [env_libcuda_path]
-
-
+ if os.path.exists("@libcudaStubsDir@"):
-
+ return ["@libcudaStubsDir@"]
-
+
-
libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode()
-
# each line looks like the following:
-
# libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1
+14
pkgs/development/python-modules/triton/0002-nvidia-driver-short-circuit-before-ldconfig.patch
···
+
diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py
+
index d088ec092..625de2db8 100644
+
--- a/third_party/nvidia/backend/driver.py
+
+++ b/third_party/nvidia/backend/driver.py
+
@@ -23,6 +23,9 @@ def libcuda_dirs():
+
if env_libcuda_path:
+
return [env_libcuda_path]
+
+
+ if os.path.exists("@libcudaStubsDir@"):
+
+ return ["@libcudaStubsDir@"]
+
+
+
libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode()
+
# each line looks like the following:
+
# libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1
+60
pkgs/development/python-modules/triton/0005-amd-search-env-paths.patch
···
+
From 9e4e58b647c17c5fa098c8a74e221f88d3cb1a43 Mon Sep 17 00:00:00 2001
+
From: Luna Nova <git@lunnova.dev>
+
Date: Sun, 24 Aug 2025 07:41:30 -0700
+
Subject: [PATCH] [AMD] Search HIP_PATH, hipconfig, and ROCM_PATH for
+
libamdhip64
+
+
Search for libamdhip64 from HIP_PATH env var, hipconfig --path output,
+
and ROCM_PATH before looking in system-wide ldconfig or /opt/rocm.
+
+
The system-wide ROCm path isn't guaranteed to be where the ROCm
+
install we're building against is located, so follow typical ROCm
+
lib behavior and look under env paths first.
+
+
This is especially important for non-FHS distros like NixOS
+
where /opt/rocm never exists, but may be useful in more
+
typical distros if multiple ROCm installs are present
+
to ensure the right libamdhip64.so is picked up.
+
---
+
third_party/amd/backend/driver.py | 28 ++++++++++++++++++++++++++++
+
1 file changed, 28 insertions(+)
+
+
diff --git a/third_party/amd/backend/driver.py b/third_party/amd/backend/driver.py
+
index af8e1a5c8097..57b0f7388c60 100644
+
--- a/third_party/amd/backend/driver.py
+
+++ b/third_party/amd/backend/driver.py
+
@@ -110,6 +110,34 @@ def _get_path_to_hip_runtime_dylib():
+
return f
+
paths.append(f)
+
+
+ # HIP_PATH should point to HIP SDK root if set
+
+ env_hip_path = os.getenv("HIP_PATH")
+
+ if env_hip_path:
+
+ hip_lib_path = os.path.join(env_hip_path, "lib", lib_name)
+
+ if os.path.exists(hip_lib_path):
+
+ return hip_lib_path
+
+ paths.append(hip_lib_path)
+
+
+
+ # if available, `hipconfig --path` prints the HIP SDK root
+
+ try:
+
+ hip_root = subprocess.check_output(["hipconfig", "--path"]).decode().strip()
+
+ if hip_root:
+
+ hip_lib_path = os.path.join(hip_root, "lib", lib_name)
+
+ if os.path.exists(hip_lib_path):
+
+ return hip_lib_path
+
+ paths.append(hip_lib_path)
+
+ except (subprocess.CalledProcessError, FileNotFoundError):
+
+ # hipconfig may not be available
+
+ pass
+
+
+
+ # ROCm lib dir based on env var
+
+ env_rocm_path = os.getenv("ROCM_PATH")
+
+ if env_rocm_path:
+
+ rocm_lib_path = os.path.join(env_rocm_path, "lib", lib_name)
+
+ if os.path.exists(rocm_lib_path):
+
+ return rocm_lib_path
+
+ paths.append(rocm_lib_path)
+
+
+
# Afterwards try to search the loader dynamic library resolution paths.
+
libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore")
+
# each line looks like the following:
+52 -3
pkgs/development/python-modules/triton/default.nix
···
torchWithRocm,
zlib,
cudaSupport ? config.cudaSupport,
-
rocmSupport ? config.rocmSupport,
+
runCommand,
rocmPackages,
triton,
}:
···
(replaceVars ./0001-_build-allow-extra-cc-flags.patch {
ccCmdExtraFlags = "-Wl,-rpath,${addDriverRunpath.driverLink}/lib";
})
-
(replaceVars ./0002-nvidia-amd-driver-short-circuit-before-ldconfig.patch {
-
libhipDir = if rocmSupport then "${lib.getLib rocmPackages.clr}/lib" else null;
+
(replaceVars ./0002-nvidia-driver-short-circuit-before-ldconfig.patch {
libcudaStubsDir =
if cudaSupport then "${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs" else null;
})
+
# Upstream PR: https://github.com/triton-lang/triton/pull/7959
+
./0005-amd-search-env-paths.patch
]
++ lib.optionals cudaSupport [
(replaceVars ./0003-nvidia-cudart-a-systempath.patch {
···
substituteInPlace cmake/AddTritonUnitTest.cmake \
--replace-fail "include(\''${PROJECT_SOURCE_DIR}/unittest/googletest.cmake)" ""\
--replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)"
+
''
+
# Don't use FHS path for ROCm LLD
+
# Remove this after `[AMD] Use lld library API #7548` makes it into a release
+
+ ''
+
substituteInPlace third_party/amd/backend/compiler.py \
+
--replace-fail 'lld = Path("/opt/rocm/llvm/bin/ld.lld")' \
+
"import os;lld = Path(os.getenv('HIP_PATH', '/opt/rocm/')"' + "/llvm/bin/ld.lld")'
'';
build-system = [ setuptools ];
···
passthru.tests = {
# Ultimately, torch is our test suite:
inherit torchWithRocm;
+
+
# Test that _get_path_to_hip_runtime_dylib works when ROCm is available at runtime
+
rocm-libamdhip64-path =
+
runCommand "triton-rocm-libamdhip64-path-test"
+
{
+
buildInputs = [
+
triton
+
python
+
rocmPackages.clr
+
];
+
}
+
''
+
python -c "
+
import os
+
import triton
+
path = triton.backends.amd.driver._get_path_to_hip_runtime_dylib()
+
print(f'libamdhip64 path: {path}')
+
assert os.path.exists(path)
+
" && touch $out
+
'';
+
+
# Test that path_to_rocm_lld works when ROCm is available at runtime
+
# Remove this after `[AMD] Use lld library API #7548` makes it into a release
+
rocm-lld-path =
+
runCommand "triton-rocm-lld-test"
+
{
+
buildInputs = [
+
triton
+
python
+
rocmPackages.clr
+
];
+
}
+
''
+
python -c "
+
import os
+
import triton
+
path = triton.backends.backends['amd'].compiler.path_to_rocm_lld()
+
print(f'ROCm LLD path: {path}')
+
assert os.path.exists(path)
+
" && touch $out
+
'';
# Test as `nix run -f "<nixpkgs>" python3Packages.triton.tests.axpy-cuda`
# or, using `programs.nix-required-mounts`, as `nix build -f "<nixpkgs>" python3Packages.triton.tests.axpy-cuda.gpuCheck`