llama-bench : use local GPUs along with RPC servers (#14917)

Currently if RPC servers are specified with '--rpc' and there is a local
GPU available (e.g. CUDA), the benchmark will be performed only on the
RPC device(s) but the backend result column will say "CUDA,RPC" which is
incorrect. This patch is adding all local GPU devices and makes
llama-bench consistent with llama-cli.
This commit is contained in:
Radoslav Gerganov 2025-07-28 18:59:04 +03:00 committed by GitHub
parent db16e2831c
commit c556418b60
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 15 additions and 0 deletions

View File

@ -950,6 +950,7 @@ struct cmd_params_instance {
}
static std::vector<ggml_backend_dev_t> devices;
devices.clear();
// RPC devices should always come first for performance reasons
for (const std::string & server : rpc_servers) {
ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
if (dev) {
@ -959,6 +960,20 @@ struct cmd_params_instance {
exit(1);
}
}
// add local GPU devices if any
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
switch (ggml_backend_dev_type(dev)) {
case GGML_BACKEND_DEVICE_TYPE_CPU:
case GGML_BACKEND_DEVICE_TYPE_ACCEL:
// skip CPU backends since they are handled separately
break;
case GGML_BACKEND_DEVICE_TYPE_GPU:
devices.push_back(dev);
break;
}
}
devices.push_back(nullptr);
mparams.devices = devices.data();
}