From 538cc77f7f44dfa047dba6a06d90c86dda69cf1d Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 16 Jul 2025 12:13:57 +0300 Subject: [PATCH] server : fix handling of the ignore_eos flag (#14710) ggml-ci --- tools/server/server.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index d4dffb39c..1e7d64a28 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -127,7 +127,6 @@ struct slot_params { std::vector response_fields; bool timings_per_token = false; bool post_sampling_probs = false; - bool ignore_eos = false; struct common_params_sampling sampling; struct common_params_speculative speculative; @@ -441,7 +440,6 @@ struct server_task { { params.sampling.logit_bias.clear(); - params.ignore_eos = json_value(data, "ignore_eos", false); const auto & logit_bias = data.find("logit_bias"); if (logit_bias != data.end() && logit_bias->is_array()) { @@ -472,6 +470,16 @@ struct server_task { } } } + + params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos); + if (params.sampling.ignore_eos) { + for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) { + if (llama_vocab_is_eog(vocab, i)) { + //SRV_DBG("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(ctx, i).c_str(), -INFINITY); + params.sampling.logit_bias.push_back({i, -INFINITY}); + } + } + } } { @@ -2217,10 +2225,6 @@ struct server_context { slot.params.n_predict = slot.n_predict; } - if (slot.params.ignore_eos && has_eos_token) { - slot.params.sampling.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY}); - } - { if (slot.smpl != nullptr) { common_sampler_free(slot.smpl);