llama : remove llm_graph_input_one (#14603)

This commit is contained in:
Xuan-Son Nguyen 2025-07-09 23:09:28 +02:00 committed by GitHub
parent 4a5686da22
commit cb9178f885
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 1 additions and 29 deletions

View File

@ -340,13 +340,6 @@ void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) {
inp_rs->set_input(ubatch); inp_rs->set_input(ubatch);
} }
void llm_graph_input_one::set_input(const llama_ubatch * ubatch) {
GGML_UNUSED(ubatch);
GGML_ASSERT(one && ggml_nelements(one) == 1);
float f_one = 1.0f;
ggml_backend_tensor_set(one, &f_one, 0, sizeof(float));
}
// //
// llm_graph_context // llm_graph_context
// //

View File

@ -341,17 +341,6 @@ public:
const llama_memory_hybrid_context * mctx; const llama_memory_hybrid_context * mctx;
}; };
// TODO: remove this when ggml_scale_add is implemented
class llm_graph_input_one : public llm_graph_input_i {
public:
llm_graph_input_one() {}
virtual ~llm_graph_input_one() = default;
void set_input(const llama_ubatch * ubatch) override;
ggml_tensor * one = nullptr; // F32
};
// //
// llm_graph_result // llm_graph_result
// //

View File

@ -9485,8 +9485,6 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
const int n_layer_sparsity = 10; // number of layers using activation sparsity const int n_layer_sparsity = 10; // number of layers using activation sparsity
const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95) const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95)
ggml_tensor * one; // containing single element 1.0f
llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf)
: llm_graph_context(params), : llm_graph_context(params),
model(model), model(model),
@ -9498,14 +9496,6 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
ggml_tensor * cur; ggml_tensor * cur;
ggml_tensor * inpL; ggml_tensor * inpL;
// TODO: remove this when ggml_scale_add is implemented
one = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
{
auto inp = std::make_unique<llm_graph_input_one>();
inp->one = one;
res->add_input(std::move(inp));
}
inpL = build_inp_embd(model.tok_embd); inpL = build_inp_embd(model.tok_embd);
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings) // important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
@ -9895,7 +9885,7 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
cb(innovation, "innovation", il); cb(innovation, "innovation", il);
ggml_tensor * all_coefs = build_lora_mm(model.layers[il].altup_correct_coef, modalities); // [n_altup, n_tokens] ggml_tensor * all_coefs = build_lora_mm(model.layers[il].altup_correct_coef, modalities); // [n_altup, n_tokens]
all_coefs = ggml_add(ctx0, all_coefs, one); all_coefs = ggml_scale_bias(ctx0, all_coefs, 1.0f, 1.0f); // + 1.0
cb(all_coefs, "all_coefs", il); cb(all_coefs, "all_coefs", il);
all_coefs = ggml_cont(ctx0, ggml_transpose(ctx0, all_coefs)); // [n_tokens, n_altup] all_coefs = ggml_cont(ctx0, ggml_transpose(ctx0, all_coefs)); // [n_tokens, n_altup]
all_coefs = ggml_reshape_3d(ctx0, all_coefs, 1, n_tokens, n_altup); // [1, n_tokens, n_altup] all_coefs = ggml_reshape_3d(ctx0, all_coefs, 1, n_tokens, n_altup); // [1, n_tokens, n_altup]