model : make rope_yarn_log_mul optional for deepseek2 (#14896)
* make rope_yarn_log_mul optional for deepseek2 * default rope_yarn_log_mul = 0.0f
This commit is contained in:
parent
1dc9614e06
commit
4762ad7316
|
@ -98,7 +98,7 @@ struct llama_hparams {
|
|||
float rope_freq_scale_train;
|
||||
float rope_freq_scale_train_swa;
|
||||
uint32_t n_ctx_orig_yarn;
|
||||
float rope_yarn_log_mul;
|
||||
float rope_yarn_log_mul = 0.0f;
|
||||
|
||||
std::array<int, 4> rope_sections;
|
||||
|
||||
|
|
|
@ -1369,7 +1369,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
// that have no expert_gating_func model parameter set
|
||||
hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX;
|
||||
}
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 27: type = LLM_TYPE_16B; break;
|
||||
|
|
Loading…
Reference in New Issue