This commit is contained in:
bowei.fw 2025-07-13 22:12:54 +08:00
parent 932f9b9232
commit ae20d51cce
3 changed files with 3 additions and 7 deletions

View File

@ -63,8 +63,8 @@ actor:
reward_bias: -0.5 reward_bias: -0.5
kl_ctl: 0.0 kl_ctl: 0.0
ppo_n_minibatches: 1 ppo_n_minibatches: 1
recompute_logprob: false recompute_logprob: true
use_decoupled_loss: false use_decoupled_loss: true
behav_imp_weight_cap: 5.0 behav_imp_weight_cap: 5.0
ref: ref:

View File

@ -32,8 +32,6 @@ dependencies = [
"datasets", "datasets",
"accelerate", "accelerate",
"transformers==4.51.1", "transformers==4.51.1",
"liger_kernel==0.6.0",
"triton<=3.1.0",
# Scientific computing # Scientific computing
"numpy<2.0.0", "numpy<2.0.0",

View File

@ -73,6 +73,4 @@ swanlab[dashboard]
torchdata torchdata
autoflake autoflake
gymnasium gymnasium
tensordict tensordict
liger_kernel==0.6.0
triton<=3.1.0