This commit is contained in:
bowei.fw 2025-07-13 22:12:54 +08:00
parent 932f9b9232
commit ae20d51cce
3 changed files with 3 additions and 7 deletions

View File

@ -63,8 +63,8 @@ actor:
reward_bias: -0.5
kl_ctl: 0.0
ppo_n_minibatches: 1
recompute_logprob: false
use_decoupled_loss: false
recompute_logprob: true
use_decoupled_loss: true
behav_imp_weight_cap: 5.0
ref:

View File

@ -32,8 +32,6 @@ dependencies = [
"datasets",
"accelerate",
"transformers==4.51.1",
"liger_kernel==0.6.0",
"triton<=3.1.0",
# Scientific computing
"numpy<2.0.0",

View File

@ -74,5 +74,3 @@ torchdata
autoflake
gymnasium
tensordict
liger_kernel==0.6.0
triton<=3.1.0