mirror of https://github.com/inclusionAI/AReaL
This commit is contained in:
parent
932f9b9232
commit
ae20d51cce
|
@ -63,8 +63,8 @@ actor:
|
||||||
reward_bias: -0.5
|
reward_bias: -0.5
|
||||||
kl_ctl: 0.0
|
kl_ctl: 0.0
|
||||||
ppo_n_minibatches: 1
|
ppo_n_minibatches: 1
|
||||||
recompute_logprob: false
|
recompute_logprob: true
|
||||||
use_decoupled_loss: false
|
use_decoupled_loss: true
|
||||||
behav_imp_weight_cap: 5.0
|
behav_imp_weight_cap: 5.0
|
||||||
|
|
||||||
ref:
|
ref:
|
||||||
|
|
|
@ -32,8 +32,6 @@ dependencies = [
|
||||||
"datasets",
|
"datasets",
|
||||||
"accelerate",
|
"accelerate",
|
||||||
"transformers==4.51.1",
|
"transformers==4.51.1",
|
||||||
"liger_kernel==0.6.0",
|
|
||||||
"triton<=3.1.0",
|
|
||||||
|
|
||||||
# Scientific computing
|
# Scientific computing
|
||||||
"numpy<2.0.0",
|
"numpy<2.0.0",
|
||||||
|
|
|
@ -73,6 +73,4 @@ swanlab[dashboard]
|
||||||
torchdata
|
torchdata
|
||||||
autoflake
|
autoflake
|
||||||
gymnasium
|
gymnasium
|
||||||
tensordict
|
tensordict
|
||||||
liger_kernel==0.6.0
|
|
||||||
triton<=3.1.0
|
|
Loading…
Reference in New Issue