diff --git a/examples/arealite/configs/boba.yaml b/examples/arealite/configs/boba.yaml index 29eb291..d38f39c 100644 --- a/examples/arealite/configs/boba.yaml +++ b/examples/arealite/configs/boba.yaml @@ -63,8 +63,8 @@ actor: reward_bias: -0.5 kl_ctl: 0.0 ppo_n_minibatches: 1 - recompute_logprob: false - use_decoupled_loss: false + recompute_logprob: true + use_decoupled_loss: true behav_imp_weight_cap: 5.0 ref: diff --git a/pyproject.toml b/pyproject.toml index f752f47..962c9b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,8 +32,6 @@ dependencies = [ "datasets", "accelerate", "transformers==4.51.1", - "liger_kernel==0.6.0", - "triton<=3.1.0", # Scientific computing "numpy<2.0.0", diff --git a/requirements.txt b/requirements.txt index 93c2df0..0af83fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -73,6 +73,4 @@ swanlab[dashboard] torchdata autoflake gymnasium -tensordict -liger_kernel==0.6.0 -triton<=3.1.0 \ No newline at end of file +tensordict \ No newline at end of file