mirror of https://github.com/inclusionAI/AReaL
63 lines
1.2 KiB
YAML
63 lines
1.2 KiB
YAML
experiment_name: ppo-1.7b-gpu32
|
|
trial_name: my-trial
|
|
mode: ray
|
|
cluster:
|
|
fileroot: /storage/ray/experiments
|
|
wandb:
|
|
mode: disabled
|
|
recover_mode: auto
|
|
recover_retries: 10
|
|
allocation_mode: sglang.d16p1m1+d8p2m1
|
|
n_nodes: 4
|
|
n_gpus_per_node: 8
|
|
cache_clear_freq: 1
|
|
exp_ctrl:
|
|
total_train_epochs: 5
|
|
save_freq_epochs: 1
|
|
ckpt_freq_secs: 600
|
|
torch_cache_mysophobia: true
|
|
dataset:
|
|
path: /storage/datasets/boba_106k_0319.jsonl
|
|
max_prompt_len: 1024
|
|
train_bs_n_seqs: 512
|
|
group_size: 16
|
|
group_adv_norm: false
|
|
actor:
|
|
type:
|
|
_class: qwen3
|
|
path: /storage/openpsi/models/Qwen3-1.7B/
|
|
optimizer:
|
|
lr: 2e-05
|
|
lr_scheduler_type: constant
|
|
eps: 1e-5
|
|
warmup_steps_proportion: 0.001
|
|
hysteresis: 2
|
|
sglang:
|
|
mem_fraction_static: 0.8
|
|
actor_train:
|
|
mb_spec:
|
|
max_tokens_per_mb: 30720
|
|
actor_gen:
|
|
mb_spec:
|
|
max_tokens_per_mb: 30720
|
|
actor_inf:
|
|
mb_spec:
|
|
max_tokens_per_mb: 30720
|
|
ppo:
|
|
gen:
|
|
max_new_tokens: 27648
|
|
min_new_tokens: 0
|
|
top_p: 1.0
|
|
top_k: 1000000
|
|
temperature: 1.0
|
|
ppo_n_minibatches: 4
|
|
kl_ctl: 0.0
|
|
discount: 1.0
|
|
value_eps_clip: 0.2
|
|
disable_value: true
|
|
reward_output_scaling: 5
|
|
reward_output_bias: 0.0
|
|
adv_norm: true
|
|
value_norm: true
|
|
|