mirror of https://github.com/inclusionAI/AReaL
73 lines
1.4 KiB
YAML
73 lines
1.4 KiB
YAML
experiment_name: ppo-32B-distill-gpus-128
|
|
trial_name: 512x32
|
|
mode: ray
|
|
wandb:
|
|
mode: disabled
|
|
metric_discovery_port: 17997
|
|
recover_mode: auto
|
|
recover_retries: 10
|
|
allocation_mode: 'sglang.d8m8p1+d4p4m4'
|
|
n_nodes: 16
|
|
n_gpus_per_node: 8
|
|
cache_clear_freq: 1
|
|
exp_ctrl:
|
|
total_train_epochs: 5
|
|
save_freq_epochs: 1
|
|
ckpt_freq_secs: 600
|
|
torch_cache_mysophobia: true
|
|
actor:
|
|
type:
|
|
_class: qwen2
|
|
path: '/storage/models/DeepSeek-R1-Distill-Qwen-32B'
|
|
optimizer:
|
|
lr: 2e-05
|
|
lr_scheduler_type: constant
|
|
eps: 1e-5
|
|
warmup_steps_proportion: 0.001
|
|
hysteresis: 2
|
|
sglang:
|
|
mem_fraction_static: 0.8
|
|
triton_attention_num_kv_splits: 16
|
|
enable_metrics: True
|
|
ref:
|
|
type:
|
|
_class: qwen2
|
|
path: '/storage/models/DeepSeek-R1-Distill-Qwen-32B'
|
|
actor_train:
|
|
mb_spec:
|
|
max_tokens_per_mb: 30720
|
|
actor_inf:
|
|
mb_spec:
|
|
max_tokens_per_mb: 30720
|
|
ref_inf:
|
|
mb_spec:
|
|
max_tokens_per_mb: 30720
|
|
dataset:
|
|
path: '/storage/datasets/boba_106k_0319.jsonl'
|
|
max_prompt_len: 1024
|
|
train_bs_n_seqs: 512
|
|
ppo:
|
|
gen:
|
|
max_new_tokens: 27648
|
|
min_new_tokens: 0
|
|
top_p: 1.0
|
|
top_k: 1000000
|
|
temperature: 1.0
|
|
ppo_n_minibatches: 4
|
|
kl_ctl: 0.0
|
|
discount: 1.0
|
|
value_eps_clip: 0.2
|
|
disable_value: true
|
|
reward_output_scaling: 5
|
|
reward_output_bias: 0.0
|
|
adv_norm: true
|
|
group_size: 32
|
|
group_adv_norm: false
|
|
|
|
# Cluster configuration
|
|
ray_temp_path: /tmp/ray
|
|
cluster:
|
|
fileroot: /tmp/ray/experiments
|
|
n_nodes: 32
|
|
n_gpus_per_node: 8
|