mirror of https://github.com/inclusionAI/AReaL
73 lines
1.4 KiB
YAML
73 lines
1.4 KiB
YAML
experiment_name: ppo-7B-zero-gpus-128
|
|
trial_name: 512x64
|
|
mode: ray
|
|
wandb:
|
|
mode: disabled
|
|
metric_discovery_port: 17997
|
|
recover_mode: auto
|
|
recover_retries: 10
|
|
allocation_mode: 'sglang.d64p1m1+d32p2m1'
|
|
n_nodes: 16
|
|
n_gpus_per_node: 8
|
|
cache_clear_freq: 1
|
|
exp_ctrl:
|
|
total_train_epochs: 5
|
|
save_freq_epochs: 1
|
|
ckpt_freq_secs: 600
|
|
torch_cache_mysophobia: true
|
|
actor:
|
|
type:
|
|
_class: qwen2
|
|
path: '/storage/models/Qwen2.5-7B'
|
|
optimizer:
|
|
lr: 1e-05
|
|
lr_scheduler_type: constant
|
|
eps: 1e-5
|
|
warmup_steps_proportion: 0.001
|
|
hysteresis: 2
|
|
sglang:
|
|
mem_fraction_static: 0.8
|
|
triton_attention_num_kv_splits: 16
|
|
enable_metrics: True
|
|
ref:
|
|
type:
|
|
_class: qwen2
|
|
path: '/storage/models/Qwen2.5-7B'
|
|
actor_train:
|
|
mb_spec:
|
|
max_tokens_per_mb: 19456
|
|
actor_inf:
|
|
mb_spec:
|
|
max_tokens_per_mb: 19456
|
|
ref_inf:
|
|
mb_spec:
|
|
max_tokens_per_mb: 19456
|
|
dataset:
|
|
path: '/storage/datasets/orz-zero_56k_0319.jsonl'
|
|
max_prompt_len: 2048
|
|
train_bs_n_seqs: 512
|
|
ppo:
|
|
gen:
|
|
max_new_tokens: 16384
|
|
min_new_tokens: 0
|
|
top_p: 1.0
|
|
top_k: 1000000
|
|
temperature: 1.0
|
|
ppo_n_minibatches: 4
|
|
kl_ctl: 0.0
|
|
discount: 1.0
|
|
value_eps_clip: 0.2
|
|
disable_value: true
|
|
reward_output_scaling: 0.5
|
|
reward_output_bias: -1.0
|
|
adv_norm: true
|
|
group_size: 64
|
|
group_adv_norm: false
|
|
|
|
# Cluster configuration
|
|
ray_temp_path: /tmp/ray
|
|
cluster:
|
|
fileroot: /tmp/ray/experiments
|
|
n_nodes: 32
|
|
n_gpus_per_node: 8
|