mirror of https://github.com/inclusionAI/AReaL
129 lines
2.6 KiB
YAML
129 lines
2.6 KiB
YAML
experiment_name: gsm8k-grpo
|
|
trial_name: trial0
|
|
allocation_mode: sglang.d4p1t1+d4p1t1
|
|
cluster:
|
|
n_nodes: 1
|
|
n_gpus_per_node: 8
|
|
fileroot: /tmp/arealite/experiments
|
|
name_resolve:
|
|
type: nfs
|
|
nfs_record_root: /tmp/areal/name_resolve
|
|
seed: 1
|
|
total_train_epochs: 10
|
|
tokenizer_path: ${actor.path}
|
|
async_training: true
|
|
|
|
rollout:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
max_concurrent_rollouts: 256
|
|
queue_size: null
|
|
consumer_batch_size: ${train_dataset.batch_size}
|
|
max_head_offpolicyness: 4
|
|
enable_rollout_tracing: false
|
|
|
|
gconfig:
|
|
n_samples: 4
|
|
min_new_tokens: 0
|
|
max_new_tokens: 512
|
|
greedy: false
|
|
temperature: 1.0
|
|
|
|
actor:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
path: Qwen/Qwen2-1.5B-Instruct
|
|
init_from_scratch: false
|
|
disable_dropout: true
|
|
gradient_checkpointing: false
|
|
dtype: bfloat16
|
|
mb_spec:
|
|
max_tokens_per_mb: 10240
|
|
optimizer:
|
|
type: adam
|
|
lr: 1e-5
|
|
weight_decay: 0.01
|
|
beta1: 0.9
|
|
beta2: 0.999
|
|
eps: 1e-8
|
|
lr_scheduler_type: constant
|
|
gradient_clipping: 1.0
|
|
warmup_steps_proportion: 0.001
|
|
backend: fsdp
|
|
|
|
group_size: ${gconfig.n_samples}
|
|
group_adv_norm: false
|
|
eps_clip: 0.4
|
|
temperature: ${gconfig.temperature}
|
|
reward_scaling: 10.0
|
|
reward_bias: -0.5
|
|
kl_ctl: 0.0
|
|
ppo_n_minibatches: 1
|
|
recompute_logprob: true
|
|
use_decoupled_loss: true
|
|
behav_imp_weight_cap: 5.0
|
|
|
|
ref:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
path: ${actor.path}
|
|
init_from_scratch: false
|
|
dtype: ${actor.dtype}
|
|
mb_spec:
|
|
max_tokens_per_mb: 10240
|
|
optimizer: null
|
|
backend: fsdp
|
|
|
|
# SGLang
|
|
server_only: false
|
|
sglang:
|
|
model_path: ${actor.path}
|
|
random_seed: ${seed}
|
|
skip_tokenizer_init: true
|
|
dtype: ${actor.dtype}
|
|
max_running_requests: null
|
|
context_length: 32768
|
|
mem_fraction_static: 0.9
|
|
|
|
# datasets
|
|
train_dataset:
|
|
batch_size: 256
|
|
shuffle: true
|
|
pin_memory: true
|
|
|
|
valid_dataset:
|
|
batch_size: 256
|
|
shuffle: true
|
|
pin_memory: true
|
|
|
|
# Utilities
|
|
saver:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
fileroot: ${cluster.fileroot}
|
|
freq_epochs: 1
|
|
freq_steps: null
|
|
freq_secs: null
|
|
|
|
checkpointer:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
fileroot: ${cluster.fileroot}
|
|
freq_epochs: 1
|
|
freq_steps: null
|
|
freq_secs: 3600
|
|
|
|
evaluator:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
fileroot: ${cluster.fileroot}
|
|
freq_epochs: 1
|
|
freq_steps: null
|
|
freq_secs: null
|
|
|
|
stats_logger:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
fileroot: ${cluster.fileroot}
|
|
wandb:
|
|
mode: disabled |