mirror of https://github.com/inclusionAI/AReaL
47 lines
1011 B
YAML
47 lines
1011 B
YAML
experiment_name: sft-32B-distill-gpus-128
|
|
trial_name: 512x32
|
|
mode: ray
|
|
wandb:
|
|
mode: disabled
|
|
recover_mode: auto
|
|
recover_retries: 10
|
|
allocation_mode: 'd32p1m4'
|
|
n_nodes: 16
|
|
n_gpus_per_node: 8
|
|
exp_ctrl:
|
|
total_train_epochs: 200
|
|
save_freq_epochs: 1
|
|
ckpt_freq_secs: 600
|
|
torch_cache_mysophobia: true
|
|
dataset:
|
|
train_path: /storage/datasets/boba-sft_200_0319.jsonl
|
|
valid_path: /storage/datasets/boba-sft_200_0319.jsonl
|
|
max_seqlen: 32768
|
|
train_bs_n_seqs: 64
|
|
valid_bs_n_seqs: 64
|
|
model:
|
|
type:
|
|
_class: qwen2
|
|
path: /storage/models/DeepSeek-R1-Distill-Qwen-32B
|
|
optimizer:
|
|
type: adam
|
|
lr_scheduler_type: constant
|
|
lr: 1e-5
|
|
warmup_steps_proportion: 0.03
|
|
initial_loss_scale: 262144.0
|
|
loss_scale_window: 10
|
|
hysteresis: 2
|
|
weight_decay: 0.1
|
|
eps: 1e-5
|
|
bf16: true
|
|
allocation:
|
|
mb_spec:
|
|
max_tokens_per_mb: 32768
|
|
|
|
# Cluster configuration
|
|
ray_temp_path: /tmp/ray
|
|
cluster:
|
|
fileroot: /tmp/ray/experiments
|
|
n_nodes: 32
|
|
n_gpus_per_node: 8
|