mirror of https://github.com/inclusionAI/AReaL
79 lines
1.4 KiB
YAML
79 lines
1.4 KiB
YAML
experiment_name: gsm8k-sft
|
|
trial_name: trial0
|
|
|
|
cluster:
|
|
n_nodes: 1
|
|
n_gpus_per_node: 8
|
|
name_resolve:
|
|
type: nfs
|
|
nfs_record_root: /tmp/areal/name_resolve
|
|
seed: 1
|
|
total_train_epochs: 1
|
|
tokenizer_path: ${model.path}
|
|
|
|
model:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
path: Qwen/Qwen3-1.7B
|
|
init_from_scratch: false
|
|
gradient_checkpointing: false
|
|
dtype: bfloat16
|
|
mb_spec:
|
|
max_tokens_per_mb: 4096
|
|
optimizer:
|
|
type: adam
|
|
lr: 2e-5
|
|
weight_decay: 0.05
|
|
beta1: 0.9
|
|
beta2: 0.95
|
|
eps: 1e-5
|
|
lr_scheduler_type: cosine
|
|
gradient_clipping: 1.0
|
|
backend: fsdp
|
|
|
|
train_dataset:
|
|
batch_size: 128
|
|
shuffle: true
|
|
pin_memory: true
|
|
num_workers: 4
|
|
path: openai/gsm8k
|
|
type: sft
|
|
|
|
valid_dataset:
|
|
batch_size: 128
|
|
shuffle: true
|
|
pin_memory: true
|
|
num_workers: 4
|
|
path: openai/gsm8k
|
|
type: sft
|
|
|
|
# Utilities
|
|
saver:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
fileroot: ${cluster.fileroot}
|
|
freq_epochs: 1
|
|
freq_steps: null
|
|
freq_secs: null
|
|
|
|
checkpointer:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
fileroot: ${cluster.fileroot}
|
|
freq_epochs: 1
|
|
freq_steps: null
|
|
freq_secs: 3600
|
|
|
|
evaluator:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
fileroot: ${cluster.fileroot}
|
|
freq_epochs: null
|
|
freq_steps: 1
|
|
freq_secs: null
|
|
|
|
stats_logger:
|
|
experiment_name: ${experiment_name}
|
|
trial_name: ${trial_name}
|
|
fileroot: ${cluster.fileroot}
|