mirror of https://github.com/inclusionAI/AReaL
172 lines
3.8 KiB
YAML
Executable File
172 lines
3.8 KiB
YAML
Executable File
# Basic experiment info
|
|
experiment_name: gsm8k-test
|
|
trial_name: my-trial-3
|
|
seed: 1
|
|
mode: local
|
|
wandb:
|
|
mode: disabled
|
|
entity: null
|
|
project: null
|
|
name: null
|
|
job_type: null
|
|
group: null
|
|
notes: null
|
|
tags: null
|
|
config: null
|
|
tensorboard:
|
|
path: null
|
|
|
|
exp_ctrl:
|
|
total_train_epochs: 5
|
|
save_freq_epochs: 1
|
|
save_freq_steps: null
|
|
save_freq_secs: null
|
|
ckpt_freq_epochs: null
|
|
ckpt_freq_steps: null
|
|
ckpt_freq_secs: 600
|
|
eval_freq_epochs: null
|
|
eval_freq_steps: null
|
|
eval_freq_secs: null
|
|
benchmark_steps: null
|
|
benchmark_n_seqs: null
|
|
|
|
# whether to allow persistent servers
|
|
shutdown_server_on_exit: true
|
|
|
|
# Allocation and parallelism
|
|
allocation_mode: sglang.d4p1t1+d4p1t1
|
|
n_nodes: 1
|
|
n_gpus_per_node: 8
|
|
|
|
# Cluster configuration
|
|
ray_temp_path: /tmp/ray
|
|
cluster:
|
|
cluster_name: local
|
|
fileroot: /tmp/arealite/
|
|
n_nodes: 1
|
|
n_gpus_per_node: 8
|
|
name_resolve:
|
|
type: nfs
|
|
nfs_record_root: /tmp/arealite/name_resolve/
|
|
|
|
# Datasets
|
|
train_dataset:
|
|
path: json
|
|
name: null
|
|
split: train
|
|
data_files: /storage/openpsi/users/xushusheng.xss/training_data/boba_106k_0319.jsonl
|
|
batch_size: 32
|
|
shuffle: True
|
|
preprocessor:
|
|
type: areal
|
|
|
|
valid_dataset: null
|
|
|
|
# Rollout config
|
|
rollout:
|
|
collector:
|
|
type: rlvr
|
|
rlvr:
|
|
reward_type: areal-math
|
|
solution_path: /storage/openpsi/users/xushusheng.xss/training_data/boba_106k_0319.jsonl
|
|
num_workers: 1
|
|
max_concurrent_rollouts: null
|
|
max_head_offpolicyness: 0
|
|
filter_reward_lb: -10000
|
|
filter_reward_ub: 10000
|
|
server_backend: sglang
|
|
model_path: /storage/openpsi/models/Qwen__Qwen3-1.7B/
|
|
gconfig:
|
|
n_samples: 16
|
|
max_new_tokens: 512
|
|
min_new_tokens: 0
|
|
top_p: 1.0
|
|
top_k: 1000000
|
|
temperature: 1.0
|
|
llm_client:
|
|
schedule_policy: round_robin
|
|
request_timeout: 3600
|
|
request_retries: 3
|
|
llm_service:
|
|
served_model_name: null
|
|
health_check_interval: 5
|
|
startup_timeout: 300
|
|
max_unhealth_count: 3
|
|
graceful_shutdown_on_unhealthy: true
|
|
sglang:
|
|
dtype: "bfloat16"
|
|
enable_mixed_chunk: false
|
|
enable_torch_compile: false
|
|
torch_compile_max_bs: 32
|
|
cuda_graph_max_bs: null
|
|
cuda_graph_bs: null
|
|
triton_attention_reduce_in_fp32: false
|
|
triton_attention_num_kv_splits: 8
|
|
num_continuous_decode_steps: 1
|
|
attention_backend: "flashinfer"
|
|
sampling_backend: null
|
|
context_length: 32768
|
|
mem_fraction_static: 0.9
|
|
max_running_requests: null
|
|
chunked_prefill_size: -1
|
|
max_prefill_tokens: 32768
|
|
schedule_policy: "lpm"
|
|
schedule_conservativeness: 1.0
|
|
cpu_offload_gb: 0
|
|
kv_cache_dtype: "auto"
|
|
log_level: "warning"
|
|
log_level_http: "warning"
|
|
log_requests: false
|
|
log_requests_level: 0
|
|
show_time_cost: false
|
|
enable_metrics: true
|
|
decode_log_interval: 1
|
|
|
|
# Trainer
|
|
trainer:
|
|
type: grpo
|
|
grpo:
|
|
async_training: true
|
|
actor:
|
|
path: /storage/openpsi/models/Qwen__Qwen3-1.7B/
|
|
init_from_scratch: false
|
|
gradient_checkpointing: false
|
|
bf16: true
|
|
optimizer:
|
|
type: adam
|
|
lr: 1.0e-6
|
|
weight_decay: 0.05
|
|
beta1: 0.9
|
|
beta2: 0.999
|
|
eps: 1.0e-08
|
|
min_lr_ratio: 0.0
|
|
lr_scheduler_type: constant
|
|
warmup_steps_proportion: 0.001
|
|
initial_loss_scale: 4294967296.0
|
|
min_loss_scale: 1.0
|
|
loss_scale_window: 5.0
|
|
hysteresis: 2
|
|
gradient_clipping: 1.0
|
|
backend:
|
|
type: fsdp
|
|
ref: null
|
|
mb_spec:
|
|
max_tokens_per_mb: 10240
|
|
# Algorithm
|
|
group_adv_norm: False
|
|
ppo_n_minibatches: 4
|
|
eps_clip: 0.2
|
|
c_clip: null
|
|
reward_scaling: 10.0
|
|
reward_bias: -0.5
|
|
max_reward_clip: 20.0
|
|
mask_no_eos_with_zero: false
|
|
discount: 1.0
|
|
gae_lambda: 1.0
|
|
adv_norm: true
|
|
kl_ctl: 0.0
|
|
recompute_logprob: true
|
|
use_decoupled_loss: true
|
|
behav_imp_weight_cap: null
|
|
|