mirror of https://github.com/inclusionAI/AReaL
205 lines
4.3 KiB
YAML
205 lines
4.3 KiB
YAML
# Basic experiment info
|
|
experiment_name: async-ppo
|
|
trial_name: my-trial
|
|
seed: 1
|
|
mode: ray
|
|
metric_discovery_port: 17997
|
|
wandb:
|
|
mode: disabled
|
|
entity: null
|
|
project: null
|
|
name: null
|
|
job_type: null
|
|
group: null
|
|
notes: null
|
|
tags: null
|
|
config: null
|
|
swanlab:
|
|
mode: disabled
|
|
api_key: null
|
|
project: null
|
|
name: null
|
|
config: null
|
|
logdir: null
|
|
tensorboard:
|
|
path: null
|
|
recover_mode: auto
|
|
recover_retries: 10
|
|
recover_after: 10
|
|
|
|
exp_ctrl:
|
|
total_train_epochs: 5
|
|
save_freq_epochs: 1
|
|
save_freq_steps: null
|
|
save_freq_secs: null
|
|
ckpt_freq_epochs: null
|
|
ckpt_freq_steps: null
|
|
ckpt_freq_secs: 600
|
|
eval_freq_epochs: null
|
|
eval_freq_steps: null
|
|
eval_freq_secs: null
|
|
benchmark_steps: null
|
|
benchmark_n_seqs: null
|
|
torch_cache_mysophobia: true
|
|
cache_clear_freq: 1
|
|
|
|
# Asynchronous RL options
|
|
max_head_offpolicyness: 4
|
|
n_rollout_workers: null
|
|
max_concurrent_rollouts: null
|
|
flush_request_timeout: 300
|
|
|
|
# Asynchronous worker resources
|
|
cpus_per_generation_server: 4
|
|
mem_per_generation_server: 61440
|
|
cpus_per_gserver_manager: 4
|
|
mem_per_gserver_manager: 10240
|
|
cpus_per_rollout_worker: 4
|
|
mem_per_rollout_worker: 20480
|
|
|
|
# Allocation and parallelism
|
|
allocation_mode: sglang.d4p1m1+d2p2m1
|
|
n_nodes: 1
|
|
n_gpus_per_node: 8
|
|
|
|
# Cluster configuration
|
|
ray_temp_path: /tmp/ray
|
|
cluster:
|
|
fileroot: /storage/ray/experiments
|
|
n_nodes: 32
|
|
n_gpus_per_node: 8
|
|
|
|
# Model
|
|
actor:
|
|
type:
|
|
_class: qwen3
|
|
path: /storage/openpsi/models/Qwen3-1.7B/
|
|
init_from_scratch: false
|
|
gradient_checkpointing: true
|
|
bf16: false
|
|
optimizer:
|
|
type: adam
|
|
lr: 2.0e-05
|
|
weight_decay: 0.05
|
|
beta1: 0.9
|
|
beta2: 0.95
|
|
eps: 1.0e-05
|
|
min_lr_ratio: 0.0
|
|
lr_scheduler_type: constant
|
|
warmup_steps_proportion: 0.001
|
|
initial_loss_scale: 4294967296.0
|
|
min_loss_scale: 1.0
|
|
loss_scale_window: 5.0
|
|
hysteresis: 2
|
|
gradient_clipping: 1.0
|
|
megatron:
|
|
ddp:
|
|
grad_reduce_in_fp32: true
|
|
overlap_grad_reduce: true
|
|
use_distributed_optimizer: true
|
|
sglang:
|
|
disable_cuda_graph: false
|
|
disable_radix_cache: false
|
|
disable_cuda_graph_padding: false
|
|
enable_nccl_nvls: false
|
|
disable_outlines_disk_cache: false
|
|
disable_custom_all_reduce: false
|
|
disable_overlap_schedule: false
|
|
enable_mixed_chunk: false
|
|
enable_torch_compile: false
|
|
torch_compile_max_bs: 32
|
|
cuda_graph_max_bs: null
|
|
cuda_graph_bs: null
|
|
torchao_config: ''
|
|
enable_nan_detection: false
|
|
enable_p2p_check: false
|
|
triton_attention_reduce_in_fp32: false
|
|
triton_attention_num_kv_splits: 8
|
|
num_continuous_decode_steps: 1
|
|
enable_memory_saver: false
|
|
allow_auto_truncate: false
|
|
attention_backend: flashinfer
|
|
sampling_backend: null
|
|
context_length: 32768
|
|
mem_fraction_static: 0.8
|
|
max_running_requests: null
|
|
chunked_prefill_size: -1
|
|
max_prefill_tokens: 32768
|
|
schedule_policy: lpm
|
|
schedule_conservativeness: 1.0
|
|
cpu_offload_gb: 0
|
|
dtype: float16
|
|
kv_cache_dtype: auto
|
|
log_level: warning
|
|
log_level_http: warning
|
|
log_requests: false
|
|
log_requests_level: 0
|
|
show_time_cost: false
|
|
enable_metrics: true
|
|
decode_log_interval: 1
|
|
ref:
|
|
type:
|
|
_class: qwen3
|
|
path: /storage/openpsi/models/Qwen3-1.7B/
|
|
init_from_scratch: false
|
|
bf16: false
|
|
actor_train:
|
|
mb_spec:
|
|
max_tokens_per_mb: 30720
|
|
ref_inf:
|
|
mb_spec:
|
|
max_tokens_per_mb: 30720
|
|
actor_inf:
|
|
mb_spec:
|
|
max_tokens_per_mb: 30720
|
|
|
|
# Dataset
|
|
shuffle_dataset: true
|
|
dataset:
|
|
path: /storage/datasets/boba_106k_0319.jsonl
|
|
max_prompt_len: 1024
|
|
train_bs_n_seqs: 512
|
|
|
|
# Algorithm
|
|
group_size: 16
|
|
mask_too_long: false
|
|
group_adv_norm: false
|
|
rw_type: sparse
|
|
success_rate_ub: 1.0
|
|
success_rate_lb: 0.0
|
|
ppo:
|
|
gen:
|
|
n: 1
|
|
max_new_tokens: 27648
|
|
min_new_tokens: 0
|
|
greedy: false
|
|
top_p: 1.0
|
|
top_k: 1000000
|
|
temperature: 1.0
|
|
ppo_n_minibatches: 4
|
|
eps_clip: 0.2
|
|
c_clip: null
|
|
value_eps_clip: 0.2
|
|
early_stop_imp_ratio: 5.0
|
|
actor_sample_reuse: 1
|
|
critic_sample_reuse: 1
|
|
max_reward_clip: 20.0
|
|
reward_output_scaling: 5.0
|
|
reward_output_bias: 0.0
|
|
fuse_rew_ref: true
|
|
discount: 1.0
|
|
gae_lambda: 1.0
|
|
adv_norm: true
|
|
kl_ctl: 0.0
|
|
use_adaptive_kl_ctl: false
|
|
disable_value: true
|
|
recompute_logprob: true
|
|
use_decoupled_loss: true
|
|
behav_imp_weight_cap: null
|
|
|
|
# worker resources
|
|
cpus_per_master_worker: 4
|
|
mem_per_master_worker: 20000
|
|
cpus_per_model_worker: 4
|
|
mem_per_model_worker: 90000
|