[Fix] Fix yaml configurations for v0.2 experiments. (#129)

* .

* fix
This commit is contained in:
Wei Fu 2025-06-24 13:48:02 +08:00 committed by GitHub
parent e3005d57f6
commit adeb8eb13f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 101 additions and 213 deletions

View File

@ -29,12 +29,6 @@ actor:
mem_fraction_static: 0.8 mem_fraction_static: 0.8
triton_attention_num_kv_splits: 16 triton_attention_num_kv_splits: 16
enable_metrics: True enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
init_critic_from_actor: true
ref: ref:
type: type:
_class: qwen2 _class: qwen2
@ -42,15 +36,6 @@ ref:
actor_train: actor_train:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf: actor_inf:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
top_p: 1.0 top_p: 1.0
top_k: 1000000 top_k: 1000000
temperature: 1.0 temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4 ppo_n_minibatches: 4
kl_ctl: 0.0 kl_ctl: 0.0
discount: 1.0 discount: 1.0
@ -78,11 +61,12 @@ ppo:
reward_output_scaling: 5 reward_output_scaling: 5
reward_output_bias: 0.0 reward_output_bias: 0.0
adv_norm: true adv_norm: true
value_norm: true
group_size: 16 group_size: 16
group_adv_norm: false group_adv_norm: false
external_configs:
cluster_config: # Cluster configuration
fileroot: "/storage/ray/experiments" ray_temp_path: /tmp/ray
envs: cluster:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1" fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -29,12 +29,6 @@ actor:
mem_fraction_static: 0.8 mem_fraction_static: 0.8
triton_attention_num_kv_splits: 16 triton_attention_num_kv_splits: 16
enable_metrics: True enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
init_critic_from_actor: true
ref: ref:
type: type:
_class: qwen2 _class: qwen2
@ -42,15 +36,6 @@ ref:
actor_train: actor_train:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf: actor_inf:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
top_p: 1.0 top_p: 1.0
top_k: 1000000 top_k: 1000000
temperature: 1.0 temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4 ppo_n_minibatches: 4
kl_ctl: 0.0 kl_ctl: 0.0
discount: 1.0 discount: 1.0
@ -78,11 +61,12 @@ ppo:
reward_output_scaling: 5 reward_output_scaling: 5
reward_output_bias: 0.0 reward_output_bias: 0.0
adv_norm: true adv_norm: true
value_norm: true
group_size: 16 group_size: 16
group_adv_norm: false group_adv_norm: false
external_configs:
cluster_config: # Cluster configuration
fileroot: "/storage/ray/experiments" ray_temp_path: /tmp/ray
envs: cluster:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1" fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -29,12 +29,6 @@ actor:
mem_fraction_static: 0.8 mem_fraction_static: 0.8
triton_attention_num_kv_splits: 16 triton_attention_num_kv_splits: 16
enable_metrics: True enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
init_critic_from_actor: true
ref: ref:
type: type:
_class: qwen2 _class: qwen2
@ -42,15 +36,6 @@ ref:
actor_train: actor_train:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf: actor_inf:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
top_p: 1.0 top_p: 1.0
top_k: 1000000 top_k: 1000000
temperature: 1.0 temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4 ppo_n_minibatches: 4
kl_ctl: 0.0 kl_ctl: 0.0
discount: 1.0 discount: 1.0
@ -78,11 +61,12 @@ ppo:
reward_output_scaling: 5 reward_output_scaling: 5
reward_output_bias: 0.0 reward_output_bias: 0.0
adv_norm: true adv_norm: true
value_norm: true
group_size: 16 group_size: 16
group_adv_norm: false group_adv_norm: false
external_configs:
cluster_config: # Cluster configuration
fileroot: "/storage/ray/experiments" ray_temp_path: /tmp/ray
envs: cluster:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1" fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -27,17 +27,8 @@ actor:
hysteresis: 2 hysteresis: 2
sglang: sglang:
mem_fraction_static: 0.8 mem_fraction_static: 0.8
disable_radix_cache: false
triton_attention_num_kv_splits: 16 triton_attention_num_kv_splits: 16
max_running_requests: 128
context_length: 29696
enable_metrics: True enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-32B'
init_critic_from_actor: true
ref: ref:
type: type:
_class: qwen2 _class: qwen2
@ -45,15 +36,6 @@ ref:
actor_train: actor_train:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf: actor_inf:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
@ -71,8 +53,6 @@ ppo:
top_p: 1.0 top_p: 1.0
top_k: 1000000 top_k: 1000000
temperature: 1.0 temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4 ppo_n_minibatches: 4
kl_ctl: 0.0 kl_ctl: 0.0
discount: 1.0 discount: 1.0
@ -81,11 +61,12 @@ ppo:
reward_output_scaling: 5 reward_output_scaling: 5
reward_output_bias: 0.0 reward_output_bias: 0.0
adv_norm: true adv_norm: true
value_norm: true
group_size: 32 group_size: 32
group_adv_norm: false group_adv_norm: false
external_configs:
cluster_config: # Cluster configuration
fileroot: "/storage/ray/experiments" ray_temp_path: /tmp/ray
envs: cluster:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1" fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -37,8 +37,10 @@ model:
allocation: allocation:
mb_spec: mb_spec:
max_tokens_per_mb: 32768 max_tokens_per_mb: 32768
external_configs:
cluster_config: # Cluster configuration
fileroot: "/storage/ray/experiments" ray_temp_path: /tmp/ray
envs: cluster:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1" fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -29,12 +29,6 @@ actor:
mem_fraction_static: 0.8 mem_fraction_static: 0.8
triton_attention_num_kv_splits: 16 triton_attention_num_kv_splits: 16
enable_metrics: True enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B'
init_critic_from_actor: true
ref: ref:
type: type:
_class: qwen2 _class: qwen2
@ -42,15 +36,6 @@ ref:
actor_train: actor_train:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf: actor_inf:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
top_p: 1.0 top_p: 1.0
top_k: 1000000 top_k: 1000000
temperature: 1.0 temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4 ppo_n_minibatches: 4
kl_ctl: 0.0 kl_ctl: 0.0
discount: 1.0 discount: 1.0
@ -78,11 +61,12 @@ ppo:
reward_output_scaling: 5 reward_output_scaling: 5
reward_output_bias: 0.0 reward_output_bias: 0.0
adv_norm: true adv_norm: true
value_norm: true
group_size: 16 group_size: 16
group_adv_norm: false group_adv_norm: false
external_configs:
cluster_config: # Cluster configuration
fileroot: "/storage/ray/experiments" ray_temp_path: /tmp/ray
envs: cluster:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1" fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -29,12 +29,6 @@ actor:
mem_fraction_static: 0.8 mem_fraction_static: 0.8
triton_attention_num_kv_splits: 16 triton_attention_num_kv_splits: 16
enable_metrics: True enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B'
init_critic_from_actor: true
ref: ref:
type: type:
_class: qwen2 _class: qwen2
@ -42,15 +36,6 @@ ref:
actor_train: actor_train:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf: actor_inf:
mb_spec: mb_spec:
max_tokens_per_mb: 30720 max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
top_p: 1.0 top_p: 1.0
top_k: 1000000 top_k: 1000000
temperature: 1.0 temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4 ppo_n_minibatches: 4
kl_ctl: 0.0 kl_ctl: 0.0
discount: 1.0 discount: 1.0
@ -78,11 +61,12 @@ ppo:
reward_output_scaling: 5 reward_output_scaling: 5
reward_output_bias: 0.0 reward_output_bias: 0.0
adv_norm: true adv_norm: true
value_norm: true
group_size: 16 group_size: 16
group_adv_norm: false group_adv_norm: false
external_configs:
cluster_config: # Cluster configuration
fileroot: "/storage/ray/experiments" ray_temp_path: /tmp/ray
envs: cluster:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1" fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -27,19 +27,8 @@ actor:
hysteresis: 2 hysteresis: 2
sglang: sglang:
mem_fraction_static: 0.8 mem_fraction_static: 0.8
disable_radix_cache: false
triton_attention_num_kv_splits: 16 triton_attention_num_kv_splits: 16
max_running_requests: 128
context_length: 18432
enable_metrics: True enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/Qwen2.5-7B'
init_critic_from_actor: true
optimizer:
lr: 5e-6
ref: ref:
type: type:
_class: qwen2 _class: qwen2
@ -47,15 +36,6 @@ ref:
actor_train: actor_train:
mb_spec: mb_spec:
max_tokens_per_mb: 19456 max_tokens_per_mb: 19456
critic_train:
mb_spec:
max_tokens_per_mb: 19456
actor_gen:
mb_spec:
max_tokens_per_mb: 19456
critic_inf:
mb_spec:
max_tokens_per_mb: 19456
actor_inf: actor_inf:
mb_spec: mb_spec:
max_tokens_per_mb: 19456 max_tokens_per_mb: 19456
@ -73,8 +53,6 @@ ppo:
top_p: 1.0 top_p: 1.0
top_k: 1000000 top_k: 1000000
temperature: 1.0 temperature: 1.0
force_no_logits_mask: true
use_cuda_graph: true
ppo_n_minibatches: 4 ppo_n_minibatches: 4
kl_ctl: 0.0 kl_ctl: 0.0
discount: 1.0 discount: 1.0
@ -83,11 +61,12 @@ ppo:
reward_output_scaling: 0.5 reward_output_scaling: 0.5
reward_output_bias: -1.0 reward_output_bias: -1.0
adv_norm: true adv_norm: true
value_norm: true
group_size: 64 group_size: 64
group_adv_norm: false group_adv_norm: false
external_configs:
cluster_config: # Cluster configuration
fileroot: "/storage/ray/experiments" ray_temp_path: /tmp/ray
envs: cluster:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1" fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -154,9 +154,12 @@ def log_swanlab_wandb_tensorboard(data, step=None, summary_writer=None):
_LATEST_LOG_STEP = max(_LATEST_LOG_STEP, step) _LATEST_LOG_STEP = max(_LATEST_LOG_STEP, step)
# swanlab # swanlab
import swanlab try:
import swanlab
swanlab.log(data, step=step) swanlab.log(data, step=step)
except (ModuleNotFoundError, ImportError):
pass
# wandb # wandb
import wandb import wandb

View File

@ -452,9 +452,6 @@ class SGLangGenerationBackend(ModelBackend, SGLangConfig):
high=60000, high=60000,
experiment_name=constants.experiment_name(), experiment_name=constants.experiment_name(),
trial_name=constants.trial_name(), trial_name=constants.trial_name(),
lockfile_root=os.path.join(
constants.get_cache_path(self.args), "ports"
),
), ),
group=constants.data_parallel_group(), group=constants.data_parallel_group(),
) )

View File

@ -170,4 +170,4 @@ def make(args: "BaseExperimentConfig", **kwargs) -> SchedulerClient:
return LocalSchedulerClient(args) return LocalSchedulerClient(args)
else: else:
raise NotImplementedError(f"Scheduler {mode} not found") raise NotImplementedError(f"Scheduler {args.mode} not found")

View File

@ -8,13 +8,17 @@ import subprocess
import time import time
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
import swanlab
import wandb import wandb
import realhf.api.core.system_api as config_pkg import realhf.api.core.system_api as config_pkg
from realhf.api.cli_args import BaseExperimentConfig from realhf.api.cli_args import BaseExperimentConfig
from realhf.base import constants, logging from realhf.base import constants, logging
try:
import swanlab
except (ModuleNotFoundError, ImportError):
swanlab = None
logger = logging.getLogger("AutomaticEvaluator", "colored") logger = logging.getLogger("AutomaticEvaluator", "colored")
@ -132,7 +136,8 @@ class EvaluationStep:
for k, v in d.items(): for k, v in d.items():
log_data[f"{data_name}_{k}"] = v log_data[f"{data_name}_{k}"] = v
wandb.log(log_data, step=self.global_step) wandb.log(log_data, step=self.global_step)
swanlab.log(log_data, step=self.global_step) if swanlab is not None:
swanlab.log(log_data, step=self.global_step)
self.status = EvaluationStepStatus.LOGGED self.status = EvaluationStepStatus.LOGGED
logger.info(f"Logging eval result {log_data} to step {self.global_step}") logger.info(f"Logging eval result {log_data} to step {self.global_step}")
@ -236,21 +241,19 @@ class AutomaticEvaluator:
def __lazy_swanlab_init(self): def __lazy_swanlab_init(self):
if self.__swanlab_config.api_key: if self.__swanlab_config.api_key:
swanlab.login(self.__swanlab_config.api_key) swanlab.login(self.__swanlab_config.api_key)
if self.swanlab_config.config is None: if self.__swanlab_config.config is None:
import yaml import yaml
with open( with open(
os.path.join( os.path.join(
constants.LOG_ROOT, constants.get_log_path(self.args),
constants.experiment_name(),
constants.trial_name(),
"config.yaml", "config.yaml",
), ),
"r", "r",
) as f: ) as f:
__config = yaml.safe_load(f) __config = yaml.safe_load(f)
else: else:
__config = self.swanlab_config.config __config = self.__swanlab_config.config
__config["FRAMEWORK"] = "AReaL" __config["FRAMEWORK"] = "AReaL"
swanlab.init( swanlab.init(
project=self.__swanlab_config.project or constants.experiment_name(), project=self.__swanlab_config.project or constants.experiment_name(),
@ -259,9 +262,7 @@ class AutomaticEvaluator:
config=__config, config=__config,
logdir=self.__swanlab_config.logdir logdir=self.__swanlab_config.logdir
or os.path.join( or os.path.join(
constants.LOG_ROOT, constants.get_log_path(self.args),
constants.experiment_name(),
constants.trial_name(),
"swanlab", "swanlab",
), ),
mode=self.__swanlab_config.mode, mode=self.__swanlab_config.mode,
@ -329,7 +330,7 @@ class AutomaticEvaluator:
if not self.__wandb_initialized: if not self.__wandb_initialized:
self.__lazy_wandb_init() self.__lazy_wandb_init()
self.__wandb_initialized = True self.__wandb_initialized = True
if not self.__swanlab_initialized: if not self.__swanlab_initialized and swanlab is not None:
self.__lazy_swanlab_init() self.__lazy_swanlab_init()
self.__swanlab_initialized = True self.__swanlab_initialized = True
self.__eval_steps[log_step].log(self.__config) self.__eval_steps[log_step].log(self.__config)

View File

@ -12,7 +12,6 @@ from typing import Dict
import colorama import colorama
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import swanlab
import wandb import wandb
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
@ -38,6 +37,11 @@ from realhf.system.buffer import AsyncIOSequenceBuffer
from realhf.system.function_executor import FunctionExecutor from realhf.system.function_executor import FunctionExecutor
from realhf.system.model_function_call import RPCCorountineControl from realhf.system.model_function_call import RPCCorountineControl
try:
import swanlab
except (ModuleNotFoundError, ImportError):
swanlab = None
logger = logging.getLogger("master worker", "system") logger = logging.getLogger("master worker", "system")
blogger = logging.getLogger("benchmark") blogger = logging.getLogger("benchmark")
@ -307,15 +311,18 @@ class MasterWorker(worker_base.AsyncWorker):
# swanlab init, connect to remote or local swanlab host # swanlab init, connect to remote or local swanlab host
if self.swanlab_config.mode != "disabled" and self.swanlab_config.api_key: if self.swanlab_config.mode != "disabled" and self.swanlab_config.api_key:
swanlab.login(self.swanlab_config.api_key) if swanlab is not None:
swanlab.login(self.swanlab_config.api_key)
else:
logger.warning(
"swanlab not installed but enabled. Ignore swanlab logging."
)
if self.swanlab_config.config is None: if self.swanlab_config.config is None:
import yaml import yaml
with open( with open(
os.path.join( os.path.join(
constants.LOG_ROOT, constants.get_log_path(self.args),
constants.experiment_name(),
constants.trial_name(),
"config.yaml", "config.yaml",
), ),
"r", "r",
@ -324,20 +331,19 @@ class MasterWorker(worker_base.AsyncWorker):
else: else:
__config = self.swanlab_config.config __config = self.swanlab_config.config
__config["FRAMEWORK"] = "AReaL" __config["FRAMEWORK"] = "AReaL"
swanlab.init( if swanlab is not None:
project=self.swanlab_config.project or constants.experiment_name(), swanlab.init(
experiment_name=self.swanlab_config.name project=self.swanlab_config.project or constants.experiment_name(),
or f"{constants.trial_name()}_train", experiment_name=self.swanlab_config.name
config=__config, or f"{constants.trial_name()}_train",
logdir=self.swanlab_config.logdir config=__config,
or os.path.join( logdir=self.swanlab_config.logdir
constants.LOG_ROOT, or os.path.join(
constants.experiment_name(), constants.get_log_path(self.args),
constants.trial_name(), "swanlab",
"swanlab", ),
), mode=self.swanlab_config.mode,
mode=self.swanlab_config.mode, )
)
# tensorboard logging # tensorboard logging
self.__summary_writer = None self.__summary_writer = None
if self.tensorboard_config.path is not None: if self.tensorboard_config.path is not None:
@ -567,7 +573,8 @@ class MasterWorker(worker_base.AsyncWorker):
) )
wandb.finish() wandb.finish()
swanlab.finish() if swanlab is not None:
swanlab.finish()
if self.__summary_writer is not None: if self.__summary_writer is not None:
self.__summary_writer.close() self.__summary_writer.close()
gc.collect() gc.collect()

View File

@ -10,8 +10,6 @@ import uuid
from collections import defaultdict from collections import defaultdict
from typing import Dict, Hashable, List, Set, Tuple from typing import Dict, Hashable, List, Set, Tuple
import swanlab
import wandb
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
import realhf.api.core.config as config_api import realhf.api.core.config as config_api