[Fix] Fix yaml configurations for v0.2 experiments. (#129)

* .

* fix
This commit is contained in:
Wei Fu 2025-06-24 13:48:02 +08:00 committed by GitHub
parent e3005d57f6
commit adeb8eb13f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 101 additions and 213 deletions

View File

@ -29,12 +29,6 @@ actor:
mem_fraction_static: 0.8
triton_attention_num_kv_splits: 16
enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
init_critic_from_actor: true
ref:
type:
_class: qwen2
@ -42,15 +36,6 @@ ref:
actor_train:
mb_spec:
max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf:
mb_spec:
max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
top_p: 1.0
top_k: 1000000
temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4
kl_ctl: 0.0
discount: 1.0
@ -78,11 +61,12 @@ ppo:
reward_output_scaling: 5
reward_output_bias: 0.0
adv_norm: true
value_norm: true
group_size: 16
group_adv_norm: false
external_configs:
cluster_config:
fileroot: "/storage/ray/experiments"
envs:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
# Cluster configuration
ray_temp_path: /tmp/ray
cluster:
fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -29,12 +29,6 @@ actor:
mem_fraction_static: 0.8
triton_attention_num_kv_splits: 16
enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
init_critic_from_actor: true
ref:
type:
_class: qwen2
@ -42,15 +36,6 @@ ref:
actor_train:
mb_spec:
max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf:
mb_spec:
max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
top_p: 1.0
top_k: 1000000
temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4
kl_ctl: 0.0
discount: 1.0
@ -78,11 +61,12 @@ ppo:
reward_output_scaling: 5
reward_output_bias: 0.0
adv_norm: true
value_norm: true
group_size: 16
group_adv_norm: false
external_configs:
cluster_config:
fileroot: "/storage/ray/experiments"
envs:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
# Cluster configuration
ray_temp_path: /tmp/ray
cluster:
fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -29,12 +29,6 @@ actor:
mem_fraction_static: 0.8
triton_attention_num_kv_splits: 16
enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
init_critic_from_actor: true
ref:
type:
_class: qwen2
@ -42,15 +36,6 @@ ref:
actor_train:
mb_spec:
max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf:
mb_spec:
max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
top_p: 1.0
top_k: 1000000
temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4
kl_ctl: 0.0
discount: 1.0
@ -78,11 +61,12 @@ ppo:
reward_output_scaling: 5
reward_output_bias: 0.0
adv_norm: true
value_norm: true
group_size: 16
group_adv_norm: false
external_configs:
cluster_config:
fileroot: "/storage/ray/experiments"
envs:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
# Cluster configuration
ray_temp_path: /tmp/ray
cluster:
fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -27,17 +27,8 @@ actor:
hysteresis: 2
sglang:
mem_fraction_static: 0.8
disable_radix_cache: false
triton_attention_num_kv_splits: 16
max_running_requests: 128
context_length: 29696
enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-32B'
init_critic_from_actor: true
ref:
type:
_class: qwen2
@ -45,15 +36,6 @@ ref:
actor_train:
mb_spec:
max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf:
mb_spec:
max_tokens_per_mb: 30720
@ -71,8 +53,6 @@ ppo:
top_p: 1.0
top_k: 1000000
temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4
kl_ctl: 0.0
discount: 1.0
@ -81,11 +61,12 @@ ppo:
reward_output_scaling: 5
reward_output_bias: 0.0
adv_norm: true
value_norm: true
group_size: 32
group_adv_norm: false
external_configs:
cluster_config:
fileroot: "/storage/ray/experiments"
envs:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
# Cluster configuration
ray_temp_path: /tmp/ray
cluster:
fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -37,8 +37,10 @@ model:
allocation:
mb_spec:
max_tokens_per_mb: 32768
external_configs:
cluster_config:
fileroot: "/storage/ray/experiments"
envs:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
# Cluster configuration
ray_temp_path: /tmp/ray
cluster:
fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -29,12 +29,6 @@ actor:
mem_fraction_static: 0.8
triton_attention_num_kv_splits: 16
enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B'
init_critic_from_actor: true
ref:
type:
_class: qwen2
@ -42,15 +36,6 @@ ref:
actor_train:
mb_spec:
max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf:
mb_spec:
max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
top_p: 1.0
top_k: 1000000
temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4
kl_ctl: 0.0
discount: 1.0
@ -78,11 +61,12 @@ ppo:
reward_output_scaling: 5
reward_output_bias: 0.0
adv_norm: true
value_norm: true
group_size: 16
group_adv_norm: false
external_configs:
cluster_config:
fileroot: "/storage/ray/experiments"
envs:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
# Cluster configuration
ray_temp_path: /tmp/ray
cluster:
fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -29,12 +29,6 @@ actor:
mem_fraction_static: 0.8
triton_attention_num_kv_splits: 16
enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B'
init_critic_from_actor: true
ref:
type:
_class: qwen2
@ -42,15 +36,6 @@ ref:
actor_train:
mb_spec:
max_tokens_per_mb: 30720
critic_train:
mb_spec:
max_tokens_per_mb: 30720
actor_gen:
mb_spec:
max_tokens_per_mb: 30720
critic_inf:
mb_spec:
max_tokens_per_mb: 30720
actor_inf:
mb_spec:
max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
top_p: 1.0
top_k: 1000000
temperature: 1.0
force_no_logits_mask: True
use_cuda_graph: True
ppo_n_minibatches: 4
kl_ctl: 0.0
discount: 1.0
@ -78,11 +61,12 @@ ppo:
reward_output_scaling: 5
reward_output_bias: 0.0
adv_norm: true
value_norm: true
group_size: 16
group_adv_norm: false
external_configs:
cluster_config:
fileroot: "/storage/ray/experiments"
envs:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
# Cluster configuration
ray_temp_path: /tmp/ray
cluster:
fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -27,19 +27,8 @@ actor:
hysteresis: 2
sglang:
mem_fraction_static: 0.8
disable_radix_cache: false
triton_attention_num_kv_splits: 16
max_running_requests: 128
context_length: 18432
enable_metrics: True
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/Qwen2.5-7B'
init_critic_from_actor: true
optimizer:
lr: 5e-6
ref:
type:
_class: qwen2
@ -47,15 +36,6 @@ ref:
actor_train:
mb_spec:
max_tokens_per_mb: 19456
critic_train:
mb_spec:
max_tokens_per_mb: 19456
actor_gen:
mb_spec:
max_tokens_per_mb: 19456
critic_inf:
mb_spec:
max_tokens_per_mb: 19456
actor_inf:
mb_spec:
max_tokens_per_mb: 19456
@ -73,8 +53,6 @@ ppo:
top_p: 1.0
top_k: 1000000
temperature: 1.0
force_no_logits_mask: true
use_cuda_graph: true
ppo_n_minibatches: 4
kl_ctl: 0.0
discount: 1.0
@ -83,11 +61,12 @@ ppo:
reward_output_scaling: 0.5
reward_output_bias: -1.0
adv_norm: true
value_norm: true
group_size: 64
group_adv_norm: false
external_configs:
cluster_config:
fileroot: "/storage/ray/experiments"
envs:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
# Cluster configuration
ray_temp_path: /tmp/ray
cluster:
fileroot: /tmp/ray/experiments
n_nodes: 32
n_gpus_per_node: 8

View File

@ -154,9 +154,12 @@ def log_swanlab_wandb_tensorboard(data, step=None, summary_writer=None):
_LATEST_LOG_STEP = max(_LATEST_LOG_STEP, step)
# swanlab
import swanlab
try:
import swanlab
swanlab.log(data, step=step)
swanlab.log(data, step=step)
except (ModuleNotFoundError, ImportError):
pass
# wandb
import wandb

View File

@ -452,9 +452,6 @@ class SGLangGenerationBackend(ModelBackend, SGLangConfig):
high=60000,
experiment_name=constants.experiment_name(),
trial_name=constants.trial_name(),
lockfile_root=os.path.join(
constants.get_cache_path(self.args), "ports"
),
),
group=constants.data_parallel_group(),
)

View File

@ -170,4 +170,4 @@ def make(args: "BaseExperimentConfig", **kwargs) -> SchedulerClient:
return LocalSchedulerClient(args)
else:
raise NotImplementedError(f"Scheduler {mode} not found")
raise NotImplementedError(f"Scheduler {args.mode} not found")

View File

@ -8,13 +8,17 @@ import subprocess
import time
from typing import Any, Dict, Optional
import swanlab
import wandb
import realhf.api.core.system_api as config_pkg
from realhf.api.cli_args import BaseExperimentConfig
from realhf.base import constants, logging
try:
import swanlab
except (ModuleNotFoundError, ImportError):
swanlab = None
logger = logging.getLogger("AutomaticEvaluator", "colored")
@ -132,7 +136,8 @@ class EvaluationStep:
for k, v in d.items():
log_data[f"{data_name}_{k}"] = v
wandb.log(log_data, step=self.global_step)
swanlab.log(log_data, step=self.global_step)
if swanlab is not None:
swanlab.log(log_data, step=self.global_step)
self.status = EvaluationStepStatus.LOGGED
logger.info(f"Logging eval result {log_data} to step {self.global_step}")
@ -236,21 +241,19 @@ class AutomaticEvaluator:
def __lazy_swanlab_init(self):
if self.__swanlab_config.api_key:
swanlab.login(self.__swanlab_config.api_key)
if self.swanlab_config.config is None:
if self.__swanlab_config.config is None:
import yaml
with open(
os.path.join(
constants.LOG_ROOT,
constants.experiment_name(),
constants.trial_name(),
constants.get_log_path(self.args),
"config.yaml",
),
"r",
) as f:
__config = yaml.safe_load(f)
else:
__config = self.swanlab_config.config
__config = self.__swanlab_config.config
__config["FRAMEWORK"] = "AReaL"
swanlab.init(
project=self.__swanlab_config.project or constants.experiment_name(),
@ -259,9 +262,7 @@ class AutomaticEvaluator:
config=__config,
logdir=self.__swanlab_config.logdir
or os.path.join(
constants.LOG_ROOT,
constants.experiment_name(),
constants.trial_name(),
constants.get_log_path(self.args),
"swanlab",
),
mode=self.__swanlab_config.mode,
@ -329,7 +330,7 @@ class AutomaticEvaluator:
if not self.__wandb_initialized:
self.__lazy_wandb_init()
self.__wandb_initialized = True
if not self.__swanlab_initialized:
if not self.__swanlab_initialized and swanlab is not None:
self.__lazy_swanlab_init()
self.__swanlab_initialized = True
self.__eval_steps[log_step].log(self.__config)

View File

@ -12,7 +12,6 @@ from typing import Dict
import colorama
import networkx as nx
import numpy as np
import swanlab
import wandb
from tensorboardX import SummaryWriter
@ -38,6 +37,11 @@ from realhf.system.buffer import AsyncIOSequenceBuffer
from realhf.system.function_executor import FunctionExecutor
from realhf.system.model_function_call import RPCCorountineControl
try:
import swanlab
except (ModuleNotFoundError, ImportError):
swanlab = None
logger = logging.getLogger("master worker", "system")
blogger = logging.getLogger("benchmark")
@ -307,15 +311,18 @@ class MasterWorker(worker_base.AsyncWorker):
# swanlab init, connect to remote or local swanlab host
if self.swanlab_config.mode != "disabled" and self.swanlab_config.api_key:
swanlab.login(self.swanlab_config.api_key)
if swanlab is not None:
swanlab.login(self.swanlab_config.api_key)
else:
logger.warning(
"swanlab not installed but enabled. Ignore swanlab logging."
)
if self.swanlab_config.config is None:
import yaml
with open(
os.path.join(
constants.LOG_ROOT,
constants.experiment_name(),
constants.trial_name(),
constants.get_log_path(self.args),
"config.yaml",
),
"r",
@ -324,20 +331,19 @@ class MasterWorker(worker_base.AsyncWorker):
else:
__config = self.swanlab_config.config
__config["FRAMEWORK"] = "AReaL"
swanlab.init(
project=self.swanlab_config.project or constants.experiment_name(),
experiment_name=self.swanlab_config.name
or f"{constants.trial_name()}_train",
config=__config,
logdir=self.swanlab_config.logdir
or os.path.join(
constants.LOG_ROOT,
constants.experiment_name(),
constants.trial_name(),
"swanlab",
),
mode=self.swanlab_config.mode,
)
if swanlab is not None:
swanlab.init(
project=self.swanlab_config.project or constants.experiment_name(),
experiment_name=self.swanlab_config.name
or f"{constants.trial_name()}_train",
config=__config,
logdir=self.swanlab_config.logdir
or os.path.join(
constants.get_log_path(self.args),
"swanlab",
),
mode=self.swanlab_config.mode,
)
# tensorboard logging
self.__summary_writer = None
if self.tensorboard_config.path is not None:
@ -567,7 +573,8 @@ class MasterWorker(worker_base.AsyncWorker):
)
wandb.finish()
swanlab.finish()
if swanlab is not None:
swanlab.finish()
if self.__summary_writer is not None:
self.__summary_writer.close()
gc.collect()

View File

@ -10,8 +10,6 @@ import uuid
from collections import defaultdict
from typing import Dict, Hashable, List, Set, Tuple
import swanlab
import wandb
from tensorboardX import SummaryWriter
import realhf.api.core.config as config_api