mirror of https://github.com/inclusionAI/AReaL
parent
e3005d57f6
commit
adeb8eb13f
|
@ -29,12 +29,6 @@ actor:
|
|||
mem_fraction_static: 0.8
|
||||
triton_attention_num_kv_splits: 16
|
||||
enable_metrics: True
|
||||
critic:
|
||||
type:
|
||||
_class: qwen2
|
||||
is_critic: true
|
||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
|
||||
init_critic_from_actor: true
|
||||
ref:
|
||||
type:
|
||||
_class: qwen2
|
||||
|
@ -42,15 +36,6 @@ ref:
|
|||
actor_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_gen:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
|
@ -68,8 +53,6 @@ ppo:
|
|||
top_p: 1.0
|
||||
top_k: 1000000
|
||||
temperature: 1.0
|
||||
force_no_logits_mask: True
|
||||
use_cuda_graph: True
|
||||
ppo_n_minibatches: 4
|
||||
kl_ctl: 0.0
|
||||
discount: 1.0
|
||||
|
@ -78,11 +61,12 @@ ppo:
|
|||
reward_output_scaling: 5
|
||||
reward_output_bias: 0.0
|
||||
adv_norm: true
|
||||
value_norm: true
|
||||
group_size: 16
|
||||
group_adv_norm: false
|
||||
external_configs:
|
||||
cluster_config:
|
||||
fileroot: "/storage/ray/experiments"
|
||||
envs:
|
||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
||||
|
||||
# Cluster configuration
|
||||
ray_temp_path: /tmp/ray
|
||||
cluster:
|
||||
fileroot: /tmp/ray/experiments
|
||||
n_nodes: 32
|
||||
n_gpus_per_node: 8
|
|
@ -29,12 +29,6 @@ actor:
|
|||
mem_fraction_static: 0.8
|
||||
triton_attention_num_kv_splits: 16
|
||||
enable_metrics: True
|
||||
critic:
|
||||
type:
|
||||
_class: qwen2
|
||||
is_critic: true
|
||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
|
||||
init_critic_from_actor: true
|
||||
ref:
|
||||
type:
|
||||
_class: qwen2
|
||||
|
@ -42,15 +36,6 @@ ref:
|
|||
actor_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_gen:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
|
@ -68,8 +53,6 @@ ppo:
|
|||
top_p: 1.0
|
||||
top_k: 1000000
|
||||
temperature: 1.0
|
||||
force_no_logits_mask: True
|
||||
use_cuda_graph: True
|
||||
ppo_n_minibatches: 4
|
||||
kl_ctl: 0.0
|
||||
discount: 1.0
|
||||
|
@ -78,11 +61,12 @@ ppo:
|
|||
reward_output_scaling: 5
|
||||
reward_output_bias: 0.0
|
||||
adv_norm: true
|
||||
value_norm: true
|
||||
group_size: 16
|
||||
group_adv_norm: false
|
||||
external_configs:
|
||||
cluster_config:
|
||||
fileroot: "/storage/ray/experiments"
|
||||
envs:
|
||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
||||
|
||||
# Cluster configuration
|
||||
ray_temp_path: /tmp/ray
|
||||
cluster:
|
||||
fileroot: /tmp/ray/experiments
|
||||
n_nodes: 32
|
||||
n_gpus_per_node: 8
|
|
@ -29,12 +29,6 @@ actor:
|
|||
mem_fraction_static: 0.8
|
||||
triton_attention_num_kv_splits: 16
|
||||
enable_metrics: True
|
||||
critic:
|
||||
type:
|
||||
_class: qwen2
|
||||
is_critic: true
|
||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
|
||||
init_critic_from_actor: true
|
||||
ref:
|
||||
type:
|
||||
_class: qwen2
|
||||
|
@ -42,15 +36,6 @@ ref:
|
|||
actor_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_gen:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
|
@ -68,8 +53,6 @@ ppo:
|
|||
top_p: 1.0
|
||||
top_k: 1000000
|
||||
temperature: 1.0
|
||||
force_no_logits_mask: True
|
||||
use_cuda_graph: True
|
||||
ppo_n_minibatches: 4
|
||||
kl_ctl: 0.0
|
||||
discount: 1.0
|
||||
|
@ -78,11 +61,12 @@ ppo:
|
|||
reward_output_scaling: 5
|
||||
reward_output_bias: 0.0
|
||||
adv_norm: true
|
||||
value_norm: true
|
||||
group_size: 16
|
||||
group_adv_norm: false
|
||||
external_configs:
|
||||
cluster_config:
|
||||
fileroot: "/storage/ray/experiments"
|
||||
envs:
|
||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
||||
|
||||
# Cluster configuration
|
||||
ray_temp_path: /tmp/ray
|
||||
cluster:
|
||||
fileroot: /tmp/ray/experiments
|
||||
n_nodes: 32
|
||||
n_gpus_per_node: 8
|
||||
|
|
|
@ -27,17 +27,8 @@ actor:
|
|||
hysteresis: 2
|
||||
sglang:
|
||||
mem_fraction_static: 0.8
|
||||
disable_radix_cache: false
|
||||
triton_attention_num_kv_splits: 16
|
||||
max_running_requests: 128
|
||||
context_length: 29696
|
||||
enable_metrics: True
|
||||
critic:
|
||||
type:
|
||||
_class: qwen2
|
||||
is_critic: true
|
||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-32B'
|
||||
init_critic_from_actor: true
|
||||
ref:
|
||||
type:
|
||||
_class: qwen2
|
||||
|
@ -45,15 +36,6 @@ ref:
|
|||
actor_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_gen:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
|
@ -71,8 +53,6 @@ ppo:
|
|||
top_p: 1.0
|
||||
top_k: 1000000
|
||||
temperature: 1.0
|
||||
force_no_logits_mask: True
|
||||
use_cuda_graph: True
|
||||
ppo_n_minibatches: 4
|
||||
kl_ctl: 0.0
|
||||
discount: 1.0
|
||||
|
@ -81,11 +61,12 @@ ppo:
|
|||
reward_output_scaling: 5
|
||||
reward_output_bias: 0.0
|
||||
adv_norm: true
|
||||
value_norm: true
|
||||
group_size: 32
|
||||
group_adv_norm: false
|
||||
external_configs:
|
||||
cluster_config:
|
||||
fileroot: "/storage/ray/experiments"
|
||||
envs:
|
||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
||||
|
||||
# Cluster configuration
|
||||
ray_temp_path: /tmp/ray
|
||||
cluster:
|
||||
fileroot: /tmp/ray/experiments
|
||||
n_nodes: 32
|
||||
n_gpus_per_node: 8
|
||||
|
|
|
@ -37,8 +37,10 @@ model:
|
|||
allocation:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 32768
|
||||
external_configs:
|
||||
cluster_config:
|
||||
fileroot: "/storage/ray/experiments"
|
||||
envs:
|
||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
||||
|
||||
# Cluster configuration
|
||||
ray_temp_path: /tmp/ray
|
||||
cluster:
|
||||
fileroot: /tmp/ray/experiments
|
||||
n_nodes: 32
|
||||
n_gpus_per_node: 8
|
||||
|
|
|
@ -29,12 +29,6 @@ actor:
|
|||
mem_fraction_static: 0.8
|
||||
triton_attention_num_kv_splits: 16
|
||||
enable_metrics: True
|
||||
critic:
|
||||
type:
|
||||
_class: qwen2
|
||||
is_critic: true
|
||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B'
|
||||
init_critic_from_actor: true
|
||||
ref:
|
||||
type:
|
||||
_class: qwen2
|
||||
|
@ -42,15 +36,6 @@ ref:
|
|||
actor_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_gen:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
|
@ -68,8 +53,6 @@ ppo:
|
|||
top_p: 1.0
|
||||
top_k: 1000000
|
||||
temperature: 1.0
|
||||
force_no_logits_mask: True
|
||||
use_cuda_graph: True
|
||||
ppo_n_minibatches: 4
|
||||
kl_ctl: 0.0
|
||||
discount: 1.0
|
||||
|
@ -78,11 +61,12 @@ ppo:
|
|||
reward_output_scaling: 5
|
||||
reward_output_bias: 0.0
|
||||
adv_norm: true
|
||||
value_norm: true
|
||||
group_size: 16
|
||||
group_adv_norm: false
|
||||
external_configs:
|
||||
cluster_config:
|
||||
fileroot: "/storage/ray/experiments"
|
||||
envs:
|
||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
||||
|
||||
# Cluster configuration
|
||||
ray_temp_path: /tmp/ray
|
||||
cluster:
|
||||
fileroot: /tmp/ray/experiments
|
||||
n_nodes: 32
|
||||
n_gpus_per_node: 8
|
||||
|
|
|
@ -29,12 +29,6 @@ actor:
|
|||
mem_fraction_static: 0.8
|
||||
triton_attention_num_kv_splits: 16
|
||||
enable_metrics: True
|
||||
critic:
|
||||
type:
|
||||
_class: qwen2
|
||||
is_critic: true
|
||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B'
|
||||
init_critic_from_actor: true
|
||||
ref:
|
||||
type:
|
||||
_class: qwen2
|
||||
|
@ -42,15 +36,6 @@ ref:
|
|||
actor_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_gen:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
critic_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
actor_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 30720
|
||||
|
@ -68,8 +53,6 @@ ppo:
|
|||
top_p: 1.0
|
||||
top_k: 1000000
|
||||
temperature: 1.0
|
||||
force_no_logits_mask: True
|
||||
use_cuda_graph: True
|
||||
ppo_n_minibatches: 4
|
||||
kl_ctl: 0.0
|
||||
discount: 1.0
|
||||
|
@ -78,11 +61,12 @@ ppo:
|
|||
reward_output_scaling: 5
|
||||
reward_output_bias: 0.0
|
||||
adv_norm: true
|
||||
value_norm: true
|
||||
group_size: 16
|
||||
group_adv_norm: false
|
||||
external_configs:
|
||||
cluster_config:
|
||||
fileroot: "/storage/ray/experiments"
|
||||
envs:
|
||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
||||
|
||||
# Cluster configuration
|
||||
ray_temp_path: /tmp/ray
|
||||
cluster:
|
||||
fileroot: /tmp/ray/experiments
|
||||
n_nodes: 32
|
||||
n_gpus_per_node: 8
|
||||
|
|
|
@ -27,19 +27,8 @@ actor:
|
|||
hysteresis: 2
|
||||
sglang:
|
||||
mem_fraction_static: 0.8
|
||||
disable_radix_cache: false
|
||||
triton_attention_num_kv_splits: 16
|
||||
max_running_requests: 128
|
||||
context_length: 18432
|
||||
enable_metrics: True
|
||||
critic:
|
||||
type:
|
||||
_class: qwen2
|
||||
is_critic: true
|
||||
path: '/storage/models/Qwen2.5-7B'
|
||||
init_critic_from_actor: true
|
||||
optimizer:
|
||||
lr: 5e-6
|
||||
ref:
|
||||
type:
|
||||
_class: qwen2
|
||||
|
@ -47,15 +36,6 @@ ref:
|
|||
actor_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 19456
|
||||
critic_train:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 19456
|
||||
actor_gen:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 19456
|
||||
critic_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 19456
|
||||
actor_inf:
|
||||
mb_spec:
|
||||
max_tokens_per_mb: 19456
|
||||
|
@ -73,8 +53,6 @@ ppo:
|
|||
top_p: 1.0
|
||||
top_k: 1000000
|
||||
temperature: 1.0
|
||||
force_no_logits_mask: true
|
||||
use_cuda_graph: true
|
||||
ppo_n_minibatches: 4
|
||||
kl_ctl: 0.0
|
||||
discount: 1.0
|
||||
|
@ -83,11 +61,12 @@ ppo:
|
|||
reward_output_scaling: 0.5
|
||||
reward_output_bias: -1.0
|
||||
adv_norm: true
|
||||
value_norm: true
|
||||
group_size: 64
|
||||
group_adv_norm: false
|
||||
external_configs:
|
||||
cluster_config:
|
||||
fileroot: "/storage/ray/experiments"
|
||||
envs:
|
||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
||||
|
||||
# Cluster configuration
|
||||
ray_temp_path: /tmp/ray
|
||||
cluster:
|
||||
fileroot: /tmp/ray/experiments
|
||||
n_nodes: 32
|
||||
n_gpus_per_node: 8
|
||||
|
|
|
@ -154,9 +154,12 @@ def log_swanlab_wandb_tensorboard(data, step=None, summary_writer=None):
|
|||
_LATEST_LOG_STEP = max(_LATEST_LOG_STEP, step)
|
||||
|
||||
# swanlab
|
||||
try:
|
||||
import swanlab
|
||||
|
||||
swanlab.log(data, step=step)
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
pass
|
||||
|
||||
# wandb
|
||||
import wandb
|
||||
|
|
|
@ -452,9 +452,6 @@ class SGLangGenerationBackend(ModelBackend, SGLangConfig):
|
|||
high=60000,
|
||||
experiment_name=constants.experiment_name(),
|
||||
trial_name=constants.trial_name(),
|
||||
lockfile_root=os.path.join(
|
||||
constants.get_cache_path(self.args), "ports"
|
||||
),
|
||||
),
|
||||
group=constants.data_parallel_group(),
|
||||
)
|
||||
|
|
|
@ -170,4 +170,4 @@ def make(args: "BaseExperimentConfig", **kwargs) -> SchedulerClient:
|
|||
|
||||
return LocalSchedulerClient(args)
|
||||
else:
|
||||
raise NotImplementedError(f"Scheduler {mode} not found")
|
||||
raise NotImplementedError(f"Scheduler {args.mode} not found")
|
||||
|
|
|
@ -8,13 +8,17 @@ import subprocess
|
|||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import swanlab
|
||||
import wandb
|
||||
|
||||
import realhf.api.core.system_api as config_pkg
|
||||
from realhf.api.cli_args import BaseExperimentConfig
|
||||
from realhf.base import constants, logging
|
||||
|
||||
try:
|
||||
import swanlab
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
swanlab = None
|
||||
|
||||
logger = logging.getLogger("AutomaticEvaluator", "colored")
|
||||
|
||||
|
||||
|
@ -132,6 +136,7 @@ class EvaluationStep:
|
|||
for k, v in d.items():
|
||||
log_data[f"{data_name}_{k}"] = v
|
||||
wandb.log(log_data, step=self.global_step)
|
||||
if swanlab is not None:
|
||||
swanlab.log(log_data, step=self.global_step)
|
||||
self.status = EvaluationStepStatus.LOGGED
|
||||
logger.info(f"Logging eval result {log_data} to step {self.global_step}")
|
||||
|
@ -236,21 +241,19 @@ class AutomaticEvaluator:
|
|||
def __lazy_swanlab_init(self):
|
||||
if self.__swanlab_config.api_key:
|
||||
swanlab.login(self.__swanlab_config.api_key)
|
||||
if self.swanlab_config.config is None:
|
||||
if self.__swanlab_config.config is None:
|
||||
import yaml
|
||||
|
||||
with open(
|
||||
os.path.join(
|
||||
constants.LOG_ROOT,
|
||||
constants.experiment_name(),
|
||||
constants.trial_name(),
|
||||
constants.get_log_path(self.args),
|
||||
"config.yaml",
|
||||
),
|
||||
"r",
|
||||
) as f:
|
||||
__config = yaml.safe_load(f)
|
||||
else:
|
||||
__config = self.swanlab_config.config
|
||||
__config = self.__swanlab_config.config
|
||||
__config["FRAMEWORK"] = "AReaL"
|
||||
swanlab.init(
|
||||
project=self.__swanlab_config.project or constants.experiment_name(),
|
||||
|
@ -259,9 +262,7 @@ class AutomaticEvaluator:
|
|||
config=__config,
|
||||
logdir=self.__swanlab_config.logdir
|
||||
or os.path.join(
|
||||
constants.LOG_ROOT,
|
||||
constants.experiment_name(),
|
||||
constants.trial_name(),
|
||||
constants.get_log_path(self.args),
|
||||
"swanlab",
|
||||
),
|
||||
mode=self.__swanlab_config.mode,
|
||||
|
@ -329,7 +330,7 @@ class AutomaticEvaluator:
|
|||
if not self.__wandb_initialized:
|
||||
self.__lazy_wandb_init()
|
||||
self.__wandb_initialized = True
|
||||
if not self.__swanlab_initialized:
|
||||
if not self.__swanlab_initialized and swanlab is not None:
|
||||
self.__lazy_swanlab_init()
|
||||
self.__swanlab_initialized = True
|
||||
self.__eval_steps[log_step].log(self.__config)
|
||||
|
|
|
@ -12,7 +12,6 @@ from typing import Dict
|
|||
import colorama
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
import swanlab
|
||||
import wandb
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
|
@ -38,6 +37,11 @@ from realhf.system.buffer import AsyncIOSequenceBuffer
|
|||
from realhf.system.function_executor import FunctionExecutor
|
||||
from realhf.system.model_function_call import RPCCorountineControl
|
||||
|
||||
try:
|
||||
import swanlab
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
swanlab = None
|
||||
|
||||
logger = logging.getLogger("master worker", "system")
|
||||
blogger = logging.getLogger("benchmark")
|
||||
|
||||
|
@ -307,15 +311,18 @@ class MasterWorker(worker_base.AsyncWorker):
|
|||
|
||||
# swanlab init, connect to remote or local swanlab host
|
||||
if self.swanlab_config.mode != "disabled" and self.swanlab_config.api_key:
|
||||
if swanlab is not None:
|
||||
swanlab.login(self.swanlab_config.api_key)
|
||||
else:
|
||||
logger.warning(
|
||||
"swanlab not installed but enabled. Ignore swanlab logging."
|
||||
)
|
||||
if self.swanlab_config.config is None:
|
||||
import yaml
|
||||
|
||||
with open(
|
||||
os.path.join(
|
||||
constants.LOG_ROOT,
|
||||
constants.experiment_name(),
|
||||
constants.trial_name(),
|
||||
constants.get_log_path(self.args),
|
||||
"config.yaml",
|
||||
),
|
||||
"r",
|
||||
|
@ -324,6 +331,7 @@ class MasterWorker(worker_base.AsyncWorker):
|
|||
else:
|
||||
__config = self.swanlab_config.config
|
||||
__config["FRAMEWORK"] = "AReaL"
|
||||
if swanlab is not None:
|
||||
swanlab.init(
|
||||
project=self.swanlab_config.project or constants.experiment_name(),
|
||||
experiment_name=self.swanlab_config.name
|
||||
|
@ -331,9 +339,7 @@ class MasterWorker(worker_base.AsyncWorker):
|
|||
config=__config,
|
||||
logdir=self.swanlab_config.logdir
|
||||
or os.path.join(
|
||||
constants.LOG_ROOT,
|
||||
constants.experiment_name(),
|
||||
constants.trial_name(),
|
||||
constants.get_log_path(self.args),
|
||||
"swanlab",
|
||||
),
|
||||
mode=self.swanlab_config.mode,
|
||||
|
@ -567,6 +573,7 @@ class MasterWorker(worker_base.AsyncWorker):
|
|||
)
|
||||
|
||||
wandb.finish()
|
||||
if swanlab is not None:
|
||||
swanlab.finish()
|
||||
if self.__summary_writer is not None:
|
||||
self.__summary_writer.close()
|
||||
|
|
|
@ -10,8 +10,6 @@ import uuid
|
|||
from collections import defaultdict
|
||||
from typing import Dict, Hashable, List, Set, Tuple
|
||||
|
||||
import swanlab
|
||||
import wandb
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
import realhf.api.core.config as config_api
|
||||
|
|
Loading…
Reference in New Issue