diff --git a/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-128.yaml b/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-128.yaml index 254716f..dd422b4 100644 --- a/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-128.yaml +++ b/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-128.yaml @@ -29,12 +29,6 @@ actor: mem_fraction_static: 0.8 triton_attention_num_kv_splits: 16 enable_metrics: True -critic: - type: - _class: qwen2 - is_critic: true - path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B' - init_critic_from_actor: true ref: type: _class: qwen2 @@ -42,15 +36,6 @@ ref: actor_train: mb_spec: max_tokens_per_mb: 30720 -critic_train: - mb_spec: - max_tokens_per_mb: 30720 -actor_gen: - mb_spec: - max_tokens_per_mb: 30720 -critic_inf: - mb_spec: - max_tokens_per_mb: 30720 actor_inf: mb_spec: max_tokens_per_mb: 30720 @@ -68,8 +53,6 @@ ppo: top_p: 1.0 top_k: 1000000 temperature: 1.0 - force_no_logits_mask: True - use_cuda_graph: True ppo_n_minibatches: 4 kl_ctl: 0.0 discount: 1.0 @@ -78,11 +61,12 @@ ppo: reward_output_scaling: 5 reward_output_bias: 0.0 adv_norm: true - value_norm: true group_size: 16 group_adv_norm: false -external_configs: - cluster_config: - fileroot: "/storage/ray/experiments" - envs: - REAL_GPU_MEMORY_KILL_THRESHOLD: "1" + +# Cluster configuration +ray_temp_path: /tmp/ray +cluster: + fileroot: /tmp/ray/experiments + n_nodes: 32 + n_gpus_per_node: 8 \ No newline at end of file diff --git a/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-32.yaml b/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-32.yaml index cc0b8dd..ec101b4 100644 --- a/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-32.yaml +++ b/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-32.yaml @@ -29,12 +29,6 @@ actor: mem_fraction_static: 0.8 triton_attention_num_kv_splits: 16 enable_metrics: True -critic: - type: - _class: qwen2 - is_critic: true - path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B' - init_critic_from_actor: true ref: type: _class: qwen2 @@ -42,15 +36,6 @@ ref: actor_train: mb_spec: max_tokens_per_mb: 30720 -critic_train: - mb_spec: - max_tokens_per_mb: 30720 -actor_gen: - mb_spec: - max_tokens_per_mb: 30720 -critic_inf: - mb_spec: - max_tokens_per_mb: 30720 actor_inf: mb_spec: max_tokens_per_mb: 30720 @@ -68,8 +53,6 @@ ppo: top_p: 1.0 top_k: 1000000 temperature: 1.0 - force_no_logits_mask: True - use_cuda_graph: True ppo_n_minibatches: 4 kl_ctl: 0.0 discount: 1.0 @@ -78,11 +61,12 @@ ppo: reward_output_scaling: 5 reward_output_bias: 0.0 adv_norm: true - value_norm: true group_size: 16 group_adv_norm: false -external_configs: - cluster_config: - fileroot: "/storage/ray/experiments" - envs: - REAL_GPU_MEMORY_KILL_THRESHOLD: "1" + +# Cluster configuration +ray_temp_path: /tmp/ray +cluster: + fileroot: /tmp/ray/experiments + n_nodes: 32 + n_gpus_per_node: 8 \ No newline at end of file diff --git a/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-8.yaml b/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-8.yaml index 4ffa1ce..680c443 100644 --- a/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-8.yaml +++ b/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-8.yaml @@ -29,12 +29,6 @@ actor: mem_fraction_static: 0.8 triton_attention_num_kv_splits: 16 enable_metrics: True -critic: - type: - _class: qwen2 - is_critic: true - path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B' - init_critic_from_actor: true ref: type: _class: qwen2 @@ -42,15 +36,6 @@ ref: actor_train: mb_spec: max_tokens_per_mb: 30720 -critic_train: - mb_spec: - max_tokens_per_mb: 30720 -actor_gen: - mb_spec: - max_tokens_per_mb: 30720 -critic_inf: - mb_spec: - max_tokens_per_mb: 30720 actor_inf: mb_spec: max_tokens_per_mb: 30720 @@ -68,8 +53,6 @@ ppo: top_p: 1.0 top_k: 1000000 temperature: 1.0 - force_no_logits_mask: True - use_cuda_graph: True ppo_n_minibatches: 4 kl_ctl: 0.0 discount: 1.0 @@ -78,11 +61,12 @@ ppo: reward_output_scaling: 5 reward_output_bias: 0.0 adv_norm: true - value_norm: true group_size: 16 group_adv_norm: false -external_configs: - cluster_config: - fileroot: "/storage/ray/experiments" - envs: - REAL_GPU_MEMORY_KILL_THRESHOLD: "1" + +# Cluster configuration +ray_temp_path: /tmp/ray +cluster: + fileroot: /tmp/ray/experiments + n_nodes: 32 + n_gpus_per_node: 8 diff --git a/examples/configs/v0.2-qwen2-math/32B-distill/ppo-32B-distill-gpus-128.yaml b/examples/configs/v0.2-qwen2-math/32B-distill/ppo-32B-distill-gpus-128.yaml index 0da7d8c..f6cf41b 100644 --- a/examples/configs/v0.2-qwen2-math/32B-distill/ppo-32B-distill-gpus-128.yaml +++ b/examples/configs/v0.2-qwen2-math/32B-distill/ppo-32B-distill-gpus-128.yaml @@ -27,17 +27,8 @@ actor: hysteresis: 2 sglang: mem_fraction_static: 0.8 - disable_radix_cache: false triton_attention_num_kv_splits: 16 - max_running_requests: 128 - context_length: 29696 enable_metrics: True -critic: - type: - _class: qwen2 - is_critic: true - path: '/storage/models/DeepSeek-R1-Distill-Qwen-32B' - init_critic_from_actor: true ref: type: _class: qwen2 @@ -45,15 +36,6 @@ ref: actor_train: mb_spec: max_tokens_per_mb: 30720 -critic_train: - mb_spec: - max_tokens_per_mb: 30720 -actor_gen: - mb_spec: - max_tokens_per_mb: 30720 -critic_inf: - mb_spec: - max_tokens_per_mb: 30720 actor_inf: mb_spec: max_tokens_per_mb: 30720 @@ -71,8 +53,6 @@ ppo: top_p: 1.0 top_k: 1000000 temperature: 1.0 - force_no_logits_mask: True - use_cuda_graph: True ppo_n_minibatches: 4 kl_ctl: 0.0 discount: 1.0 @@ -81,11 +61,12 @@ ppo: reward_output_scaling: 5 reward_output_bias: 0.0 adv_norm: true - value_norm: true group_size: 32 group_adv_norm: false -external_configs: - cluster_config: - fileroot: "/storage/ray/experiments" - envs: - REAL_GPU_MEMORY_KILL_THRESHOLD: "1" + +# Cluster configuration +ray_temp_path: /tmp/ray +cluster: + fileroot: /tmp/ray/experiments + n_nodes: 32 + n_gpus_per_node: 8 diff --git a/examples/configs/v0.2-qwen2-math/32B-distill/sft-32B-distill-gpus-128.yaml b/examples/configs/v0.2-qwen2-math/32B-distill/sft-32B-distill-gpus-128.yaml index a8d7ef5..b4c19cf 100644 --- a/examples/configs/v0.2-qwen2-math/32B-distill/sft-32B-distill-gpus-128.yaml +++ b/examples/configs/v0.2-qwen2-math/32B-distill/sft-32B-distill-gpus-128.yaml @@ -37,8 +37,10 @@ model: allocation: mb_spec: max_tokens_per_mb: 32768 -external_configs: - cluster_config: - fileroot: "/storage/ray/experiments" - envs: - REAL_GPU_MEMORY_KILL_THRESHOLD: "1" + +# Cluster configuration +ray_temp_path: /tmp/ray +cluster: + fileroot: /tmp/ray/experiments + n_nodes: 32 + n_gpus_per_node: 8 diff --git a/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-128.yaml b/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-128.yaml index e521f80..d930d35 100644 --- a/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-128.yaml +++ b/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-128.yaml @@ -29,12 +29,6 @@ actor: mem_fraction_static: 0.8 triton_attention_num_kv_splits: 16 enable_metrics: True -critic: - type: - _class: qwen2 - is_critic: true - path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B' - init_critic_from_actor: true ref: type: _class: qwen2 @@ -42,15 +36,6 @@ ref: actor_train: mb_spec: max_tokens_per_mb: 30720 -critic_train: - mb_spec: - max_tokens_per_mb: 30720 -actor_gen: - mb_spec: - max_tokens_per_mb: 30720 -critic_inf: - mb_spec: - max_tokens_per_mb: 30720 actor_inf: mb_spec: max_tokens_per_mb: 30720 @@ -68,8 +53,6 @@ ppo: top_p: 1.0 top_k: 1000000 temperature: 1.0 - force_no_logits_mask: True - use_cuda_graph: True ppo_n_minibatches: 4 kl_ctl: 0.0 discount: 1.0 @@ -78,11 +61,12 @@ ppo: reward_output_scaling: 5 reward_output_bias: 0.0 adv_norm: true - value_norm: true group_size: 16 group_adv_norm: false -external_configs: - cluster_config: - fileroot: "/storage/ray/experiments" - envs: - REAL_GPU_MEMORY_KILL_THRESHOLD: "1" + +# Cluster configuration +ray_temp_path: /tmp/ray +cluster: + fileroot: /tmp/ray/experiments + n_nodes: 32 + n_gpus_per_node: 8 diff --git a/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-32.yaml b/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-32.yaml index 9218181..74a13cc 100644 --- a/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-32.yaml +++ b/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-32.yaml @@ -29,12 +29,6 @@ actor: mem_fraction_static: 0.8 triton_attention_num_kv_splits: 16 enable_metrics: True -critic: - type: - _class: qwen2 - is_critic: true - path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B' - init_critic_from_actor: true ref: type: _class: qwen2 @@ -42,15 +36,6 @@ ref: actor_train: mb_spec: max_tokens_per_mb: 30720 -critic_train: - mb_spec: - max_tokens_per_mb: 30720 -actor_gen: - mb_spec: - max_tokens_per_mb: 30720 -critic_inf: - mb_spec: - max_tokens_per_mb: 30720 actor_inf: mb_spec: max_tokens_per_mb: 30720 @@ -68,8 +53,6 @@ ppo: top_p: 1.0 top_k: 1000000 temperature: 1.0 - force_no_logits_mask: True - use_cuda_graph: True ppo_n_minibatches: 4 kl_ctl: 0.0 discount: 1.0 @@ -78,11 +61,12 @@ ppo: reward_output_scaling: 5 reward_output_bias: 0.0 adv_norm: true - value_norm: true group_size: 16 group_adv_norm: false -external_configs: - cluster_config: - fileroot: "/storage/ray/experiments" - envs: - REAL_GPU_MEMORY_KILL_THRESHOLD: "1" + +# Cluster configuration +ray_temp_path: /tmp/ray +cluster: + fileroot: /tmp/ray/experiments + n_nodes: 32 + n_gpus_per_node: 8 diff --git a/examples/configs/v0.2-qwen2-math/7B-zero/ppo-7B-zero-gpus-128.yaml b/examples/configs/v0.2-qwen2-math/7B-zero/ppo-7B-zero-gpus-128.yaml index a93e1c6..73c6824 100644 --- a/examples/configs/v0.2-qwen2-math/7B-zero/ppo-7B-zero-gpus-128.yaml +++ b/examples/configs/v0.2-qwen2-math/7B-zero/ppo-7B-zero-gpus-128.yaml @@ -27,19 +27,8 @@ actor: hysteresis: 2 sglang: mem_fraction_static: 0.8 - disable_radix_cache: false triton_attention_num_kv_splits: 16 - max_running_requests: 128 - context_length: 18432 enable_metrics: True -critic: - type: - _class: qwen2 - is_critic: true - path: '/storage/models/Qwen2.5-7B' - init_critic_from_actor: true - optimizer: - lr: 5e-6 ref: type: _class: qwen2 @@ -47,15 +36,6 @@ ref: actor_train: mb_spec: max_tokens_per_mb: 19456 -critic_train: - mb_spec: - max_tokens_per_mb: 19456 -actor_gen: - mb_spec: - max_tokens_per_mb: 19456 -critic_inf: - mb_spec: - max_tokens_per_mb: 19456 actor_inf: mb_spec: max_tokens_per_mb: 19456 @@ -73,8 +53,6 @@ ppo: top_p: 1.0 top_k: 1000000 temperature: 1.0 - force_no_logits_mask: true - use_cuda_graph: true ppo_n_minibatches: 4 kl_ctl: 0.0 discount: 1.0 @@ -83,11 +61,12 @@ ppo: reward_output_scaling: 0.5 reward_output_bias: -1.0 adv_norm: true - value_norm: true group_size: 64 group_adv_norm: false -external_configs: - cluster_config: - fileroot: "/storage/ray/experiments" - envs: - REAL_GPU_MEMORY_KILL_THRESHOLD: "1" + +# Cluster configuration +ray_temp_path: /tmp/ray +cluster: + fileroot: /tmp/ray/experiments + n_nodes: 32 + n_gpus_per_node: 8 diff --git a/realhf/base/logging.py b/realhf/base/logging.py index 4f21e47..8ae8c06 100644 --- a/realhf/base/logging.py +++ b/realhf/base/logging.py @@ -154,9 +154,12 @@ def log_swanlab_wandb_tensorboard(data, step=None, summary_writer=None): _LATEST_LOG_STEP = max(_LATEST_LOG_STEP, step) # swanlab - import swanlab + try: + import swanlab - swanlab.log(data, step=step) + swanlab.log(data, step=step) + except (ModuleNotFoundError, ImportError): + pass # wandb import wandb diff --git a/realhf/impl/model/backend/sglang.py b/realhf/impl/model/backend/sglang.py index 55ff9cb..c0ef278 100644 --- a/realhf/impl/model/backend/sglang.py +++ b/realhf/impl/model/backend/sglang.py @@ -452,9 +452,6 @@ class SGLangGenerationBackend(ModelBackend, SGLangConfig): high=60000, experiment_name=constants.experiment_name(), trial_name=constants.trial_name(), - lockfile_root=os.path.join( - constants.get_cache_path(self.args), "ports" - ), ), group=constants.data_parallel_group(), ) diff --git a/realhf/scheduler/client.py b/realhf/scheduler/client.py index fb2f20e..71cfdf8 100644 --- a/realhf/scheduler/client.py +++ b/realhf/scheduler/client.py @@ -170,4 +170,4 @@ def make(args: "BaseExperimentConfig", **kwargs) -> SchedulerClient: return LocalSchedulerClient(args) else: - raise NotImplementedError(f"Scheduler {mode} not found") + raise NotImplementedError(f"Scheduler {args.mode} not found") diff --git a/realhf/scheduler/evaluator.py b/realhf/scheduler/evaluator.py index 597a75b..c12864d 100644 --- a/realhf/scheduler/evaluator.py +++ b/realhf/scheduler/evaluator.py @@ -8,13 +8,17 @@ import subprocess import time from typing import Any, Dict, Optional -import swanlab import wandb import realhf.api.core.system_api as config_pkg from realhf.api.cli_args import BaseExperimentConfig from realhf.base import constants, logging +try: + import swanlab +except (ModuleNotFoundError, ImportError): + swanlab = None + logger = logging.getLogger("AutomaticEvaluator", "colored") @@ -132,7 +136,8 @@ class EvaluationStep: for k, v in d.items(): log_data[f"{data_name}_{k}"] = v wandb.log(log_data, step=self.global_step) - swanlab.log(log_data, step=self.global_step) + if swanlab is not None: + swanlab.log(log_data, step=self.global_step) self.status = EvaluationStepStatus.LOGGED logger.info(f"Logging eval result {log_data} to step {self.global_step}") @@ -236,21 +241,19 @@ class AutomaticEvaluator: def __lazy_swanlab_init(self): if self.__swanlab_config.api_key: swanlab.login(self.__swanlab_config.api_key) - if self.swanlab_config.config is None: + if self.__swanlab_config.config is None: import yaml with open( os.path.join( - constants.LOG_ROOT, - constants.experiment_name(), - constants.trial_name(), + constants.get_log_path(self.args), "config.yaml", ), "r", ) as f: __config = yaml.safe_load(f) else: - __config = self.swanlab_config.config + __config = self.__swanlab_config.config __config["FRAMEWORK"] = "AReaL" swanlab.init( project=self.__swanlab_config.project or constants.experiment_name(), @@ -259,9 +262,7 @@ class AutomaticEvaluator: config=__config, logdir=self.__swanlab_config.logdir or os.path.join( - constants.LOG_ROOT, - constants.experiment_name(), - constants.trial_name(), + constants.get_log_path(self.args), "swanlab", ), mode=self.__swanlab_config.mode, @@ -329,7 +330,7 @@ class AutomaticEvaluator: if not self.__wandb_initialized: self.__lazy_wandb_init() self.__wandb_initialized = True - if not self.__swanlab_initialized: + if not self.__swanlab_initialized and swanlab is not None: self.__lazy_swanlab_init() self.__swanlab_initialized = True self.__eval_steps[log_step].log(self.__config) diff --git a/realhf/system/master_worker.py b/realhf/system/master_worker.py index cabed4c..20c7793 100644 --- a/realhf/system/master_worker.py +++ b/realhf/system/master_worker.py @@ -12,7 +12,6 @@ from typing import Dict import colorama import networkx as nx import numpy as np -import swanlab import wandb from tensorboardX import SummaryWriter @@ -38,6 +37,11 @@ from realhf.system.buffer import AsyncIOSequenceBuffer from realhf.system.function_executor import FunctionExecutor from realhf.system.model_function_call import RPCCorountineControl +try: + import swanlab +except (ModuleNotFoundError, ImportError): + swanlab = None + logger = logging.getLogger("master worker", "system") blogger = logging.getLogger("benchmark") @@ -307,15 +311,18 @@ class MasterWorker(worker_base.AsyncWorker): # swanlab init, connect to remote or local swanlab host if self.swanlab_config.mode != "disabled" and self.swanlab_config.api_key: - swanlab.login(self.swanlab_config.api_key) + if swanlab is not None: + swanlab.login(self.swanlab_config.api_key) + else: + logger.warning( + "swanlab not installed but enabled. Ignore swanlab logging." + ) if self.swanlab_config.config is None: import yaml with open( os.path.join( - constants.LOG_ROOT, - constants.experiment_name(), - constants.trial_name(), + constants.get_log_path(self.args), "config.yaml", ), "r", @@ -324,20 +331,19 @@ class MasterWorker(worker_base.AsyncWorker): else: __config = self.swanlab_config.config __config["FRAMEWORK"] = "AReaL" - swanlab.init( - project=self.swanlab_config.project or constants.experiment_name(), - experiment_name=self.swanlab_config.name - or f"{constants.trial_name()}_train", - config=__config, - logdir=self.swanlab_config.logdir - or os.path.join( - constants.LOG_ROOT, - constants.experiment_name(), - constants.trial_name(), - "swanlab", - ), - mode=self.swanlab_config.mode, - ) + if swanlab is not None: + swanlab.init( + project=self.swanlab_config.project or constants.experiment_name(), + experiment_name=self.swanlab_config.name + or f"{constants.trial_name()}_train", + config=__config, + logdir=self.swanlab_config.logdir + or os.path.join( + constants.get_log_path(self.args), + "swanlab", + ), + mode=self.swanlab_config.mode, + ) # tensorboard logging self.__summary_writer = None if self.tensorboard_config.path is not None: @@ -567,7 +573,8 @@ class MasterWorker(worker_base.AsyncWorker): ) wandb.finish() - swanlab.finish() + if swanlab is not None: + swanlab.finish() if self.__summary_writer is not None: self.__summary_writer.close() gc.collect() diff --git a/realhf/system/model_function_call.py b/realhf/system/model_function_call.py index 87fb6e4..d927455 100644 --- a/realhf/system/model_function_call.py +++ b/realhf/system/model_function_call.py @@ -10,8 +10,6 @@ import uuid from collections import defaultdict from typing import Dict, Hashable, List, Set, Tuple -import swanlab -import wandb from tensorboardX import SummaryWriter import realhf.api.core.config as config_api