[Fix] Fix yaml configurations for v0.2 experiments. (#129)

* . * fix
2025-06-24 13:48:02 +08:00 · 2025-06-24 13:48:02 +08:00 · adeb8eb13f
parent e3005d57f6
commit adeb8eb13f
14 changed files with 101 additions and 213 deletions
--- a/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-128.yaml
+++ b/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-128.yaml
@ -29,12 +29,6 @@ actor:
    mem_fraction_static: 0.8
    triton_attention_num_kv_splits: 16
    enable_metrics: True
-critic:
-  type:
-    _class: qwen2
-    is_critic: true
-  path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
-  init_critic_from_actor: true
 ref:
  type:
    _class: qwen2
@ -42,15 +36,6 @@ ref:
 actor_train:
  mb_spec:
    max_tokens_per_mb: 30720
-critic_train:
-  mb_spec:
-    max_tokens_per_mb: 30720
-actor_gen:
-  mb_spec:
-    max_tokens_per_mb: 30720
-critic_inf:
-  mb_spec:
-    max_tokens_per_mb: 30720
 actor_inf:
  mb_spec:
    max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
    top_p: 1.0
    top_k: 1000000
    temperature: 1.0
-    force_no_logits_mask: True
-    use_cuda_graph: True
  ppo_n_minibatches: 4
  kl_ctl: 0.0
  discount: 1.0
@ -78,11 +61,12 @@ ppo:
  reward_output_scaling: 5
  reward_output_bias: 0.0
  adv_norm: true
-  value_norm: true
 group_size: 16
 group_adv_norm: false
-external_configs:
-  cluster_config:
-    fileroot: "/storage/ray/experiments"
-  envs:
-    REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
+
+# Cluster configuration
+ray_temp_path: /tmp/ray
+cluster:
+  fileroot: /tmp/ray/experiments
+  n_nodes: 32
+  n_gpus_per_node: 8
--- a/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-32.yaml
+++ b/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-32.yaml
@ -29,12 +29,6 @@ actor:
    mem_fraction_static: 0.8
    triton_attention_num_kv_splits: 16
    enable_metrics: True
-critic:
-  type:
-    _class: qwen2
-    is_critic: true
-  path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
-  init_critic_from_actor: true
 ref:
  type:
    _class: qwen2
@ -42,15 +36,6 @@ ref:
 actor_train:
  mb_spec:
    max_tokens_per_mb: 30720
-critic_train:
-  mb_spec:
-    max_tokens_per_mb: 30720
-actor_gen:
-  mb_spec:
-    max_tokens_per_mb: 30720
-critic_inf:
-  mb_spec:
-    max_tokens_per_mb: 30720
 actor_inf:
  mb_spec:
    max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
    top_p: 1.0
    top_k: 1000000
    temperature: 1.0
-    force_no_logits_mask: True
-    use_cuda_graph: True
  ppo_n_minibatches: 4
  kl_ctl: 0.0
  discount: 1.0
@ -78,11 +61,12 @@ ppo:
  reward_output_scaling: 5
  reward_output_bias: 0.0
  adv_norm: true
-  value_norm: true
 group_size: 16
 group_adv_norm: false
-external_configs:
-  cluster_config:
-    fileroot: "/storage/ray/experiments"
-  envs:
-    REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
+
+# Cluster configuration
+ray_temp_path: /tmp/ray
+cluster:
+  fileroot: /tmp/ray/experiments
+  n_nodes: 32
+  n_gpus_per_node: 8
--- a/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-8.yaml
+++ b/examples/configs/v0.2-qwen2-math/1.5B-distill/ppo-1.5B-distill-gpus-8.yaml
@ -29,12 +29,6 @@ actor:
    mem_fraction_static: 0.8
    triton_attention_num_kv_splits: 16
    enable_metrics: True
-critic:
-  type:
-    _class: qwen2
-    is_critic: true
-  path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
-  init_critic_from_actor: true
 ref:
  type:
    _class: qwen2
@ -42,15 +36,6 @@ ref:
 actor_train:
  mb_spec:
    max_tokens_per_mb: 30720
-critic_train:
-  mb_spec:
-    max_tokens_per_mb: 30720
-actor_gen:
-  mb_spec:
-    max_tokens_per_mb: 30720
-critic_inf:
-  mb_spec:
-    max_tokens_per_mb: 30720
 actor_inf:
  mb_spec:
    max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
    top_p: 1.0
    top_k: 1000000
    temperature: 1.0
-    force_no_logits_mask: True
-    use_cuda_graph: True
  ppo_n_minibatches: 4
  kl_ctl: 0.0
  discount: 1.0
@ -78,11 +61,12 @@ ppo:
  reward_output_scaling: 5
  reward_output_bias: 0.0
  adv_norm: true
-  value_norm: true
 group_size: 16
 group_adv_norm: false
-external_configs:
-  cluster_config:
-    fileroot: "/storage/ray/experiments"
-  envs:
-    REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
+
+# Cluster configuration
+ray_temp_path: /tmp/ray
+cluster:
+  fileroot: /tmp/ray/experiments
+  n_nodes: 32
+  n_gpus_per_node: 8
--- a/examples/configs/v0.2-qwen2-math/32B-distill/ppo-32B-distill-gpus-128.yaml
+++ b/examples/configs/v0.2-qwen2-math/32B-distill/ppo-32B-distill-gpus-128.yaml
@ -27,17 +27,8 @@ actor:
    hysteresis: 2
  sglang:
    mem_fraction_static: 0.8
-    disable_radix_cache: false
    triton_attention_num_kv_splits: 16
-    max_running_requests: 128
-    context_length: 29696
    enable_metrics: True
-critic:
-  type:
-    _class: qwen2
-    is_critic: true
-  path: '/storage/models/DeepSeek-R1-Distill-Qwen-32B'
-  init_critic_from_actor: true
 ref:
  type:
    _class: qwen2
@ -45,15 +36,6 @@ ref:
 actor_train:
  mb_spec:
    max_tokens_per_mb: 30720
-critic_train:
-  mb_spec:
-    max_tokens_per_mb: 30720
-actor_gen:
-  mb_spec:
-    max_tokens_per_mb: 30720
-critic_inf:
-  mb_spec:
-    max_tokens_per_mb: 30720
 actor_inf:
  mb_spec:
    max_tokens_per_mb: 30720
@ -71,8 +53,6 @@ ppo:
    top_p: 1.0
    top_k: 1000000
    temperature: 1.0
-    force_no_logits_mask: True
-    use_cuda_graph: True
  ppo_n_minibatches: 4
  kl_ctl: 0.0
  discount: 1.0
@ -81,11 +61,12 @@ ppo:
  reward_output_scaling: 5
  reward_output_bias: 0.0
  adv_norm: true
-  value_norm: true
 group_size: 32
 group_adv_norm: false
-external_configs:
-  cluster_config:
-    fileroot: "/storage/ray/experiments"
-  envs:
-    REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
+
+# Cluster configuration
+ray_temp_path: /tmp/ray
+cluster:
+  fileroot: /tmp/ray/experiments
+  n_nodes: 32
+  n_gpus_per_node: 8
--- a/examples/configs/v0.2-qwen2-math/32B-distill/sft-32B-distill-gpus-128.yaml
+++ b/examples/configs/v0.2-qwen2-math/32B-distill/sft-32B-distill-gpus-128.yaml
@ -37,8 +37,10 @@ model:
 allocation:
  mb_spec:
    max_tokens_per_mb: 32768
-external_configs:
-  cluster_config:
-    fileroot: "/storage/ray/experiments"
-  envs:
-    REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
+
+# Cluster configuration
+ray_temp_path: /tmp/ray
+cluster:
+  fileroot: /tmp/ray/experiments
+  n_nodes: 32
+  n_gpus_per_node: 8
--- a/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-128.yaml
+++ b/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-128.yaml
@ -29,12 +29,6 @@ actor:
    mem_fraction_static: 0.8
    triton_attention_num_kv_splits: 16
    enable_metrics: True
-critic:
-  type:
-    _class: qwen2
-    is_critic: true
-  path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B'
-  init_critic_from_actor: true
 ref:
  type:
    _class: qwen2
@ -42,15 +36,6 @@ ref:
 actor_train:
  mb_spec:
    max_tokens_per_mb: 30720
-critic_train:
-  mb_spec:
-    max_tokens_per_mb: 30720
-actor_gen:
-  mb_spec:
-    max_tokens_per_mb: 30720
-critic_inf:
-  mb_spec:
-    max_tokens_per_mb: 30720
 actor_inf:
  mb_spec:
    max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
    top_p: 1.0
    top_k: 1000000
    temperature: 1.0
-    force_no_logits_mask: True
-    use_cuda_graph: True
  ppo_n_minibatches: 4
  kl_ctl: 0.0
  discount: 1.0
@ -78,11 +61,12 @@ ppo:
  reward_output_scaling: 5
  reward_output_bias: 0.0
  adv_norm: true
-  value_norm: true
 group_size: 16
 group_adv_norm: false
-external_configs:
-  cluster_config:
-    fileroot: "/storage/ray/experiments"
-  envs:
-    REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
+
+# Cluster configuration
+ray_temp_path: /tmp/ray
+cluster:
+  fileroot: /tmp/ray/experiments
+  n_nodes: 32
+  n_gpus_per_node: 8
--- a/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-32.yaml
+++ b/examples/configs/v0.2-qwen2-math/7B-distill/ppo-7B-distill-gpus-32.yaml
@ -29,12 +29,6 @@ actor:
    mem_fraction_static: 0.8
    triton_attention_num_kv_splits: 16
    enable_metrics: True
-critic:
-  type:
-    _class: qwen2
-    is_critic: true
-  path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B'
-  init_critic_from_actor: true
 ref:
  type:
    _class: qwen2
@ -42,15 +36,6 @@ ref:
 actor_train:
  mb_spec:
    max_tokens_per_mb: 30720
-critic_train:
-  mb_spec:
-    max_tokens_per_mb: 30720
-actor_gen:
-  mb_spec:
-    max_tokens_per_mb: 30720
-critic_inf:
-  mb_spec:
-    max_tokens_per_mb: 30720
 actor_inf:
  mb_spec:
    max_tokens_per_mb: 30720
@ -68,8 +53,6 @@ ppo:
    top_p: 1.0
    top_k: 1000000
    temperature: 1.0
-    force_no_logits_mask: True
-    use_cuda_graph: True
  ppo_n_minibatches: 4
  kl_ctl: 0.0
  discount: 1.0
@ -78,11 +61,12 @@ ppo:
  reward_output_scaling: 5
  reward_output_bias: 0.0
  adv_norm: true
-  value_norm: true
 group_size: 16
 group_adv_norm: false
-external_configs:
-  cluster_config:
-    fileroot: "/storage/ray/experiments"
-  envs:
-    REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
+
+# Cluster configuration
+ray_temp_path: /tmp/ray
+cluster:
+  fileroot: /tmp/ray/experiments
+  n_nodes: 32
+  n_gpus_per_node: 8
--- a/examples/configs/v0.2-qwen2-math/7B-zero/ppo-7B-zero-gpus-128.yaml
+++ b/examples/configs/v0.2-qwen2-math/7B-zero/ppo-7B-zero-gpus-128.yaml
@ -27,19 +27,8 @@ actor:
    hysteresis: 2
  sglang:
    mem_fraction_static: 0.8
-    disable_radix_cache: false
    triton_attention_num_kv_splits: 16
-    max_running_requests: 128
-    context_length: 18432
    enable_metrics: True
-critic:
-  type:
-    _class: qwen2
-    is_critic: true
-  path: '/storage/models/Qwen2.5-7B'
-  init_critic_from_actor: true
-  optimizer:
-    lr: 5e-6
 ref:
  type:
    _class: qwen2
@ -47,15 +36,6 @@ ref:
 actor_train:
  mb_spec:
    max_tokens_per_mb: 19456
-critic_train:
-  mb_spec:
-    max_tokens_per_mb: 19456
-actor_gen:
-  mb_spec:
-    max_tokens_per_mb: 19456
-critic_inf:
-  mb_spec:
-    max_tokens_per_mb: 19456
 actor_inf:
  mb_spec:
    max_tokens_per_mb: 19456
@ -73,8 +53,6 @@ ppo:
    top_p: 1.0
    top_k: 1000000
    temperature: 1.0
-    force_no_logits_mask: true
-    use_cuda_graph: true
  ppo_n_minibatches: 4
  kl_ctl: 0.0
  discount: 1.0
@ -83,11 +61,12 @@ ppo:
  reward_output_scaling: 0.5
  reward_output_bias: -1.0
  adv_norm: true
-  value_norm: true
 group_size: 64
 group_adv_norm: false
-external_configs:
-  cluster_config:
-    fileroot: "/storage/ray/experiments"
-  envs:
-    REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
+
+# Cluster configuration
+ray_temp_path: /tmp/ray
+cluster:
+  fileroot: /tmp/ray/experiments
+  n_nodes: 32
+  n_gpus_per_node: 8
--- a/realhf/base/logging.py
+++ b/realhf/base/logging.py
@ -154,9 +154,12 @@ def log_swanlab_wandb_tensorboard(data, step=None, summary_writer=None):
        _LATEST_LOG_STEP = max(_LATEST_LOG_STEP, step)

    # swanlab
+    try:
        import swanlab

        swanlab.log(data, step=step)
+    except (ModuleNotFoundError, ImportError):
+        pass

    # wandb
    import wandb
--- a/realhf/impl/model/backend/sglang.py
+++ b/realhf/impl/model/backend/sglang.py
@ -452,9 +452,6 @@ class SGLangGenerationBackend(ModelBackend, SGLangConfig):
                    high=60000,
                    experiment_name=constants.experiment_name(),
                    trial_name=constants.trial_name(),
-                    lockfile_root=os.path.join(
-                        constants.get_cache_path(self.args), "ports"
-                    ),
                ),
                group=constants.data_parallel_group(),
            )
--- a/realhf/scheduler/client.py
+++ b/realhf/scheduler/client.py
@ -170,4 +170,4 @@ def make(args: "BaseExperimentConfig", **kwargs) -> SchedulerClient:

        return LocalSchedulerClient(args)
    else:
-        raise NotImplementedError(f"Scheduler {mode} not found")
+        raise NotImplementedError(f"Scheduler {args.mode} not found")
--- a/realhf/scheduler/evaluator.py
+++ b/realhf/scheduler/evaluator.py
@ -8,13 +8,17 @@ import subprocess
 import time
 from typing import Any, Dict, Optional

-import swanlab
 import wandb

 import realhf.api.core.system_api as config_pkg
 from realhf.api.cli_args import BaseExperimentConfig
 from realhf.base import constants, logging

+try:
+    import swanlab
+except (ModuleNotFoundError, ImportError):
+    swanlab = None
+
 logger = logging.getLogger("AutomaticEvaluator", "colored")


@ -132,6 +136,7 @@ class EvaluationStep:
            for k, v in d.items():
                log_data[f"{data_name}_{k}"] = v
        wandb.log(log_data, step=self.global_step)
+        if swanlab is not None:
            swanlab.log(log_data, step=self.global_step)
        self.status = EvaluationStepStatus.LOGGED
        logger.info(f"Logging eval result {log_data} to step {self.global_step}")
@ -236,21 +241,19 @@ class AutomaticEvaluator:
    def __lazy_swanlab_init(self):
        if self.__swanlab_config.api_key:
            swanlab.login(self.__swanlab_config.api_key)
-        if self.swanlab_config.config is None:
+        if self.__swanlab_config.config is None:
            import yaml

            with open(
                os.path.join(
-                    constants.LOG_ROOT,
-                    constants.experiment_name(),
-                    constants.trial_name(),
+                    constants.get_log_path(self.args),
                    "config.yaml",
                ),
                "r",
            ) as f:
                __config = yaml.safe_load(f)
        else:
-            __config = self.swanlab_config.config
+            __config = self.__swanlab_config.config
        __config["FRAMEWORK"] = "AReaL"
        swanlab.init(
            project=self.__swanlab_config.project or constants.experiment_name(),
@ -259,9 +262,7 @@ class AutomaticEvaluator:
            config=__config,
            logdir=self.__swanlab_config.logdir
            or os.path.join(
-                constants.LOG_ROOT,
-                constants.experiment_name(),
-                constants.trial_name(),
+                constants.get_log_path(self.args),
                "swanlab",
            ),
            mode=self.__swanlab_config.mode,
@ -329,7 +330,7 @@ class AutomaticEvaluator:
                if not self.__wandb_initialized:
                    self.__lazy_wandb_init()
                    self.__wandb_initialized = True
-                if not self.__swanlab_initialized:
+                if not self.__swanlab_initialized and swanlab is not None:
                    self.__lazy_swanlab_init()
                    self.__swanlab_initialized = True
                self.__eval_steps[log_step].log(self.__config)
--- a/realhf/system/master_worker.py
+++ b/realhf/system/master_worker.py
@ -12,7 +12,6 @@ from typing import Dict
 import colorama
 import networkx as nx
 import numpy as np
-import swanlab
 import wandb
 from tensorboardX import SummaryWriter

@ -38,6 +37,11 @@ from realhf.system.buffer import AsyncIOSequenceBuffer
 from realhf.system.function_executor import FunctionExecutor
 from realhf.system.model_function_call import RPCCorountineControl

+try:
+    import swanlab
+except (ModuleNotFoundError, ImportError):
+    swanlab = None
+
 logger = logging.getLogger("master worker", "system")
 blogger = logging.getLogger("benchmark")

@ -307,15 +311,18 @@ class MasterWorker(worker_base.AsyncWorker):

        # swanlab init, connect to remote or local swanlab host
        if self.swanlab_config.mode != "disabled" and self.swanlab_config.api_key:
+            if swanlab is not None:
                swanlab.login(self.swanlab_config.api_key)
+            else:
+                logger.warning(
+                    "swanlab not installed but enabled. Ignore swanlab logging."
+                )
        if self.swanlab_config.config is None:
            import yaml

            with open(
                os.path.join(
-                    constants.LOG_ROOT,
-                    constants.experiment_name(),
-                    constants.trial_name(),
+                    constants.get_log_path(self.args),
                    "config.yaml",
                ),
                "r",
@ -324,6 +331,7 @@ class MasterWorker(worker_base.AsyncWorker):
        else:
            __config = self.swanlab_config.config
        __config["FRAMEWORK"] = "AReaL"
+        if swanlab is not None:
            swanlab.init(
                project=self.swanlab_config.project or constants.experiment_name(),
                experiment_name=self.swanlab_config.name
@ -331,9 +339,7 @@ class MasterWorker(worker_base.AsyncWorker):
                config=__config,
                logdir=self.swanlab_config.logdir
                or os.path.join(
-                constants.LOG_ROOT,
-                constants.experiment_name(),
-                constants.trial_name(),
+                    constants.get_log_path(self.args),
                    "swanlab",
                ),
                mode=self.swanlab_config.mode,
@ -567,6 +573,7 @@ class MasterWorker(worker_base.AsyncWorker):
        )

        wandb.finish()
+        if swanlab is not None:
            swanlab.finish()
        if self.__summary_writer is not None:
            self.__summary_writer.close()
--- a/realhf/system/model_function_call.py
+++ b/realhf/system/model_function_call.py
@ -10,8 +10,6 @@ import uuid
 from collections import defaultdict
 from typing import Dict, Hashable, List, Set, Tuple

-import swanlab
-import wandb
 from tensorboardX import SummaryWriter

 import realhf.api.core.config as config_api