Support using SwanLab for experiment tracking (#98)

* Support using SwanLab for experiment tracking * docs: improve WandB and SwanLab integration documentation - Added official links for better user reference - Used backticks to quote commands and parameters - Unified mode settings to use "online" / "cloud" convention - Merged WandB and SwanLab descriptions into a single concise statement - Added note on using `swanlab.mode="local"` when server connection is unavailable * refactor: update default value of api_key * fix: correct help description from WandB to SwanLab in SwanLabConfig * refactor: merge log_swanlab_tensorboard and log_wandb_tensorboard into log_swanlab_wandb_tensorboard - Unified logging logic for SwanLab, WandB, and TensorBoard to reduce code duplication * chore: update swanlab version in dependency config files - Updated SwanLab version in pyproject.toml - Updated SwanLab version in requirements.txt * refactor: enhance SwanLab config handling for logging purposes - Config now uses provided arguments first - Falls back to reading from config.yaml if no input is given * docs: add note on using when server connection is unavailable * refactor: merge _LATEST_WANDB_STEP and _LATEST_SWANLAB_STEP into _LATEST_LOG_STEP * Format code with black and isort * chore: update swanlab version in dependency config files - Updated SwanLab version in requirements.txt * refactor: rename swanlab_wandb_data to log_data --------- Co-authored-by: dubingnan <dubingnan@360.cn>
2025-06-16 19:51:31 +08:00 · 2025-06-16 19:51:31 +08:00 · bb14f022dc
parent f2f4b67bcd
commit bb14f022dc
19 changed files with 160 additions and 22 deletions
--- a/docs/tutorial/quickstart.md
+++ b/docs/tutorial/quickstart.md
@ -97,12 +97,15 @@ python3 training/main_sync_ppo.py --help
 ## Monitoring the Training Process
-We recommend using Weights & Biases (wandb) for monitoring. Run `wandb login` or set the `WANDB_API_KEY` environment variable. Set `wandb.mode=online` in your configuration to upload training statistics.
+ We recommend using [Weights & Biases (wandb)](https://github.com/wandb/wandb)  or [SwanLab](https://github.com/SwanHubX/SwanLab)  for monitoring—run `wandb login` or `swanlab login`, or set the corresponding environment variable API key (`WANDB_API_KEY` or `SWANLAB_API_KEY`). Set `wandb.mode="online"` or `swanlab.mode="cloud"` in your configuration to upload training statistics. If you cannot connect to the server, you can also use `wandb.mode="offline"` or `swanlab.mode="local"` to save data locally without uploading.
 You can also use TensorBoard by setting the `tensorboard.path` parameter.
 The main log will be saved to `${fileroot}/logs/${USER}/${experiment_name}/${trial_name}/main.log` and contains the statistics uploaded to wandb.
 If SwanLab is enabled, logs will be saved to the directory specified by `swanlab.logdir`.
 ### Key Training Statistics
 - **`Epoch 1/5`**: Indicates the total epochs required and the current epoch being trained.
--- a/evaluation/requirements.txt
+++ b/evaluation/requirements.txt
@ -15,3 +15,4 @@ prettytable
 timeout-decorator
 timeout_decorator
 wandb
 swanlab[dashboard]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -61,6 +61,7 @@ dependencies = [
    "colorlog",
    "psutil",
    "pynvml",
    "swanlab[dashboard]",
    # Performance and compression
    "ninja",
--- a/realhf/api/cli_args.py
+++ b/realhf/api/cli_args.py
@ -848,6 +848,16 @@ class WandBConfig:
    config: Optional[Dict] = None
@dataclass
 class SwanlabConfig:
    project: Optional[str] = None
    name: Optional[str] = None
    config: Optional[Dict] = None
    logdir: Optional[str] = None
    mode: Optional[str] = "local"
    api_key: Optional[str] = os.getenv("SWANLAB_API_KEY", None)
@dataclass
 class TensorBoardConfig:
    path: Optional[str] = None
@ -951,6 +961,10 @@ class BaseExperimentConfig:
        default_factory=WandBConfig,
        metadata={"help": "Weights & Biases configuration."},
    )
    swanlab: SwanlabConfig = field(
        default_factory=SwanlabConfig,
        metadata={"help": "SwanLab configuration."},
    )
    tensorboard: TensorBoardConfig = field(
        default_factory=TensorBoardConfig,
        metadata={"help": "TensorBoard configuration. Only 'path' field required."},
@ -1026,7 +1040,7 @@ class BaseExperimentConfig:
        default=False,
        metadata={
            "help": "Enable automatic evaluation during training. "
-            "Results logged to disk and WandB (if active)."
+            "Results logged to disk and WandB or Swanlab(if active)."
        },
    )
    auto_eval_config: AutomaticEvaluator = field(
--- a/realhf/api/core/system_api.py
+++ b/realhf/api/core/system_api.py
@ -11,6 +11,7 @@ import realhf.api.core.dfg as dfg
 from realhf.api.cli_args import (
    AutomaticEvaluator,
    ExperimentSaveEvalControl,
    SwanlabConfig,
    TensorBoardConfig,
    WandBConfig,
 )
@ -254,6 +255,7 @@ class ExperimentScheduling:
 class ExperimentConfig:
    exp_ctrl: ExperimentSaveEvalControl
    wandb: WandBConfig
    swanlab: SwanlabConfig
    tensorboard: TensorBoardConfig
    # dataflow
    model_rpcs: List[dfg.MFCDef]
--- a/realhf/apps/main.py
+++ b/realhf/apps/main.py
@ -94,7 +94,7 @@ def main_start(args, job_group_id: str = "", recover_count: int = 0):
        raise RuntimeError("Experiment initial setup failed.") from e
    evaluator = (
-        AutomaticEvaluator(exp_cfg.evaluator, exp_cfg.wandb)
+        AutomaticEvaluator(exp_cfg.evaluator, exp_cfg.wandb, exp_cfg.swanlab)
        if exp_cfg.auto_eval
        else None
    )
--- a/realhf/base/logging.py
+++ b/realhf/base/logging.py
@ -141,19 +141,29 @@ def getLogger(
    return logging.getLogger(name)
-_LATEST_WANDB_STEP = 0
+_LATEST_LOG_STEP = 0
-def log_wandb_tensorboard(data, step=None, summary_writer=None):
+def log_swanlab_wandb_tensorboard(data, step=None, summary_writer=None):
    # Logs data to SwanLab、 wandb、 TensorBoard.
    global _LATEST_LOG_STEP
    if step is None:
        step = _LATEST_LOG_STEP
    else:
        _LATEST_LOG_STEP = max(_LATEST_LOG_STEP, step)
    # swanlab
    import swanlab
    swanlab.log(data, step=step)
    # wandb
    import wandb
    global _LATEST_WANDB_STEP
    if step is None:
        step = _LATEST_WANDB_STEP
    else:
        _LATEST_WANDB_STEP = max(_LATEST_WANDB_STEP, step)
    wandb.log(data, step=step)
    # tensorboard
    if summary_writer is not None:
        for key, val in data.items():
            summary_writer.add_scalar(f"{key}", val, step)
--- a/realhf/experiments/async_exp/async_rl_exp.py
+++ b/realhf/experiments/async_exp/async_rl_exp.py
@ -331,6 +331,7 @@ class AsyncRLExperimentConfig(CommonExperimentConfig, AsyncRLOptions):
        return ExperimentConfig(
            exp_ctrl=self.exp_ctrl,
            wandb=self.wandb,
            swanlab=self.swanlab,
            tensorboard=self.tensorboard,
            # NOTE: master and model worker only see RPCs without generation
            model_rpcs=[
--- a/realhf/experiments/common/common.py
+++ b/realhf/experiments/common/common.py
@ -564,6 +564,7 @@ class CommonExperimentConfig(BaseExperimentConfig, Experiment):
        return ExperimentConfig(
            exp_ctrl=self.exp_ctrl,
            wandb=self.wandb,
            swanlab=self.swanlab,
            tensorboard=self.tensorboard,
            model_rpcs=[rpc_alloc.rpc for rpc_alloc in rpc_allocs],
            model_worker=model_worker,
--- a/realhf/experiments/common/ppo_math_exp.py
+++ b/realhf/experiments/common/ppo_math_exp.py
@ -370,6 +370,7 @@ class PPOMATHConfig(CommonExperimentConfig, PPOMATHExperimentOptions):
        return ExperimentConfig(
            exp_ctrl=self.exp_ctrl,
            wandb=self.wandb,
            swanlab=self.swanlab,
            tensorboard=self.tensorboard,
            model_rpcs=[rpc_alloc.rpc for rpc_alloc in rpc_allocs],
            model_worker=model_worker,
--- a/realhf/scheduler/evaluator.py
+++ b/realhf/scheduler/evaluator.py
@ -8,6 +8,7 @@ import subprocess
 import time
 from typing import Dict, Optional
 import swanlab
 import wandb
 import realhf.api.core.system_api as config_pkg
@ -125,13 +126,15 @@ class EvaluationStep:
            self.status = EvaluationStepStatus.FAILED
            return False
-        wandb_data = {}
+        log_data = {}
        for data_name, d in data.items():
            for k, v in d.items():
-                wandb_data[f"{data_name}_{k}"] = v
+                log_data[f"{data_name}_{k}"] = v
-        wandb.log(wandb_data, step=self.global_step)
+        wandb.log(log_data, step=self.global_step)
        swanlab.log(log_data, step=self.global_step)
        self.status = EvaluationStepStatus.LOGGED
-        logger.info(f"Logging eval result {wandb_data} to step {self.global_step}")
+        logger.info(f"Logging eval result {log_data} to step {self.global_step}")
        return True
    def check(self):
@ -154,13 +157,15 @@ class AutomaticEvaluator:
        self,
        config: config_pkg.AutomaticEvaluator,
        wandb_config: config_pkg.WandBConfig,
        swanlab_config: config_pkg.SwanlabConfig,
    ):
        self.__eval_steps: Dict[int, EvaluationStep] = {}
        self.__max_concurrent_jobs = config.max_concurrent_jobs
        self.__wandb_config = wandb_config
        self.__swanlab_config = swanlab_config
        self.__config = config
        self.__wandb_initialized = False
-
+        self.__swanlab_initialized = False
        # Check evaluated checkpoints by logs in recover
        # NOTE: All previous evaluation steps with output will be marked
        # as logged, even if it is not really logged in wandb.
@ -228,6 +233,40 @@ class AutomaticEvaluator:
            settings=wandb.Settings(start_method="fork"),
        )
    def __lazy_swanlab_init(self):
        if self.__swanlab_config.api_key:
            swanlab.login(self.__swanlab_config.api_key)
        if self.swanlab_config.config is None:
            import yaml
            with open(
                os.path.join(
                    constants.LOG_ROOT,
                    constants.experiment_name(),
                    constants.trial_name(),
                    "config.yaml",
                ),
                "r",
            ) as f:
                __config = yaml.safe_load(f)
        else:
            __config = self.swanlab_config.config
        __config["FRAMEWORK"] = "AReaL"
        swanlab.init(
            project=self.__swanlab_config.project or constants.experiment_name(),
            experiment_name=self.__swanlab_config.name
            or f"{constants.trial_name()}_eval",
            config=__config,
            logdir=self.__swanlab_config.logdir
            or os.path.join(
                constants.LOG_ROOT,
                constants.experiment_name(),
                constants.trial_name(),
                "swanlab",
            ),
            mode=self.__swanlab_config.mode,
        )
    def step(self):
        # Check whether a new evaluation step should be created
        ckpt_parent = os.path.join(
@ -292,6 +331,9 @@ class AutomaticEvaluator:
                if not self.__wandb_initialized:
                    self.__lazy_wandb_init()
                    self.__wandb_initialized = True
                if not self.__swanlab_initialized:
                    self.__lazy_swanlab_init()
                    self.__swanlab_initialized = True
                self.__eval_steps[log_step].log(self.__config)
    @property
--- a/realhf/system/master_worker.py
+++ b/realhf/system/master_worker.py
@ -12,6 +12,7 @@ from typing import Dict
 import colorama
 import networkx as nx
 import numpy as np
 import swanlab
 import wandb
 from tensorboardX import SummaryWriter
@ -312,6 +313,40 @@ class MasterWorker(worker_base.AsyncWorker):
            resume="allow",
            settings=wandb.Settings(start_method="fork"),
        )
        # swanlab init, connect to remote or local swanlab host
        if self.swanlab_config.mode != "disabled" and self.swanlab_config.api_key:
            swanlab.login(self.swanlab_config.api_key)
        if self.swanlab_config.config is None:
            import yaml
            with open(
                os.path.join(
                    constants.LOG_ROOT,
                    constants.experiment_name(),
                    constants.trial_name(),
                    "config.yaml",
                ),
                "r",
            ) as f:
                __config = yaml.safe_load(f)
        else:
            __config = self.swanlab_config.config
        __config["FRAMEWORK"] = "AReaL"
        swanlab.init(
            project=self.swanlab_config.project or constants.experiment_name(),
            experiment_name=self.swanlab_config.name
            or f"{constants.trial_name()}_train",
            config=__config,
            logdir=self.swanlab_config.logdir
            or os.path.join(
                constants.LOG_ROOT,
                constants.experiment_name(),
                constants.trial_name(),
                "swanlab",
            ),
            mode=self.swanlab_config.mode,
        )
        # tensorboard logging
        self.__summary_writer = None
        if self.tensorboard_config.path is not None:
@ -487,7 +522,7 @@ class MasterWorker(worker_base.AsyncWorker):
        s += f"(global step {global_step}) finishes. "
        s += f"#End to end# execution time: *{e2e_time:.3f}*s. "
        s += f"Total time consumption: {time_since_configure:.3f}s. "
-        logging.log_wandb_tensorboard({"timeperf/e2e": e2e_time})
+        logging.log_swanlab_wandb_tensorboard({"timeperf/e2e": e2e_time})
        if len(self.e2e_time_history) > 2:
            remaining_steps = self._steps_per_epoch - epoch_step
            remaining_epochs = self.__total_train_epochs - epoch
@ -540,6 +575,7 @@ class MasterWorker(worker_base.AsyncWorker):
        )
        wandb.finish()
        swanlab.finish()
        if self.__summary_writer is not None:
            self.__summary_writer.close()
        gc.collect()
--- a/realhf/system/model_function_call.py
+++ b/realhf/system/model_function_call.py
@ -10,6 +10,7 @@ import uuid
 from collections import defaultdict
 from typing import Dict, Hashable, List, Set, Tuple
 import swanlab
 import wandb
 from tensorboardX import SummaryWriter
@ -442,7 +443,7 @@ class ModelFunctionCall:
                logger.info(
                    f"RPC name {rpc.name} returns\n{data_api.tabulate_stats(res)}"
                )
-                logging.log_wandb_tensorboard(
+                logging.log_swanlab_wandb_tensorboard(
                    res,
                    step=ctrl.step_info.global_step,
                    summary_writer=self.summary_writer,
@ -453,7 +454,7 @@ class ModelFunctionCall:
                        f"RPC name {rpc.name} returns ({j + 1}/{len(res)})\n{data_api.tabulate_stats(r)}"
                    )
                    offset = len(res) * ctrl.step_info.global_step
-                    logging.log_wandb_tensorboard(
+                    logging.log_swanlab_wandb_tensorboard(
                        r,
                        step=offset + j,
                        summary_writer=self.summary_writer,
@ -465,11 +466,10 @@ class ModelFunctionCall:
        for time_record in time_records:
            stats_tracker.scalar(**time_record)
        time_stats = stats_tracker.export()
-        logging.log_wandb_tensorboard(
+        logging.log_swanlab_wandb_tensorboard(
            time_stats,
            summary_writer=self.summary_writer,
        )
        logger.info(
            f"Model rpc {rpc.name} finished. "
            f"Request-reply time {time.perf_counter() - tik:.4f}s. "
--- a/realhf/system/worker_base.py
+++ b/realhf/system/worker_base.py
@ -580,7 +580,9 @@ class Worker:
        )
        expr_config.lazy_init()
        self.wandb_config = expr_config.wandb
        self.swanlab_config = expr_config.swanlab
        os.environ["WANDB_MODE"] = self.wandb_config.mode
        os.environ["SWANLAB_MODE"] = self.swanlab_config.mode
        self.tensorboard_config = expr_config.tensorboard
        config = expr_config.resolve_worker_config(
            self.__worker_type, self.__worker_index
--- a/requirements.txt
+++ b/requirements.txt
@ -68,4 +68,5 @@ python_dateutil
 word2number
 Pebble
 timeout-decorator
-prettytable
+prettytable
 swanlab[dashboard]
--- a/training/configs/async-ppo.yaml
+++ b/training/configs/async-ppo.yaml
@ -14,6 +14,13 @@ wandb:
  notes: null
  tags: null
  config: null
 swanlab:
  mode: disabled
  api_key: null
  project: null
  name: null
  config: null
  logdir: null
 tensorboard:
  path: null
 recover_mode: auto
--- a/training/configs/sft.yaml
+++ b/training/configs/sft.yaml
@ -14,6 +14,13 @@ wandb:
  notes: null
  tags: null
  config: null
 swanlab:
  mode: disabled
  api_key: null
  project: null
  name: null
  config: null
  logdir: null
 tensorboard:
  path: null
 recover_mode: auto
--- a/training/configs/sync-ppo.yaml
+++ b/training/configs/sync-ppo.yaml
@ -14,6 +14,13 @@ wandb:
  notes: null
  tags: null
  config: null
 swanlab:
  mode: disabled
  api_key: null
  project: null
  name: null
  config: null
  logdir: null
 tensorboard:
  path: null
 recover_mode: auto
--- a/training/utils.py
+++ b/training/utils.py
@ -90,6 +90,7 @@ class RayWorker:
            worker_info.experiment_name, worker_info.trial_name
        )
        self.worker.wandb_config = expr_config.wandb
        self.worker.swanlab_config = expr_config.swanlab
        self.worker.tensorboard_config = expr_config.tensorboard
        self.logger = logging.getLogger(f"{self.worker_type} {idx}", "benchmark")
        self.logger.info(f"Configuring {self.worker_type}...")
@ -125,6 +126,7 @@ def _run_experiment(exp_cfg, expr_name, trial_name):
    # Initialize ray in the Ray cluster
    env_vars = constants.get_env_vars(
        WADNB_MODE=exp_cfg.wandb.mode,
        SWANLAB_MODE=exp_cfg.swanlab.mode,
        REAL_MODE="ray",
        REAL_RECOVER_RUN="0",
        REAL_SAVE_RECOVER_STATES="1",