From bb14f022dc0ae2b1e8501256447318e76c1be344 Mon Sep 17 00:00:00 2001
From: xichengpro <bingnandu9@gmail.com>
Date: Mon, 16 Jun 2025 19:51:31 +0800
Subject: [PATCH] Support using SwanLab for experiment tracking (#98)

* Support using SwanLab for experiment tracking

* docs: improve WandB and SwanLab integration documentation
- Added official links for better user reference
- Used backticks to quote commands and parameters
- Unified mode settings to use "online" / "cloud" convention
- Merged WandB and SwanLab descriptions into a single concise statement
- Added note on using `swanlab.mode="local"` when server connection is unavailable

* refactor: update default value of api_key

* fix: correct help description from WandB to SwanLab in SwanLabConfig

* refactor: merge log_swanlab_tensorboard and log_wandb_tensorboard into log_swanlab_wandb_tensorboard

 - Unified logging logic for SwanLab, WandB, and TensorBoard to reduce code duplication

* chore: update swanlab version in dependency config files

 - Updated SwanLab version in pyproject.toml
 - Updated SwanLab version in requirements.txt

* refactor: enhance SwanLab config handling for logging purposes
- Config now uses provided arguments first
- Falls back to reading from config.yaml if no input is given

* docs: add note on using  when server connection is unavailable

* refactor: merge _LATEST_WANDB_STEP and _LATEST_SWANLAB_STEP into _LATEST_LOG_STEP

* Format code with black and isort

* chore: update swanlab version in dependency config files
- Updated SwanLab version in requirements.txt

* refactor: rename swanlab_wandb_data to log_data

---------

Co-authored-by: dubingnan <dubingnan@360.cn>
---
 docs/tutorial/quickstart.md                  |  5 +-
 evaluation/requirements.txt                  |  1 +
 pyproject.toml                               |  1 +
 realhf/api/cli_args.py                       | 16 +++++-
 realhf/api/core/system_api.py                |  2 +
 realhf/apps/main.py                          |  2 +-
 realhf/base/logging.py                       | 26 +++++++---
 realhf/experiments/async_exp/async_rl_exp.py |  1 +
 realhf/experiments/common/common.py          |  1 +
 realhf/experiments/common/ppo_math_exp.py    |  1 +
 realhf/scheduler/evaluator.py                | 52 ++++++++++++++++++--
 realhf/system/master_worker.py               | 38 +++++++++++++-
 realhf/system/model_function_call.py         |  8 +--
 realhf/system/worker_base.py                 |  2 +
 requirements.txt                             |  3 +-
 training/configs/async-ppo.yaml              |  7 +++
 training/configs/sft.yaml                    |  7 +++
 training/configs/sync-ppo.yaml               |  7 +++
 training/utils.py                            |  2 +
 19 files changed, 160 insertions(+), 22 deletions(-)

diff --git a/docs/tutorial/quickstart.md b/docs/tutorial/quickstart.md
index b871cd7..899b377 100644
--- a/docs/tutorial/quickstart.md
+++ b/docs/tutorial/quickstart.md
@@ -97,12 +97,15 @@ python3 training/main_sync_ppo.py --help
 
 ## Monitoring the Training Process
 
-We recommend using Weights & Biases (wandb) for monitoring. Run `wandb login` or set the `WANDB_API_KEY` environment variable. Set `wandb.mode=online` in your configuration to upload training statistics.
++ We recommend using [Weights & Biases (wandb)](https://github.com/wandb/wandb)  or [SwanLab](https://github.com/SwanHubX/SwanLab)  for monitoring—run `wandb login` or `swanlab login`, or set the corresponding environment variable API key (`WANDB_API_KEY` or `SWANLAB_API_KEY`). Set `wandb.mode="online"` or `swanlab.mode="cloud"` in your configuration to upload training statistics. If you cannot connect to the server, you can also use `wandb.mode="offline"` or `swanlab.mode="local"` to save data locally without uploading.
+
 
 You can also use TensorBoard by setting the `tensorboard.path` parameter.
 
 The main log will be saved to `${fileroot}/logs/${USER}/${experiment_name}/${trial_name}/main.log` and contains the statistics uploaded to wandb.
 
+If SwanLab is enabled, logs will be saved to the directory specified by `swanlab.logdir`.
+
 ### Key Training Statistics
 
 - **`Epoch 1/5`**: Indicates the total epochs required and the current epoch being trained.
diff --git a/evaluation/requirements.txt b/evaluation/requirements.txt
index 3b211d4..7f8c6cb 100644
--- a/evaluation/requirements.txt
+++ b/evaluation/requirements.txt
@@ -15,3 +15,4 @@ prettytable
 timeout-decorator
 timeout_decorator
 wandb
+swanlab[dashboard]
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 2433b17..7597e13 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,6 +61,7 @@ dependencies = [
     "colorlog",
     "psutil",
     "pynvml",
+    "swanlab[dashboard]",
     
     # Performance and compression
     "ninja",
diff --git a/realhf/api/cli_args.py b/realhf/api/cli_args.py
index 22ebd08..8c4cb28 100644
--- a/realhf/api/cli_args.py
+++ b/realhf/api/cli_args.py
@@ -848,6 +848,16 @@ class WandBConfig:
     config: Optional[Dict] = None
 
 
+@dataclass
+class SwanlabConfig:
+    project: Optional[str] = None
+    name: Optional[str] = None
+    config: Optional[Dict] = None
+    logdir: Optional[str] = None
+    mode: Optional[str] = "local"
+    api_key: Optional[str] = os.getenv("SWANLAB_API_KEY", None)
+
+
 @dataclass
 class TensorBoardConfig:
     path: Optional[str] = None
@@ -951,6 +961,10 @@ class BaseExperimentConfig:
         default_factory=WandBConfig,
         metadata={"help": "Weights & Biases configuration."},
     )
+    swanlab: SwanlabConfig = field(
+        default_factory=SwanlabConfig,
+        metadata={"help": "SwanLab configuration."},
+    )
     tensorboard: TensorBoardConfig = field(
         default_factory=TensorBoardConfig,
         metadata={"help": "TensorBoard configuration. Only 'path' field required."},
@@ -1026,7 +1040,7 @@ class BaseExperimentConfig:
         default=False,
         metadata={
             "help": "Enable automatic evaluation during training. "
-            "Results logged to disk and WandB (if active)."
+            "Results logged to disk and WandB or Swanlab(if active)."
         },
     )
     auto_eval_config: AutomaticEvaluator = field(
diff --git a/realhf/api/core/system_api.py b/realhf/api/core/system_api.py
index 05825d7..6dda819 100644
--- a/realhf/api/core/system_api.py
+++ b/realhf/api/core/system_api.py
@@ -11,6 +11,7 @@ import realhf.api.core.dfg as dfg
 from realhf.api.cli_args import (
     AutomaticEvaluator,
     ExperimentSaveEvalControl,
+    SwanlabConfig,
     TensorBoardConfig,
     WandBConfig,
 )
@@ -254,6 +255,7 @@ class ExperimentScheduling:
 class ExperimentConfig:
     exp_ctrl: ExperimentSaveEvalControl
     wandb: WandBConfig
+    swanlab: SwanlabConfig
     tensorboard: TensorBoardConfig
     # dataflow
     model_rpcs: List[dfg.MFCDef]
diff --git a/realhf/apps/main.py b/realhf/apps/main.py
index d44d32c..146655e 100644
--- a/realhf/apps/main.py
+++ b/realhf/apps/main.py
@@ -94,7 +94,7 @@ def main_start(args, job_group_id: str = "", recover_count: int = 0):
         raise RuntimeError("Experiment initial setup failed.") from e
 
     evaluator = (
-        AutomaticEvaluator(exp_cfg.evaluator, exp_cfg.wandb)
+        AutomaticEvaluator(exp_cfg.evaluator, exp_cfg.wandb, exp_cfg.swanlab)
         if exp_cfg.auto_eval
         else None
     )
diff --git a/realhf/base/logging.py b/realhf/base/logging.py
index 7b46ea9..4f21e47 100644
--- a/realhf/base/logging.py
+++ b/realhf/base/logging.py
@@ -141,19 +141,29 @@ def getLogger(
     return logging.getLogger(name)
 
 
-_LATEST_WANDB_STEP = 0
+_LATEST_LOG_STEP = 0
 
 
-def log_wandb_tensorboard(data, step=None, summary_writer=None):
+def log_swanlab_wandb_tensorboard(data, step=None, summary_writer=None):
+    # Logs data to SwanLab、 wandb、 TensorBoard.
+
+    global _LATEST_LOG_STEP
+    if step is None:
+        step = _LATEST_LOG_STEP
+    else:
+        _LATEST_LOG_STEP = max(_LATEST_LOG_STEP, step)
+
+    # swanlab
+    import swanlab
+
+    swanlab.log(data, step=step)
+
+    # wandb
     import wandb
 
-    global _LATEST_WANDB_STEP
-    if step is None:
-        step = _LATEST_WANDB_STEP
-    else:
-        _LATEST_WANDB_STEP = max(_LATEST_WANDB_STEP, step)
-
     wandb.log(data, step=step)
+
+    # tensorboard
     if summary_writer is not None:
         for key, val in data.items():
             summary_writer.add_scalar(f"{key}", val, step)
diff --git a/realhf/experiments/async_exp/async_rl_exp.py b/realhf/experiments/async_exp/async_rl_exp.py
index 5caf25b..b1a2252 100755
--- a/realhf/experiments/async_exp/async_rl_exp.py
+++ b/realhf/experiments/async_exp/async_rl_exp.py
@@ -331,6 +331,7 @@ class AsyncRLExperimentConfig(CommonExperimentConfig, AsyncRLOptions):
         return ExperimentConfig(
             exp_ctrl=self.exp_ctrl,
             wandb=self.wandb,
+            swanlab=self.swanlab,
             tensorboard=self.tensorboard,
             # NOTE: master and model worker only see RPCs without generation
             model_rpcs=[
diff --git a/realhf/experiments/common/common.py b/realhf/experiments/common/common.py
index 4ff9a63..ef47549 100644
--- a/realhf/experiments/common/common.py
+++ b/realhf/experiments/common/common.py
@@ -564,6 +564,7 @@ class CommonExperimentConfig(BaseExperimentConfig, Experiment):
         return ExperimentConfig(
             exp_ctrl=self.exp_ctrl,
             wandb=self.wandb,
+            swanlab=self.swanlab,
             tensorboard=self.tensorboard,
             model_rpcs=[rpc_alloc.rpc for rpc_alloc in rpc_allocs],
             model_worker=model_worker,
diff --git a/realhf/experiments/common/ppo_math_exp.py b/realhf/experiments/common/ppo_math_exp.py
index e3ef786..9b8810e 100644
--- a/realhf/experiments/common/ppo_math_exp.py
+++ b/realhf/experiments/common/ppo_math_exp.py
@@ -370,6 +370,7 @@ class PPOMATHConfig(CommonExperimentConfig, PPOMATHExperimentOptions):
         return ExperimentConfig(
             exp_ctrl=self.exp_ctrl,
             wandb=self.wandb,
+            swanlab=self.swanlab,
             tensorboard=self.tensorboard,
             model_rpcs=[rpc_alloc.rpc for rpc_alloc in rpc_allocs],
             model_worker=model_worker,
diff --git a/realhf/scheduler/evaluator.py b/realhf/scheduler/evaluator.py
index a24029e..7c5fef0 100644
--- a/realhf/scheduler/evaluator.py
+++ b/realhf/scheduler/evaluator.py
@@ -8,6 +8,7 @@ import subprocess
 import time
 from typing import Dict, Optional
 
+import swanlab
 import wandb
 
 import realhf.api.core.system_api as config_pkg
@@ -125,13 +126,15 @@ class EvaluationStep:
             self.status = EvaluationStepStatus.FAILED
             return False
 
-        wandb_data = {}
+        log_data = {}
         for data_name, d in data.items():
             for k, v in d.items():
-                wandb_data[f"{data_name}_{k}"] = v
-        wandb.log(wandb_data, step=self.global_step)
+                log_data[f"{data_name}_{k}"] = v
+        wandb.log(log_data, step=self.global_step)
+        swanlab.log(log_data, step=self.global_step)
         self.status = EvaluationStepStatus.LOGGED
-        logger.info(f"Logging eval result {wandb_data} to step {self.global_step}")
+        logger.info(f"Logging eval result {log_data} to step {self.global_step}")
+
         return True
 
     def check(self):
@@ -154,13 +157,15 @@ class AutomaticEvaluator:
         self,
         config: config_pkg.AutomaticEvaluator,
         wandb_config: config_pkg.WandBConfig,
+        swanlab_config: config_pkg.SwanlabConfig,
     ):
         self.__eval_steps: Dict[int, EvaluationStep] = {}
         self.__max_concurrent_jobs = config.max_concurrent_jobs
         self.__wandb_config = wandb_config
+        self.__swanlab_config = swanlab_config
         self.__config = config
         self.__wandb_initialized = False
-
+        self.__swanlab_initialized = False
         # Check evaluated checkpoints by logs in recover
         # NOTE: All previous evaluation steps with output will be marked
         # as logged, even if it is not really logged in wandb.
@@ -228,6 +233,40 @@ class AutomaticEvaluator:
             settings=wandb.Settings(start_method="fork"),
         )
 
+    def __lazy_swanlab_init(self):
+        if self.__swanlab_config.api_key:
+            swanlab.login(self.__swanlab_config.api_key)
+        if self.swanlab_config.config is None:
+            import yaml
+
+            with open(
+                os.path.join(
+                    constants.LOG_ROOT,
+                    constants.experiment_name(),
+                    constants.trial_name(),
+                    "config.yaml",
+                ),
+                "r",
+            ) as f:
+                __config = yaml.safe_load(f)
+        else:
+            __config = self.swanlab_config.config
+        __config["FRAMEWORK"] = "AReaL"
+        swanlab.init(
+            project=self.__swanlab_config.project or constants.experiment_name(),
+            experiment_name=self.__swanlab_config.name
+            or f"{constants.trial_name()}_eval",
+            config=__config,
+            logdir=self.__swanlab_config.logdir
+            or os.path.join(
+                constants.LOG_ROOT,
+                constants.experiment_name(),
+                constants.trial_name(),
+                "swanlab",
+            ),
+            mode=self.__swanlab_config.mode,
+        )
+
     def step(self):
         # Check whether a new evaluation step should be created
         ckpt_parent = os.path.join(
@@ -292,6 +331,9 @@ class AutomaticEvaluator:
                 if not self.__wandb_initialized:
                     self.__lazy_wandb_init()
                     self.__wandb_initialized = True
+                if not self.__swanlab_initialized:
+                    self.__lazy_swanlab_init()
+                    self.__swanlab_initialized = True
                 self.__eval_steps[log_step].log(self.__config)
 
     @property
diff --git a/realhf/system/master_worker.py b/realhf/system/master_worker.py
index 9a80c7c..ef4d1ed 100644
--- a/realhf/system/master_worker.py
+++ b/realhf/system/master_worker.py
@@ -12,6 +12,7 @@ from typing import Dict
 import colorama
 import networkx as nx
 import numpy as np
+import swanlab
 import wandb
 from tensorboardX import SummaryWriter
 
@@ -312,6 +313,40 @@ class MasterWorker(worker_base.AsyncWorker):
             resume="allow",
             settings=wandb.Settings(start_method="fork"),
         )
+
+        # swanlab init, connect to remote or local swanlab host
+        if self.swanlab_config.mode != "disabled" and self.swanlab_config.api_key:
+            swanlab.login(self.swanlab_config.api_key)
+        if self.swanlab_config.config is None:
+            import yaml
+
+            with open(
+                os.path.join(
+                    constants.LOG_ROOT,
+                    constants.experiment_name(),
+                    constants.trial_name(),
+                    "config.yaml",
+                ),
+                "r",
+            ) as f:
+                __config = yaml.safe_load(f)
+        else:
+            __config = self.swanlab_config.config
+        __config["FRAMEWORK"] = "AReaL"
+        swanlab.init(
+            project=self.swanlab_config.project or constants.experiment_name(),
+            experiment_name=self.swanlab_config.name
+            or f"{constants.trial_name()}_train",
+            config=__config,
+            logdir=self.swanlab_config.logdir
+            or os.path.join(
+                constants.LOG_ROOT,
+                constants.experiment_name(),
+                constants.trial_name(),
+                "swanlab",
+            ),
+            mode=self.swanlab_config.mode,
+        )
         # tensorboard logging
         self.__summary_writer = None
         if self.tensorboard_config.path is not None:
@@ -487,7 +522,7 @@ class MasterWorker(worker_base.AsyncWorker):
         s += f"(global step {global_step}) finishes. "
         s += f"#End to end# execution time: *{e2e_time:.3f}*s. "
         s += f"Total time consumption: {time_since_configure:.3f}s. "
-        logging.log_wandb_tensorboard({"timeperf/e2e": e2e_time})
+        logging.log_swanlab_wandb_tensorboard({"timeperf/e2e": e2e_time})
         if len(self.e2e_time_history) > 2:
             remaining_steps = self._steps_per_epoch - epoch_step
             remaining_epochs = self.__total_train_epochs - epoch
@@ -540,6 +575,7 @@ class MasterWorker(worker_base.AsyncWorker):
         )
 
         wandb.finish()
+        swanlab.finish()
         if self.__summary_writer is not None:
             self.__summary_writer.close()
         gc.collect()
diff --git a/realhf/system/model_function_call.py b/realhf/system/model_function_call.py
index 659dc6e..8bda855 100644
--- a/realhf/system/model_function_call.py
+++ b/realhf/system/model_function_call.py
@@ -10,6 +10,7 @@ import uuid
 from collections import defaultdict
 from typing import Dict, Hashable, List, Set, Tuple
 
+import swanlab
 import wandb
 from tensorboardX import SummaryWriter
 
@@ -442,7 +443,7 @@ class ModelFunctionCall:
                 logger.info(
                     f"RPC name {rpc.name} returns\n{data_api.tabulate_stats(res)}"
                 )
-                logging.log_wandb_tensorboard(
+                logging.log_swanlab_wandb_tensorboard(
                     res,
                     step=ctrl.step_info.global_step,
                     summary_writer=self.summary_writer,
@@ -453,7 +454,7 @@ class ModelFunctionCall:
                         f"RPC name {rpc.name} returns ({j + 1}/{len(res)})\n{data_api.tabulate_stats(r)}"
                     )
                     offset = len(res) * ctrl.step_info.global_step
-                    logging.log_wandb_tensorboard(
+                    logging.log_swanlab_wandb_tensorboard(
                         r,
                         step=offset + j,
                         summary_writer=self.summary_writer,
@@ -465,11 +466,10 @@ class ModelFunctionCall:
         for time_record in time_records:
             stats_tracker.scalar(**time_record)
         time_stats = stats_tracker.export()
-        logging.log_wandb_tensorboard(
+        logging.log_swanlab_wandb_tensorboard(
             time_stats,
             summary_writer=self.summary_writer,
         )
-
         logger.info(
             f"Model rpc {rpc.name} finished. "
             f"Request-reply time {time.perf_counter() - tik:.4f}s. "
diff --git a/realhf/system/worker_base.py b/realhf/system/worker_base.py
index bbd8060..d673f50 100644
--- a/realhf/system/worker_base.py
+++ b/realhf/system/worker_base.py
@@ -580,7 +580,9 @@ class Worker:
         )
         expr_config.lazy_init()
         self.wandb_config = expr_config.wandb
+        self.swanlab_config = expr_config.swanlab
         os.environ["WANDB_MODE"] = self.wandb_config.mode
+        os.environ["SWANLAB_MODE"] = self.swanlab_config.mode
         self.tensorboard_config = expr_config.tensorboard
         config = expr_config.resolve_worker_config(
             self.__worker_type, self.__worker_index
diff --git a/requirements.txt b/requirements.txt
index 5f775f6..4ffab7e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -68,4 +68,5 @@ python_dateutil
 word2number
 Pebble
 timeout-decorator
-prettytable
\ No newline at end of file
+prettytable
+swanlab[dashboard]
\ No newline at end of file
diff --git a/training/configs/async-ppo.yaml b/training/configs/async-ppo.yaml
index bb4cd6d..7c6e609 100644
--- a/training/configs/async-ppo.yaml
+++ b/training/configs/async-ppo.yaml
@@ -14,6 +14,13 @@ wandb:
   notes: null
   tags: null
   config: null
+swanlab:
+  mode: disabled
+  api_key: null
+  project: null
+  name: null
+  config: null
+  logdir: null
 tensorboard:
   path: null
 recover_mode: auto
diff --git a/training/configs/sft.yaml b/training/configs/sft.yaml
index 822369b..109ce97 100644
--- a/training/configs/sft.yaml
+++ b/training/configs/sft.yaml
@@ -14,6 +14,13 @@ wandb:
   notes: null
   tags: null
   config: null
+swanlab:
+  mode: disabled
+  api_key: null
+  project: null
+  name: null
+  config: null
+  logdir: null
 tensorboard:
   path: null
 recover_mode: auto
diff --git a/training/configs/sync-ppo.yaml b/training/configs/sync-ppo.yaml
index 88ae35f..cef7523 100644
--- a/training/configs/sync-ppo.yaml
+++ b/training/configs/sync-ppo.yaml
@@ -14,6 +14,13 @@ wandb:
   notes: null
   tags: null
   config: null
+swanlab:
+  mode: disabled
+  api_key: null
+  project: null
+  name: null
+  config: null
+  logdir: null
 tensorboard:
   path: null
 recover_mode: auto
diff --git a/training/utils.py b/training/utils.py
index 515f675..a20ecbf 100644
--- a/training/utils.py
+++ b/training/utils.py
@@ -90,6 +90,7 @@ class RayWorker:
             worker_info.experiment_name, worker_info.trial_name
         )
         self.worker.wandb_config = expr_config.wandb
+        self.worker.swanlab_config = expr_config.swanlab
         self.worker.tensorboard_config = expr_config.tensorboard
         self.logger = logging.getLogger(f"{self.worker_type} {idx}", "benchmark")
         self.logger.info(f"Configuring {self.worker_type}...")
@@ -125,6 +126,7 @@ def _run_experiment(exp_cfg, expr_name, trial_name):
     # Initialize ray in the Ray cluster
     env_vars = constants.get_env_vars(
         WADNB_MODE=exp_cfg.wandb.mode,
+        SWANLAB_MODE=exp_cfg.swanlab.mode,
         REAL_MODE="ray",
         REAL_RECOVER_RUN="0",
         REAL_SAVE_RECOVER_STATES="1",