mirror of https://github.com/inclusionAI/AReaL
parent
e3005d57f6
commit
adeb8eb13f
|
@ -29,12 +29,6 @@ actor:
|
||||||
mem_fraction_static: 0.8
|
mem_fraction_static: 0.8
|
||||||
triton_attention_num_kv_splits: 16
|
triton_attention_num_kv_splits: 16
|
||||||
enable_metrics: True
|
enable_metrics: True
|
||||||
critic:
|
|
||||||
type:
|
|
||||||
_class: qwen2
|
|
||||||
is_critic: true
|
|
||||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
|
|
||||||
init_critic_from_actor: true
|
|
||||||
ref:
|
ref:
|
||||||
type:
|
type:
|
||||||
_class: qwen2
|
_class: qwen2
|
||||||
|
@ -42,15 +36,6 @@ ref:
|
||||||
actor_train:
|
actor_train:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
critic_train:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_gen:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
critic_inf:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_inf:
|
actor_inf:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
|
@ -68,8 +53,6 @@ ppo:
|
||||||
top_p: 1.0
|
top_p: 1.0
|
||||||
top_k: 1000000
|
top_k: 1000000
|
||||||
temperature: 1.0
|
temperature: 1.0
|
||||||
force_no_logits_mask: True
|
|
||||||
use_cuda_graph: True
|
|
||||||
ppo_n_minibatches: 4
|
ppo_n_minibatches: 4
|
||||||
kl_ctl: 0.0
|
kl_ctl: 0.0
|
||||||
discount: 1.0
|
discount: 1.0
|
||||||
|
@ -78,11 +61,12 @@ ppo:
|
||||||
reward_output_scaling: 5
|
reward_output_scaling: 5
|
||||||
reward_output_bias: 0.0
|
reward_output_bias: 0.0
|
||||||
adv_norm: true
|
adv_norm: true
|
||||||
value_norm: true
|
|
||||||
group_size: 16
|
group_size: 16
|
||||||
group_adv_norm: false
|
group_adv_norm: false
|
||||||
external_configs:
|
|
||||||
cluster_config:
|
# Cluster configuration
|
||||||
fileroot: "/storage/ray/experiments"
|
ray_temp_path: /tmp/ray
|
||||||
envs:
|
cluster:
|
||||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
fileroot: /tmp/ray/experiments
|
||||||
|
n_nodes: 32
|
||||||
|
n_gpus_per_node: 8
|
|
@ -29,12 +29,6 @@ actor:
|
||||||
mem_fraction_static: 0.8
|
mem_fraction_static: 0.8
|
||||||
triton_attention_num_kv_splits: 16
|
triton_attention_num_kv_splits: 16
|
||||||
enable_metrics: True
|
enable_metrics: True
|
||||||
critic:
|
|
||||||
type:
|
|
||||||
_class: qwen2
|
|
||||||
is_critic: true
|
|
||||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
|
|
||||||
init_critic_from_actor: true
|
|
||||||
ref:
|
ref:
|
||||||
type:
|
type:
|
||||||
_class: qwen2
|
_class: qwen2
|
||||||
|
@ -42,15 +36,6 @@ ref:
|
||||||
actor_train:
|
actor_train:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
critic_train:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_gen:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
critic_inf:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_inf:
|
actor_inf:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
|
@ -68,8 +53,6 @@ ppo:
|
||||||
top_p: 1.0
|
top_p: 1.0
|
||||||
top_k: 1000000
|
top_k: 1000000
|
||||||
temperature: 1.0
|
temperature: 1.0
|
||||||
force_no_logits_mask: True
|
|
||||||
use_cuda_graph: True
|
|
||||||
ppo_n_minibatches: 4
|
ppo_n_minibatches: 4
|
||||||
kl_ctl: 0.0
|
kl_ctl: 0.0
|
||||||
discount: 1.0
|
discount: 1.0
|
||||||
|
@ -78,11 +61,12 @@ ppo:
|
||||||
reward_output_scaling: 5
|
reward_output_scaling: 5
|
||||||
reward_output_bias: 0.0
|
reward_output_bias: 0.0
|
||||||
adv_norm: true
|
adv_norm: true
|
||||||
value_norm: true
|
|
||||||
group_size: 16
|
group_size: 16
|
||||||
group_adv_norm: false
|
group_adv_norm: false
|
||||||
external_configs:
|
|
||||||
cluster_config:
|
# Cluster configuration
|
||||||
fileroot: "/storage/ray/experiments"
|
ray_temp_path: /tmp/ray
|
||||||
envs:
|
cluster:
|
||||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
fileroot: /tmp/ray/experiments
|
||||||
|
n_nodes: 32
|
||||||
|
n_gpus_per_node: 8
|
|
@ -29,12 +29,6 @@ actor:
|
||||||
mem_fraction_static: 0.8
|
mem_fraction_static: 0.8
|
||||||
triton_attention_num_kv_splits: 16
|
triton_attention_num_kv_splits: 16
|
||||||
enable_metrics: True
|
enable_metrics: True
|
||||||
critic:
|
|
||||||
type:
|
|
||||||
_class: qwen2
|
|
||||||
is_critic: true
|
|
||||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-1.5B'
|
|
||||||
init_critic_from_actor: true
|
|
||||||
ref:
|
ref:
|
||||||
type:
|
type:
|
||||||
_class: qwen2
|
_class: qwen2
|
||||||
|
@ -42,15 +36,6 @@ ref:
|
||||||
actor_train:
|
actor_train:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
critic_train:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_gen:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
critic_inf:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_inf:
|
actor_inf:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
|
@ -68,8 +53,6 @@ ppo:
|
||||||
top_p: 1.0
|
top_p: 1.0
|
||||||
top_k: 1000000
|
top_k: 1000000
|
||||||
temperature: 1.0
|
temperature: 1.0
|
||||||
force_no_logits_mask: True
|
|
||||||
use_cuda_graph: True
|
|
||||||
ppo_n_minibatches: 4
|
ppo_n_minibatches: 4
|
||||||
kl_ctl: 0.0
|
kl_ctl: 0.0
|
||||||
discount: 1.0
|
discount: 1.0
|
||||||
|
@ -78,11 +61,12 @@ ppo:
|
||||||
reward_output_scaling: 5
|
reward_output_scaling: 5
|
||||||
reward_output_bias: 0.0
|
reward_output_bias: 0.0
|
||||||
adv_norm: true
|
adv_norm: true
|
||||||
value_norm: true
|
|
||||||
group_size: 16
|
group_size: 16
|
||||||
group_adv_norm: false
|
group_adv_norm: false
|
||||||
external_configs:
|
|
||||||
cluster_config:
|
# Cluster configuration
|
||||||
fileroot: "/storage/ray/experiments"
|
ray_temp_path: /tmp/ray
|
||||||
envs:
|
cluster:
|
||||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
fileroot: /tmp/ray/experiments
|
||||||
|
n_nodes: 32
|
||||||
|
n_gpus_per_node: 8
|
||||||
|
|
|
@ -27,17 +27,8 @@ actor:
|
||||||
hysteresis: 2
|
hysteresis: 2
|
||||||
sglang:
|
sglang:
|
||||||
mem_fraction_static: 0.8
|
mem_fraction_static: 0.8
|
||||||
disable_radix_cache: false
|
|
||||||
triton_attention_num_kv_splits: 16
|
triton_attention_num_kv_splits: 16
|
||||||
max_running_requests: 128
|
|
||||||
context_length: 29696
|
|
||||||
enable_metrics: True
|
enable_metrics: True
|
||||||
critic:
|
|
||||||
type:
|
|
||||||
_class: qwen2
|
|
||||||
is_critic: true
|
|
||||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-32B'
|
|
||||||
init_critic_from_actor: true
|
|
||||||
ref:
|
ref:
|
||||||
type:
|
type:
|
||||||
_class: qwen2
|
_class: qwen2
|
||||||
|
@ -45,15 +36,6 @@ ref:
|
||||||
actor_train:
|
actor_train:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
critic_train:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_gen:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
critic_inf:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_inf:
|
actor_inf:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
|
@ -71,8 +53,6 @@ ppo:
|
||||||
top_p: 1.0
|
top_p: 1.0
|
||||||
top_k: 1000000
|
top_k: 1000000
|
||||||
temperature: 1.0
|
temperature: 1.0
|
||||||
force_no_logits_mask: True
|
|
||||||
use_cuda_graph: True
|
|
||||||
ppo_n_minibatches: 4
|
ppo_n_minibatches: 4
|
||||||
kl_ctl: 0.0
|
kl_ctl: 0.0
|
||||||
discount: 1.0
|
discount: 1.0
|
||||||
|
@ -81,11 +61,12 @@ ppo:
|
||||||
reward_output_scaling: 5
|
reward_output_scaling: 5
|
||||||
reward_output_bias: 0.0
|
reward_output_bias: 0.0
|
||||||
adv_norm: true
|
adv_norm: true
|
||||||
value_norm: true
|
|
||||||
group_size: 32
|
group_size: 32
|
||||||
group_adv_norm: false
|
group_adv_norm: false
|
||||||
external_configs:
|
|
||||||
cluster_config:
|
# Cluster configuration
|
||||||
fileroot: "/storage/ray/experiments"
|
ray_temp_path: /tmp/ray
|
||||||
envs:
|
cluster:
|
||||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
fileroot: /tmp/ray/experiments
|
||||||
|
n_nodes: 32
|
||||||
|
n_gpus_per_node: 8
|
||||||
|
|
|
@ -37,8 +37,10 @@ model:
|
||||||
allocation:
|
allocation:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 32768
|
max_tokens_per_mb: 32768
|
||||||
external_configs:
|
|
||||||
cluster_config:
|
# Cluster configuration
|
||||||
fileroot: "/storage/ray/experiments"
|
ray_temp_path: /tmp/ray
|
||||||
envs:
|
cluster:
|
||||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
fileroot: /tmp/ray/experiments
|
||||||
|
n_nodes: 32
|
||||||
|
n_gpus_per_node: 8
|
||||||
|
|
|
@ -29,12 +29,6 @@ actor:
|
||||||
mem_fraction_static: 0.8
|
mem_fraction_static: 0.8
|
||||||
triton_attention_num_kv_splits: 16
|
triton_attention_num_kv_splits: 16
|
||||||
enable_metrics: True
|
enable_metrics: True
|
||||||
critic:
|
|
||||||
type:
|
|
||||||
_class: qwen2
|
|
||||||
is_critic: true
|
|
||||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B'
|
|
||||||
init_critic_from_actor: true
|
|
||||||
ref:
|
ref:
|
||||||
type:
|
type:
|
||||||
_class: qwen2
|
_class: qwen2
|
||||||
|
@ -42,15 +36,6 @@ ref:
|
||||||
actor_train:
|
actor_train:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
critic_train:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_gen:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
critic_inf:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_inf:
|
actor_inf:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
|
@ -68,8 +53,6 @@ ppo:
|
||||||
top_p: 1.0
|
top_p: 1.0
|
||||||
top_k: 1000000
|
top_k: 1000000
|
||||||
temperature: 1.0
|
temperature: 1.0
|
||||||
force_no_logits_mask: True
|
|
||||||
use_cuda_graph: True
|
|
||||||
ppo_n_minibatches: 4
|
ppo_n_minibatches: 4
|
||||||
kl_ctl: 0.0
|
kl_ctl: 0.0
|
||||||
discount: 1.0
|
discount: 1.0
|
||||||
|
@ -78,11 +61,12 @@ ppo:
|
||||||
reward_output_scaling: 5
|
reward_output_scaling: 5
|
||||||
reward_output_bias: 0.0
|
reward_output_bias: 0.0
|
||||||
adv_norm: true
|
adv_norm: true
|
||||||
value_norm: true
|
|
||||||
group_size: 16
|
group_size: 16
|
||||||
group_adv_norm: false
|
group_adv_norm: false
|
||||||
external_configs:
|
|
||||||
cluster_config:
|
# Cluster configuration
|
||||||
fileroot: "/storage/ray/experiments"
|
ray_temp_path: /tmp/ray
|
||||||
envs:
|
cluster:
|
||||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
fileroot: /tmp/ray/experiments
|
||||||
|
n_nodes: 32
|
||||||
|
n_gpus_per_node: 8
|
||||||
|
|
|
@ -29,12 +29,6 @@ actor:
|
||||||
mem_fraction_static: 0.8
|
mem_fraction_static: 0.8
|
||||||
triton_attention_num_kv_splits: 16
|
triton_attention_num_kv_splits: 16
|
||||||
enable_metrics: True
|
enable_metrics: True
|
||||||
critic:
|
|
||||||
type:
|
|
||||||
_class: qwen2
|
|
||||||
is_critic: true
|
|
||||||
path: '/storage/models/DeepSeek-R1-Distill-Qwen-7B'
|
|
||||||
init_critic_from_actor: true
|
|
||||||
ref:
|
ref:
|
||||||
type:
|
type:
|
||||||
_class: qwen2
|
_class: qwen2
|
||||||
|
@ -42,15 +36,6 @@ ref:
|
||||||
actor_train:
|
actor_train:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
critic_train:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_gen:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
critic_inf:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 30720
|
|
||||||
actor_inf:
|
actor_inf:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 30720
|
max_tokens_per_mb: 30720
|
||||||
|
@ -68,8 +53,6 @@ ppo:
|
||||||
top_p: 1.0
|
top_p: 1.0
|
||||||
top_k: 1000000
|
top_k: 1000000
|
||||||
temperature: 1.0
|
temperature: 1.0
|
||||||
force_no_logits_mask: True
|
|
||||||
use_cuda_graph: True
|
|
||||||
ppo_n_minibatches: 4
|
ppo_n_minibatches: 4
|
||||||
kl_ctl: 0.0
|
kl_ctl: 0.0
|
||||||
discount: 1.0
|
discount: 1.0
|
||||||
|
@ -78,11 +61,12 @@ ppo:
|
||||||
reward_output_scaling: 5
|
reward_output_scaling: 5
|
||||||
reward_output_bias: 0.0
|
reward_output_bias: 0.0
|
||||||
adv_norm: true
|
adv_norm: true
|
||||||
value_norm: true
|
|
||||||
group_size: 16
|
group_size: 16
|
||||||
group_adv_norm: false
|
group_adv_norm: false
|
||||||
external_configs:
|
|
||||||
cluster_config:
|
# Cluster configuration
|
||||||
fileroot: "/storage/ray/experiments"
|
ray_temp_path: /tmp/ray
|
||||||
envs:
|
cluster:
|
||||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
fileroot: /tmp/ray/experiments
|
||||||
|
n_nodes: 32
|
||||||
|
n_gpus_per_node: 8
|
||||||
|
|
|
@ -27,19 +27,8 @@ actor:
|
||||||
hysteresis: 2
|
hysteresis: 2
|
||||||
sglang:
|
sglang:
|
||||||
mem_fraction_static: 0.8
|
mem_fraction_static: 0.8
|
||||||
disable_radix_cache: false
|
|
||||||
triton_attention_num_kv_splits: 16
|
triton_attention_num_kv_splits: 16
|
||||||
max_running_requests: 128
|
|
||||||
context_length: 18432
|
|
||||||
enable_metrics: True
|
enable_metrics: True
|
||||||
critic:
|
|
||||||
type:
|
|
||||||
_class: qwen2
|
|
||||||
is_critic: true
|
|
||||||
path: '/storage/models/Qwen2.5-7B'
|
|
||||||
init_critic_from_actor: true
|
|
||||||
optimizer:
|
|
||||||
lr: 5e-6
|
|
||||||
ref:
|
ref:
|
||||||
type:
|
type:
|
||||||
_class: qwen2
|
_class: qwen2
|
||||||
|
@ -47,15 +36,6 @@ ref:
|
||||||
actor_train:
|
actor_train:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 19456
|
max_tokens_per_mb: 19456
|
||||||
critic_train:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 19456
|
|
||||||
actor_gen:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 19456
|
|
||||||
critic_inf:
|
|
||||||
mb_spec:
|
|
||||||
max_tokens_per_mb: 19456
|
|
||||||
actor_inf:
|
actor_inf:
|
||||||
mb_spec:
|
mb_spec:
|
||||||
max_tokens_per_mb: 19456
|
max_tokens_per_mb: 19456
|
||||||
|
@ -73,8 +53,6 @@ ppo:
|
||||||
top_p: 1.0
|
top_p: 1.0
|
||||||
top_k: 1000000
|
top_k: 1000000
|
||||||
temperature: 1.0
|
temperature: 1.0
|
||||||
force_no_logits_mask: true
|
|
||||||
use_cuda_graph: true
|
|
||||||
ppo_n_minibatches: 4
|
ppo_n_minibatches: 4
|
||||||
kl_ctl: 0.0
|
kl_ctl: 0.0
|
||||||
discount: 1.0
|
discount: 1.0
|
||||||
|
@ -83,11 +61,12 @@ ppo:
|
||||||
reward_output_scaling: 0.5
|
reward_output_scaling: 0.5
|
||||||
reward_output_bias: -1.0
|
reward_output_bias: -1.0
|
||||||
adv_norm: true
|
adv_norm: true
|
||||||
value_norm: true
|
|
||||||
group_size: 64
|
group_size: 64
|
||||||
group_adv_norm: false
|
group_adv_norm: false
|
||||||
external_configs:
|
|
||||||
cluster_config:
|
# Cluster configuration
|
||||||
fileroot: "/storage/ray/experiments"
|
ray_temp_path: /tmp/ray
|
||||||
envs:
|
cluster:
|
||||||
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"
|
fileroot: /tmp/ray/experiments
|
||||||
|
n_nodes: 32
|
||||||
|
n_gpus_per_node: 8
|
||||||
|
|
|
@ -154,9 +154,12 @@ def log_swanlab_wandb_tensorboard(data, step=None, summary_writer=None):
|
||||||
_LATEST_LOG_STEP = max(_LATEST_LOG_STEP, step)
|
_LATEST_LOG_STEP = max(_LATEST_LOG_STEP, step)
|
||||||
|
|
||||||
# swanlab
|
# swanlab
|
||||||
import swanlab
|
try:
|
||||||
|
import swanlab
|
||||||
|
|
||||||
swanlab.log(data, step=step)
|
swanlab.log(data, step=step)
|
||||||
|
except (ModuleNotFoundError, ImportError):
|
||||||
|
pass
|
||||||
|
|
||||||
# wandb
|
# wandb
|
||||||
import wandb
|
import wandb
|
||||||
|
|
|
@ -452,9 +452,6 @@ class SGLangGenerationBackend(ModelBackend, SGLangConfig):
|
||||||
high=60000,
|
high=60000,
|
||||||
experiment_name=constants.experiment_name(),
|
experiment_name=constants.experiment_name(),
|
||||||
trial_name=constants.trial_name(),
|
trial_name=constants.trial_name(),
|
||||||
lockfile_root=os.path.join(
|
|
||||||
constants.get_cache_path(self.args), "ports"
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
group=constants.data_parallel_group(),
|
group=constants.data_parallel_group(),
|
||||||
)
|
)
|
||||||
|
|
|
@ -170,4 +170,4 @@ def make(args: "BaseExperimentConfig", **kwargs) -> SchedulerClient:
|
||||||
|
|
||||||
return LocalSchedulerClient(args)
|
return LocalSchedulerClient(args)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"Scheduler {mode} not found")
|
raise NotImplementedError(f"Scheduler {args.mode} not found")
|
||||||
|
|
|
@ -8,13 +8,17 @@ import subprocess
|
||||||
import time
|
import time
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
import swanlab
|
|
||||||
import wandb
|
import wandb
|
||||||
|
|
||||||
import realhf.api.core.system_api as config_pkg
|
import realhf.api.core.system_api as config_pkg
|
||||||
from realhf.api.cli_args import BaseExperimentConfig
|
from realhf.api.cli_args import BaseExperimentConfig
|
||||||
from realhf.base import constants, logging
|
from realhf.base import constants, logging
|
||||||
|
|
||||||
|
try:
|
||||||
|
import swanlab
|
||||||
|
except (ModuleNotFoundError, ImportError):
|
||||||
|
swanlab = None
|
||||||
|
|
||||||
logger = logging.getLogger("AutomaticEvaluator", "colored")
|
logger = logging.getLogger("AutomaticEvaluator", "colored")
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,7 +136,8 @@ class EvaluationStep:
|
||||||
for k, v in d.items():
|
for k, v in d.items():
|
||||||
log_data[f"{data_name}_{k}"] = v
|
log_data[f"{data_name}_{k}"] = v
|
||||||
wandb.log(log_data, step=self.global_step)
|
wandb.log(log_data, step=self.global_step)
|
||||||
swanlab.log(log_data, step=self.global_step)
|
if swanlab is not None:
|
||||||
|
swanlab.log(log_data, step=self.global_step)
|
||||||
self.status = EvaluationStepStatus.LOGGED
|
self.status = EvaluationStepStatus.LOGGED
|
||||||
logger.info(f"Logging eval result {log_data} to step {self.global_step}")
|
logger.info(f"Logging eval result {log_data} to step {self.global_step}")
|
||||||
|
|
||||||
|
@ -236,21 +241,19 @@ class AutomaticEvaluator:
|
||||||
def __lazy_swanlab_init(self):
|
def __lazy_swanlab_init(self):
|
||||||
if self.__swanlab_config.api_key:
|
if self.__swanlab_config.api_key:
|
||||||
swanlab.login(self.__swanlab_config.api_key)
|
swanlab.login(self.__swanlab_config.api_key)
|
||||||
if self.swanlab_config.config is None:
|
if self.__swanlab_config.config is None:
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
with open(
|
with open(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
constants.LOG_ROOT,
|
constants.get_log_path(self.args),
|
||||||
constants.experiment_name(),
|
|
||||||
constants.trial_name(),
|
|
||||||
"config.yaml",
|
"config.yaml",
|
||||||
),
|
),
|
||||||
"r",
|
"r",
|
||||||
) as f:
|
) as f:
|
||||||
__config = yaml.safe_load(f)
|
__config = yaml.safe_load(f)
|
||||||
else:
|
else:
|
||||||
__config = self.swanlab_config.config
|
__config = self.__swanlab_config.config
|
||||||
__config["FRAMEWORK"] = "AReaL"
|
__config["FRAMEWORK"] = "AReaL"
|
||||||
swanlab.init(
|
swanlab.init(
|
||||||
project=self.__swanlab_config.project or constants.experiment_name(),
|
project=self.__swanlab_config.project or constants.experiment_name(),
|
||||||
|
@ -259,9 +262,7 @@ class AutomaticEvaluator:
|
||||||
config=__config,
|
config=__config,
|
||||||
logdir=self.__swanlab_config.logdir
|
logdir=self.__swanlab_config.logdir
|
||||||
or os.path.join(
|
or os.path.join(
|
||||||
constants.LOG_ROOT,
|
constants.get_log_path(self.args),
|
||||||
constants.experiment_name(),
|
|
||||||
constants.trial_name(),
|
|
||||||
"swanlab",
|
"swanlab",
|
||||||
),
|
),
|
||||||
mode=self.__swanlab_config.mode,
|
mode=self.__swanlab_config.mode,
|
||||||
|
@ -329,7 +330,7 @@ class AutomaticEvaluator:
|
||||||
if not self.__wandb_initialized:
|
if not self.__wandb_initialized:
|
||||||
self.__lazy_wandb_init()
|
self.__lazy_wandb_init()
|
||||||
self.__wandb_initialized = True
|
self.__wandb_initialized = True
|
||||||
if not self.__swanlab_initialized:
|
if not self.__swanlab_initialized and swanlab is not None:
|
||||||
self.__lazy_swanlab_init()
|
self.__lazy_swanlab_init()
|
||||||
self.__swanlab_initialized = True
|
self.__swanlab_initialized = True
|
||||||
self.__eval_steps[log_step].log(self.__config)
|
self.__eval_steps[log_step].log(self.__config)
|
||||||
|
|
|
@ -12,7 +12,6 @@ from typing import Dict
|
||||||
import colorama
|
import colorama
|
||||||
import networkx as nx
|
import networkx as nx
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import swanlab
|
|
||||||
import wandb
|
import wandb
|
||||||
from tensorboardX import SummaryWriter
|
from tensorboardX import SummaryWriter
|
||||||
|
|
||||||
|
@ -38,6 +37,11 @@ from realhf.system.buffer import AsyncIOSequenceBuffer
|
||||||
from realhf.system.function_executor import FunctionExecutor
|
from realhf.system.function_executor import FunctionExecutor
|
||||||
from realhf.system.model_function_call import RPCCorountineControl
|
from realhf.system.model_function_call import RPCCorountineControl
|
||||||
|
|
||||||
|
try:
|
||||||
|
import swanlab
|
||||||
|
except (ModuleNotFoundError, ImportError):
|
||||||
|
swanlab = None
|
||||||
|
|
||||||
logger = logging.getLogger("master worker", "system")
|
logger = logging.getLogger("master worker", "system")
|
||||||
blogger = logging.getLogger("benchmark")
|
blogger = logging.getLogger("benchmark")
|
||||||
|
|
||||||
|
@ -307,15 +311,18 @@ class MasterWorker(worker_base.AsyncWorker):
|
||||||
|
|
||||||
# swanlab init, connect to remote or local swanlab host
|
# swanlab init, connect to remote or local swanlab host
|
||||||
if self.swanlab_config.mode != "disabled" and self.swanlab_config.api_key:
|
if self.swanlab_config.mode != "disabled" and self.swanlab_config.api_key:
|
||||||
swanlab.login(self.swanlab_config.api_key)
|
if swanlab is not None:
|
||||||
|
swanlab.login(self.swanlab_config.api_key)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"swanlab not installed but enabled. Ignore swanlab logging."
|
||||||
|
)
|
||||||
if self.swanlab_config.config is None:
|
if self.swanlab_config.config is None:
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
with open(
|
with open(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
constants.LOG_ROOT,
|
constants.get_log_path(self.args),
|
||||||
constants.experiment_name(),
|
|
||||||
constants.trial_name(),
|
|
||||||
"config.yaml",
|
"config.yaml",
|
||||||
),
|
),
|
||||||
"r",
|
"r",
|
||||||
|
@ -324,20 +331,19 @@ class MasterWorker(worker_base.AsyncWorker):
|
||||||
else:
|
else:
|
||||||
__config = self.swanlab_config.config
|
__config = self.swanlab_config.config
|
||||||
__config["FRAMEWORK"] = "AReaL"
|
__config["FRAMEWORK"] = "AReaL"
|
||||||
swanlab.init(
|
if swanlab is not None:
|
||||||
project=self.swanlab_config.project or constants.experiment_name(),
|
swanlab.init(
|
||||||
experiment_name=self.swanlab_config.name
|
project=self.swanlab_config.project or constants.experiment_name(),
|
||||||
or f"{constants.trial_name()}_train",
|
experiment_name=self.swanlab_config.name
|
||||||
config=__config,
|
or f"{constants.trial_name()}_train",
|
||||||
logdir=self.swanlab_config.logdir
|
config=__config,
|
||||||
or os.path.join(
|
logdir=self.swanlab_config.logdir
|
||||||
constants.LOG_ROOT,
|
or os.path.join(
|
||||||
constants.experiment_name(),
|
constants.get_log_path(self.args),
|
||||||
constants.trial_name(),
|
"swanlab",
|
||||||
"swanlab",
|
),
|
||||||
),
|
mode=self.swanlab_config.mode,
|
||||||
mode=self.swanlab_config.mode,
|
)
|
||||||
)
|
|
||||||
# tensorboard logging
|
# tensorboard logging
|
||||||
self.__summary_writer = None
|
self.__summary_writer = None
|
||||||
if self.tensorboard_config.path is not None:
|
if self.tensorboard_config.path is not None:
|
||||||
|
@ -567,7 +573,8 @@ class MasterWorker(worker_base.AsyncWorker):
|
||||||
)
|
)
|
||||||
|
|
||||||
wandb.finish()
|
wandb.finish()
|
||||||
swanlab.finish()
|
if swanlab is not None:
|
||||||
|
swanlab.finish()
|
||||||
if self.__summary_writer is not None:
|
if self.__summary_writer is not None:
|
||||||
self.__summary_writer.close()
|
self.__summary_writer.close()
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
|
@ -10,8 +10,6 @@ import uuid
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Dict, Hashable, List, Set, Tuple
|
from typing import Dict, Hashable, List, Set, Tuple
|
||||||
|
|
||||||
import swanlab
|
|
||||||
import wandb
|
|
||||||
from tensorboardX import SummaryWriter
|
from tensorboardX import SummaryWriter
|
||||||
|
|
||||||
import realhf.api.core.config as config_api
|
import realhf.api.core.config as config_api
|
||||||
|
|
Loading…
Reference in New Issue