Clean up CI (#11)

* PullRequest: 67 Update v0.2.0 Dockerfile

Merge branch fw/v0.2.0-dockerfile of git@code.alipay.com:inclusionAI/AReaL.git into main
https://code.alipay.com/inclusionAI/AReaL/pull_requests/67

Signed-off-by: 温差 <xushusheng.xss@antgroup.com>


* fw/v0.2.0-dockerfile

* PullRequest: 66 Update v0.2.0 cover letter

Merge branch fw/v0.2.0-readme of git@code.alipay.com:inclusionAI/AReaL.git into main
https://code.alipay.com/inclusionAI/AReaL/pull_requests/66

Signed-off-by: 温差 <xushusheng.xss@antgroup.com>


* .
* .
* .
* .
* .
* update thpt fig
* update readme 20250329-20:16
* update
* update tutorial
* .

* upload 7B zero and 32B sft config

* clean ci

---------

Signed-off-by: 博惟 <bowei.fw@antgroup.com>
Co-authored-by: meijun <meijun.mei@antgroup.com>
This commit is contained in:
Wei Fu 2025-03-31 08:39:16 +08:00 committed by GitHub
parent 9c54166bf9
commit de3f66a90c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 138 additions and 28 deletions

View File

@ -1,28 +0,0 @@
name: Run Pytest
on: [pull_request]
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
python3 -m pip install -r requirements.txt
python3 -m pip install pytest
python3 -m pip install torch==2.3.1
python3 -m pip install -e . --no-build-isolation
- name: Run tests
run: |
pytest -m "not gpu"

View File

@ -26,7 +26,10 @@ actor:
hysteresis: 2
sglang:
mem_fraction_static: 0.8
disable_radix_cache: false
triton_attention_num_kv_splits: 16
max_running_requests: 128
context_length: 29696
critic:
type:
_class: qwen2

View File

@ -0,0 +1,44 @@
experiment_name: sft-32B-distill-gpus-128
trial_name: 512x32
mode: ray
wandb:
mode: disabled
recover_mode: auto
recover_retries: 10
allocation_mode: 'd32p1m4'
n_nodes: 16
n_gpus_per_node: 8
exp_ctrl:
total_train_epochs: 200
save_freq_epochs: 1
ckpt_freq_secs: 600
torch_cache_mysophobia: true
dataset:
train_path: /storage/datasets/boba-sft_200_0319.jsonl
valid_path: /storage/datasets/boba-sft_200_0319.jsonl
max_seqlen: 32768
train_bs_n_seqs: 64
valid_bs_n_seqs: 64
model:
type:
_class: qwen2
path: /storage/models/DeepSeek-R1-Distill-Qwen-32B
optimizer:
type: adam
lr_scheduler_type: constant
lr: 1e-5
warmup_steps_proportion: 0.03
initial_loss_scale: 262144.0
loss_scale_window: 10
hysteresis: 2
weight_decay: 0.1
eps: 1e-5
bf16: true
allocation:
mb_spec:
max_tokens_per_mb: 32768
external_configs:
cluster_config:
fileroot: "/storage/ray/experiments"
envs:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"

View File

@ -0,0 +1,91 @@
experiment_name: ppo-7B-zero-gpus-128
trial_name: 512x64
mode: ray
wandb:
mode: disabled
recover_mode: auto
recover_retries: 10
allocation_mode: 'sglang.d64p1m1+d32p2m1'
n_nodes: 16
n_gpus_per_node: 8
cache_clear_freq: 1
exp_ctrl:
total_train_epochs: 5
save_freq_epochs: 1
ckpt_freq_secs: 600
torch_cache_mysophobia: true
actor:
type:
_class: qwen2
path: '/storage/models/Qwen2.5-7B'
optimizer:
lr: 1e-05
lr_scheduler_type: constant
eps: 1e-5
warmup_steps_proportion: 0.001
hysteresis: 2
sglang:
mem_fraction_static: 0.8
disable_radix_cache: false
triton_attention_num_kv_splits: 16
max_running_requests: 128
context_length: 18432
critic:
type:
_class: qwen2
is_critic: true
path: '/storage/models/Qwen2.5-7B'
init_critic_from_actor: true
optimizer:
lr: 5e-6
ref:
type:
_class: qwen2
path: '/storage/models/Qwen2.5-7B'
actor_train:
mb_spec:
max_tokens_per_mb: 19456
critic_train:
mb_spec:
max_tokens_per_mb: 19456
actor_gen:
mb_spec:
max_tokens_per_mb: 19456
critic_inf:
mb_spec:
max_tokens_per_mb: 19456
actor_inf:
mb_spec:
max_tokens_per_mb: 19456
ref_inf:
mb_spec:
max_tokens_per_mb: 19456
dataset:
path: '/storage/datasets/orz-zero_56k_0319.jsonl'
max_prompt_len: 2048
train_bs_n_seqs: 512
ppo:
gen:
max_new_tokens: 16384
min_new_tokens: 0
top_p: 1.0
top_k: 1000000
temperature: 1.0
force_no_logits_mask: true
use_cuda_graph: true
ppo_n_minibatches: 4
kl_ctl: 0.0
discount: 1.0
value_eps_clip: 0.2
disable_value: true
reward_output_scaling: 0.5
reward_output_bias: -1.0
adv_norm: true
value_norm: true
group_size: 64
group_adv_norm: false
external_configs:
cluster_config:
fileroot: "/storage/ray/experiments"
envs:
REAL_GPU_MEMORY_KILL_THRESHOLD: "1"