mirror of https://github.com/inclusionAI/AReaL
Add CI for testing AReaLite (#150)
* ci: add test-arealite * ci: add checkout before running test-arealite * ci: add USERNAME * ci: add test script * ci: add GitHub mirror * ci: fix typo * ci: clone one commit * ci: fix condition * ci: set command timeout to 60m * ci: enable pip cache * ci: optimize container lifecycle * ci: split into many stages * ci(test-arealite): fix typo * ci: fix wrong env * ci: fix pytest * ci: uninstall transformer-engine * ci: uninstall transformer-engine * ci: fix model paths * ci: show stdout/stderr * ci: fix not clean up * ci: backup sglang * ci: remove tmp repo dir when run * ci: fix docker run exit 1 condition * ci(test-arealite): limit the concurrency and extend command timeout
This commit is contained in:
parent
89a8d8c46a
commit
078d3e1a44
|
@ -0,0 +1,50 @@
|
|||
name: Test AReaLite
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- .github/workflows/test-arealite.yml
|
||||
- arealite/**
|
||||
- ci/**
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
test-arealite:
|
||||
runs-on: ubuntu-latest
|
||||
concurrency:
|
||||
group: test-arealite
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: appleboy/ssh-action@v1
|
||||
env:
|
||||
GIT_REPO_URL: https://github.bibk.top/${{ github.repository }}
|
||||
GIT_COMMIT_SHA: ${{ github.sha }}
|
||||
with:
|
||||
host: ${{ secrets.CI_NODE_ADDR }}
|
||||
username: ${{ secrets.CI_NODE_USER }}
|
||||
key: ${{ secrets.REMOTE_SSH_KEY }}
|
||||
envs: GIT_REPO_URL,GIT_COMMIT_SHA
|
||||
script_path: ci/clone_repo.sh
|
||||
|
||||
- uses: appleboy/ssh-action@v1
|
||||
env:
|
||||
GIT_COMMIT_SHA: ${{ github.sha }}
|
||||
with:
|
||||
host: ${{ secrets.CI_NODE_ADDR }}
|
||||
username: ${{ secrets.CI_NODE_USER }}
|
||||
key: ${{ secrets.REMOTE_SSH_KEY }}
|
||||
command_timeout: 2h
|
||||
envs: GIT_COMMIT_SHA
|
||||
script_path: ci/build_env_image.sh
|
||||
|
||||
- uses: appleboy/ssh-action@v1
|
||||
env:
|
||||
GIT_COMMIT_SHA: ${{ github.sha }}
|
||||
with:
|
||||
host: ${{ secrets.CI_NODE_ADDR }}
|
||||
username: ${{ secrets.CI_NODE_USER }}
|
||||
key: ${{ secrets.REMOTE_SSH_KEY }}
|
||||
command_timeout: 1h
|
||||
envs: GIT_COMMIT_SHA
|
||||
script_path: ci/test_arealite.sh
|
|
@ -23,7 +23,7 @@ from arealite.utils import compute_varlen_position_indices
|
|||
from realhf.impl.model.utils.padding import unpad_input
|
||||
|
||||
VOCAB_SIZE = 100
|
||||
MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/"
|
||||
MODEL_PATH = "Qwen/Qwen2-0.5B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
|
|
|
@ -22,7 +22,7 @@ from realhf.base import constants, name_resolve, seeding
|
|||
|
||||
EXPR_NAME = "test_grpo"
|
||||
TRIAL_NAME = "test_grpo"
|
||||
MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/"
|
||||
MODEL_PATH = "Qwen/Qwen2-0.5B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
|
|
|
@ -27,7 +27,7 @@ from realhf.base import name_resolve, seeding
|
|||
|
||||
EXPR_NAME = "test_rollout"
|
||||
TRIAL_NAME = "test_rollout"
|
||||
MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/"
|
||||
MODEL_PATH = "Qwen/Qwen2-0.5B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
|
|
|
@ -22,7 +22,7 @@ from realhf.base import name_resolve, names, seeding
|
|||
|
||||
EXPR_NAME = "test_rollout_controller"
|
||||
TRIAL_NAME = "test_rollout_controller"
|
||||
MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/"
|
||||
MODEL_PATH = "Qwen/Qwen2-0.5B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
|
|
|
@ -60,7 +60,7 @@ def test_sft():
|
|||
)
|
||||
|
||||
engine_config = EngineConfig(
|
||||
path="/storage/openpsi/models/Qwen__Qwen3-1.7B/",
|
||||
path="Qwen/Qwen2-0.5B",
|
||||
gradient_checkpointing=False,
|
||||
optimizer=OptimizerConfig(),
|
||||
backend=EngineBackendConfig(type="hf"),
|
||||
|
|
|
@ -23,7 +23,7 @@ from realhf.base import name_resolve, seeding
|
|||
|
||||
EXPR_NAME = "test_sglang_client"
|
||||
TRIAL_NAME = "test_sglang_client"
|
||||
MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/"
|
||||
MODEL_PATH = "Qwen/Qwen2-0.5B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
GIT_COMMIT_SHA=${GIT_COMMIT_SHA:?"GIT_COMMIT_SHA is not set"}
|
||||
|
||||
echo "GIT_COMMIT_SHA: $GIT_COMMIT_SHA"
|
||||
|
||||
# If there is already an image named areal-env, skip.
|
||||
if docker images --format '{{.Repository}}:{{.Tag}}' | grep -q 'areal-env:latest'; then
|
||||
echo "Image areal-env already exists, skipping build."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
RUN_ID="areal-$GIT_COMMIT_SHA"
|
||||
cd "/tmp/$RUN_ID"
|
||||
|
||||
if docker ps -a --format '{{.Names}}' | grep -q "$RUN_ID"; then
|
||||
docker rm -f $RUN_ID
|
||||
fi
|
||||
|
||||
docker run \
|
||||
--name $RUN_ID \
|
||||
--gpus all \
|
||||
--shm-size=8g \
|
||||
-v $(pwd):/workspace \
|
||||
-w /workspace \
|
||||
nvcr.io/nvidia/pytorch:25.01-py3 \
|
||||
bash -c "
|
||||
python -m pip install --upgrade pip
|
||||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||
pip config unset global.extra-index-url
|
||||
bash examples/env/scripts/setup-pip-deps.sh
|
||||
pip uninstall -y transformer-engine
|
||||
mv ./sglang /sglang
|
||||
" || { docker rm -f $RUN_ID; exit 1; }
|
||||
|
||||
docker commit $RUN_ID areal-env:latest
|
||||
docker rm -f $RUN_ID
|
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
GIT_REPO_URL=${GIT_REPO_URL:?"GIT_REPO_URL is not set"}
|
||||
GIT_COMMIT_SHA=${GIT_COMMIT_SHA:?"GIT_COMMIT_SHA is not set"}
|
||||
|
||||
echo "GIT_REPO_URL: $GIT_REPO_URL"
|
||||
echo "GIT_COMMIT_SHA: $GIT_COMMIT_SHA"
|
||||
|
||||
RUN_ID="areal-$GIT_COMMIT_SHA"
|
||||
rm -rf "/tmp/$RUN_ID"
|
||||
mkdir -p "/tmp/$RUN_ID"
|
||||
cd "/tmp/$RUN_ID"
|
||||
|
||||
git init
|
||||
git remote add origin "$GIT_REPO_URL"
|
||||
git fetch --depth 1 origin "$GIT_COMMIT_SHA"
|
||||
git checkout FETCH_HEAD
|
|
@ -0,0 +1,28 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
GIT_COMMIT_SHA=${GIT_COMMIT_SHA:?"GIT_COMMIT_SHA is not set"}
|
||||
|
||||
echo "GIT_COMMIT_SHA: $GIT_COMMIT_SHA"
|
||||
|
||||
RUN_ID="areal-$GIT_COMMIT_SHA"
|
||||
cd "/tmp/$RUN_ID"
|
||||
|
||||
if docker ps -a --format '{{.Names}}' | grep -q "$RUN_ID"; then
|
||||
docker rm -f $RUN_ID
|
||||
fi
|
||||
|
||||
docker run \
|
||||
--name $RUN_ID \
|
||||
--gpus all \
|
||||
--shm-size=8g \
|
||||
-v $(pwd):/workspace \
|
||||
-w /workspace \
|
||||
areal-env:latest \
|
||||
bash -c "
|
||||
mv /sglang ./sglang
|
||||
HF_ENDPOINT=https://hf-mirror.com python -m pytest -s arealite/
|
||||
" || { docker rm -f $RUN_ID; exit 1; }
|
||||
|
||||
docker rm -f $RUN_ID
|
Loading…
Reference in New Issue