mirror of https://github.com/inclusionAI/AReaL
128 lines
4.5 KiB
YAML
128 lines
4.5 KiB
YAML
name: Installation Validation
|
|
|
|
on:
|
|
push:
|
|
branches: [ none ]
|
|
paths:
|
|
- 'examples/env/scripts/setup-pip-deps.sh'
|
|
- 'docs/tutorial/installation.md'
|
|
- 'examples/env/validate_installation.py'
|
|
- 'setup.py'
|
|
- 'requirements*.txt'
|
|
- '.github/workflows/installation-validation.yml'
|
|
pull_request:
|
|
branches: [ none ]
|
|
paths:
|
|
- 'examples/env/scripts/setup-pip-deps.sh'
|
|
- 'docs/tutorial/installation.md'
|
|
- 'examples/env/validate_installation.py'
|
|
- 'setup.py'
|
|
- 'requirements*.txt'
|
|
- '.github/workflows/installation-validation.yml'
|
|
workflow_dispatch:
|
|
|
|
jobs:
|
|
validate-installation:
|
|
runs-on: ubuntu-latest
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
lfs: true
|
|
|
|
- name: Set up SSH key
|
|
run: |
|
|
mkdir -p ~/.ssh
|
|
echo "${{ secrets.REMOTE_SSH_KEY }}" > ~/.ssh/id_rsa
|
|
chmod 600 ~/.ssh/id_rsa
|
|
ssh-keyscan -p 8107 101.6.96.205 >> ~/.ssh/known_hosts
|
|
|
|
- name: Synchronize repository to remote machine
|
|
run: |
|
|
# Use rsync to synchronize repository to remote machine
|
|
rsync -avz --delete \
|
|
--exclude='.git' \
|
|
--exclude='__pycache__' \
|
|
--exclude='*.pyc' \
|
|
--exclude='*.pyo' \
|
|
--exclude='*.egg-info' \
|
|
--exclude='build/' \
|
|
--exclude='dist/' \
|
|
--exclude='.pytest_cache' \
|
|
--exclude='.coverage' \
|
|
--exclude='*.so' \
|
|
--exclude='*.dylib' \
|
|
--exclude='node_modules/' \
|
|
--exclude='.env' \
|
|
--exclude='.venv' \
|
|
-e 'ssh -p 8107' . fuwei@101.6.96.205:/tmp/areal-validation/
|
|
|
|
- name: Run installation validation on remote machine
|
|
run: |
|
|
ssh -p 8107 fuwei@101.6.96.205 << 'EOF'
|
|
set -e
|
|
|
|
# Navigate to the synchronized repository
|
|
cd /tmp/areal-validation
|
|
|
|
# Create persistent pip cache directory
|
|
mkdir -p /tmp/pip-cache
|
|
|
|
# Generate a unique container name
|
|
CONTAINER_NAME="areal-validation-$(date +%s)"
|
|
|
|
# Stop and remove any existing container with the same name
|
|
docker stop $CONTAINER_NAME 2>/dev/null || true
|
|
docker rm $CONTAINER_NAME 2>/dev/null || true
|
|
|
|
echo "=== Starting Docker container ==="
|
|
# Launch Docker container with NVIDIA PyTorch image
|
|
docker run -d \
|
|
--name $CONTAINER_NAME \
|
|
--gpus all \
|
|
--shm-size=8g \
|
|
-v $(pwd):/workspace \
|
|
-v /tmp/pip-cache:/root/.cache/pip \
|
|
-w /workspace \
|
|
nvcr.io/nvidia/pytorch:25.01-py3 \
|
|
sleep infinity
|
|
|
|
echo "=== Verifying CUDA environment in container ==="
|
|
docker exec $CONTAINER_NAME nvidia-smi
|
|
docker exec $CONTAINER_NAME nvcc --version
|
|
|
|
echo "=== Verifying workspace contents ==="
|
|
docker exec $CONTAINER_NAME pwd
|
|
docker exec $CONTAINER_NAME ls -la /workspace
|
|
docker exec $CONTAINER_NAME ls -la /workspace/examples/env/ || echo "examples/env directory not found"
|
|
|
|
echo "=== Checking pip cache before installation ==="
|
|
du -sh /tmp/pip-cache 2>/dev/null || echo "Cache directory empty"
|
|
|
|
echo "=== Installing dependencies ==="
|
|
docker exec $CONTAINER_NAME bash -c "
|
|
python -m pip install --upgrade pip
|
|
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
|
pip config unset global.extra-index-url
|
|
# Run the installation script
|
|
bash examples/env/scripts/setup-pip-deps.sh
|
|
python examples/env/validate_installation.py
|
|
"
|
|
|
|
echo "=== Checking pip cache after installation ==="
|
|
du -sh /tmp/pip-cache 2>/dev/null || echo "Cache directory still empty"
|
|
|
|
echo "=== Installation validation completed successfully ==="
|
|
|
|
# Cleanup
|
|
docker stop $CONTAINER_NAME
|
|
docker rm $CONTAINER_NAME
|
|
cd ~
|
|
rm -rf /tmp/areal-validation
|
|
EOF
|
|
|
|
- name: Cleanup SSH key
|
|
if: always()
|
|
run: |
|
|
rm -f ~/.ssh/id_rsa |